Tuesday, February 16, 2021

[Google Question] Implement Search Autocomplete System

Problem: Design a search autocomplete system for a search engine. Users may input a sentence (at least one word and end with a special character '#'). For each character they type except '#', you need to return the top 3 historical sentences that have prefix the same as the part of sentence already typed. Here are the specific rules:

The rank for a sentence is defined as the number of times a user typed the exactly same sentence before.

  1. The returned top 3 hot sentences should be sorted by rank (The first is having the greatest rank). If several sentences have the same rank, you need to use ASCII-code order (smaller one appears first).
  2. If less than 3 sentences exist, then just return as many as you can.
  3. When the input is a special character, it means the sentence ends, and in this case, you need to return an empty list.

Here we need to implement two methods:

  1. Constructor which will take the initial sentences and their ranks / frequencies.
  2. Input(char ch): The input ch is the next character typed by the user. The character will only be lower-case letters ('a' to 'z'), blank space (' ') or a special character ('#'). Also, the previously typed sentence should be recorded in your system. The output will be the top 3 historical hot sentences that have prefix the same as the part of sentence already typed.


Approach: Given that this problem is a kind of prefix search, we can use Trie here. Here I am storing following additional data on Trie node:

  1. int Frequency: In case of leaf node, recording the frequency/rank of the sentence. By default it is 0.
  2. AutocompleteSystemEntry[] AutocompleteSystemEntries: An  AutocompleteSystemEntry will have the sentence and its rank. We will be storing 3 such entries on every node so that we don't have to parse the whole path every time user type. We just return the top 3 sentences from the node itself.

Note that in case of end of user input or character '#', we need to update the AutocompleteSystemEntries on every node related to given prefix.


Implementation in C#:

public class AutocompleteSystemEntry

    {

        public string Sentence { get; set; }

        public int Frequency { get; set; }


        public AutocompleteSystemEntry(string sentence, int frequency = 1)

        {

            this.Sentence = sentence;

            this.Frequency = frequency;

        }

    }


    public class AutocompleteSystemTrieNode

    {

        private const short ALPHABET_SIZE = 26;

        private const int NumOfEntriesToBeReturned = 3;


        public AutocompleteSystemTrieNode(char value)

        {

            this.Value = value;

            this.Childrens = new AutocompleteSystemTrieNode[ALPHABET_SIZE + 1]; // 1 is for space

            this.AutocompleteSystemEntries = new AutocompleteSystemEntry[NumOfEntriesToBeReturned];

            this.Frequency = 0;

        }


        public void UpdateEntriesOnNode(string sentence, int frequency)

        {

            int i = 0;

            bool found = false;

            for (; i < NumOfEntriesToBeReturned && this.AutocompleteSystemEntries[i] != null; ++i)

            {

                if (this.AutocompleteSystemEntries[i].Sentence == sentence)

                {

                    found = true;

                    break;

                }

            }


            if (found)

            {

                this.AutocompleteSystemEntries[i].Frequency = frequency;

                while (i > 0 && 

                    (this.AutocompleteSystemEntries[i].Frequency > this.AutocompleteSystemEntries[i - 1].Frequency

                    || (this.AutocompleteSystemEntries[i].Frequency == this.AutocompleteSystemEntries[i - 1].Frequency && this.AutocompleteSystemEntries[i].Sentence.CompareTo(this.AutocompleteSystemEntries[i - 1].Sentence) < 0)))

                {

                    AutocompleteSystemEntry tempEntry = this.AutocompleteSystemEntries[i];

                    this.AutocompleteSystemEntries[i] = this.AutocompleteSystemEntries[i - 1];

                    this.AutocompleteSystemEntries[i - 1] = tempEntry;

                    --i;

                }

                return;

            }

            for (i = 0; i < NumOfEntriesToBeReturned; ++i)

            {

                if (this.AutocompleteSystemEntries[i] == null  || this.AutocompleteSystemEntries[i].Frequency < frequency || (this.AutocompleteSystemEntries[i].Frequency == frequency && this.AutocompleteSystemEntries[i].Sentence.CompareTo(sentence) > 0))

                {

                    break;

                }

            }

            if (i == NumOfEntriesToBeReturned)

            {

                return;

            }

            if (this.AutocompleteSystemEntries[i] == null)

            {

                this.AutocompleteSystemEntries[i] = new AutocompleteSystemEntry(sentence, frequency);

            }

            else

            {

                int j = NumOfEntriesToBeReturned - 1;

                while (j > i)

                {

                    this.AutocompleteSystemEntries[j] = this.AutocompleteSystemEntries[j - 1];

                    --j;

                }

                this.AutocompleteSystemEntries[i] = new AutocompleteSystemEntry(sentence, frequency);

            }

        }


        public AutocompleteSystemEntry[] AutocompleteSystemEntries { get; set; }

        public char Value { get; set; }

        public int Frequency { get; set; }

        public AutocompleteSystemTrieNode[] Childrens { get; set; }

    }


    public class AutocompleteSystemTrie

    {

        private const char EndOfSentence = '#'; // Leaf node

        private const char CharOfSentence = '*';

        public AutocompleteSystemTrie()

        {

            this.root = new AutocompleteSystemTrieNode('x'); // special charater for root

            this.currInputNodes = new Stack<AutocompleteSystemTrieNode>();

            this.currInputNodes.Push(this.root);

        }


        public void Insert(string sentence, int frequency)

        {

            AutocompleteSystemTrieNode node = root;

            foreach(char ch in sentence)

            {

                if (ch == ' ')

                {

                    if (node.Childrens[node.Childrens.Length - 1] == null)

                    {

                        node.Childrens[node.Childrens.Length - 1] = new AutocompleteSystemTrieNode(CharOfSentence); 

                    }

                    node = node.Childrens[node.Childrens.Length - 1];

                }

                else

                {

                    if (node.Childrens[ch - 'a'] == null)

                    {

                        node.Childrens[ch - 'a'] = new AutocompleteSystemTrieNode(CharOfSentence);

                    }

                    node = node.Childrens[ch - 'a'];

                }

                node.UpdateEntriesOnNode(sentence, frequency);

            }

            node.Value = EndOfSentence;

            node.Frequency = frequency;

        }


        public void MarkItAsEndOfSentence(string sentence)

        {

            if (this.currInputNodes.Count <= 1)

            {

                return;

            }

            AutocompleteSystemTrieNode node = this.currInputNodes.Pop();

            node.Value = EndOfSentence;

            int currFrequency = ++node.Frequency;

            node.UpdateEntriesOnNode(sentence, currFrequency);

            while (this.currInputNodes.Count > 0)

            {

                node = this.currInputNodes.Pop();

                if (node != this.root)

                {

                    node.UpdateEntriesOnNode(sentence, currFrequency);

                }

            }

            this.currInputNodes.Push(this.root);

        }


        public List<string> Search(char ch)

        {

            AutocompleteSystemTrieNode node = this.currInputNodes.Peek();

            if (ch == ' ')

            {

                if (node.Childrens.Last() == null)

                {

                    node.Childrens[node.Childrens.Length - 1] = new AutocompleteSystemTrieNode(CharOfSentence);

                }

                node = node.Childrens.Last();

            }

            else

            {

                if (node.Childrens[ch - 'a'] == null)

                {

                    node.Childrens[ch - 'a'] = new AutocompleteSystemTrieNode(CharOfSentence);

                }

                node = node.Childrens[ch - 'a'];

            }

            this.currInputNodes.Push(node);

            return node.AutocompleteSystemEntries.Where(e => e != null).Select(e => e.Sentence).ToList();

        }


        private Stack<AutocompleteSystemTrieNode> currInputNodes;

        private AutocompleteSystemTrieNode root;

    }


    public class AutocompleteSystem

    {

        public AutocompleteSystem(string[] sentences, int[] times)

        {

            this.trie = new AutocompleteSystemTrie();

            int length = sentences.Length;

            for (int i = 0; i < length; ++i)

            {

                this.trie.Insert(sentences[i], times[i]);

            }

            this.currSentence = string.Empty;

        }


        public IList<string> Input(char c)

        {

            List<string> result = new List<string>();

            if (c == '#')

            {

                this.trie.MarkItAsEndOfSentence(this.currSentence);

                this.currSentence = string.Empty;

                return result;

            }

            else

            {

                this.currSentence += c;

                result =  this.trie.Search(c);

                return result;

            }

        }


        private AutocompleteSystemTrie trie;

        private string currSentence;

    }


Complexity: Construction of trie: O(n), Input: O(m) where m is the length of the prefix

No comments:

Post a Comment