Problem: Design a search autocomplete system for a search engine. Users may input a sentence (at least one word and end with a special character '#'). For each character they type except '#', you need to return the top 3 historical sentences that have prefix the same as the part of sentence already typed. Here are the specific rules:
The rank for a sentence is defined as the number of times a user typed the exactly same sentence before.
- The returned top 3 hot sentences should be sorted by rank (The first is having the greatest rank). If several sentences have the same rank, you need to use ASCII-code order (smaller one appears first).
- If less than 3 sentences exist, then just return as many as you can.
- When the input is a special character, it means the sentence ends, and in this case, you need to return an empty list.
Here we need to implement two methods:
- Constructor which will take the initial sentences and their ranks / frequencies.
- Input(char ch): The input ch is the next character typed by the user. The character will only be lower-case letters ('a' to 'z'), blank space (' ') or a special character ('#'). Also, the previously typed sentence should be recorded in your system. The output will be the top 3 historical hot sentences that have prefix the same as the part of sentence already typed.
Approach: Given that this problem is a kind of prefix search, we can use Trie here. Here I am storing following additional data on Trie node:
- int Frequency: In case of leaf node, recording the frequency/rank of the sentence. By default it is 0.
- AutocompleteSystemEntry[] AutocompleteSystemEntries: An AutocompleteSystemEntry will have the sentence and its rank. We will be storing 3 such entries on every node so that we don't have to parse the whole path every time user type. We just return the top 3 sentences from the node itself.
Note that in case of end of user input or character '#', we need to update the AutocompleteSystemEntries on every node related to given prefix.
Implementation in C#:
public class AutocompleteSystemEntry
{
public string Sentence { get; set; }
public int Frequency { get; set; }
public AutocompleteSystemEntry(string sentence, int frequency = 1)
{
this.Sentence = sentence;
this.Frequency = frequency;
}
}
public class AutocompleteSystemTrieNode
{
private const short ALPHABET_SIZE = 26;
private const int NumOfEntriesToBeReturned = 3;
public AutocompleteSystemTrieNode(char value)
{
this.Value = value;
this.Childrens = new AutocompleteSystemTrieNode[ALPHABET_SIZE + 1]; // 1 is for space
this.AutocompleteSystemEntries = new AutocompleteSystemEntry[NumOfEntriesToBeReturned];
this.Frequency = 0;
}
public void UpdateEntriesOnNode(string sentence, int frequency)
{
int i = 0;
bool found = false;
for (; i < NumOfEntriesToBeReturned && this.AutocompleteSystemEntries[i] != null; ++i)
{
if (this.AutocompleteSystemEntries[i].Sentence == sentence)
{
found = true;
break;
}
}
if (found)
{
this.AutocompleteSystemEntries[i].Frequency = frequency;
while (i > 0 &&
(this.AutocompleteSystemEntries[i].Frequency > this.AutocompleteSystemEntries[i - 1].Frequency
|| (this.AutocompleteSystemEntries[i].Frequency == this.AutocompleteSystemEntries[i - 1].Frequency && this.AutocompleteSystemEntries[i].Sentence.CompareTo(this.AutocompleteSystemEntries[i - 1].Sentence) < 0)))
{
AutocompleteSystemEntry tempEntry = this.AutocompleteSystemEntries[i];
this.AutocompleteSystemEntries[i] = this.AutocompleteSystemEntries[i - 1];
this.AutocompleteSystemEntries[i - 1] = tempEntry;
--i;
}
return;
}
for (i = 0; i < NumOfEntriesToBeReturned; ++i)
{
if (this.AutocompleteSystemEntries[i] == null || this.AutocompleteSystemEntries[i].Frequency < frequency || (this.AutocompleteSystemEntries[i].Frequency == frequency && this.AutocompleteSystemEntries[i].Sentence.CompareTo(sentence) > 0))
{
break;
}
}
if (i == NumOfEntriesToBeReturned)
{
return;
}
if (this.AutocompleteSystemEntries[i] == null)
{
this.AutocompleteSystemEntries[i] = new AutocompleteSystemEntry(sentence, frequency);
}
else
{
int j = NumOfEntriesToBeReturned - 1;
while (j > i)
{
this.AutocompleteSystemEntries[j] = this.AutocompleteSystemEntries[j - 1];
--j;
}
this.AutocompleteSystemEntries[i] = new AutocompleteSystemEntry(sentence, frequency);
}
}
public AutocompleteSystemEntry[] AutocompleteSystemEntries { get; set; }
public char Value { get; set; }
public int Frequency { get; set; }
public AutocompleteSystemTrieNode[] Childrens { get; set; }
}
public class AutocompleteSystemTrie
{
private const char EndOfSentence = '#'; // Leaf node
private const char CharOfSentence = '*';
public AutocompleteSystemTrie()
{
this.root = new AutocompleteSystemTrieNode('x'); // special charater for root
this.currInputNodes = new Stack<AutocompleteSystemTrieNode>();
this.currInputNodes.Push(this.root);
}
public void Insert(string sentence, int frequency)
{
AutocompleteSystemTrieNode node = root;
foreach(char ch in sentence)
{
if (ch == ' ')
{
if (node.Childrens[node.Childrens.Length - 1] == null)
{
node.Childrens[node.Childrens.Length - 1] = new AutocompleteSystemTrieNode(CharOfSentence);
}
node = node.Childrens[node.Childrens.Length - 1];
}
else
{
if (node.Childrens[ch - 'a'] == null)
{
node.Childrens[ch - 'a'] = new AutocompleteSystemTrieNode(CharOfSentence);
}
node = node.Childrens[ch - 'a'];
}
node.UpdateEntriesOnNode(sentence, frequency);
}
node.Value = EndOfSentence;
node.Frequency = frequency;
}
public void MarkItAsEndOfSentence(string sentence)
{
if (this.currInputNodes.Count <= 1)
{
return;
}
AutocompleteSystemTrieNode node = this.currInputNodes.Pop();
node.Value = EndOfSentence;
int currFrequency = ++node.Frequency;
node.UpdateEntriesOnNode(sentence, currFrequency);
while (this.currInputNodes.Count > 0)
{
node = this.currInputNodes.Pop();
if (node != this.root)
{
node.UpdateEntriesOnNode(sentence, currFrequency);
}
}
this.currInputNodes.Push(this.root);
}
public List<string> Search(char ch)
{
AutocompleteSystemTrieNode node = this.currInputNodes.Peek();
if (ch == ' ')
{
if (node.Childrens.Last() == null)
{
node.Childrens[node.Childrens.Length - 1] = new AutocompleteSystemTrieNode(CharOfSentence);
}
node = node.Childrens.Last();
}
else
{
if (node.Childrens[ch - 'a'] == null)
{
node.Childrens[ch - 'a'] = new AutocompleteSystemTrieNode(CharOfSentence);
}
node = node.Childrens[ch - 'a'];
}
this.currInputNodes.Push(node);
return node.AutocompleteSystemEntries.Where(e => e != null).Select(e => e.Sentence).ToList();
}
private Stack<AutocompleteSystemTrieNode> currInputNodes;
private AutocompleteSystemTrieNode root;
}
public class AutocompleteSystem
{
public AutocompleteSystem(string[] sentences, int[] times)
{
this.trie = new AutocompleteSystemTrie();
int length = sentences.Length;
for (int i = 0; i < length; ++i)
{
this.trie.Insert(sentences[i], times[i]);
}
this.currSentence = string.Empty;
}
public IList<string> Input(char c)
{
List<string> result = new List<string>();
if (c == '#')
{
this.trie.MarkItAsEndOfSentence(this.currSentence);
this.currSentence = string.Empty;
return result;
}
else
{
this.currSentence += c;
result = this.trie.Search(c);
return result;
}
}
private AutocompleteSystemTrie trie;
private string currSentence;
}
Complexity: Construction of trie: O(n), Input: O(m) where m is the length of the prefix
No comments:
Post a Comment