A Trie (from retrieval), is a multi-way tree structure useful for storing strings over an alphabet. It has been used to store large dictionaries of English (say) words in spelling-checking programs and in natural-language "understanding" programs. Using Trie, search complexities can be brought to optimal limit (key length).
Example:
Given the following strings:
an, ant, all, allot, alloy, aloe, are, ate, be
the corresponding Trie would be:
Implementation in C#:
public class TrieNode
{
public const short ALPHABET_SIZE = 26;
public TrieNode(char value)
{
this.Value = value;
this.Childrens = new TrieNode[ALPHABET_SIZE];
}
public char Value { get; set; }
public TrieNode[] Childrens { get; set; }
}
public class Trie
{
public const char EndOfWord = '#'; // Leaf node
public const char CharOfWord = '*';
public Trie()
{
this.root = new TrieNode('x'); // special charater for root
}
public void Insert(string word)
{
TrieNode node = root;
foreach(char ch in word)
{
if (node.Childrens[ch - 'a'] == null)
{
node.Childrens[ch - 'a'] = new TrieNode(CharOfWord);
}
node = node.Childrens[ch - 'a'];
}
node.Value = EndOfWord;
}
public bool Search(string word)
{
TrieNode node = this.root;
foreach(char ch in word)
{
if (node.Childrens[ch - 'a'] == null)
{
return false;
}
node = node.Childrens[ch - 'a'];
}
return node.Value == EndOfWord;
}
public bool StartsWith(string prefix)
{
TrieNode node = this.root;
foreach (char ch in prefix)
{
if (node.Childrens[ch - 'a'] == null)
{
return false;
}
node = node.Childrens[ch - 'a'];
}
return true;
}
public bool PatternSearch(string word)
{
return this.DFSPatternSearch(this.root.Childrens, word, 0);
}
private bool DFSPatternSearch(TrieNode[] nodes, string word, int startIndex)
{
if (startIndex == word.Length)
{
return false;
}
if (word[startIndex] == '.')
{
bool result = false;
foreach(TrieNode node in nodes)
{
if (node == null)
{
continue;
}
if (startIndex == word.Length - 1 && node.Value == EndOfWord)
{
return true;
}
if (this.DFSPatternSearch(node.Childrens, word, startIndex + 1))
{
result = true;
}
}
return result;
}
else if (nodes[word[startIndex] - 'a'] != null)
{
if (startIndex == word.Length - 1 && nodes[word[startIndex] -'a'].Value == EndOfWord)
{
return true;
}
return this.DFSPatternSearch(nodes[word[startIndex] - 'a'].Childrens, word, startIndex + 1);
}
else
{
return false;
}
}
private TrieNode root;
}
Complexity: O(n) for insertion, search and startswith where n is the length of pattern
No comments:
Post a Comment