Tuesday, November 1, 2011

Infibeam Question: Implement T9 Dictionary

Solution is to take hash with key is the number and the value is the list of words which can be made by pressing the digits in the number. For example 4663 --> good, home

LoadWords Operation --

   1. Read words and their popularity one by one from file.
   2. Get the number corresponding to the word.
   3. Make an object which contains the word and its popularity.
   4. hash[number].insert(wordObject).

DisplayWords Operation --

It was given that most popular 5 words need to be displayed.
  1. Maintain a heap based on popularity of size 5.
  2. Maintain the keys in the hash in string sorted order. ( Map can be used for hash)
  3. Look for the keys which are started with the given number.

Source Code --

#include<iostream>
#include<string>
#include<map>
#include<vector>
#include<fstream>
#include<algorithm>

using namespace std;

//Class word
class Word
{
public:
    Word(string w="", int p=0);
    string getWord();
    int getPopularity();
private:
    string word;     //actual word
    int popularity;  //popularity of word
};

Word::Word(string w, int p):word(w), popularity(p)
{
}

string Word::getWord()
{
    return word;
}

int Word::getPopularity()
{
    return popularity;
}

//Comparator function used to sort the vector of Word with key popularity of the word
bool wordCompare(Word a, Word b)
{
    return a.getPopularity() == b.getPopularity() && a.getWord() < b.getWord() || 
    a.getPopularity() > b.getPopularity();
}

//This is helper class to T9Dictionary class. It uses heap to maintain the top 5 i.e. total words to display
class T9DictDisplayHelperHeap
{
public:
    T9DictDisplayHelperHeap(int num = 5);  //num is number of words to display
    void pushHeap(Word word);   //insert word into heap
    void displayWords();        //display the words
private:
    int count;
    vector<Word> vectWord;        //heap
    const short wordsToDisplay;
};

T9DictDisplayHelperHeap::T9DictDisplayHelperHeap(int num):wordsToDisplay(num), count(0)
{
    vectWord.reserve(wordsToDisplay);
}

void T9DictDisplayHelperHeap::pushHeap(Word word)
{
    if(count<wordsToDisplay)
    {
        vectWord.push_back(word);
        count++;
    }
    else if(count == wordsToDisplay)
    {
        make_heap(vectWord.begin(), vectWord.end(), wordCompare);
        if(vectWord.front().getPopularity() < word.getPopularity())
        {
            pop_heap(vectWord.begin(), vectWord.end(), wordCompare);
            vectWord.pop_back();
            vectWord.push_back(word);
            push_heap(vectWord.begin(), vectWord.end(), wordCompare);
        }
        count++;
    }
    else
    {
        if(vectWord.front().getPopularity() < word.getPopularity())
        {
            pop_heap(vectWord.begin(), vectWord.end(), wordCompare);
            vectWord.pop_back();
            vectWord.push_back(word);
            push_heap(vectWord.begin(), vectWord.end(), wordCompare);
        }
    }
}

void T9DictDisplayHelperHeap::displayWords()
{
    if(count < wordsToDisplay)
        sort(vectWord.begin(), vectWord.end(), wordCompare);
    else
    {
        sort_heap(vectWord.begin(), vectWord.end(), wordCompare);
    }
    int size = vectWord.size();
    for(int i = 0; i < size; ++i)
        cout<<vectWord[i].getWord()<<" : "<<vectWord[i].getPopularity()<<'\n';
}

//Dictionary Class. It is using map in which key is the number and the value is vector of corresponding words
class T9Dict
{
public:
    T9Dict(int count = 5);   //count is number of words to display
    bool loadWords(string fileName);   // load words from a file, fileName is the path of the file which contains words to be inserted in dictionary
    void displayWords(string num);       
private:
    const short wordsToDisplay;
    void addWord(string key, Word w);
    map<string, vector<Word> > mapWords;
};

T9Dict::T9Dict(int count):wordsToDisplay(count)
{
}

// For each alphabet it is taking the corresponding number like for a,b,c corresponding number is 2
// For space ' ' number is 0
// For digits number is same as the digit given
// For all other special charcters number is 1

bool T9Dict::loadWords(string fileName)
{
    ifstream in(fileName.c_str());
    if(!in)
    {
        cout<<"File: "<<fileName<<" does not exist or not accessible\n";
        return false;
    }
    int popularity = 0;
    char *temp = new char[256];
    string word;
    while(!in.eof())
    {
        string key = "";
        in>>popularity;
        //in>>word;
        in.getline(temp, 255);
        word.clear();
        word = string(temp);
        word = word.substr(1);
        int len = word.length();
        for(int i=0; i<len; ++i)
        {
            word[i] = tolower(word[i]);
            if(word[i] == ' ')
                key += '0';
            else if(word[i] >= '0' && word[i] <= '9')
            {
                key += word[i];
            }
            else if(word[i] >= 'a' && word[i] <= 'o')
            {
                key += (((word[i] - 'a') / 3) + 2 + '0') ;
            }
            else if(word[i] >= 'p' && word[i] <= 's')
            {
                key += '7';
            }
            else if(word[i] >= 't' && word[i] <= 'v')
            {
                key += '8';
            }
            else if(word[i] >= 'w' && word[i] <= 'z')
            {
                key += '9';
            }
            else
            {
                key += '1';
            }
        }
        addWord(key, Word(word, popularity));
    }

    delete temp;
    for(map<string, vector<Word> >::iterator it = mapWords.begin(); it != mapWords.end(); 
    ++it)
    {
        sort(it->second.begin(), it->second.end(), wordCompare);
    }
    return true;
}

void T9Dict::addWord(string key, Word word)
{
    mapWords[key].push_back(word);
}

void T9Dict::displayWords(string num)
{
    T9DictDisplayHelperHeap heap;
   
    map<string, vector<Word> >::iterator it = mapWords.begin();
    while(it != mapWords.end() && it->first < num)
    {
        it++;
    }
    int len = num.length();
    while(it != mapWords.end() && (it->first.substr(0, len) == num))
    {
        for(unsigned int i=0; i < it->second.size(); ++i)
            heap.pushHeap(it->second[i]);
        it++;
    }

    heap.displayWords();
}

No comments:

Post a Comment