#include
#include
#include
#include
#include
#include
using namespace std;
const string START_SYMBOL = "#@#";
const string END_SYMBOL = "@!#@";
class Ngram {
public:
Ngram(vector
int size = data.size();
for(int j = 0;j
string curr_string = data[j];
vector
for (int i = 0; i < words.size() - n + 1; i++)
ngrams[concat(words[i], i, i+n)]++;
}
}
double LogProb(const string& sentence) {
// IMPLEMENT ME
double prob = 1.0;
vector
int n = words.size();
for(int i = 0;i
string curr_str;
if(i==0) curr_str = START_SYMBOL + words[i];
else if(i==n-1) curr_str = words[i] + END_SYMBOL;
else curr_str = words[i];
if(i>=1)
string pre_str = words[i] + " " + words[i-1];
else
string pre_str = words[i] + END_SYMBOL;
prob = prob * ngrams[curr_str]./ngrams[pre_str];
}
return prob;
}
vector
{
vector
int k = 0;
int i = 0;
while(k
if(curr_string[k]!=' ') k++;
else
{
words.push_back(curr_string.substr(i,k-i));
i = k;
k++;
}
}
}
string concat(string words, int start, int end) {
string sb;
for (int i = start; i < end; i++)
{
if(i==start) sb = words[i];
else sb = sb+ " " + words[i];
}
if(start == 0) sb= START_SYMBOL + sb;
if(end == words.size()-1) sb = sb + END_SYMBOL;
return sb;
}
unordered_map
};
#ifndef __main__
#define __main__
int main() {
vector
"I am Sam",
"Sam I am",
"Sam likes green eggs and ham",
"I like green eggs and pizza",
"I do not like green eggs and ham",
"I do not eat green eggs and ham",
};
Ngram ngram(data, 3);
// should print the log (natural log) probability of the sentence
cout << ngram.LogProb("I like green eggs and ham") << endl;
return 0;
}
#endif