#include "kgramstats.h" #include #include #include kgramstats::kgramstats(string corpus, int maxK) { this->maxK = maxK; vector tokens; int start = 0; int end = 0; while (end != string::npos) { end = corpus.find(" ", start); tokens.push_back(corpus.substr(start, (end == string::npos) ? string::npos : end - start)); start = ((end > (string::npos - 1) ) ? string::npos : end + 1); } stats = new map* >(); for (int k=0; k<=maxK; k++) { for (int i=0; i<(tokens.size() - k); i++) { kgram seq(tokens.begin()+i, tokens.begin()+i+k); string f = tokens[i+k]; if ((*stats)[seq] == NULL) { (*stats)[seq] = new map(); } (*((*stats)[seq]))[f]++; } } } map* kgramstats::lookupExts(kgram tk) { return (*stats)[tk]; } int kgramstats::getMaxK() { return maxK; } void printKgram(kgram k) { for (kgram::iterator it = k.begin(); it != k.end(); it++) { cout << *it << " "; } cout << endl; } vector kgramstats::randomSentence(int n) { vector result; list cur; for (int i=0; i* probtable = lookupExts(cur); int max = 0; for (map::iterator it = probtable->begin(); it != probtable->end(); ++it) { max += it->second; } int r = rand() % (max+1); string next = probtable->begin()->first; for (map::iterator it = probtable->begin(); it != probtable->end(); ++it) { if (it->second > r) { break; } else { next = it->first; r -= it->second; } } if (cur.size() == maxK) { cur.pop_front(); } cur.push_back(next); result.push_back(next); } return result; }