From 8de3134bf2cd26ff81359df703e5fbc6280448d7 Mon Sep 17 00:00:00 2001 From: Feffernoose Date: Tue, 1 Oct 2013 18:15:22 -0400 Subject: Wrote program --- kgramstats.cpp | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 kgramstats.cpp (limited to 'kgramstats.cpp') diff --git a/kgramstats.cpp b/kgramstats.cpp new file mode 100644 index 0000000..142b5aa --- /dev/null +++ b/kgramstats.cpp @@ -0,0 +1,110 @@ +#include "kgramstats.h" +#include +#include +#include + +kgramstats::kgramstats(string corpus, int maxK) +{ + this->maxK = maxK; + + vector tokens; + int start = 0; + int end = 0; + + while (end != string::npos) + { + end = corpus.find(" ", start); + + tokens.push_back(corpus.substr(start, (end == string::npos) ? string::npos : end - start)); + + start = ((end > (string::npos - 1) ) ? string::npos : end + 1); + } + + stats = new map* >(); + for (int k=0; k<=maxK; k++) + { + for (int i=0; i<(tokens.size() - k); i++) + { + kgram seq(tokens.begin()+i, tokens.begin()+i+k); + string f = tokens[i+k]; + + if ((*stats)[seq] == NULL) + { + (*stats)[seq] = new map(); + } + + (*((*stats)[seq]))[f]++; + } + } +} + +map* kgramstats::lookupExts(kgram tk) +{ + return (*stats)[tk]; +} + +int kgramstats::getMaxK() +{ + return maxK; +} + +void printKgram(kgram k) +{ + for (kgram::iterator it = k.begin(); it != k.end(); it++) + { + cout << *it << " "; + } + cout << endl; +} + +vector kgramstats::randomSentence(int n) +{ + vector result; + list cur; + + for (int i=0; i* probtable = lookupExts(cur); + int max = 0; + for (map::iterator it = probtable->begin(); it != probtable->end(); ++it) + { + max += it->second; + } + + int r = rand() % (max+1); + string next = probtable->begin()->first; + for (map::iterator it = probtable->begin(); it != probtable->end(); ++it) + { + if (it->second > r) + { + break; + } else { + next = it->first; + r -= it->second; + } + } + + if (cur.size() == maxK) + { + cur.pop_front(); + } + + cur.push_back(next); + result.push_back(next); + } + + return result; +} \ No newline at end of file -- cgit 1.4.1