about summary refs log tree commit diff stats
path: root/kgramstats.cpp
diff options
context:
space:
mode:
authorFeffernoose <fefferburbia@gmail.com>2013-10-01 18:15:22 -0400
committerFeffernoose <fefferburbia@gmail.com>2013-10-01 18:15:22 -0400
commit8de3134bf2cd26ff81359df703e5fbc6280448d7 (patch)
tree325e99abac6b7e3316334af7961645e9381e6517 /kgramstats.cpp
parent2b1f8c3363ef667bc20f33bbb5a856a35f2591ba (diff)
downloadrawr-ebooks-8de3134bf2cd26ff81359df703e5fbc6280448d7.tar.gz
rawr-ebooks-8de3134bf2cd26ff81359df703e5fbc6280448d7.tar.bz2
rawr-ebooks-8de3134bf2cd26ff81359df703e5fbc6280448d7.zip
Wrote program
Diffstat (limited to 'kgramstats.cpp')
-rw-r--r--kgramstats.cpp110
1 files changed, 110 insertions, 0 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp new file mode 100644 index 0000000..142b5aa --- /dev/null +++ b/kgramstats.cpp
@@ -0,0 +1,110 @@
1#include "kgramstats.h"
2#include <vector>
3#include <iostream>
4#include <cstdlib>
5
6kgramstats::kgramstats(string corpus, int maxK)
7{
8 this->maxK = maxK;
9
10 vector<string> tokens;
11 int start = 0;
12 int end = 0;
13
14 while (end != string::npos)
15 {
16 end = corpus.find(" ", start);
17
18 tokens.push_back(corpus.substr(start, (end == string::npos) ? string::npos : end - start));
19
20 start = ((end > (string::npos - 1) ) ? string::npos : end + 1);
21 }
22
23 stats = new map<kgram, map<string, int>* >();
24 for (int k=0; k<=maxK; k++)
25 {
26 for (int i=0; i<(tokens.size() - k); i++)
27 {
28 kgram seq(tokens.begin()+i, tokens.begin()+i+k);
29 string f = tokens[i+k];
30
31 if ((*stats)[seq] == NULL)
32 {
33 (*stats)[seq] = new map<string, int>();
34 }
35
36 (*((*stats)[seq]))[f]++;
37 }
38 }
39}
40
41map<string, int>* kgramstats::lookupExts(kgram tk)
42{
43 return (*stats)[tk];
44}
45
46int kgramstats::getMaxK()
47{
48 return maxK;
49}
50
51void printKgram(kgram k)
52{
53 for (kgram::iterator it = k.begin(); it != k.end(); it++)
54 {
55 cout << *it << " ";
56 }
57 cout << endl;
58}
59
60vector<string> kgramstats::randomSentence(int n)
61{
62 vector<string> result;
63 list<string> cur;
64
65 for (int i=0; i<n; i++)
66 {
67 if ((rand() % 4) != 0)
68 {
69 for (int i=0; i<cur.size(); i++)
70 {
71 if ((rand() % 3) != 0)
72 {
73 cur.pop_front();
74 } else {
75 break;
76 }
77 }
78 }
79
80 map<string, int>* probtable = lookupExts(cur);
81 int max = 0;
82 for (map<string, int>::iterator it = probtable->begin(); it != probtable->end(); ++it)
83 {
84 max += it->second;
85 }
86
87 int r = rand() % (max+1);
88 string next = probtable->begin()->first;
89 for (map<string, int>::iterator it = probtable->begin(); it != probtable->end(); ++it)
90 {
91 if (it->second > r)
92 {
93 break;
94 } else {
95 next = it->first;
96 r -= it->second;
97 }
98 }
99
100 if (cur.size() == maxK)
101 {
102 cur.pop_front();
103 }
104
105 cur.push_back(next);
106 result.push_back(next);
107 }
108
109 return result;
110} \ No newline at end of file