about summary refs log tree commit diff stats
path: root/kgramstats.h
diff options
context:
space:
mode:
authorFeffernoose <fefferburbia@gmail.com>2013-10-05 19:14:53 -0400
committerFeffernoose <fefferburbia@gmail.com>2013-10-05 19:14:53 -0400
commiteb076ca2c6c8932fd251419563cf0078c5ee0914 (patch)
treebcd96acd0613fafa27b847cc5937420755b3d748 /kgramstats.h
parent92a4a0e7db8336f8ccc11c053dc29847a303ad88 (diff)
downloadrawr-ebooks-eb076ca2c6c8932fd251419563cf0078c5ee0914.tar.gz
rawr-ebooks-eb076ca2c6c8932fd251419563cf0078c5ee0914.tar.bz2
rawr-ebooks-eb076ca2c6c8932fd251419563cf0078c5ee0914.zip
Rewrote weighted random number generator
The previous method of picking which token was the next one was flawed in some mysterious way that ended up
picking various words that occurred only once in the input corpus as the first word of the generated output
(most notably, "hysterically," "Anarchy," "Yorkshire," and "impunity.").
Diffstat (limited to 'kgramstats.h')
-rw-r--r--kgramstats.h3
1 files changed, 2 insertions, 1 deletions
diff --git a/kgramstats.h b/kgramstats.h index 248b193..b40e1ab 100644 --- a/kgramstats.h +++ b/kgramstats.h
@@ -23,9 +23,10 @@ private:
23 int titlecase; 23 int titlecase;
24 int uppercase; 24 int uppercase;
25 int period; 25 int period;
26 string* token;
26 } token_data; 27 } token_data;
27 int maxK; 28 int maxK;
28 map<kgram, map<string, token_data*>* >* stats; 29 map<kgram, map<int, token_data*>* >* stats;
29}; 30};
30 31
31void printKgram(kgram k); 32void printKgram(kgram k);