From eb076ca2c6c8932fd251419563cf0078c5ee0914 Mon Sep 17 00:00:00 2001 From: Feffernoose Date: Sat, 5 Oct 2013 19:14:53 -0400 Subject: Rewrote weighted random number generator The previous method of picking which token was the next one was flawed in some mysterious way that ended up picking various words that occurred only once in the input corpus as the first word of the generated output (most notably, "hysterically," "Anarchy," "Yorkshire," and "impunity."). --- kgramstats.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kgramstats.h') diff --git a/kgramstats.h b/kgramstats.h index 248b193..b40e1ab 100644 --- a/kgramstats.h +++ b/kgramstats.h @@ -23,9 +23,10 @@ private: int titlecase; int uppercase; int period; + string* token; } token_data; int maxK; - map* >* stats; + map* >* stats; }; void printKgram(kgram k); -- cgit 1.4.1