about summary refs log tree commit diff stats
path: root/kgramstats.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2015-11-23 18:04:11 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2015-11-23 18:04:11 -0500
commit006c6502872cfc51eafd1df06ccb01c3c140a1ed (patch)
tree7c4a83ae21215a0dd5861072c3cce169253c957f /kgramstats.cpp
parenta7127a63cc025ab3b95cde125a7c0db552603862 (diff)
downloadrawr-ebooks-006c6502872cfc51eafd1df06ccb01c3c140a1ed.tar.gz
rawr-ebooks-006c6502872cfc51eafd1df06ccb01c3c140a1ed.tar.bz2
rawr-ebooks-006c6502872cfc51eafd1df06ccb01c3c140a1ed.zip
You guessed it,,, twerked the algo
Diffstat (limited to 'kgramstats.cpp')
-rw-r--r--kgramstats.cpp85
1 files changed, 41 insertions, 44 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index 17598de..41517ca 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp
@@ -206,7 +206,7 @@ std::vector<std::string> kgramstats::randomSentence(int n)
206 206
207 for (int i=0; i<n; i++) 207 for (int i=0; i<n; i++)
208 { 208 {
209 if ((cur.size() > 0) && (cur != newKgram)) 209 /*if ((cur.size() > 0) && (cur != newKgram))
210 { 210 {
211 if (rand() % (maxK - cur.size() + 1) == 0) 211 if (rand() % (maxK - cur.size() + 1) == 0)
212 { 212 {
@@ -223,7 +223,7 @@ std::vector<std::string> kgramstats::randomSentence(int n)
223 } 223 }
224 224
225 cuts++; 225 cuts++;
226 } 226 }*/
227 227
228 std::map<int, token_data*> distribution = *(*stats)[cur]; 228 std::map<int, token_data*> distribution = *(*stats)[cur];
229 int max = distribution.rbegin()->first; 229 int max = distribution.rbegin()->first;
@@ -241,12 +241,9 @@ std::vector<std::string> kgramstats::randomSentence(int n)
241 if (casing < next->uppercase) 241 if (casing < next->uppercase)
242 { 242 {
243 std::transform(nextToken.begin(), nextToken.end(), nextToken.begin(), ::toupper); 243 std::transform(nextToken.begin(), nextToken.end(), nextToken.begin(), ::toupper);
244 } else if ((casing - next->uppercase) < next->titlecase)
245 {
246 nextToken[0] = toupper(nextToken[0]);
247 } 244 }
248 245
249 if ((cur == newKgram) && (rand() % 3 < 2)) 246 if ((cur == newKgram) && (rand() % 15 > 0))
250 { 247 {
251 nextToken[0] = toupper(nextToken[0]); 248 nextToken[0] = toupper(nextToken[0]);
252 } 249 }
@@ -255,48 +252,48 @@ std::vector<std::string> kgramstats::randomSentence(int n)
255 if (mess) 252 if (mess)
256 { 253 {
257 nextToken = mstats.alternate(nextToken); 254 nextToken = mstats.alternate(nextToken);
255 }
258 256
259 if (startquote < next->startquote) 257 if (startquote < next->startquote)
258 {
259 nextToken = "\"" + nextToken;
260 } else if (startparen < next->startparen)
261 {
262 nextToken = "(" + nextToken;
263 }
264
265 if (period < next->period)
266 {
267 if (endquote < next->endquote)
260 { 268 {
261 nextToken = "\"" + nextToken; 269 nextToken += "\"";
262 } else if (startparen < next->startparen) 270 } else if (endparen < next->endparen)
263 { 271 {
264 nextToken = "(" + nextToken; 272 nextToken += ")";
265 } 273 }
266 274
267 if (period < next->period) 275 int type = rand() % 6;
268 { 276
269 if (endquote < next->endquote) 277 if (type < 3)
270 {
271 nextToken += "\"";
272 } else if (endparen < next->endparen)
273 {
274 nextToken += ")";
275 }
276
277 int type = rand() % 6;
278
279 if (type < 3)
280 {
281 nextToken += ".";
282 } else if (type < 5)
283 {
284 nextToken += "!";
285 } else {
286 nextToken += "?";
287 }
288 } else if (comma < next->comma)
289 { 278 {
290 if (endquote < next->endquote) 279 nextToken += ".";
291 { 280 } else if (type < 5)
292 nextToken += "\""; 281 {
293 } else if (endparen < next->endparen) 282 nextToken += "!";
294 { 283 } else {
295 nextToken += ")"; 284 nextToken += "?";
296 }
297
298 nextToken += ",";
299 } 285 }
286 } else if (comma < next->comma)
287 {
288 if (endquote < next->endquote)
289 {
290 nextToken += "\"";
291 } else if (endparen < next->endparen)
292 {
293 nextToken += ")";
294 }
295
296 nextToken += ",";
300 } 297 }
301 298
302 if (cur.size() == maxK) 299 if (cur.size() == maxK)
@@ -324,7 +321,7 @@ std::vector<std::string> kgramstats::randomSentence(int n)
324 cur.pop_front(); 321 cur.pop_front();
325 } 322 }
326 323
327 if ((period < next->period) && ((rand() % 2) == 0)) 324 if ((period < next->period) && ((rand() % 3) == 0))
328 { 325 {
329 cur = newKgram; 326 cur = newKgram;
330 } else if ((comma < next->comma) && ((rand() % 3) == 0)) 327 } else if ((comma < next->comma) && ((rand() % 3) == 0))
@@ -361,5 +358,5 @@ std::string canonize(std::string f)
361 std::string result; 358 std::string result;
362 std::remove_copy_if(canonical.begin(), canonical.end(), std::back_inserter(result), removeIf); 359 std::remove_copy_if(canonical.begin(), canonical.end(), std::back_inserter(result), removeIf);
363 360
364 return canonical; 361 return result;
365} 362}