about summary refs log tree commit diff stats
path: root/kgramstats.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2016-02-09 23:24:57 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2016-02-09 23:24:57 -0500
commitab46925fccf86a361426a5363cf644c0a6b03057 (patch)
tree0be01ecf578bdd3b1008e4fed8db283490dfd562 /kgramstats.cpp
parent814858d7a30fbbe8aa16e16c2297bca47497e754 (diff)
downloadrawr-ebooks-ab46925fccf86a361426a5363cf644c0a6b03057.tar.gz
rawr-ebooks-ab46925fccf86a361426a5363cf644c0a6b03057.tar.bz2
rawr-ebooks-ab46925fccf86a361426a5363cf644c0a6b03057.zip
Tweaked kgram cut rate again
Diffstat (limited to 'kgramstats.cpp')
-rw-r--r--kgramstats.cpp12
1 files changed, 8 insertions, 4 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index e6048d9..ac694f3 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp
@@ -437,9 +437,9 @@ std::string kgramstats::randomSentence(int n)
437 { 437 {
438 if (rand() % (maxK - cur.size() + 1) == 0) 438 if (rand() % (maxK - cur.size() + 1) == 0)
439 { 439 {
440 while (cur.size() > 2) 440 while ((cur.size() > 2) && (cuts > 0))
441 { 441 {
442 if ((rand() % (n)) < cuts) 442 if ((rand() % cuts) > 2)
443 { 443 {
444 cur.pop_front(); 444 cur.pop_front();
445 cuts--; 445 cuts--;
@@ -448,8 +448,6 @@ std::string kgramstats::randomSentence(int n)
448 } 448 }
449 } 449 }
450 } 450 }
451
452 cuts++;
453 } 451 }
454 452
455 // Gotta circumvent the last line of the input corpus 453 // Gotta circumvent the last line of the input corpus
@@ -556,6 +554,12 @@ std::string kgramstats::randomSentence(int n)
556 { 554 {
557 nextToken.append(","); 555 nextToken.append(",");
558 } 556 }
557
558 // If this pick was guaranteed, increase cut chance
559 if (next.all == max)
560 {
561 cuts++;
562 }
559 563
560 /* DEBUG */ 564 /* DEBUG */
561 printKgram(cur); 565 printKgram(cur);