about summary refs log tree commit diff stats
path: root/kgramstats.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2016-02-09 23:28:09 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2016-02-09 23:28:09 -0500
commitcc8167a392eb435f657e1411d6bf6d7cbc6f3e3f (patch)
tree0be01ecf578bdd3b1008e4fed8db283490dfd562 /kgramstats.cpp
parented4e289590d53623311ce42e44e873292211f7ab (diff)
parentab46925fccf86a361426a5363cf644c0a6b03057 (diff)
downloadrawr-ebooks-cc8167a392eb435f657e1411d6bf6d7cbc6f3e3f.tar.gz
rawr-ebooks-cc8167a392eb435f657e1411d6bf6d7cbc6f3e3f.tar.bz2
rawr-ebooks-cc8167a392eb435f657e1411d6bf6d7cbc6f3e3f.zip
Merge in changes to older kgram cutting strategy
Diffstat (limited to 'kgramstats.cpp')
-rw-r--r--kgramstats.cpp30
1 files changed, 21 insertions, 9 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index b25c61f..ac694f3 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp
@@ -423,7 +423,7 @@ std::string kgramstats::randomSentence(int n)
423{ 423{
424 std::string result; 424 std::string result;
425 kgram cur(1, wildcardQuery); 425 kgram cur(1, wildcardQuery);
426 bool cut = false; 426 int cuts = 0;
427 std::stack<parentype> open_delimiters; 427 std::stack<parentype> open_delimiters;
428 428
429 for (int i=0; i<n; i++) 429 for (int i=0; i<n; i++)
@@ -433,10 +433,21 @@ std::string kgramstats::randomSentence(int n)
433 cur.pop_front(); 433 cur.pop_front();
434 } 434 }
435 435
436 if ((cur.size() > 0) && cut) 436 if (cur.size() > 0)
437 { 437 {
438 cur.pop_front(); 438 if (rand() % (maxK - cur.size() + 1) == 0)
439 cut = false; 439 {
440 while ((cur.size() > 2) && (cuts > 0))
441 {
442 if ((rand() % cuts) > 2)
443 {
444 cur.pop_front();
445 cuts--;
446 } else {
447 break;
448 }
449 }
450 }
440 } 451 }
441 452
442 // Gotta circumvent the last line of the input corpus 453 // Gotta circumvent the last line of the input corpus
@@ -543,6 +554,12 @@ std::string kgramstats::randomSentence(int n)
543 { 554 {
544 nextToken.append(","); 555 nextToken.append(",");
545 } 556 }
557
558 // If this pick was guaranteed, increase cut chance
559 if (next.all == max)
560 {
561 cuts++;
562 }
546 563
547 /* DEBUG */ 564 /* DEBUG */
548 printKgram(cur); 565 printKgram(cur);
@@ -556,11 +573,6 @@ std::string kgramstats::randomSentence(int n)
556 { 573 {
557 break; 574 break;
558 } 575 }
559
560 if ((next.all == max) && (rand() % 3 == 0))
561 {
562 cut = true;
563 }
564 } 576 }
565 577
566 // Remove the trailing space 578 // Remove the trailing space