about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--kgramstats.cpp31
1 files changed, 14 insertions, 17 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index 17c3c2d..4a7eb9d 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp
@@ -327,7 +327,12 @@ void rawr::compile(int maxK)
327 } else { 327 } else {
328 tk.suffix = suffixtype::terminating; 328 tk.suffix = suffixtype::terminating;
329 329
330 w.terms.add({suffix, newline}); 330 if (!newline)
331 {
332 w.terms.add({suffix, false});
333 } else {
334 w.terms.add({".", false});
335 }
331 } 336 }
332 } 337 }
333 } 338 }
@@ -573,21 +578,10 @@ std::string rawr::randomSentence(int maxL) const
573 cur.pop_front(); 578 cur.pop_front();
574 } 579 }
575 580
576 if (cur.size() > 0) 581 if ((cur.size() > 2) && (cuts > 0) && ((rand() % cuts) > 0))
577 { 582 {
578 if (rand() % (_maxK - cur.size() + 1) == 0) 583 cur.pop_front();
579 { 584 cuts /= 2;
580 while ((cur.size() > 2) && (cuts > 0))
581 {
582 if ((rand() % cuts) > 2)
583 {
584 cur.pop_front();
585 cuts--;
586 } else {
587 break;
588 }
589 }
590 }
591 } 585 }
592 586
593 // Gotta circumvent the last line of the input corpus 587 // Gotta circumvent the last line of the input corpus
@@ -711,10 +705,13 @@ std::string rawr::randomSentence(int maxL) const
711 nextToken.append(" "); 705 nextToken.append(" ");
712 } 706 }
713 707
714 // If this pick was guaranteed, increase cut chance
715 if (next.all == max) 708 if (next.all == max)
716 { 709 {
710 // If this pick was guaranteed, increase cut chance
717 cuts++; 711 cuts++;
712 } else if (cuts > 0) {
713 // Otherwise, decrease cut chance
714 cuts--;
718 } 715 }
719 716
720 if (next.corpora.size() == 1) 717 if (next.corpora.size() == 1)
@@ -728,7 +725,7 @@ std::string rawr::randomSentence(int maxL) const
728 { 725 {
729 std::cout << " " << cor; 726 std::cout << " " << cor;
730 } 727 }
731 std::cout << std::endl; 728 std::cout << "; l=" << cur.size() << ",cuts=" << cuts << std::endl;
732 729
733 cur.push_back(next.tok); 730 cur.push_back(next.tok);
734 result.append(nextToken); 731 result.append(nextToken);