diff options
-rw-r--r-- | kgramstats.cpp | 31 |
1 files changed, 14 insertions, 17 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index 17c3c2d..4a7eb9d 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp | |||
@@ -327,7 +327,12 @@ void rawr::compile(int maxK) | |||
327 | } else { | 327 | } else { |
328 | tk.suffix = suffixtype::terminating; | 328 | tk.suffix = suffixtype::terminating; |
329 | 329 | ||
330 | w.terms.add({suffix, newline}); | 330 | if (!newline) |
331 | { | ||
332 | w.terms.add({suffix, false}); | ||
333 | } else { | ||
334 | w.terms.add({".", false}); | ||
335 | } | ||
331 | } | 336 | } |
332 | } | 337 | } |
333 | } | 338 | } |
@@ -573,21 +578,10 @@ std::string rawr::randomSentence(int maxL) const | |||
573 | cur.pop_front(); | 578 | cur.pop_front(); |
574 | } | 579 | } |
575 | 580 | ||
576 | if (cur.size() > 0) | 581 | if ((cur.size() > 2) && (cuts > 0) && ((rand() % cuts) > 0)) |
577 | { | 582 | { |
578 | if (rand() % (_maxK - cur.size() + 1) == 0) | 583 | cur.pop_front(); |
579 | { | 584 | cuts /= 2; |
580 | while ((cur.size() > 2) && (cuts > 0)) | ||
581 | { | ||
582 | if ((rand() % cuts) > 2) | ||
583 | { | ||
584 | cur.pop_front(); | ||
585 | cuts--; | ||
586 | } else { | ||
587 | break; | ||
588 | } | ||
589 | } | ||
590 | } | ||
591 | } | 585 | } |
592 | 586 | ||
593 | // Gotta circumvent the last line of the input corpus | 587 | // Gotta circumvent the last line of the input corpus |
@@ -711,10 +705,13 @@ std::string rawr::randomSentence(int maxL) const | |||
711 | nextToken.append(" "); | 705 | nextToken.append(" "); |
712 | } | 706 | } |
713 | 707 | ||
714 | // If this pick was guaranteed, increase cut chance | ||
715 | if (next.all == max) | 708 | if (next.all == max) |
716 | { | 709 | { |
710 | // If this pick was guaranteed, increase cut chance | ||
717 | cuts++; | 711 | cuts++; |
712 | } else if (cuts > 0) { | ||
713 | // Otherwise, decrease cut chance | ||
714 | cuts--; | ||
718 | } | 715 | } |
719 | 716 | ||
720 | if (next.corpora.size() == 1) | 717 | if (next.corpora.size() == 1) |
@@ -728,7 +725,7 @@ std::string rawr::randomSentence(int maxL) const | |||
728 | { | 725 | { |
729 | std::cout << " " << cor; | 726 | std::cout << " " << cor; |
730 | } | 727 | } |
731 | std::cout << std::endl; | 728 | std::cout << "; l=" << cur.size() << ",cuts=" << cuts << std::endl; |
732 | 729 | ||
733 | cur.push_back(next.tok); | 730 | cur.push_back(next.tok); |
734 | result.append(nextToken); | 731 | result.append(nextToken); |