diff options
| -rw-r--r-- | kgramstats.cpp | 31 |
1 files changed, 14 insertions, 17 deletions
| diff --git a/kgramstats.cpp b/kgramstats.cpp index 17c3c2d..4a7eb9d 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp | |||
| @@ -327,7 +327,12 @@ void rawr::compile(int maxK) | |||
| 327 | } else { | 327 | } else { |
| 328 | tk.suffix = suffixtype::terminating; | 328 | tk.suffix = suffixtype::terminating; |
| 329 | 329 | ||
| 330 | w.terms.add({suffix, newline}); | 330 | if (!newline) |
| 331 | { | ||
| 332 | w.terms.add({suffix, false}); | ||
| 333 | } else { | ||
| 334 | w.terms.add({".", false}); | ||
| 335 | } | ||
| 331 | } | 336 | } |
| 332 | } | 337 | } |
| 333 | } | 338 | } |
| @@ -573,21 +578,10 @@ std::string rawr::randomSentence(int maxL) const | |||
| 573 | cur.pop_front(); | 578 | cur.pop_front(); |
| 574 | } | 579 | } |
| 575 | 580 | ||
| 576 | if (cur.size() > 0) | 581 | if ((cur.size() > 2) && (cuts > 0) && ((rand() % cuts) > 0)) |
| 577 | { | 582 | { |
| 578 | if (rand() % (_maxK - cur.size() + 1) == 0) | 583 | cur.pop_front(); |
| 579 | { | 584 | cuts /= 2; |
| 580 | while ((cur.size() > 2) && (cuts > 0)) | ||
| 581 | { | ||
| 582 | if ((rand() % cuts) > 2) | ||
| 583 | { | ||
| 584 | cur.pop_front(); | ||
| 585 | cuts--; | ||
| 586 | } else { | ||
| 587 | break; | ||
| 588 | } | ||
| 589 | } | ||
| 590 | } | ||
| 591 | } | 585 | } |
| 592 | 586 | ||
| 593 | // Gotta circumvent the last line of the input corpus | 587 | // Gotta circumvent the last line of the input corpus |
| @@ -711,10 +705,13 @@ std::string rawr::randomSentence(int maxL) const | |||
| 711 | nextToken.append(" "); | 705 | nextToken.append(" "); |
| 712 | } | 706 | } |
| 713 | 707 | ||
| 714 | // If this pick was guaranteed, increase cut chance | ||
| 715 | if (next.all == max) | 708 | if (next.all == max) |
| 716 | { | 709 | { |
| 710 | // If this pick was guaranteed, increase cut chance | ||
| 717 | cuts++; | 711 | cuts++; |
| 712 | } else if (cuts > 0) { | ||
| 713 | // Otherwise, decrease cut chance | ||
| 714 | cuts--; | ||
| 718 | } | 715 | } |
| 719 | 716 | ||
| 720 | if (next.corpora.size() == 1) | 717 | if (next.corpora.size() == 1) |
| @@ -728,7 +725,7 @@ std::string rawr::randomSentence(int maxL) const | |||
| 728 | { | 725 | { |
| 729 | std::cout << " " << cor; | 726 | std::cout << " " << cor; |
| 730 | } | 727 | } |
| 731 | std::cout << std::endl; | 728 | std::cout << "; l=" << cur.size() << ",cuts=" << cuts << std::endl; |
| 732 | 729 | ||
| 733 | cur.push_back(next.tok); | 730 | cur.push_back(next.tok); |
| 734 | result.append(nextToken); | 731 | result.append(nextToken); |
