diff options
| author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-02-09 23:28:09 -0500 |
|---|---|---|
| committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-02-09 23:28:09 -0500 |
| commit | cc8167a392eb435f657e1411d6bf6d7cbc6f3e3f (patch) | |
| tree | 0be01ecf578bdd3b1008e4fed8db283490dfd562 | |
| parent | ed4e289590d53623311ce42e44e873292211f7ab (diff) | |
| parent | ab46925fccf86a361426a5363cf644c0a6b03057 (diff) | |
| download | rawr-ebooks-cc8167a392eb435f657e1411d6bf6d7cbc6f3e3f.tar.gz rawr-ebooks-cc8167a392eb435f657e1411d6bf6d7cbc6f3e3f.tar.bz2 rawr-ebooks-cc8167a392eb435f657e1411d6bf6d7cbc6f3e3f.zip | |
Merge in changes to older kgram cutting strategy
| -rw-r--r-- | kgramstats.cpp | 30 |
1 files changed, 21 insertions, 9 deletions
| diff --git a/kgramstats.cpp b/kgramstats.cpp index b25c61f..ac694f3 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp | |||
| @@ -423,7 +423,7 @@ std::string kgramstats::randomSentence(int n) | |||
| 423 | { | 423 | { |
| 424 | std::string result; | 424 | std::string result; |
| 425 | kgram cur(1, wildcardQuery); | 425 | kgram cur(1, wildcardQuery); |
| 426 | bool cut = false; | 426 | int cuts = 0; |
| 427 | std::stack<parentype> open_delimiters; | 427 | std::stack<parentype> open_delimiters; |
| 428 | 428 | ||
| 429 | for (int i=0; i<n; i++) | 429 | for (int i=0; i<n; i++) |
| @@ -433,10 +433,21 @@ std::string kgramstats::randomSentence(int n) | |||
| 433 | cur.pop_front(); | 433 | cur.pop_front(); |
| 434 | } | 434 | } |
| 435 | 435 | ||
| 436 | if ((cur.size() > 0) && cut) | 436 | if (cur.size() > 0) |
| 437 | { | 437 | { |
| 438 | cur.pop_front(); | 438 | if (rand() % (maxK - cur.size() + 1) == 0) |
| 439 | cut = false; | 439 | { |
| 440 | while ((cur.size() > 2) && (cuts > 0)) | ||
| 441 | { | ||
| 442 | if ((rand() % cuts) > 2) | ||
| 443 | { | ||
| 444 | cur.pop_front(); | ||
| 445 | cuts--; | ||
| 446 | } else { | ||
| 447 | break; | ||
| 448 | } | ||
| 449 | } | ||
| 450 | } | ||
| 440 | } | 451 | } |
| 441 | 452 | ||
| 442 | // Gotta circumvent the last line of the input corpus | 453 | // Gotta circumvent the last line of the input corpus |
| @@ -543,6 +554,12 @@ std::string kgramstats::randomSentence(int n) | |||
| 543 | { | 554 | { |
| 544 | nextToken.append(","); | 555 | nextToken.append(","); |
| 545 | } | 556 | } |
| 557 | |||
| 558 | // If this pick was guaranteed, increase cut chance | ||
| 559 | if (next.all == max) | ||
| 560 | { | ||
| 561 | cuts++; | ||
| 562 | } | ||
| 546 | 563 | ||
| 547 | /* DEBUG */ | 564 | /* DEBUG */ |
| 548 | printKgram(cur); | 565 | printKgram(cur); |
| @@ -556,11 +573,6 @@ std::string kgramstats::randomSentence(int n) | |||
| 556 | { | 573 | { |
| 557 | break; | 574 | break; |
| 558 | } | 575 | } |
| 559 | |||
| 560 | if ((next.all == max) && (rand() % 3 == 0)) | ||
| 561 | { | ||
| 562 | cut = true; | ||
| 563 | } | ||
| 564 | } | 576 | } |
| 565 | 577 | ||
| 566 | // Remove the trailing space | 578 | // Remove the trailing space |
