diff options
| author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-03-08 14:37:16 -0500 |
|---|---|---|
| committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-03-08 14:37:16 -0500 |
| commit | 73821856c9648d030f4d148d2bc50f07f43ad369 (patch) | |
| tree | 98f554e8cd55c859b71d5b2de5b9762baae6d563 /kgramstats.cpp | |
| parent | a791091a4da2335ee45f3716cfe68466e5ebd679 (diff) | |
| download | rawr-ebooks-73821856c9648d030f4d148d2bc50f07f43ad369.tar.gz rawr-ebooks-73821856c9648d030f4d148d2bc50f07f43ad369.tar.bz2 rawr-ebooks-73821856c9648d030f4d148d2bc50f07f43ad369.zip | |
Full sentences mode!
Diffstat (limited to 'kgramstats.cpp')
| -rw-r--r-- | kgramstats.cpp | 17 |
1 files changed, 15 insertions, 2 deletions
| diff --git a/kgramstats.cpp b/kgramstats.cpp index 933165a..899ad20 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp | |||
| @@ -457,14 +457,14 @@ void printKgram(kgram k) | |||
| 457 | } | 457 | } |
| 458 | 458 | ||
| 459 | // runs in O(n log t) time where n is the input number of sentences and t is the number of tokens in the input corpus | 459 | // runs in O(n log t) time where n is the input number of sentences and t is the number of tokens in the input corpus |
| 460 | std::string kgramstats::randomSentence(int n) | 460 | std::string kgramstats::randomSentence(int max) |
| 461 | { | 461 | { |
| 462 | std::string result; | 462 | std::string result; |
| 463 | kgram cur(1, wildcardQuery); | 463 | kgram cur(1, wildcardQuery); |
| 464 | int cuts = 0; | 464 | int cuts = 0; |
| 465 | std::stack<parentype> open_delimiters; | 465 | std::stack<parentype> open_delimiters; |
| 466 | 466 | ||
| 467 | for (int i=0; i<n; i++) | 467 | for (;;) |
| 468 | { | 468 | { |
| 469 | if (cur.size() == maxK) | 469 | if (cur.size() == maxK) |
| 470 | { | 470 | { |
| @@ -611,6 +611,19 @@ std::string kgramstats::randomSentence(int n) | |||
| 611 | { | 611 | { |
| 612 | break; | 612 | break; |
| 613 | } | 613 | } |
| 614 | |||
| 615 | // Went over the limit, so reset | ||
| 616 | if (result.length() > max) | ||
| 617 | { | ||
| 618 | result = ""; | ||
| 619 | cur = kgram(1, wildcardQuery); | ||
| 620 | cuts = 0; | ||
| 621 | |||
| 622 | while (!open_delimiters.empty()) | ||
| 623 | { | ||
| 624 | open_delimiters.pop(); | ||
| 625 | } | ||
| 626 | } | ||
| 614 | } | 627 | } |
| 615 | 628 | ||
| 616 | // Remove the trailing space | 629 | // Remove the trailing space |
