diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-03-08 14:37:16 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-03-08 14:37:16 -0500 |
commit | 73821856c9648d030f4d148d2bc50f07f43ad369 (patch) | |
tree | 98f554e8cd55c859b71d5b2de5b9762baae6d563 /kgramstats.cpp | |
parent | a791091a4da2335ee45f3716cfe68466e5ebd679 (diff) | |
download | rawr-ebooks-73821856c9648d030f4d148d2bc50f07f43ad369.tar.gz rawr-ebooks-73821856c9648d030f4d148d2bc50f07f43ad369.tar.bz2 rawr-ebooks-73821856c9648d030f4d148d2bc50f07f43ad369.zip |
Full sentences mode!
Diffstat (limited to 'kgramstats.cpp')
-rw-r--r-- | kgramstats.cpp | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index 933165a..899ad20 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp | |||
@@ -457,14 +457,14 @@ void printKgram(kgram k) | |||
457 | } | 457 | } |
458 | 458 | ||
459 | // runs in O(n log t) time where n is the input number of sentences and t is the number of tokens in the input corpus | 459 | // runs in O(n log t) time where n is the input number of sentences and t is the number of tokens in the input corpus |
460 | std::string kgramstats::randomSentence(int n) | 460 | std::string kgramstats::randomSentence(int max) |
461 | { | 461 | { |
462 | std::string result; | 462 | std::string result; |
463 | kgram cur(1, wildcardQuery); | 463 | kgram cur(1, wildcardQuery); |
464 | int cuts = 0; | 464 | int cuts = 0; |
465 | std::stack<parentype> open_delimiters; | 465 | std::stack<parentype> open_delimiters; |
466 | 466 | ||
467 | for (int i=0; i<n; i++) | 467 | for (;;) |
468 | { | 468 | { |
469 | if (cur.size() == maxK) | 469 | if (cur.size() == maxK) |
470 | { | 470 | { |
@@ -611,6 +611,19 @@ std::string kgramstats::randomSentence(int n) | |||
611 | { | 611 | { |
612 | break; | 612 | break; |
613 | } | 613 | } |
614 | |||
615 | // Went over the limit, so reset | ||
616 | if (result.length() > max) | ||
617 | { | ||
618 | result = ""; | ||
619 | cur = kgram(1, wildcardQuery); | ||
620 | cuts = 0; | ||
621 | |||
622 | while (!open_delimiters.empty()) | ||
623 | { | ||
624 | open_delimiters.pop(); | ||
625 | } | ||
626 | } | ||
614 | } | 627 | } |
615 | 628 | ||
616 | // Remove the trailing space | 629 | // Remove the trailing space |