about summary refs log tree commit diff stats
path: root/kgramstats.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2016-03-08 14:37:16 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2016-03-08 14:37:16 -0500
commit73821856c9648d030f4d148d2bc50f07f43ad369 (patch)
tree98f554e8cd55c859b71d5b2de5b9762baae6d563 /kgramstats.cpp
parenta791091a4da2335ee45f3716cfe68466e5ebd679 (diff)
downloadrawr-ebooks-73821856c9648d030f4d148d2bc50f07f43ad369.tar.gz
rawr-ebooks-73821856c9648d030f4d148d2bc50f07f43ad369.tar.bz2
rawr-ebooks-73821856c9648d030f4d148d2bc50f07f43ad369.zip
Full sentences mode!
Diffstat (limited to 'kgramstats.cpp')
-rw-r--r--kgramstats.cpp17
1 files changed, 15 insertions, 2 deletions
diff --git a/kgramstats.cpp b/kgramstats.cpp index 933165a..899ad20 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp
@@ -457,14 +457,14 @@ void printKgram(kgram k)
457} 457}
458 458
459// runs in O(n log t) time where n is the input number of sentences and t is the number of tokens in the input corpus 459// runs in O(n log t) time where n is the input number of sentences and t is the number of tokens in the input corpus
460std::string kgramstats::randomSentence(int n) 460std::string kgramstats::randomSentence(int max)
461{ 461{
462 std::string result; 462 std::string result;
463 kgram cur(1, wildcardQuery); 463 kgram cur(1, wildcardQuery);
464 int cuts = 0; 464 int cuts = 0;
465 std::stack<parentype> open_delimiters; 465 std::stack<parentype> open_delimiters;
466 466
467 for (int i=0; i<n; i++) 467 for (;;)
468 { 468 {
469 if (cur.size() == maxK) 469 if (cur.size() == maxK)
470 { 470 {
@@ -611,6 +611,19 @@ std::string kgramstats::randomSentence(int n)
611 { 611 {
612 break; 612 break;
613 } 613 }
614
615 // Went over the limit, so reset
616 if (result.length() > max)
617 {
618 result = "";
619 cur = kgram(1, wildcardQuery);
620 cuts = 0;
621
622 while (!open_delimiters.empty())
623 {
624 open_delimiters.pop();
625 }
626 }
614 } 627 }
615 628
616 // Remove the trailing space 629 // Remove the trailing space