From 73821856c9648d030f4d148d2bc50f07f43ad369 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Tue, 8 Mar 2016 14:37:16 -0500 Subject: Full sentences mode! --- ebooks.cpp | 2 +- gen.cpp | 2 +- kgramstats.cpp | 17 +++++++++++++++-- kgramstats.h | 2 +- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/ebooks.cpp b/ebooks.cpp index 7d2724c..b586d63 100644 --- a/ebooks.cpp +++ b/ebooks.cpp @@ -43,7 +43,7 @@ int main(int argc, char** args) std::cout << "Generating..." << std::endl; for (;;) { - std::string doc = stats->randomSentence(rand() % 45 + 5); + std::string doc = stats->randomSentence(140); std::string hi = doc; hi.resize(140); diff --git a/gen.cpp b/gen.cpp index a963740..0319283 100644 --- a/gen.cpp +++ b/gen.cpp @@ -51,7 +51,7 @@ int main(int argc, char** args) std::cout << "Generating..." << std::endl; for (;;) { - std::string doc = stats->randomSentence(rand() % 35 + 15); + std::string doc = stats->randomSentence(140); std::string hi = doc; hi.resize(140); diff --git a/kgramstats.cpp b/kgramstats.cpp index 933165a..899ad20 100644 --- a/kgramstats.cpp +++ b/kgramstats.cpp @@ -457,14 +457,14 @@ void printKgram(kgram k) } // runs in O(n log t) time where n is the input number of sentences and t is the number of tokens in the input corpus -std::string kgramstats::randomSentence(int n) +std::string kgramstats::randomSentence(int max) { std::string result; kgram cur(1, wildcardQuery); int cuts = 0; std::stack open_delimiters; - for (int i=0; i max) + { + result = ""; + cur = kgram(1, wildcardQuery); + cuts = 0; + + while (!open_delimiters.empty()) + { + open_delimiters.pop(); + } + } } // Remove the trailing space diff --git a/kgramstats.h b/kgramstats.h index 4acde65..a024184 100644 --- a/kgramstats.h +++ b/kgramstats.h @@ -97,7 +97,7 @@ class kgramstats { public: kgramstats(std::string corpus, int maxK); - std::string randomSentence(int n); + std::string randomSentence(int max); private: struct token_data -- cgit 1.4.1