From 0a5c6bd740aff9be53e7ef117e9e926fde3c289e Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Wed, 30 Dec 2015 22:01:37 -0500 Subject: guess what! the algorithm this time it's a literal algorithm again not canonizing away punctuation newlines are actually considered new sentences now we look for the end of a sentence and then start after that --- ebooks.cpp | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'ebooks.cpp') diff --git a/ebooks.cpp b/ebooks.cpp index 6bbe25e..27591f4 100644 --- a/ebooks.cpp +++ b/ebooks.cpp @@ -24,11 +24,11 @@ int main(int argc, char** args) std::string line; while (getline(infile, line)) { - corpus += " " + line; + corpus += line + "\n "; } std::cout << "Preprocessing corpus..." << std::endl; - kgramstats* stats = new kgramstats(corpus, 3); + kgramstats* stats = new kgramstats(corpus, 4); std::cout << "Preprocessing freevars..." << std::endl; freevars* vars = new freevars(); @@ -38,20 +38,26 @@ int main(int argc, char** args) std::cout << "Generating..." << std::endl; for (;;) { - std::vector doc = stats->randomSentence(rand() % 25 + 5); + std::vector doc = stats->randomSentence(rand() % 45 + 5); std::string hi; for (std::vector::iterator it = doc.begin(); it != doc.end(); ++it) { hi += vars->parse(*it) + " "; } - size_t lastperiod = hi.find_last_of("."); - if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) - { - hi = hi.substr(0, lastperiod+1); - } - - hi = hi.substr(0,140); + size_t firstperiod = hi.find_first_of(".!?"); + if (firstperiod != std::string::npos) + { + hi = hi.substr(firstperiod+2); + } + + hi.resize(140); + + size_t lastperiod = hi.find_last_of(".!?"); + if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) + { + hi = hi.substr(0, lastperiod+1); + } twitCurl twitterObj; std::string tmpStr, tmpStr2; -- cgit 1.4.1