From 0a5c6bd740aff9be53e7ef117e9e926fde3c289e Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Wed, 30 Dec 2015 22:01:37 -0500 Subject: guess what! the algorithm this time it's a literal algorithm again not canonizing away punctuation newlines are actually considered new sentences now we look for the end of a sentence and then start after that --- gen.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'gen.cpp') diff --git a/gen.cpp b/gen.cpp index 3284ffa..7e47d45 100644 --- a/gen.cpp +++ b/gen.cpp @@ -38,11 +38,11 @@ int main(int argc, char** args) std::string line; while (getline(infile, line)) { - corpus += " " + line; + corpus += line + "\n "; } std::cout << "Preprocessing corpus..." << std::endl; - kgramstats* stats = new kgramstats(corpus, 3); + kgramstats* stats = new kgramstats(corpus, 4); std::cout << "Preprocessing freevars..." << std::endl; freevars* vars = new freevars(); @@ -52,14 +52,22 @@ int main(int argc, char** args) std::cout << "Generating..." << std::endl; for (;;) { - std::vector doc = stats->randomSentence(rand() % 35 + 15); + std::vector doc = stats->randomSentence(rand() % 35 + 45); std::string hi; for (std::vector::iterator it = doc.begin(); it != doc.end(); ++it) { hi += vars->parse(*it) + " "; } + + size_t firstperiod = hi.find_first_of(".!?"); + if (firstperiod != std::string::npos) + { + hi = hi.substr(firstperiod+2); + } + + hi.resize(140); - size_t lastperiod = hi.find_last_of("."); + size_t lastperiod = hi.find_last_of(".!?"); if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) { hi = hi.substr(0, lastperiod+1); -- cgit 1.4.1