guess what! the algorithm

this time it's a literal algorithm again not canonizing away punctuation newlines are actually considered new sentences now we look for the end of a sentence and then start after that
author: Kelly Rauchenberger <fefferburbia@gmail.com> 2015-12-30 22:01:37 -0500
committer: Kelly Rauchenberger <fefferburbia@gmail.com> 2015-12-30 22:01:37 -0500
commit: 0a5c6bd740aff9be53e7ef117e9e926fde3c289e (patch)
tree: 7a0a5c95433b5505b890c8a3176c863a5e802b8e /ebooks.cpp
parent: 006c6502872cfc51eafd1df06ccb01c3c140a1ed (diff)
download: rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.gz
rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.bz2
rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.zip
1 files changed, 16 insertions, 10 deletions
diff --git a/ebooks.cpp b/ebooks.cpp
index 6bbe25e..27591f4 100644
--- a/ebooks.cpp
+++ b/ebooks.cpp

@@ -24,11 +24,11 @@ int main(int argc, char** args)
        std::string line;
        while (getline(infile, line))
        {
-                corpus += " " + line;
+                corpus += line + "\n ";
        }
    std::cout << "Preprocessing corpus..." << std::endl;
-        kgramstats* stats = new kgramstats(corpus, 3);
+        kgramstats* stats = new kgramstats(corpus, 4);
    
    std::cout << "Preprocessing freevars..." << std::endl;
    freevars* vars = new freevars();
@@ -38,20 +38,26 @@ int main(int argc, char** args)
    std::cout << "Generating..." << std::endl;
        for (;;)
        {
-                std::vector<std::string> doc = stats->randomSentence(rand() % 25 + 5);
+                std::vector<std::string> doc = stats->randomSentence(rand() % 45 + 5);
                std::string hi;
                for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it)
                {
                        hi += vars->parse(*it) + " ";
                }
-                size_t lastperiod = hi.find_last_of(".");
+    size_t firstperiod = hi.find_first_of(".!?");
-                if ((lastperiod != std::string::npos) && (rand() % 3 > 0))
+    if (firstperiod != std::string::npos)
-                {
+    {
-                        hi = hi.substr(0, lastperiod+1);
+      hi = hi.substr(firstperiod+2);
-                }
+    }
-        
+    
-                hi = hi.substr(0,140);
+    hi.resize(140);
+                size_t lastperiod = hi.find_last_of(".!?");
+                if ((lastperiod != std::string::npos) && (rand() % 3 > 0))
+                {
+                        hi = hi.substr(0, lastperiod+1);
+                }
            twitCurl twitterObj;
            std::string tmpStr, tmpStr2;
author	Kelly Rauchenberger <fefferburbia@gmail.com>	2015-12-30 22:01:37 -0500
committer	Kelly Rauchenberger <fefferburbia@gmail.com>	2015-12-30 22:01:37 -0500
commit	0a5c6bd740aff9be53e7ef117e9e926fde3c289e (patch)
tree	7a0a5c95433b5505b890c8a3176c863a5e802b8e /ebooks.cpp
parent	006c6502872cfc51eafd1df06ccb01c3c140a1ed (diff)
download	rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.gz rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.bz2 rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.zip