guess what! the algorithm

this time it's a literal algorithm again not canonizing away punctuation newlines are actually considered new sentences now we look for the end of a sentence and then start after that
author: Kelly Rauchenberger <fefferburbia@gmail.com> 2015-12-30 22:01:37 -0500
committer: Kelly Rauchenberger <fefferburbia@gmail.com> 2015-12-30 22:01:37 -0500
commit: 0a5c6bd740aff9be53e7ef117e9e926fde3c289e (patch)
tree: 7a0a5c95433b5505b890c8a3176c863a5e802b8e /gen.cpp
parent: 006c6502872cfc51eafd1df06ccb01c3c140a1ed (diff)
download: rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.gz
rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.bz2
rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.zip
1 files changed, 12 insertions, 4 deletions
diff --git a/gen.cpp b/gen.cpp
index 3284ffa..7e47d45 100644
--- a/gen.cpp
+++ b/gen.cpp

@@ -38,11 +38,11 @@ int main(int argc, char** args)
        std::string line;
        while (getline(infile, line))
        {
-                corpus += " " + line;
+                corpus += line + "\n ";
        }
        
    std::cout << "Preprocessing corpus..." << std::endl;
-        kgramstats* stats = new kgramstats(corpus, 3);
+        kgramstats* stats = new kgramstats(corpus, 4);
    
    std::cout << "Preprocessing freevars..." << std::endl;
    freevars* vars = new freevars();
@@ -52,14 +52,22 @@ int main(int argc, char** args)
    std::cout << "Generating..." << std::endl;
        for (;;)
        {
-                std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 15);
+                std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 45);
                std::string hi;
                for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it)
                {
                        hi += vars->parse(*it) + " ";
                }
+    
+    size_t firstperiod = hi.find_first_of(".!?");
+    if (firstperiod != std::string::npos)
+    {
+      hi = hi.substr(firstperiod+2);
+    }
+    
+    hi.resize(140);
-                size_t lastperiod = hi.find_last_of(".");
+                size_t lastperiod = hi.find_last_of(".!?");
                if ((lastperiod != std::string::npos) && (rand() % 3 > 0))
                {
                        hi = hi.substr(0, lastperiod+1);
author	Kelly Rauchenberger <fefferburbia@gmail.com>	2015-12-30 22:01:37 -0500
committer	Kelly Rauchenberger <fefferburbia@gmail.com>	2015-12-30 22:01:37 -0500
commit	0a5c6bd740aff9be53e7ef117e9e926fde3c289e (patch)
tree	7a0a5c95433b5505b890c8a3176c863a5e802b8e /gen.cpp
parent	006c6502872cfc51eafd1df06ccb01c3c140a1ed (diff)
download	rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.gz rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.bz2 rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.zip