about summary refs log tree commit diff stats
path: root/gen.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2015-12-30 22:01:37 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2015-12-30 22:01:37 -0500
commit0a5c6bd740aff9be53e7ef117e9e926fde3c289e (patch)
tree7a0a5c95433b5505b890c8a3176c863a5e802b8e /gen.cpp
parent006c6502872cfc51eafd1df06ccb01c3c140a1ed (diff)
downloadrawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.gz
rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.bz2
rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.zip
guess what! the algorithm
this time it's a literal algorithm again
not canonizing away punctuation
newlines are actually considered new sentences now
we look for the end of a sentence and then start after that
Diffstat (limited to 'gen.cpp')
-rw-r--r--gen.cpp16
1 files changed, 12 insertions, 4 deletions
diff --git a/gen.cpp b/gen.cpp index 3284ffa..7e47d45 100644 --- a/gen.cpp +++ b/gen.cpp
@@ -38,11 +38,11 @@ int main(int argc, char** args)
38 std::string line; 38 std::string line;
39 while (getline(infile, line)) 39 while (getline(infile, line))
40 { 40 {
41 corpus += " " + line; 41 corpus += line + "\n ";
42 } 42 }
43 43
44 std::cout << "Preprocessing corpus..." << std::endl; 44 std::cout << "Preprocessing corpus..." << std::endl;
45 kgramstats* stats = new kgramstats(corpus, 3); 45 kgramstats* stats = new kgramstats(corpus, 4);
46 46
47 std::cout << "Preprocessing freevars..." << std::endl; 47 std::cout << "Preprocessing freevars..." << std::endl;
48 freevars* vars = new freevars(); 48 freevars* vars = new freevars();
@@ -52,14 +52,22 @@ int main(int argc, char** args)
52 std::cout << "Generating..." << std::endl; 52 std::cout << "Generating..." << std::endl;
53 for (;;) 53 for (;;)
54 { 54 {
55 std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 15); 55 std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 45);
56 std::string hi; 56 std::string hi;
57 for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it) 57 for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it)
58 { 58 {
59 hi += vars->parse(*it) + " "; 59 hi += vars->parse(*it) + " ";
60 } 60 }
61
62 size_t firstperiod = hi.find_first_of(".!?");
63 if (firstperiod != std::string::npos)
64 {
65 hi = hi.substr(firstperiod+2);
66 }
67
68 hi.resize(140);
61 69
62 size_t lastperiod = hi.find_last_of("."); 70 size_t lastperiod = hi.find_last_of(".!?");
63 if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) 71 if ((lastperiod != std::string::npos) && (rand() % 3 > 0))
64 { 72 {
65 hi = hi.substr(0, lastperiod+1); 73 hi = hi.substr(0, lastperiod+1);