about summary refs log tree commit diff stats
path: root/ebooks.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2015-12-30 22:01:37 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2015-12-30 22:01:37 -0500
commit0a5c6bd740aff9be53e7ef117e9e926fde3c289e (patch)
tree7a0a5c95433b5505b890c8a3176c863a5e802b8e /ebooks.cpp
parent006c6502872cfc51eafd1df06ccb01c3c140a1ed (diff)
downloadrawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.gz
rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.tar.bz2
rawr-ebooks-0a5c6bd740aff9be53e7ef117e9e926fde3c289e.zip
guess what! the algorithm
this time it's a literal algorithm again
not canonizing away punctuation
newlines are actually considered new sentences now
we look for the end of a sentence and then start after that
Diffstat (limited to 'ebooks.cpp')
-rw-r--r--ebooks.cpp26
1 files changed, 16 insertions, 10 deletions
diff --git a/ebooks.cpp b/ebooks.cpp index 6bbe25e..27591f4 100644 --- a/ebooks.cpp +++ b/ebooks.cpp
@@ -24,11 +24,11 @@ int main(int argc, char** args)
24 std::string line; 24 std::string line;
25 while (getline(infile, line)) 25 while (getline(infile, line))
26 { 26 {
27 corpus += " " + line; 27 corpus += line + "\n ";
28 } 28 }
29 29
30 std::cout << "Preprocessing corpus..." << std::endl; 30 std::cout << "Preprocessing corpus..." << std::endl;
31 kgramstats* stats = new kgramstats(corpus, 3); 31 kgramstats* stats = new kgramstats(corpus, 4);
32 32
33 std::cout << "Preprocessing freevars..." << std::endl; 33 std::cout << "Preprocessing freevars..." << std::endl;
34 freevars* vars = new freevars(); 34 freevars* vars = new freevars();
@@ -38,20 +38,26 @@ int main(int argc, char** args)
38 std::cout << "Generating..." << std::endl; 38 std::cout << "Generating..." << std::endl;
39 for (;;) 39 for (;;)
40 { 40 {
41 std::vector<std::string> doc = stats->randomSentence(rand() % 25 + 5); 41 std::vector<std::string> doc = stats->randomSentence(rand() % 45 + 5);
42 std::string hi; 42 std::string hi;
43 for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it) 43 for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it)
44 { 44 {
45 hi += vars->parse(*it) + " "; 45 hi += vars->parse(*it) + " ";
46 } 46 }
47 47
48 size_t lastperiod = hi.find_last_of("."); 48 size_t firstperiod = hi.find_first_of(".!?");
49 if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) 49 if (firstperiod != std::string::npos)
50 { 50 {
51 hi = hi.substr(0, lastperiod+1); 51 hi = hi.substr(firstperiod+2);
52 } 52 }
53 53
54 hi = hi.substr(0,140); 54 hi.resize(140);
55
56 size_t lastperiod = hi.find_last_of(".!?");
57 if ((lastperiod != std::string::npos) && (rand() % 3 > 0))
58 {
59 hi = hi.substr(0, lastperiod+1);
60 }
55 61
56 twitCurl twitterObj; 62 twitCurl twitterObj;
57 std::string tmpStr, tmpStr2; 63 std::string tmpStr, tmpStr2;