about summary refs log tree commit diff stats
path: root/gen.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gen.cpp')
-rw-r--r--gen.cpp98
1 files changed, 46 insertions, 52 deletions
diff --git a/gen.cpp b/gen.cpp index 7e47d45..400c0a5 100644 --- a/gen.cpp +++ b/gen.cpp
@@ -11,72 +11,66 @@
11 11
12int main(int argc, char** args) 12int main(int argc, char** args)
13{ 13{
14 srand(time(NULL)); 14 srand(time(NULL));
15 15
16 if (argc == 1) 16 if (argc == 1)
17 { 17 {
18 std::cout << "rawr-gen, version 1.0" << std::endl; 18 std::cout << "rawr-gen, version 1.0" << std::endl;
19 std::cout << "Usage: rawr-gen corpus-file" << std::endl; 19 std::cout << "Usage: rawr-gen corpus-file" << std::endl;
20 std::cout << " where 'corpus-file' is the path to your input" << std::endl; 20 std::cout << " where 'corpus-file' is the path to your input" << std::endl;
21 21
22 return 0; 22 return 0;
23 } 23 }
24 24
25 std::ifstream infile(args[1]); 25 std::ifstream infile(args[1]);
26 if (!infile) 26 if (!infile)
27 { 27 {
28 std::cout << "rawr-gen, version 1.0" << std::endl; 28 std::cout << "rawr-gen, version 1.0" << std::endl;
29 std::cout << "Usage: rawr-gen corpus-file" << std::endl; 29 std::cout << "Usage: rawr-gen corpus-file" << std::endl;
30 std::cout << " where 'corpus-file' is the path to your input" << std::endl; 30 std::cout << " where 'corpus-file' is the path to your input" << std::endl;
31 std::cout << std::endl; 31 std::cout << std::endl;
32 std::cout << "The file you specified does not exist." << std::endl; 32 std::cout << "The file you specified does not exist." << std::endl;
33 33
34 return 0; 34 return 0;
35 } 35 }
36 36
37 std::string corpus; 37 std::string corpus;
38 std::string line; 38 std::string line;
39 while (getline(infile, line)) 39 while (getline(infile, line))
40 { 40 {
41 corpus += line + "\n "; 41 corpus += line + "\n ";
42 } 42 }
43 43
44 std::cout << "Preprocessing corpus..." << std::endl; 44 std::cout << "Preprocessing corpus..." << std::endl;
45 kgramstats* stats = new kgramstats(corpus, 4); 45 kgramstats* stats = new kgramstats(corpus, 4);
46 46
47 std::cout << "Preprocessing freevars..." << std::endl; 47 std::cout << "Preprocessing freevars..." << std::endl;
48 freevars* vars = new freevars(); 48 freevars* vars = new freevars();
49 vars->addVar("name", "names.txt"); 49 vars->addVar("name", "names.txt");
50 vars->addVar("noun", "nouns.txt"); 50 vars->addVar("noun", "nouns.txt");
51 51
52 std::cout << "Generating..." << std::endl; 52 std::cout << "Generating..." << std::endl;
53 for (;;) 53 for (;;)
54 { 54 {
55 std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 45); 55 std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 15);
56 std::string hi; 56 std::string hi;
57 for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it) 57 for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it)
58 {
59 hi += vars->parse(*it) + " ";
60 }
61
62 size_t firstperiod = hi.find_first_of(".!?");
63 if (firstperiod != std::string::npos)
64 { 58 {
65 hi = hi.substr(firstperiod+2); 59 hi += vars->parse(*it) + " ";
66 } 60 }
67 61
68 hi.resize(140); 62 hi.resize(140);
69 63
70 size_t lastperiod = hi.find_last_of(".!?"); 64 size_t lastperiod = hi.find_last_of(".!?,");
71 if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) 65 if ((lastperiod != std::string::npos) && (rand() % 3 > 0))
72 { 66 {
73 hi = hi.substr(0, lastperiod+1); 67 hi = hi.substr(0, lastperiod+1);
74 } 68 }
75 69
76 std::cout << hi << std::endl; 70 std::cout << hi << std::endl;
77 71
78 getc(stdin); 72 getc(stdin);
79 } 73 }
80 74
81 return 0; 75 return 0;
82} 76}