diff options
Diffstat (limited to 'gen.cpp')
-rw-r--r-- | gen.cpp | 98 |
1 files changed, 46 insertions, 52 deletions
diff --git a/gen.cpp b/gen.cpp index 7e47d45..400c0a5 100644 --- a/gen.cpp +++ b/gen.cpp | |||
@@ -11,72 +11,66 @@ | |||
11 | 11 | ||
12 | int main(int argc, char** args) | 12 | int main(int argc, char** args) |
13 | { | 13 | { |
14 | srand(time(NULL)); | 14 | srand(time(NULL)); |
15 | 15 | ||
16 | if (argc == 1) | 16 | if (argc == 1) |
17 | { | 17 | { |
18 | std::cout << "rawr-gen, version 1.0" << std::endl; | 18 | std::cout << "rawr-gen, version 1.0" << std::endl; |
19 | std::cout << "Usage: rawr-gen corpus-file" << std::endl; | 19 | std::cout << "Usage: rawr-gen corpus-file" << std::endl; |
20 | std::cout << " where 'corpus-file' is the path to your input" << std::endl; | 20 | std::cout << " where 'corpus-file' is the path to your input" << std::endl; |
21 | 21 | ||
22 | return 0; | 22 | return 0; |
23 | } | 23 | } |
24 | 24 | ||
25 | std::ifstream infile(args[1]); | 25 | std::ifstream infile(args[1]); |
26 | if (!infile) | 26 | if (!infile) |
27 | { | 27 | { |
28 | std::cout << "rawr-gen, version 1.0" << std::endl; | 28 | std::cout << "rawr-gen, version 1.0" << std::endl; |
29 | std::cout << "Usage: rawr-gen corpus-file" << std::endl; | 29 | std::cout << "Usage: rawr-gen corpus-file" << std::endl; |
30 | std::cout << " where 'corpus-file' is the path to your input" << std::endl; | 30 | std::cout << " where 'corpus-file' is the path to your input" << std::endl; |
31 | std::cout << std::endl; | 31 | std::cout << std::endl; |
32 | std::cout << "The file you specified does not exist." << std::endl; | 32 | std::cout << "The file you specified does not exist." << std::endl; |
33 | 33 | ||
34 | return 0; | 34 | return 0; |
35 | } | 35 | } |
36 | 36 | ||
37 | std::string corpus; | 37 | std::string corpus; |
38 | std::string line; | 38 | std::string line; |
39 | while (getline(infile, line)) | 39 | while (getline(infile, line)) |
40 | { | 40 | { |
41 | corpus += line + "\n "; | 41 | corpus += line + "\n "; |
42 | } | 42 | } |
43 | 43 | ||
44 | std::cout << "Preprocessing corpus..." << std::endl; | 44 | std::cout << "Preprocessing corpus..." << std::endl; |
45 | kgramstats* stats = new kgramstats(corpus, 4); | 45 | kgramstats* stats = new kgramstats(corpus, 4); |
46 | 46 | ||
47 | std::cout << "Preprocessing freevars..." << std::endl; | 47 | std::cout << "Preprocessing freevars..." << std::endl; |
48 | freevars* vars = new freevars(); | 48 | freevars* vars = new freevars(); |
49 | vars->addVar("name", "names.txt"); | 49 | vars->addVar("name", "names.txt"); |
50 | vars->addVar("noun", "nouns.txt"); | 50 | vars->addVar("noun", "nouns.txt"); |
51 | 51 | ||
52 | std::cout << "Generating..." << std::endl; | 52 | std::cout << "Generating..." << std::endl; |
53 | for (;;) | 53 | for (;;) |
54 | { | 54 | { |
55 | std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 45); | 55 | std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 15); |
56 | std::string hi; | 56 | std::string hi; |
57 | for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it) | 57 | for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it) |
58 | { | ||
59 | hi += vars->parse(*it) + " "; | ||
60 | } | ||
61 | |||
62 | size_t firstperiod = hi.find_first_of(".!?"); | ||
63 | if (firstperiod != std::string::npos) | ||
64 | { | 58 | { |
65 | hi = hi.substr(firstperiod+2); | 59 | hi += vars->parse(*it) + " "; |
66 | } | 60 | } |
67 | 61 | ||
68 | hi.resize(140); | 62 | hi.resize(140); |
69 | 63 | ||
70 | size_t lastperiod = hi.find_last_of(".!?"); | 64 | size_t lastperiod = hi.find_last_of(".!?,"); |
71 | if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) | 65 | if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) |
72 | { | 66 | { |
73 | hi = hi.substr(0, lastperiod+1); | 67 | hi = hi.substr(0, lastperiod+1); |
74 | } | 68 | } |
75 | 69 | ||
76 | std::cout << hi << std::endl; | 70 | std::cout << hi << std::endl; |
77 | 71 | ||
78 | getc(stdin); | 72 | getc(stdin); |
79 | } | 73 | } |
80 | 74 | ||
81 | return 0; | 75 | return 0; |
82 | } | 76 | } |