diff options
Diffstat (limited to 'gen.cpp')
| -rw-r--r-- | gen.cpp | 98 |
1 files changed, 46 insertions, 52 deletions
| diff --git a/gen.cpp b/gen.cpp index 7e47d45..400c0a5 100644 --- a/gen.cpp +++ b/gen.cpp | |||
| @@ -11,72 +11,66 @@ | |||
| 11 | 11 | ||
| 12 | int main(int argc, char** args) | 12 | int main(int argc, char** args) |
| 13 | { | 13 | { |
| 14 | srand(time(NULL)); | 14 | srand(time(NULL)); |
| 15 | 15 | ||
| 16 | if (argc == 1) | 16 | if (argc == 1) |
| 17 | { | 17 | { |
| 18 | std::cout << "rawr-gen, version 1.0" << std::endl; | 18 | std::cout << "rawr-gen, version 1.0" << std::endl; |
| 19 | std::cout << "Usage: rawr-gen corpus-file" << std::endl; | 19 | std::cout << "Usage: rawr-gen corpus-file" << std::endl; |
| 20 | std::cout << " where 'corpus-file' is the path to your input" << std::endl; | 20 | std::cout << " where 'corpus-file' is the path to your input" << std::endl; |
| 21 | 21 | ||
| 22 | return 0; | 22 | return 0; |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | std::ifstream infile(args[1]); | 25 | std::ifstream infile(args[1]); |
| 26 | if (!infile) | 26 | if (!infile) |
| 27 | { | 27 | { |
| 28 | std::cout << "rawr-gen, version 1.0" << std::endl; | 28 | std::cout << "rawr-gen, version 1.0" << std::endl; |
| 29 | std::cout << "Usage: rawr-gen corpus-file" << std::endl; | 29 | std::cout << "Usage: rawr-gen corpus-file" << std::endl; |
| 30 | std::cout << " where 'corpus-file' is the path to your input" << std::endl; | 30 | std::cout << " where 'corpus-file' is the path to your input" << std::endl; |
| 31 | std::cout << std::endl; | 31 | std::cout << std::endl; |
| 32 | std::cout << "The file you specified does not exist." << std::endl; | 32 | std::cout << "The file you specified does not exist." << std::endl; |
| 33 | 33 | ||
| 34 | return 0; | 34 | return 0; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | std::string corpus; | 37 | std::string corpus; |
| 38 | std::string line; | 38 | std::string line; |
| 39 | while (getline(infile, line)) | 39 | while (getline(infile, line)) |
| 40 | { | 40 | { |
| 41 | corpus += line + "\n "; | 41 | corpus += line + "\n "; |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | std::cout << "Preprocessing corpus..." << std::endl; | 44 | std::cout << "Preprocessing corpus..." << std::endl; |
| 45 | kgramstats* stats = new kgramstats(corpus, 4); | 45 | kgramstats* stats = new kgramstats(corpus, 4); |
| 46 | 46 | ||
| 47 | std::cout << "Preprocessing freevars..." << std::endl; | 47 | std::cout << "Preprocessing freevars..." << std::endl; |
| 48 | freevars* vars = new freevars(); | 48 | freevars* vars = new freevars(); |
| 49 | vars->addVar("name", "names.txt"); | 49 | vars->addVar("name", "names.txt"); |
| 50 | vars->addVar("noun", "nouns.txt"); | 50 | vars->addVar("noun", "nouns.txt"); |
| 51 | 51 | ||
| 52 | std::cout << "Generating..." << std::endl; | 52 | std::cout << "Generating..." << std::endl; |
| 53 | for (;;) | 53 | for (;;) |
| 54 | { | 54 | { |
| 55 | std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 45); | 55 | std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 15); |
| 56 | std::string hi; | 56 | std::string hi; |
| 57 | for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it) | 57 | for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it) |
| 58 | { | ||
| 59 | hi += vars->parse(*it) + " "; | ||
| 60 | } | ||
| 61 | |||
| 62 | size_t firstperiod = hi.find_first_of(".!?"); | ||
| 63 | if (firstperiod != std::string::npos) | ||
| 64 | { | 58 | { |
| 65 | hi = hi.substr(firstperiod+2); | 59 | hi += vars->parse(*it) + " "; |
| 66 | } | 60 | } |
| 67 | 61 | ||
| 68 | hi.resize(140); | 62 | hi.resize(140); |
| 69 | 63 | ||
| 70 | size_t lastperiod = hi.find_last_of(".!?"); | 64 | size_t lastperiod = hi.find_last_of(".!?,"); |
| 71 | if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) | 65 | if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) |
| 72 | { | 66 | { |
| 73 | hi = hi.substr(0, lastperiod+1); | 67 | hi = hi.substr(0, lastperiod+1); |
| 74 | } | 68 | } |
| 75 | 69 | ||
| 76 | std::cout << hi << std::endl; | 70 | std::cout << hi << std::endl; |
| 77 | 71 | ||
| 78 | getc(stdin); | 72 | getc(stdin); |
| 79 | } | 73 | } |
| 80 | 74 | ||
| 81 | return 0; | 75 | return 0; |
| 82 | } | 76 | } |
