diff options
Diffstat (limited to 'gen.cpp')
-rw-r--r-- | gen.cpp | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/gen.cpp b/gen.cpp index 3284ffa..7e47d45 100644 --- a/gen.cpp +++ b/gen.cpp | |||
@@ -38,11 +38,11 @@ int main(int argc, char** args) | |||
38 | std::string line; | 38 | std::string line; |
39 | while (getline(infile, line)) | 39 | while (getline(infile, line)) |
40 | { | 40 | { |
41 | corpus += " " + line; | 41 | corpus += line + "\n "; |
42 | } | 42 | } |
43 | 43 | ||
44 | std::cout << "Preprocessing corpus..." << std::endl; | 44 | std::cout << "Preprocessing corpus..." << std::endl; |
45 | kgramstats* stats = new kgramstats(corpus, 3); | 45 | kgramstats* stats = new kgramstats(corpus, 4); |
46 | 46 | ||
47 | std::cout << "Preprocessing freevars..." << std::endl; | 47 | std::cout << "Preprocessing freevars..." << std::endl; |
48 | freevars* vars = new freevars(); | 48 | freevars* vars = new freevars(); |
@@ -52,14 +52,22 @@ int main(int argc, char** args) | |||
52 | std::cout << "Generating..." << std::endl; | 52 | std::cout << "Generating..." << std::endl; |
53 | for (;;) | 53 | for (;;) |
54 | { | 54 | { |
55 | std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 15); | 55 | std::vector<std::string> doc = stats->randomSentence(rand() % 35 + 45); |
56 | std::string hi; | 56 | std::string hi; |
57 | for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it) | 57 | for (std::vector<std::string>::iterator it = doc.begin(); it != doc.end(); ++it) |
58 | { | 58 | { |
59 | hi += vars->parse(*it) + " "; | 59 | hi += vars->parse(*it) + " "; |
60 | } | 60 | } |
61 | |||
62 | size_t firstperiod = hi.find_first_of(".!?"); | ||
63 | if (firstperiod != std::string::npos) | ||
64 | { | ||
65 | hi = hi.substr(firstperiod+2); | ||
66 | } | ||
67 | |||
68 | hi.resize(140); | ||
61 | 69 | ||
62 | size_t lastperiod = hi.find_last_of("."); | 70 | size_t lastperiod = hi.find_last_of(".!?"); |
63 | if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) | 71 | if ((lastperiod != std::string::npos) && (rand() % 3 > 0)) |
64 | { | 72 | { |
65 | hi = hi.substr(0, lastperiod+1); | 73 | hi = hi.substr(0, lastperiod+1); |