diff options
| author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-05-20 23:14:06 -0400 |
|---|---|---|
| committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-05-20 23:15:10 -0400 |
| commit | 8c3022e759191e90b5e12bcb6b0b5a6a48b37840 (patch) | |
| tree | 0d9a8a12616d6ea335fdc687049b05f679e8ccc6 /ebooks.cpp | |
| parent | a9c391efd5f0f73b5374dcfd807cdf59ed663e6b (diff) | |
| download | rawr-ebooks-8c3022e759191e90b5e12bcb6b0b5a6a48b37840.tar.gz rawr-ebooks-8c3022e759191e90b5e12bcb6b0b5a6a48b37840.tar.bz2 rawr-ebooks-8c3022e759191e90b5e12bcb6b0b5a6a48b37840.zip | |
Pulled the ebooks functionality out into a library
Diffstat (limited to 'ebooks.cpp')
| -rw-r--r-- | ebooks.cpp | 38 |
1 files changed, 35 insertions, 3 deletions
| diff --git a/ebooks.cpp b/ebooks.cpp index aa690c2..c01cdc9 100644 --- a/ebooks.cpp +++ b/ebooks.cpp | |||
| @@ -39,9 +39,41 @@ int main(int argc, char** args) | |||
| 39 | 39 | ||
| 40 | corpus += line + "\n "; | 40 | corpus += line + "\n "; |
| 41 | } | 41 | } |
| 42 | |||
| 43 | // Replace old-style freevars while I can't be bothered to remake the corpus yet | ||
| 44 | std::vector<std::string> fv_names; | ||
| 45 | std::ifstream namefile("names.txt"); | ||
| 46 | if (namefile.is_open()) | ||
| 47 | { | ||
| 48 | while (!namefile.eof()) | ||
| 49 | { | ||
| 50 | std::string l; | ||
| 51 | getline(namefile, l); | ||
| 52 | if (l.back() == '\r') | ||
| 53 | { | ||
| 54 | l.pop_back(); | ||
| 55 | } | ||
| 56 | |||
| 57 | fv_names.push_back(l); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | |||
| 61 | namefile.close(); | ||
| 42 | 62 | ||
| 43 | std::cout << "Preprocessing corpus..." << std::endl; | 63 | std::cout << "Preprocessing corpus..." << std::endl; |
| 44 | kgramstats* stats = new kgramstats(corpus, 4); | 64 | rawr kgramstats; |
| 65 | kgramstats.addCorpus(corpus); | ||
| 66 | kgramstats.compile(4); | ||
| 67 | kgramstats.setTransformCallback([&] (std::string canonical, std::string) { | ||
| 68 | size_t pos = canonical.find("$name$"); | ||
| 69 | if (pos != std::string::npos) | ||
| 70 | { | ||
| 71 | canonical.replace(pos, 6, fv_names[rand() % fv_names.size()]); | ||
| 72 | } | ||
| 73 | |||
| 74 | return canonical; | ||
| 75 | }); | ||
| 76 | |||
| 45 | std::mutex stats_mutex; | 77 | std::mutex stats_mutex; |
| 46 | 78 | ||
| 47 | client.setUserStreamNotifyCallback([&] (twitter::notification n) { | 79 | client.setUserStreamNotifyCallback([&] (twitter::notification n) { |
| @@ -60,7 +92,7 @@ int main(int argc, char** args) | |||
| 60 | std::string doc = "@" + n.getTweet().getAuthor().getScreenName() + " "; | 92 | std::string doc = "@" + n.getTweet().getAuthor().getScreenName() + " "; |
| 61 | { | 93 | { |
| 62 | std::lock_guard<std::mutex> stats_lock(stats_mutex); | 94 | std::lock_guard<std::mutex> stats_lock(stats_mutex); |
| 63 | doc += stats->randomSentence(140 - doc.length()); | 95 | doc += kgramstats.randomSentence(140 - doc.length()); |
| 64 | doc.resize(140); | 96 | doc.resize(140); |
| 65 | } | 97 | } |
| 66 | 98 | ||
| @@ -84,7 +116,7 @@ int main(int argc, char** args) | |||
| 84 | std::string doc; | 116 | std::string doc; |
| 85 | { | 117 | { |
| 86 | std::lock_guard<std::mutex> stats_lock(stats_mutex); | 118 | std::lock_guard<std::mutex> stats_lock(stats_mutex); |
| 87 | doc = stats->randomSentence(140); | 119 | doc = kgramstats.randomSentence(140); |
| 88 | } | 120 | } |
| 89 | doc.resize(140); | 121 | doc.resize(140); |
| 90 | 122 | ||
