diff options
-rw-r--r-- | CMakeLists.txt | 12 | ||||
-rw-r--r-- | chemist.cpp | 183 | ||||
-rw-r--r-- | data.txt | 310 | ||||
-rw-r--r-- | insult.cpp | 108 | ||||
-rw-r--r-- | patterner.cpp | 138 | ||||
-rw-r--r-- | patterner.h | 21 | ||||
m--------- | vendor/libtwittercpp | 0 | ||||
m--------- | vendor/verbly | 0 |
8 files changed, 304 insertions, 468 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 49b0d2e..b9a143f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt | |||
@@ -1,14 +1,12 @@ | |||
1 | cmake_minimum_required (VERSION 3.1) | 1 | cmake_minimum_required (VERSION 3.1) |
2 | project (chemist) | 2 | project (insult) |
3 | |||
4 | set(CMAKE_BUILD_TYPE Debug) | ||
5 | 3 | ||
6 | add_subdirectory(vendor/verbly) | 4 | add_subdirectory(vendor/verbly) |
7 | add_subdirectory(vendor/yaml-cpp EXCLUDE_FROM_ALL) | 5 | add_subdirectory(vendor/yaml-cpp EXCLUDE_FROM_ALL) |
8 | add_subdirectory(vendor/libtwittercpp) | 6 | add_subdirectory(vendor/libtwittercpp) |
9 | 7 | ||
10 | include_directories(vendor/verbly/lib vendor/yaml-cpp/include vendor/libtwittercpp/src) | 8 | include_directories(vendor/verbly/lib vendor/yaml-cpp/include vendor/libtwittercpp/src) |
11 | add_executable(chemist chemist.cpp) | 9 | add_executable(insult insult.cpp patterner.cpp) |
12 | set_property(TARGET chemist PROPERTY CXX_STANDARD 11) | 10 | set_property(TARGET insult PROPERTY CXX_STANDARD 11) |
13 | set_property(TARGET chemist PROPERTY CXX_STANDARD_REQUIRED ON) | 11 | set_property(TARGET insult PROPERTY CXX_STANDARD_REQUIRED ON) |
14 | target_link_libraries(chemist verbly twitter++ yaml-cpp) | 12 | target_link_libraries(insult verbly twitter++ yaml-cpp) |
diff --git a/chemist.cpp b/chemist.cpp deleted file mode 100644 index 06e1992..0000000 --- a/chemist.cpp +++ /dev/null | |||
@@ -1,183 +0,0 @@ | |||
1 | #include <yaml-cpp/yaml.h> | ||
2 | #include <iostream> | ||
3 | #include <sstream> | ||
4 | #include <verbly.h> | ||
5 | #include <fstream> | ||
6 | #include <twitter.h> | ||
7 | #include <random> | ||
8 | #include <chrono> | ||
9 | #include <thread> | ||
10 | |||
11 | int main(int argc, char** argv) | ||
12 | { | ||
13 | if (argc != 2) | ||
14 | { | ||
15 | std::cout << "usage: chemist [configfile]" << std::endl; | ||
16 | return -1; | ||
17 | } | ||
18 | |||
19 | std::string configfile(argv[1]); | ||
20 | YAML::Node config = YAML::LoadFile(configfile); | ||
21 | |||
22 | twitter::auth auth; | ||
23 | auth.setConsumerKey(config["consumer_key"].as<std::string>()); | ||
24 | auth.setConsumerSecret(config["consumer_secret"].as<std::string>()); | ||
25 | auth.setAccessKey(config["access_key"].as<std::string>()); | ||
26 | auth.setAccessSecret(config["access_secret"].as<std::string>()); | ||
27 | |||
28 | twitter::client client(auth); | ||
29 | |||
30 | std::map<std::string, std::vector<std::string>> groups; | ||
31 | std::ifstream datafile(config["forms_file"].as<std::string>()); | ||
32 | if (!datafile.is_open()) | ||
33 | { | ||
34 | std::cout << "Could not find datafile" << std::endl; | ||
35 | return 1; | ||
36 | } | ||
37 | |||
38 | bool newgroup = true; | ||
39 | std::string line; | ||
40 | std::string curgroup; | ||
41 | while (getline(datafile, line)) | ||
42 | { | ||
43 | if (line.back() == '\r') | ||
44 | { | ||
45 | line.pop_back(); | ||
46 | } | ||
47 | |||
48 | if (newgroup) | ||
49 | { | ||
50 | curgroup = line; | ||
51 | newgroup = false; | ||
52 | } else { | ||
53 | if (line.empty()) | ||
54 | { | ||
55 | newgroup = true; | ||
56 | } else { | ||
57 | groups[curgroup].push_back(line); | ||
58 | } | ||
59 | } | ||
60 | } | ||
61 | |||
62 | std::random_device random_device; | ||
63 | std::mt19937 random_engine{random_device()}; | ||
64 | |||
65 | verbly::data database {config["verbly_datafile"].as<std::string>()}; | ||
66 | for (;;) | ||
67 | { | ||
68 | std::cout << "Generating tweet" << std::endl; | ||
69 | std::string action = "{Main}"; | ||
70 | int tknloc; | ||
71 | while ((tknloc = action.find("{")) != std::string::npos) | ||
72 | { | ||
73 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); | ||
74 | std::string modifier; | ||
75 | int modloc; | ||
76 | if ((modloc = token.find(":")) != std::string::npos) | ||
77 | { | ||
78 | modifier = token.substr(modloc+1); | ||
79 | token = token.substr(0, modloc); | ||
80 | } | ||
81 | |||
82 | std::string canontkn; | ||
83 | std::transform(std::begin(token), std::end(token), std::back_inserter(canontkn), [] (char ch) { | ||
84 | return std::toupper(ch); | ||
85 | }); | ||
86 | |||
87 | std::string result; | ||
88 | if (canontkn == "NOUN") | ||
89 | { | ||
90 | result = database.nouns().is_not_proper().random().limit(1).with_complexity(1).run().front().singular_form(); | ||
91 | } else if (canontkn == "ATTRIBUTE") | ||
92 | { | ||
93 | result = database.nouns().random().limit(1).full_hyponym_of(database.nouns().with_wnid(100024264).limit(1).run().front()).run().front().singular_form(); | ||
94 | } else if (canontkn == "ADJECTIVE") | ||
95 | { | ||
96 | result = database.adjectives().with_complexity(1).random().limit(1).run().front().base_form(); | ||
97 | } else if (canontkn == "VERBING") | ||
98 | { | ||
99 | result = database.verbs().random().limit(1).run().front().ing_form(); | ||
100 | } else if (canontkn == "YEAR") | ||
101 | { | ||
102 | std::uniform_int_distribution<int> yeardist(1916,2015); | ||
103 | int year = yeardist(random_engine); | ||
104 | result = std::to_string(year); | ||
105 | } else if (canontkn == "REGION") | ||
106 | { | ||
107 | auto hem1 = database.nouns().with_singular_form("eastern hemisphere").limit(1).run().front(); | ||
108 | auto hem2 = database.nouns().with_singular_form("western hemisphere").limit(1).run().front(); | ||
109 | verbly::filter<verbly::noun> region{hem1, hem2}; | ||
110 | region.set_orlogic(true); | ||
111 | |||
112 | result = database.nouns().full_part_holonym_of(region).random().limit(1).run().front().singular_form(); | ||
113 | } else if (canontkn == "FAMOUSNAME") | ||
114 | { | ||
115 | auto person = database.nouns().with_singular_form("person").limit(1).run().front(); | ||
116 | auto ptypes = database.nouns().full_hyponym_of({person}).is_class().random().limit(1).run().front(); | ||
117 | result = database.nouns().instance_of({ptypes}).random().limit(1).run().front().singular_form(); | ||
118 | } else if (canontkn == "BODYPART") | ||
119 | { | ||
120 | auto bp = database.nouns().with_singular_form("body part").limit(1).run().front(); | ||
121 | result = database.nouns().full_hyponym_of({bp}).with_complexity(1).random().limit(1).run().front().singular_form(); | ||
122 | } else if (canontkn == "\\N") | ||
123 | { | ||
124 | result = "\n"; | ||
125 | } else { | ||
126 | auto group = groups[canontkn]; | ||
127 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | ||
128 | int groupind = groupdist(random_engine); | ||
129 | result = group[groupind]; | ||
130 | } | ||
131 | |||
132 | if (modifier == "indefinite") | ||
133 | { | ||
134 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) | ||
135 | { | ||
136 | result = "an " + result; | ||
137 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | ||
138 | { | ||
139 | result = "an " + result; | ||
140 | } else { | ||
141 | result = "a " + result; | ||
142 | } | ||
143 | } | ||
144 | |||
145 | std::string finalresult; | ||
146 | if (islower(token[0])) | ||
147 | { | ||
148 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
149 | return std::tolower(ch); | ||
150 | }); | ||
151 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
152 | { | ||
153 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
154 | for (auto& word : words) | ||
155 | { | ||
156 | word[0] = std::toupper(word[0]); | ||
157 | } | ||
158 | |||
159 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | ||
160 | } else { | ||
161 | finalresult = result; | ||
162 | } | ||
163 | |||
164 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); | ||
165 | } | ||
166 | |||
167 | action.resize(140); | ||
168 | |||
169 | try | ||
170 | { | ||
171 | client.updateStatus(action); | ||
172 | |||
173 | std::cout << "Tweeted!" << std::endl; | ||
174 | } catch (const twitter::twitter_error& e) | ||
175 | { | ||
176 | std::cout << "Twitter error: " << e.what() << std::endl; | ||
177 | } | ||
178 | |||
179 | std::cout << "Waiting..." << std::endl; | ||
180 | |||
181 | std::this_thread::sleep_for(std::chrono::hours(1)); | ||
182 | } | ||
183 | } | ||
diff --git a/data.txt b/data.txt index 77179de..f25657f 100644 --- a/data.txt +++ b/data.txt | |||
@@ -1,279 +1,33 @@ | |||
1 | MAIN | 1 | MAIN |
2 | {NAME} ({CLASS}){\n}{PRIMARY}{\n}{SECONDARY} | 2 | {INSULT} |
3 | 3 | {INSULT} | |
4 | PRIMARY | 4 | {INSULT} |
5 | Used to treat {SYNDROME} | 5 | {INSULT} |
6 | Treats the {adjective} symptoms of {SYNDROME} | 6 | {Insult} |
7 | Cures {SYNDROME} | 7 | {INSULT}! |
8 | Approved to treat {SYNDROME} and {SYNDROME} | 8 | {INSULT}! |
9 | Prescribed for {SYNDROME} | 9 | {INSULT}!!!! |
10 | Used with {EXISTENT} to treat {SYNDROME} | 10 | {INSULT}!!!! |
11 | Used recreationally as {CLASS:indefinite} | 11 | {INSULT}? |
12 | Used recreationally for {verbing} | 12 | {INSULT}???? |
13 | 13 | {INSULT}?!?!?!?! | |
14 | SECONDARY | 14 | |
15 | Developed after {year} {Noun} Pandemic | 15 | INSULT |
16 | Often used off-label for {SYNDROME} | 16 | {START} {END} |
17 | Notable for its {adjective} effect | 17 | What the {WORD} did you say to me you little {WORD}? |
18 | Controlled in the US due to its {adjective} effect | 18 | |
19 | Developed in {year} to replace {EXISTENT} | 19 | INSULT,START |
20 | May cause {ADJECTIVE} feelings | 20 | {WORD} off |
21 | Obsoleted by {EXISTENT} | 21 | {WORD} you |
22 | Contraindicated by {SYNDROME} | 22 | {WORD} you and the horse you rode in on |
23 | Contraindicated by {EXISTENT} | 23 | {WORD} you in particular |
24 | Decreases the {adjective} effects of {EXISTENT} | 24 | go {WORD} yourself |
25 | 25 | go to {WORD} | |
26 | SYNDROME | 26 | leave me the {WORD} alone |
27 | irritable {noun} syndrome | 27 | you're a piece of {WORD} |
28 | {adjective} {noun} syndrome | 28 | what the {WORD} |
29 | severe {noun} | 29 | what the {WORD}ing {WORD} |
30 | major {adjective} disorder | 30 | kindly catch the 9am train to {Word}sville |
31 | {adjective} {noun} disorder | 31 | |
32 | obsessive {noun} disorder | 32 | INSULT,END |
33 | clinical {noun} | 33 | you piece of {WORD} \ No newline at end of file |
34 | {adjective} personality disorder | ||
35 | respiratory {adjective} disease | ||
36 | {bodypart} cancer | ||
37 | restless {bodypart} | ||
38 | {bodypart} failure | ||
39 | congenital {noun} disease | ||
40 | {FamousName}'s disease | ||
41 | {adjective} fever | ||
42 | hypo{noun}ism | ||
43 | {EXISTENT} overdose | ||
44 | {CLASS} overdose | ||
45 | {CLASS} discontinuation syndrome | ||
46 | {noun} syndrome | ||
47 | low {ATTRIBUTE} | ||
48 | |||
49 | CLASSIFIER | ||
50 | {CLASS:indefinite} | ||
51 | {NAME} | ||
52 | {EXISTENT} | ||
53 | |||
54 | CLASS | ||
55 | analgesic | ||
56 | painkiller | ||
57 | anaesthetic | ||
58 | antihistamine | ||
59 | anticonvulsant | ||
60 | antiepileptic | ||
61 | antidepressant | ||
62 | antimigraine | ||
63 | antipsychotic | ||
64 | benzodiazepine | ||
65 | antiparkinsonian | ||
66 | immunosuppressive | ||
67 | antianaemia | ||
68 | anticoagulant | ||
69 | blood thinner | ||
70 | antiarrhythmic | ||
71 | antithrombotic | ||
72 | antifungal | ||
73 | anti-infective | ||
74 | anti-inflammatory | ||
75 | disinfectant | ||
76 | antiseptic | ||
77 | antiemetic | ||
78 | diuretic | ||
79 | opiod painkiller | ||
80 | antiulcer | ||
81 | laxative | ||
82 | sedative | ||
83 | hormone | ||
84 | estrogen | ||
85 | androgen | ||
86 | contraceptive | ||
87 | ovulation inducer | ||
88 | thyroid stimulant | ||
89 | antithyroid | ||
90 | insulin | ||
91 | vaccine | ||
92 | oxytocin | ||
93 | SSRI | ||
94 | anxiolytic | ||
95 | antipanic agent | ||
96 | tricyclic | ||
97 | tetracyclic | ||
98 | MAOI | ||
99 | SNRI | ||
100 | antiandrogen | ||
101 | psychedelic | ||
102 | vitamin | ||
103 | probiotic | ||
104 | antibiotic | ||
105 | antiviral drug | ||
106 | stimulant | ||
107 | depressant | ||
108 | aphrodisiac | ||
109 | {EXISTENT} prodrug | ||
110 | |||
111 | EXISTENT | ||
112 | fluoxetine | ||
113 | Prozac | ||
114 | sertraline | ||
115 | Zoloft | ||
116 | escitalopram | ||
117 | Lexapro | ||
118 | venlafaxine | ||
119 | Effexor | ||
120 | aripiprazole | ||
121 | Abilify | ||
122 | alprazolam | ||
123 | Xanax | ||
124 | diazepam | ||
125 | Valium | ||
126 | lamotrigine | ||
127 | Lamictal | ||
128 | gabapentin | ||
129 | Neurontin | ||
130 | acetaminophen | ||
131 | Tylenol | ||
132 | ibuprofin | ||
133 | Advil | ||
134 | lurasidone | ||
135 | Latuda | ||
136 | lithium | ||
137 | activated charcoal | ||
138 | estradiol | ||
139 | AndroGel | ||
140 | ziprasidone | ||
141 | Geodon | ||
142 | risperidone | ||
143 | Risperdal | ||
144 | quetiapine | ||
145 | Seroquel | ||
146 | Cymbalta | ||
147 | duloxetine | ||
148 | bupropion | ||
149 | Welbutrin | ||
150 | buspirone | ||
151 | Buspar | ||
152 | oxycontin | ||
153 | Oxycodone | ||
154 | Concerta | ||
155 | methylphenidate | ||
156 | Ritalin | ||
157 | Vyvanse | ||
158 | lisdexamfetamine | ||
159 | Adderall | ||
160 | epinephrine | ||
161 | adrenaline | ||
162 | testosterone gel | ||
163 | Cialis | ||
164 | Viagra | ||
165 | heroin | ||
166 | morphine | ||
167 | crystal meth | ||
168 | mirtazapine | ||
169 | Remeron | ||
170 | Luvox | ||
171 | fluvoxamine | ||
172 | |||
173 | NAME | ||
174 | {PRENAME}{NAMEMID}{NAMEIFX} | ||
175 | |||
176 | PRENAME | ||
177 | Oxy | ||
178 | Ari | ||
179 | Zi | ||
180 | Quetia | ||
181 | Mor | ||
182 | Her | ||
183 | Cia | ||
184 | Via | ||
185 | He | ||
186 | Con | ||
187 | Flu | ||
188 | Ser | ||
189 | Es | ||
190 | Ven | ||
191 | Al | ||
192 | Dia | ||
193 | Lamo | ||
194 | Gaba | ||
195 | Aceta | ||
196 | Ibu | ||
197 | Lura | ||
198 | Ris | ||
199 | Li | ||
200 | Estra | ||
201 | Du | ||
202 | Bus | ||
203 | Epin | ||
204 | Co | ||
205 | Lido | ||
206 | Pro | ||
207 | Pri | ||
208 | Bu | ||
209 | Levo | ||
210 | Ro | ||
211 | Me | ||
212 | Dibu | ||
213 | Des | ||
214 | Ha | ||
215 | Mir | ||
216 | |||
217 | NAMEMID | ||
218 | pipra | ||
219 | pi | ||
220 | to | ||
221 | stero | ||
222 | cer | ||
223 | oxe | ||
224 | tra | ||
225 | cita | ||
226 | lo | ||
227 | la | ||
228 | fa | ||
229 | pra | ||
230 | zo | ||
231 | ze | ||
232 | tri | ||
233 | pen | ||
234 | mino | ||
235 | pro | ||
236 | si | ||
237 | peri | ||
238 | thi | ||
239 | loxe | ||
240 | con | ||
241 | epher | ||
242 | piva | ||
243 | bupi | ||
244 | va | ||
245 | piva | ||
246 | flu | ||
247 | oxy | ||
248 | taz | ||
249 | {NAMEMID}{NAMEMID} | ||
250 | {NAMEMID}{NAMEMID} | ||
251 | |||
252 | NAMEIFX | ||
253 | zole | ||
254 | ne | ||
255 | tine | ||
256 | lin | ||
257 | tamine | ||
258 | gra | ||
259 | phine | ||
260 | ta | ||
261 | line | ||
262 | pram | ||
263 | xine | ||
264 | lam | ||
265 | pam | ||
266 | gine | ||
267 | tin | ||
268 | phen | ||
269 | fin | ||
270 | done | ||
271 | um | ||
272 | diol | ||
273 | tin | ||
274 | rone | ||
275 | ine | ||
276 | caine | ||
277 | rane | ||
278 | ide | ||
279 | epine \ No newline at end of file | ||
diff --git a/insult.cpp b/insult.cpp new file mode 100644 index 0000000..2612d4e --- /dev/null +++ b/insult.cpp | |||
@@ -0,0 +1,108 @@ | |||
1 | #include <yaml-cpp/yaml.h> | ||
2 | #include <iostream> | ||
3 | #include <sstream> | ||
4 | #include <verbly.h> | ||
5 | #include <fstream> | ||
6 | #include <twitter.h> | ||
7 | #include <random> | ||
8 | #include <chrono> | ||
9 | #include <thread> | ||
10 | #include "patterner.h" | ||
11 | |||
12 | int main(int argc, char** argv) | ||
13 | { | ||
14 | if (argc != 2) | ||
15 | { | ||
16 | std::cout << "usage: insult [configfile]" << std::endl; | ||
17 | return -1; | ||
18 | } | ||
19 | |||
20 | std::string configfile(argv[1]); | ||
21 | YAML::Node config = YAML::LoadFile(configfile); | ||
22 | |||
23 | twitter::auth auth; | ||
24 | auth.setConsumerKey(config["consumer_key"].as<std::string>()); | ||
25 | auth.setConsumerSecret(config["consumer_secret"].as<std::string>()); | ||
26 | auth.setAccessKey(config["access_key"].as<std::string>()); | ||
27 | auth.setAccessSecret(config["access_secret"].as<std::string>()); | ||
28 | |||
29 | twitter::client client(auth); | ||
30 | |||
31 | std::random_device randomDevice; | ||
32 | std::mt19937 rng(randomDevice()); | ||
33 | |||
34 | try | ||
35 | { | ||
36 | verbly::database database(config["verbly_datafile"].as<std::string>()); | ||
37 | patterner pgen(config["forms_file"].as<std::string>(), database, rng); | ||
38 | |||
39 | std::cout << "Starting streaming..." << std::endl; | ||
40 | |||
41 | twitter::stream userStream(client, [&pgen, &client] | ||
42 | (const twitter::notification& n) { | ||
43 | if (n.getType() == twitter::notification::type::tweet) | ||
44 | { | ||
45 | if ((!n.getTweet().isRetweet()) | ||
46 | && (n.getTweet().getAuthor() != client.getUser())) | ||
47 | { | ||
48 | std::string original = n.getTweet().getText(); | ||
49 | std::string canonical; | ||
50 | |||
51 | std::transform(std::begin(original), std::end(original), | ||
52 | std::back_inserter(canonical), [] (char ch) | ||
53 | { | ||
54 | return std::tolower(ch); | ||
55 | }); | ||
56 | |||
57 | if (canonical.find("@teammeanies") != std::string::npos) | ||
58 | { | ||
59 | std::string doc = | ||
60 | n.getTweet().generateReplyPrefill(client.getUser()); | ||
61 | |||
62 | doc += pgen.generate(); | ||
63 | doc.resize(140); | ||
64 | |||
65 | try | ||
66 | { | ||
67 | client.replyToTweet(doc, n.getTweet()); | ||
68 | } catch (const twitter::twitter_error& error) | ||
69 | { | ||
70 | std::cout << "Twitter error while tweeting: " | ||
71 | << error.what() << std::endl; | ||
72 | } | ||
73 | } | ||
74 | } | ||
75 | } | ||
76 | }); | ||
77 | |||
78 | std::this_thread::sleep_for(std::chrono::minutes(1)); | ||
79 | |||
80 | for (;;) | ||
81 | { | ||
82 | std::cout << "Generating tweet..." << std::endl; | ||
83 | |||
84 | std::string action = pgen.generate(); | ||
85 | action.resize(140); | ||
86 | |||
87 | std::cout << action << std::endl; | ||
88 | |||
89 | try | ||
90 | { | ||
91 | client.updateStatus(action); | ||
92 | |||
93 | std::cout << "Tweeted!" << std::endl; | ||
94 | } catch (const twitter::twitter_error& e) | ||
95 | { | ||
96 | std::cout << "Twitter error: " << e.what() << std::endl; | ||
97 | } | ||
98 | |||
99 | std::cout << "Waiting..." << std::endl; | ||
100 | |||
101 | std::this_thread::sleep_for(std::chrono::hours(1)); | ||
102 | } | ||
103 | } catch (std::invalid_argument& e) | ||
104 | { | ||
105 | std::cout << e.what() << std::endl; | ||
106 | return -1; | ||
107 | } | ||
108 | } | ||
diff --git a/patterner.cpp b/patterner.cpp new file mode 100644 index 0000000..2c92428 --- /dev/null +++ b/patterner.cpp | |||
@@ -0,0 +1,138 @@ | |||
1 | #include "patterner.h" | ||
2 | #include <fstream> | ||
3 | #include <stdexcept> | ||
4 | |||
5 | patterner::patterner( | ||
6 | std::string datapath, | ||
7 | verbly::database& data, | ||
8 | std::mt19937& rng) : | ||
9 | data_(data), | ||
10 | rng_(rng) | ||
11 | { | ||
12 | std::ifstream datafile(datapath); | ||
13 | if (!datafile.is_open()) | ||
14 | { | ||
15 | throw std::invalid_argument("Could not find datafile"); | ||
16 | } | ||
17 | |||
18 | bool newgroup = true; | ||
19 | std::string line; | ||
20 | std::list<std::string> curgroups; | ||
21 | while (getline(datafile, line)) | ||
22 | { | ||
23 | if (line.back() == '\r') | ||
24 | { | ||
25 | line.pop_back(); | ||
26 | } | ||
27 | |||
28 | if (newgroup) | ||
29 | { | ||
30 | curgroups = verbly::split<std::list<std::string>>(line, ","); | ||
31 | newgroup = false; | ||
32 | } else { | ||
33 | if (line.empty()) | ||
34 | { | ||
35 | newgroup = true; | ||
36 | } else { | ||
37 | for (std::string curgroup : curgroups) | ||
38 | { | ||
39 | groups_[curgroup].push_back(line); | ||
40 | } | ||
41 | } | ||
42 | } | ||
43 | } | ||
44 | } | ||
45 | |||
46 | std::string patterner::generate() | ||
47 | { | ||
48 | std::string action = "{MAIN}"; | ||
49 | int tknloc; | ||
50 | while ((tknloc = action.find("{")) != std::string::npos) | ||
51 | { | ||
52 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); | ||
53 | std::string modifier; | ||
54 | int modloc; | ||
55 | if ((modloc = token.find(":")) != std::string::npos) | ||
56 | { | ||
57 | modifier = token.substr(modloc+1); | ||
58 | token = token.substr(0, modloc); | ||
59 | } | ||
60 | |||
61 | std::string canontkn; | ||
62 | std::transform(std::begin(token), std::end(token), | ||
63 | std::back_inserter(canontkn), [] (char ch) { | ||
64 | return std::toupper(ch); | ||
65 | }); | ||
66 | |||
67 | std::string result; | ||
68 | if (canontkn == "WORD") | ||
69 | { | ||
70 | result = data_.words( | ||
71 | (verbly::word::forms(verbly::inflection::base) %= | ||
72 | (verbly::form::complexity == 1) | ||
73 | && (verbly::form::length == 4) | ||
74 | && (verbly::form::proper == false) | ||
75 | && (verbly::pronunciation::numOfSyllables == 1)) | ||
76 | && !(verbly::word::usageDomains %= | ||
77 | (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs | ||
78 | .first().getBaseForm().getText(); | ||
79 | } else if (canontkn == "\\N") | ||
80 | { | ||
81 | result = "\n"; | ||
82 | } else { | ||
83 | auto group = groups_[canontkn]; | ||
84 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | ||
85 | int groupind = groupdist(rng_); | ||
86 | result = group[groupind]; | ||
87 | } | ||
88 | |||
89 | if (modifier == "indefinite") | ||
90 | { | ||
91 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) | ||
92 | { | ||
93 | result = "an " + result; | ||
94 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | ||
95 | { | ||
96 | result = "an " + result; | ||
97 | } else { | ||
98 | result = "a " + result; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | std::string finalresult; | ||
103 | if (islower(token[0])) | ||
104 | { | ||
105 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
106 | return std::tolower(ch); | ||
107 | }); | ||
108 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
109 | { | ||
110 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
111 | for (auto& word : words) | ||
112 | { | ||
113 | if (word[0] == '{') | ||
114 | { | ||
115 | word[1] = std::toupper(word[1]); | ||
116 | |||
117 | for (int k=2; k<word.length(); k++) | ||
118 | { | ||
119 | if (std::isalpha(word[k])) | ||
120 | { | ||
121 | word[k] = std::tolower(word[k]); | ||
122 | } | ||
123 | } | ||
124 | } else { | ||
125 | word[0] = std::toupper(word[0]); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | ||
130 | } else { | ||
131 | finalresult = result; | ||
132 | } | ||
133 | |||
134 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); | ||
135 | } | ||
136 | |||
137 | return action; | ||
138 | } | ||
diff --git a/patterner.h b/patterner.h new file mode 100644 index 0000000..df631b1 --- /dev/null +++ b/patterner.h | |||
@@ -0,0 +1,21 @@ | |||
1 | #ifndef PATTERNER_H_AB6883F5 | ||
2 | #define PATTERNER_H_AB6883F5 | ||
3 | |||
4 | #include <verbly.h> | ||
5 | #include <random> | ||
6 | |||
7 | class patterner { | ||
8 | public: | ||
9 | |||
10 | patterner(std::string datafile, verbly::database& data, std::mt19937& rng); | ||
11 | |||
12 | std::string generate(); | ||
13 | |||
14 | private: | ||
15 | |||
16 | std::map<std::string, std::vector<std::string>> groups_; | ||
17 | verbly::database& data_; | ||
18 | std::mt19937& rng_; | ||
19 | }; | ||
20 | |||
21 | #endif /* end of include guard: PATTERNER_H_AB6883F5 */ | ||
diff --git a/vendor/libtwittercpp b/vendor/libtwittercpp | |||
Subproject d90a1e74c77ba67f25a812609fd49d479bc464d | Subproject df906121dd862c0f704e44f28ee079158c431c4 | ||
diff --git a/vendor/verbly b/vendor/verbly | |||
Subproject 1f898f3bd66c29672275c2c884b17ba662ced62 | Subproject 1fd518d1c2b1d4e88ad88218b606a284b712810 | ||