about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-02-04 10:32:55 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-02-04 10:32:55 -0500
commit8c2e7c646f2a549ea9b4db831d8088f57b3287ae (patch)
treefa9070e88a34284c6089b2939ea1d1ca6bef147e
parentcb77fded0b9a8a9034ace592be04176c8778ddca (diff)
downloadfurries-8c2e7c646f2a549ea9b4db831d8088f57b3287ae.tar.gz
furries-8c2e7c646f2a549ea9b4db831d8088f57b3287ae.tar.bz2
furries-8c2e7c646f2a549ea9b4db831d8088f57b3287ae.zip
Updated verbly (new API)
Notably, the bot should not be able to use ethnic slurs now.

sentence.cpp is basically just copied from advice.
-rw-r--r--CMakeLists.txt2
-rw-r--r--furries.cpp460
-rw-r--r--sentence.cpp682
-rw-r--r--sentence.h39
m---------vendor/verbly0
5 files changed, 738 insertions, 445 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index e6a8aa7..cbc9e68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -6,7 +6,7 @@ add_subdirectory(vendor/verbly)
6add_subdirectory(vendor/yaml-cpp EXCLUDE_FROM_ALL) 6add_subdirectory(vendor/yaml-cpp EXCLUDE_FROM_ALL)
7 7
8include_directories(vendor/libtwittercpp/src vendor/verbly/lib vendor/yaml-cpp/include) 8include_directories(vendor/libtwittercpp/src vendor/verbly/lib vendor/yaml-cpp/include)
9add_executable(furries furries.cpp) 9add_executable(furries furries.cpp sentence.cpp)
10set_property(TARGET furries PROPERTY CXX_STANDARD 11) 10set_property(TARGET furries PROPERTY CXX_STANDARD 11)
11set_property(TARGET furries PROPERTY CXX_STANDARD_REQUIRED ON) 11set_property(TARGET furries PROPERTY CXX_STANDARD_REQUIRED ON)
12target_link_libraries(furries verbly twitter++ yaml-cpp) 12target_link_libraries(furries verbly twitter++ yaml-cpp)
diff --git a/furries.cpp b/furries.cpp index de2fa02..3f9c76d 100644 --- a/furries.cpp +++ b/furries.cpp
@@ -6,429 +6,7 @@
6#include <chrono> 6#include <chrono>
7#include <thread> 7#include <thread>
8#include <random> 8#include <random>
9 9#include "sentence.h"
10class fill_blanks {
11 private:
12 verbly::data& database;
13
14 public:
15 fill_blanks(verbly::data& database) : database(database)
16 {
17
18 }
19
20 verbly::filter<verbly::noun> parse_selrestrs(verbly::frame::selrestr selrestr)
21 {
22 switch (selrestr.get_type())
23 {
24 case verbly::frame::selrestr::type::empty:
25 {
26 return verbly::filter<verbly::noun>{};
27 }
28
29 case verbly::frame::selrestr::type::singleton:
30 {
31 verbly::noun n;
32
33 if (selrestr.get_restriction() == "concrete")
34 {
35 n = database.nouns().with_singular_form("physical entity").limit(1).run().front();
36 } else if (selrestr.get_restriction() == "time")
37 {
38 n = database.nouns().with_singular_form("time").limit(1).run().front();
39 } else if (selrestr.get_restriction() == "state")
40 {
41 n = database.nouns().with_singular_form("state").limit(1).run().front();
42 } else if (selrestr.get_restriction() == "abstract")
43 {
44 n = database.nouns().with_singular_form("abstract entity").limit(1).run().front();
45 } else if (selrestr.get_restriction() == "time")
46 {
47 n = database.nouns().with_singular_form("time").limit(1).run().front();
48 } else if (selrestr.get_restriction() == "scalar")
49 {
50 n = database.nouns().with_singular_form("number").limit(1).run().front();
51 } else if (selrestr.get_restriction() == "currency")
52 {
53 auto nn2 = database.nouns().with_singular_form("currency").limit(2).run();
54 std::vector<verbly::noun> nn(std::begin(nn2), std::end(nn2));
55 n = nn[1];
56 } else if (selrestr.get_restriction() == "location")
57 {
58 n = database.nouns().with_singular_form("location").limit(1).run().front();
59 } else if (selrestr.get_restriction() == "organization")
60 {
61 n = database.nouns().with_singular_form("organization").limit(1).run().front();
62 } else if (selrestr.get_restriction() == "int_control")
63 {
64 n = database.nouns().with_singular_form("causal agent").limit(1).run().front();
65 } else if (selrestr.get_restriction() == "natural")
66 {
67 n = database.nouns().with_singular_form("natural object").limit(1).run().front();
68 } else if (selrestr.get_restriction() == "phys_obj")
69 {
70 n = database.nouns().with_singular_form("physical object").limit(1).run().front();
71 } else if (selrestr.get_restriction() == "solid")
72 {
73 n = database.nouns().with_singular_form("solid").limit(1).run().front();
74 } else if (selrestr.get_restriction() == "shape")
75 {
76 n = database.nouns().with_singular_form("shape").limit(1).run().front();
77 } else if (selrestr.get_restriction() == "substance")
78 {
79 n = database.nouns().with_singular_form("substance").limit(1).run().front();
80 } else if (selrestr.get_restriction() == "idea")
81 {
82 n = database.nouns().with_singular_form("idea").limit(1).run().front();
83 } else if (selrestr.get_restriction() == "sound")
84 {
85 auto nn2 = database.nouns().with_singular_form("sound").limit(4).run();
86 std::vector<verbly::noun> nn(std::begin(nn2), std::end(nn2));
87 n = nn[3];
88 } else if (selrestr.get_restriction() == "communication")
89 {
90 n = database.nouns().with_singular_form("communication").limit(1).run().front();
91 } else if (selrestr.get_restriction() == "region")
92 {
93 n = database.nouns().with_singular_form("region").limit(1).run().front();
94 } else if (selrestr.get_restriction() == "place")
95 {
96 n = database.nouns().with_singular_form("place").limit(1).run().front();
97 } else if (selrestr.get_restriction() == "machine")
98 {
99 n = database.nouns().with_singular_form("machine").limit(1).run().front();
100 } else if (selrestr.get_restriction() == "animate")
101 {
102 n = database.nouns().with_singular_form("animate being").limit(1).run().front();
103 } else if (selrestr.get_restriction() == "plant")
104 {
105 auto nn2 = database.nouns().with_singular_form("plant").limit(2).run();
106 std::vector<verbly::noun> nn(std::begin(nn2), std::end(nn2));
107 n = nn[1];
108 } else if (selrestr.get_restriction() == "comestible")
109 {
110 n = database.nouns().with_singular_form("food").limit(1).run().front();
111 } else if (selrestr.get_restriction() == "artifact")
112 {
113 n = database.nouns().with_singular_form("artifact").limit(1).run().front();
114 } else if (selrestr.get_restriction() == "vehicle")
115 {
116 n = database.nouns().with_singular_form("vehicle").limit(1).run().front();
117 } else if (selrestr.get_restriction() == "human")
118 {
119 n = database.nouns().with_singular_form("person").limit(1).run().front();
120 } else if (selrestr.get_restriction() == "animal")
121 {
122 n = database.nouns().with_singular_form("animal").limit(1).run().front();
123 } else if (selrestr.get_restriction() == "body_part")
124 {
125 n = database.nouns().with_singular_form("body part").limit(1).run().front();
126 } else if (selrestr.get_restriction() == "garment")
127 {
128 n = database.nouns().with_singular_form("clothing").limit(1).run().front();
129 } else if (selrestr.get_restriction() == "tool")
130 {
131 n = database.nouns().with_singular_form("tool").limit(1).run().front();
132 } else {
133 return verbly::filter<verbly::noun>{};
134 }
135
136 return verbly::filter<verbly::noun>{n, !selrestr.get_pos()};
137 }
138
139 case verbly::frame::selrestr::type::group:
140 {
141 verbly::filter<verbly::noun> ret;
142 ret.set_orlogic(selrestr.get_orlogic());
143
144 std::transform(std::begin(selrestr), std::end(selrestr), std::back_inserter(ret), [&] (verbly::frame::selrestr sr) {
145 return parse_selrestrs(sr);
146 });
147
148 return ret;
149 }
150 }
151 }
152
153 template <typename RNG>
154 void visit(verbly::token& it, RNG&& rng)
155 {
156 switch (it.get_type())
157 {
158 case verbly::token::type::utterance:
159 {
160 for (auto& tkn : it)
161 {
162 if (!tkn.is_complete())
163 {
164 visit(tkn, rng);
165
166 break;
167 }
168 }
169
170 break;
171 }
172
173 case verbly::token::type::fillin:
174 {
175 switch (it.get_fillin_type())
176 {
177 case verbly::token::fillin_type::participle_phrase:
178 {
179 for (;;)
180 {
181 verbly::verb v = database.verbs().has_frames().random().limit(1).run().front();
182 auto frames = v.frames().run();
183 std::vector<verbly::frame> filtered;
184 std::remove_copy_if(std::begin(frames), std::end(frames), std::back_inserter(filtered), [] (verbly::frame& f) {
185 if (f.parts().size() < 2)
186 {
187 return true;
188 }
189
190 if (f.parts()[0].get_type() != verbly::frame::part::type::noun_phrase)
191 {
192 return true;
193 }
194
195 if (f.parts()[0].get_role() != "Agent")
196 {
197 return true;
198 }
199
200 if (f.parts()[1].get_type() != verbly::frame::part::type::verb)
201 {
202 return true;
203 }
204
205 return false;
206 });
207
208 if (filtered.empty())
209 {
210 continue;
211 }
212
213 int fr_i = std::uniform_int_distribution<int>(0, filtered.size()-1)(rng);
214 verbly::frame fr = filtered[fr_i];
215 verbly::token utter;
216 for (auto part : fr.parts())
217 {
218 switch (part.get_type())
219 {
220 case verbly::frame::part::type::noun_phrase:
221 {
222 if (part.get_role() == "Agent")
223 {
224 continue;
225 }
226
227 if (part.get_synrestrs().count("adjp") == 1)
228 {
229 utter << verbly::token{verbly::token::fillin_type::adjective_phrase};
230
231 continue;
232 } else if ((part.get_synrestrs().count("be_sc_ing") == 1)
233 || (part.get_synrestrs().count("ac_ing") == 1)
234 || (part.get_synrestrs().count("sc_ing") == 1)
235 || (part.get_synrestrs().count("np_omit_ing") == 1)
236 || (part.get_synrestrs().count("oc_ing") == 1))
237 {
238 utter << verbly::token{verbly::token::fillin_type::participle_phrase};
239
240 continue;
241 } else if ((part.get_synrestrs().count("poss_ing") == 1)
242 || (part.get_synrestrs().count("possing") == 1)
243 || (part.get_synrestrs().count("pos_ing") == 1))
244 {
245 utter << verbly::token{"their"};
246 utter << verbly::token{verbly::token::fillin_type::participle_phrase};
247
248 continue;
249 } else if (part.get_synrestrs().count("genitive") == 1)
250 {
251 utter << verbly::token{"their"};
252
253 continue;
254 } else if (part.get_synrestrs().count("adv_loc") == 1)
255 {
256 if (std::bernoulli_distribution(1.0/2.0)(rng))
257 {
258 utter << verbly::token{"here"};
259 } else {
260 utter << verbly::token{"there"};
261 }
262
263 continue;
264 } else if (part.get_synrestrs().count("refl") == 1)
265 {
266 utter << verbly::token{"themselves"};
267
268 continue;
269 } else if ((part.get_synrestrs().count("sc_to_inf") == 1)
270 || (part.get_synrestrs().count("ac_to_inf") == 1)
271 || (part.get_synrestrs().count("vc_to_inf") == 1)
272 || (part.get_synrestrs().count("rs_to_inf") == 1)
273 || (part.get_synrestrs().count("oc_to_inf") == 1))
274 {
275 utter << verbly::token{verbly::token::fillin_type::infinitive_phrase};
276
277 continue;
278 } else if (part.get_synrestrs().count("oc_bare_inf") == 1)
279 {
280 verbly::token tkn{verbly::token::fillin_type::infinitive_phrase};
281 tkn.set_extra(1);
282
283 utter << tkn;
284
285 continue;
286 }
287
288 auto selrestrs = fr.roles()[part.get_role()];
289 auto query = database.nouns().limit(1).random().is_not_proper().full_hyponym_of(parse_selrestrs(selrestrs));
290 verbly::noun n = query.run().front();
291 if ((std::bernoulli_distribution(1.0/2.0)(rng)) && (part.get_synrestrs().count("definite") == 0))
292 {
293 utter << verbly::token{"the"};
294 } else {
295 if (n.starts_with_vowel_sound())
296 {
297 utter << verbly::token{"an"};
298 } else {
299 utter << verbly::token{"a"};
300 }
301 }
302
303 if (part.get_synrestrs().count("plural") == 1)
304 {
305 utter << verbly::token{n, verbly::token::noun_inflection::plural};
306 } else {
307 utter << verbly::token{n};
308 }
309
310 if (part.get_synrestrs().count("acc_ing") == 1)
311 {
312 utter << verbly::token{verbly::token::fillin_type::participle_phrase};
313 }
314
315 break;
316 }
317
318 case verbly::frame::part::type::verb:
319 {
320 utter << verbly::token{v, verbly::token::verb_inflection::ing_form};
321
322 break;
323 }
324
325 case verbly::frame::part::type::literal_preposition:
326 {
327 int ch_i = std::uniform_int_distribution<int>(0, part.get_choices().size()-1)(rng);
328 utter << verbly::token{part.get_choices()[ch_i]};
329
330 break;
331 }
332
333 case verbly::frame::part::type::selection_preposition:
334 {
335 auto query = database.prepositions();
336 for (auto preprestr : part.get_preprestrs())
337 {
338 query.in_group(preprestr);
339 }
340 utter << verbly::token{query.random().limit(1).run().front()};
341
342 break;
343 }
344
345 case verbly::frame::part::type::adjective:
346 {
347 utter << verbly::token{verbly::token::fillin_type::adjective_phrase};
348
349 break;
350 }
351
352 case verbly::frame::part::type::adverb:
353 {
354 utter << verbly::token{verbly::token::fillin_type::adverb_phrase};
355
356 break;
357 }
358
359 case verbly::frame::part::type::literal:
360 {
361 utter << verbly::token{part.get_literal()};
362
363 break;
364 }
365 }
366 }
367
368 it = utter;
369
370 break;
371 }
372
373 break;
374 }
375
376 case verbly::token::fillin_type::adjective_phrase:
377 {
378 verbly::token phrase;
379
380 if (std::bernoulli_distribution(1.0/4.0)(rng))
381 {
382 phrase << verbly::token{verbly::token::fillin_type::adverb_phrase};
383 }
384
385 if (std::bernoulli_distribution(1.0/2.0)(rng))
386 {
387 phrase << verbly::token{verbly::token::fillin_type::participle_phrase};
388 } else {
389 phrase << verbly::token{database.adjectives().random().limit(1).run().front()};
390 }
391
392 it = phrase;
393
394 break;
395 }
396
397 case verbly::token::fillin_type::adverb_phrase:
398 {
399 it = verbly::token{database.adverbs().random().limit(1).run().front()};
400
401 break;
402 }
403
404 case verbly::token::fillin_type::infinitive_phrase:
405 {
406 verbly::token utter;
407 if (it.get_extra() != 1)
408 {
409 utter << verbly::token{"to"};
410 }
411
412 utter << verbly::token{database.verbs().random().limit(1).run().front()};
413
414 it = utter;
415
416 break;
417 }
418
419 default:
420 {
421 it = verbly::token{"*the reality of the situation*"};
422
423 break;
424 }
425 }
426
427 break;
428 }
429 }
430 }
431};
432 10
433int main(int argc, char** argv) 11int main(int argc, char** argv)
434{ 12{
@@ -437,51 +15,45 @@ int main(int argc, char** argv)
437 std::cout << "usage: furries [configfile]" << std::endl; 15 std::cout << "usage: furries [configfile]" << std::endl;
438 return -1; 16 return -1;
439 } 17 }
440 18
441 std::string configfile(argv[1]); 19 std::string configfile(argv[1]);
442 YAML::Node config = YAML::LoadFile(configfile); 20 YAML::Node config = YAML::LoadFile(configfile);
443 21
444 std::random_device random_device; 22 std::random_device random_device;
445 std::mt19937 random_engine{random_device()}; 23 std::mt19937 random_engine{random_device()};
446 24
447 twitter::auth auth; 25 twitter::auth auth;
448 auth.setConsumerKey(config["consumer_key"].as<std::string>()); 26 auth.setConsumerKey(config["consumer_key"].as<std::string>());
449 auth.setConsumerSecret(config["consumer_secret"].as<std::string>()); 27 auth.setConsumerSecret(config["consumer_secret"].as<std::string>());
450 auth.setAccessKey(config["access_key"].as<std::string>()); 28 auth.setAccessKey(config["access_key"].as<std::string>());
451 auth.setAccessSecret(config["access_secret"].as<std::string>()); 29 auth.setAccessSecret(config["access_secret"].as<std::string>());
452 30
453 twitter::client client(auth); 31 twitter::client client(auth);
454 verbly::data database {config["verbly_datafile"].as<std::string>()}; 32 verbly::database database(config["verbly_datafile"].as<std::string>());
455 33 sentence generator(database, random_engine);
34
456 for (;;) 35 for (;;)
457 { 36 {
458 std::cout << "Generating tweet..." << std::endl; 37 std::cout << "Generating tweet..." << std::endl;
459 38
460 fill_blanks yeah {database}; 39 std::string result = generator.generate();
461 verbly::token action{
462 {"the furries are"},
463 {verbly::token::fillin_type::adjective_phrase}
464 };
465 while (!action.is_complete())
466 {
467 yeah.visit(action, random_engine);
468 }
469
470 std::string result = action.compile();
471 result.resize(140); 40 result.resize(140);
472 41
42 std::cout << result << std::endl;
43
473 try 44 try
474 { 45 {
475 client.updateStatus(result); 46 client.updateStatus(result);
476 47
477 std::cout << "Tweeted!" << std::endl; 48 std::cout << "Tweeted!" << std::endl;
49 std::cout << "Waiting..." << std::endl;
50
51 std::this_thread::sleep_for(std::chrono::hours(1));
52
53 std::cout << std::endl;
478 } catch (const twitter::twitter_error& e) 54 } catch (const twitter::twitter_error& e)
479 { 55 {
480 std::cout << "Twitter error: " << e.what() << std::endl; 56 std::cout << "Twitter error: " << e.what() << std::endl;
481 } 57 }
482
483 std::cout << "Waiting..." << std::endl;
484
485 std::this_thread::sleep_for(std::chrono::hours(1));
486 } 58 }
487} 59}
diff --git a/sentence.cpp b/sentence.cpp new file mode 100644 index 0000000..fdf883e --- /dev/null +++ b/sentence.cpp
@@ -0,0 +1,682 @@
1#include "sentence.h"
2#include <algorithm>
3#include <list>
4#include <set>
5
6sentence::sentence(
7 const verbly::database& database,
8 std::mt19937& rng) :
9 database_(database),
10 rng_(rng)
11{
12}
13
14std::string sentence::generate() const
15{
16 // Generate the form that the title should take.
17 verbly::token form;
18 form << "the" << "furries" << "are";
19
20 std::set<std::string> synrestrs {"adjective_phrase"};
21 form << synrestrs;
22
23 // Compile the form.
24 while (!form.isComplete())
25 {
26 visit(form);
27 }
28
29 return form.compile();
30}
31
32verbly::filter sentence::parseSelrestrs(
33 verbly::selrestr selrestr) const
34{
35 switch (selrestr.getType())
36 {
37 case verbly::selrestr::type::empty:
38 {
39 return {};
40 }
41
42 case verbly::selrestr::type::singleton:
43 {
44 verbly::filter result;
45
46 if (selrestr.getRestriction() == "concrete")
47 {
48 result = (verbly::notion::wnid == 100001930); // physical entity
49 } else if (selrestr.getRestriction() == "time")
50 {
51 result = (verbly::notion::wnid == 100028270); // time
52 } else if (selrestr.getRestriction() == "state")
53 {
54 result = (verbly::notion::wnid == 100024720); // state
55 } else if (selrestr.getRestriction() == "abstract")
56 {
57 result = (verbly::notion::wnid == 100002137); // abstract entity
58 } else if (selrestr.getRestriction() == "scalar")
59 {
60 result = (verbly::notion::wnid == 103835412); // number
61 } else if (selrestr.getRestriction() == "currency")
62 {
63 result = (verbly::notion::wnid == 105050379); // currency
64 } else if (selrestr.getRestriction() == "location")
65 {
66 result = (verbly::notion::wnid == 100027167); // location
67 } else if (selrestr.getRestriction() == "organization")
68 {
69 result = (verbly::notion::wnid == 100237078); // organization
70 } else if (selrestr.getRestriction() == "int_control")
71 {
72 result = (verbly::notion::wnid == 100007347); // causal agent
73 } else if (selrestr.getRestriction() == "natural")
74 {
75 result = (verbly::notion::wnid == 100019128); // natural object
76 } else if (selrestr.getRestriction() == "phys_obj")
77 {
78 result = (verbly::notion::wnid == 100002684); // physical object
79 } else if (selrestr.getRestriction() == "solid")
80 {
81 result = (verbly::notion::wnid == 113860793); // solid
82 } else if (selrestr.getRestriction() == "shape")
83 {
84 result = (verbly::notion::wnid == 100027807); // shape
85 } else if (selrestr.getRestriction() == "substance")
86 {
87 result = (verbly::notion::wnid == 100019613); // substance
88 } else if (selrestr.getRestriction() == "idea")
89 {
90 result = (verbly::notion::wnid == 105803379); // idea
91 } else if (selrestr.getRestriction() == "sound")
92 {
93 result = (verbly::notion::wnid == 107111047); // sound
94 } else if (selrestr.getRestriction() == "communication")
95 {
96 result = (verbly::notion::wnid == 100033020); // communication
97 } else if (selrestr.getRestriction() == "region")
98 {
99 result = (verbly::notion::wnid == 105221895); // region
100 } else if (selrestr.getRestriction() == "place")
101 {
102 result = (verbly::notion::wnid == 100586262); // place
103 } else if (selrestr.getRestriction() == "machine")
104 {
105 result = (verbly::notion::wnid == 102958343); // machine
106 } else if (selrestr.getRestriction() == "animate")
107 {
108 result = (verbly::notion::wnid == 100004258); // animate thing
109 } else if (selrestr.getRestriction() == "plant")
110 {
111 result = (verbly::notion::wnid == 103956922); // plant
112 } else if (selrestr.getRestriction() == "comestible")
113 {
114 result = (verbly::notion::wnid == 100021265); // food
115 } else if (selrestr.getRestriction() == "artifact")
116 {
117 result = (verbly::notion::wnid == 100021939); // artifact
118 } else if (selrestr.getRestriction() == "vehicle")
119 {
120 result = (verbly::notion::wnid == 104524313); // vehicle
121 } else if (selrestr.getRestriction() == "human")
122 {
123 result = (verbly::notion::wnid == 100007846); // person
124 } else if (selrestr.getRestriction() == "animal")
125 {
126 result = (verbly::notion::wnid == 100015388); // animal
127 } else if (selrestr.getRestriction() == "body_part")
128 {
129 result = (verbly::notion::wnid == 105220461); // body part
130 } else if (selrestr.getRestriction() == "garment")
131 {
132 result = (verbly::notion::wnid == 103051540); // clothing
133 } else if (selrestr.getRestriction() == "tool")
134 {
135 result = (verbly::notion::wnid == 104451818); // tool
136 } else {
137 return {};
138 }
139
140 std::cout << selrestr.getRestriction() << " (" << selrestr.getPos() << ")" << std::endl;
141
142 if (selrestr.getPos())
143 {
144 return (verbly::notion::fullHypernyms %= result);
145 } else {
146 return !(verbly::notion::fullHypernyms %= result);
147 }
148 }
149
150 case verbly::selrestr::type::group:
151 {
152 std::cout << "or: " << selrestr.getOrlogic() << std::endl;
153 verbly::filter ret(selrestr.getOrlogic());
154
155 for (const verbly::selrestr& child : selrestr)
156 {
157 ret += parseSelrestrs(child);
158 }
159
160 return ret;
161 }
162 }
163}
164
165bool sentence::requiresSelrestr(
166 std::string restriction,
167 verbly::selrestr selrestr) const
168{
169 switch (selrestr.getType())
170 {
171 case verbly::selrestr::type::empty:
172 {
173 return false;
174 }
175
176 case verbly::selrestr::type::singleton:
177 {
178 if (selrestr.getRestriction() == restriction)
179 {
180 return selrestr.getPos();
181 } else {
182 return false;
183 }
184 }
185
186 case verbly::selrestr::type::group:
187 {
188 if (selrestr.getOrlogic())
189 {
190 return std::all_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) {
191 return requiresSelrestr(restriction, s);
192 });
193 } else {
194 return std::any_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) {
195 return requiresSelrestr(restriction, s);
196 });
197 }
198 }
199 }
200}
201
202verbly::word sentence::generateStandardNoun(
203 std::string role,
204 verbly::selrestr selrestrs) const
205{
206 std::geometric_distribution<int> tagdist(0.5); // 0.06
207 std::vector<verbly::word> result;
208 bool trySelection = true;
209
210 while (result.empty())
211 {
212 verbly::filter condition =
213 (verbly::notion::partOfSpeech == verbly::part_of_speech::noun)
214 && (verbly::form::proper == false)
215 && (verbly::word::tagCount >= 1)
216 && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs
217
218 // Only use selection restrictions for a first attempt.
219 if (trySelection)
220 {
221 verbly::filter selrestrCondition = parseSelrestrs(selrestrs).compact();
222
223 if (selrestrCondition.getType() != verbly::filter::type::empty)
224 {
225 condition &= std::move(selrestrCondition);
226 } else if (role == "Attribute")
227 {
228 condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute
229 } else if (role == "Instrument")
230 {
231 condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 104451818)); // tool
232 } else if (role == "Agent")
233 {
234 condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100007347)); // causal agent
235 }
236
237 trySelection = false;
238 } else {
239 std::cout << "Selection failed" << std::endl;
240 }
241
242 result = database_.words(condition).all();
243 }
244
245 return result.front();
246}
247
248verbly::token sentence::generateStandardNounPhrase(
249 const verbly::word& noun,
250 std::string role,
251 bool plural,
252 bool definite) const
253{
254 verbly::token utter;
255 verbly::word sounder = noun;
256 verbly::word descript;
257
258 if (std::bernoulli_distribution(1.0/8.0)(rng_))
259 {
260 std::geometric_distribution<int> tagdist(0.2);
261 descript = database_.words(
262 (verbly::word::tagCount >= tagdist(rng_))
263 && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();
264
265 sounder = descript;
266 }
267
268 if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite))
269 {
270 utter << "the";
271
272 if (std::bernoulli_distribution(1.0/2.0)(rng_))
273 {
274 plural = true;
275 }
276 } else {
277 if ((role != "Theme") && (role != "Attribute") && std::bernoulli_distribution(1.0/2.0)(rng_))
278 {
279 utter << "your";
280 } else if (!plural) {
281 if (sounder.getLemma().getBaseForm().startsWithVowelSound())
282 {
283 utter << "an";
284 } else {
285 utter << "a";
286 }
287 }
288 }
289
290 if (descript)
291 {
292 utter << descript;
293 }
294
295 if (plural && noun.getLemma().hasInflection(verbly::inflection::plural))
296 {
297 utter << verbly::token(noun, verbly::inflection::plural);
298 } else {
299 utter << noun;
300 }
301
302 return utter;
303}
304
305verbly::token sentence::generateClause(
306 const verbly::token& it) const
307{
308 verbly::token utter;
309 std::geometric_distribution<int> tagdist(0.07);
310 std::vector<verbly::word> verbDataset;
311
312 verbly::filter frameCondition =
313 (verbly::frame::length >= 2)
314 && (verbly::frame::parts(0) %= (
315 (verbly::part::type == verbly::part_type::noun_phrase)
316 && (verbly::part::role == "Agent"))
317 && (verbly::frame::parts(1) %=
318 (verbly::part::type == verbly::part_type::verb))
319 && !(verbly::frame::parts() %= (
320 verbly::part::synrestrs %= "adjp")));
321
322 if (it.hasSynrestr("experiencer"))
323 {
324 frameCondition &=
325 (verbly::frame::parts(2) %=
326 (verbly::part::type == verbly::part_type::noun_phrase)
327 && !(verbly::part::synrestrs %= "genitive")
328 && ((verbly::part::role == "Patient")
329 || (verbly::part::role == "Experiencer")));
330 }
331
332 verbly::filter verbCondition =
333 (verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
334 && frameCondition;
335
336 if (it.hasSynrestr("participle_phrase"))
337 {
338 verbCondition &= (verbly::lemma::forms(verbly::inflection::ing_form));
339 } else if (it.hasSynrestr("progressive"))
340 {
341 verbCondition &= (verbly::lemma::forms(verbly::inflection::s_form));
342 } else if (it.hasSynrestr("past_participle"))
343 {
344 verbCondition &= (verbly::lemma::forms(verbly::inflection::past_participle));
345 }
346
347 // Because of the tag distribution, it's possible (albeit extremely unlikely)
348 // for the verb query to fail, so we loop until it succeeds.
349 while (verbDataset.empty())
350 {
351 verbDataset = database_.words(
352 verbCondition
353 && (verbly::word::tagCount >= tagdist(rng_))
354 ).all();
355 }
356
357 verbly::word verb = verbDataset.front();
358 verbly::frame frame = database_.frames(frameCondition && verb).first();
359 std::list<verbly::part> parts(std::begin(frame.getParts()), std::end(frame.getParts()));
360
361 if (it.hasSynrestr("experiencer"))
362 {
363 // Ignore the direct object.
364 parts.erase(std::next(parts.begin(), 2));
365 }
366
367 if (it.hasSynrestr("subjectless"))
368 {
369 // Ignore the subject.
370 parts.pop_front();
371 }
372
373 for (const verbly::part& part : parts)
374 {
375 switch (part.getType())
376 {
377 case verbly::part_type::noun_phrase:
378 {
379 std::cout << "NP: ";
380 for (auto& s : part.getNounSynrestrs())
381 {
382 std::cout << s << " ";
383 }
384 std::cout << std::endl;
385
386 if (requiresSelrestr("currency", part.getNounSelrestrs()))
387 {
388 int lead = std::uniform_int_distribution<int>(1,9)(rng_);
389 int tail = std::uniform_int_distribution<int>(0,6)(rng_);
390 std::string tailStr(tail, '0');
391
392 utter << ("$" + std::to_string(lead) + tailStr);
393 } else if (part.nounHasSynrestr("adjp"))
394 {
395 utter << std::set<std::string>({"adjective_phrase"});
396 } else if ((part.nounHasSynrestr("be_sc_ing"))
397 || (part.nounHasSynrestr("ac_ing"))
398 || (part.nounHasSynrestr("sc_ing"))
399 || (part.nounHasSynrestr("np_omit_ing"))
400 || (part.nounHasSynrestr("oc_ing")))
401 {
402 utter << std::set<std::string>({"participle_phrase", "subjectless"});
403 } else if ((part.nounHasSynrestr("poss_ing"))
404 || (part.nounHasSynrestr("possing"))
405 || (part.nounHasSynrestr("pos_ing")))
406 {
407 utter << "your";
408 utter << std::set<std::string>({"participle_phrase", "subjectless"});
409 } else if (part.nounHasSynrestr("genitive"))
410 {
411 utter << "your";
412 } else if (part.nounHasSynrestr("adv_loc"))
413 {
414 if (std::bernoulli_distribution(1.0/2.0)(rng_))
415 {
416 utter << "here";
417 } else {
418 utter << "there";
419 }
420 } else if (part.nounHasSynrestr("refl"))
421 {
422 utter << "yourself";
423 } else if ((part.nounHasSynrestr("sc_to_inf"))
424 || (part.nounHasSynrestr("ac_to_inf"))
425 || (part.nounHasSynrestr("vc_to_inf"))
426 || (part.nounHasSynrestr("rs_to_inf"))
427 || (part.nounHasSynrestr("oc_to_inf")))
428 {
429 utter << std::set<std::string>({"infinitive_phrase", "subjectless"});
430 } else if (part.nounHasSynrestr("oc_bare_inf"))
431 {
432 utter << std::set<std::string>({"infinitive_phrase", "bare", "subjectless"});
433 } else if (part.nounHasSynrestr("wh_comp"))
434 {
435 utter << "whether";
436
437 verbly::token sentence(std::set<std::string>({"progressive"}));
438 utter << generateClause(sentence);
439 } else if (part.nounHasSynrestr("that_comp"))
440 {
441 utter << "that";
442 utter << "they";
443
444 verbly::token sentence(std::set<std::string>({"subjectless"}));
445 utter << generateClause(sentence);
446 } else if (part.nounHasSynrestr("what_extract"))
447 {
448 utter << "what";
449
450 verbly::token sentence(std::set<std::string>({"progressive", "experiencer"}));
451 utter << generateClause(sentence);
452 } else if (part.nounHasSynrestr("how_extract"))
453 {
454 utter << "how";
455
456 verbly::token sentence(std::set<std::string>({"progressive"}));
457 utter << generateClause(sentence);
458 } else if (part.nounHasSynrestr("wh_inf"))
459 {
460 utter << "how";
461
462 verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless"}));
463 utter << generateClause(sentence);
464 } else if (part.nounHasSynrestr("what_inf"))
465 {
466 utter << "what";
467
468 verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless", "experiencer"}));
469 utter << generateClause(sentence);
470 } else if (part.nounHasSynrestr("wheth_inf"))
471 {
472 utter << "whether";
473
474 verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless"}));
475 utter << generateClause(sentence);
476 } else if (part.nounHasSynrestr("quotation"))
477 {
478 verbly::token sentence(std::set<std::string>({"participle_phrase"}));
479 while (!sentence.isComplete())
480 {
481 visit(sentence);
482 }
483
484 utter << ("\"" + sentence.compile() + "\"");
485 } else {
486 verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs());
487
488 bool plural = part.nounHasSynrestr("plural");
489 if (!plural)
490 {
491 plural = requiresSelrestr("plural", part.getNounSelrestrs());
492 }
493
494 utter << generateStandardNounPhrase(
495 noun,
496 part.getNounRole(),
497 plural,
498 part.nounHasSynrestr("definite"));
499
500 if (part.nounHasSynrestr("acc_ing") || part.nounHasSynrestr("ac_ing"))
501 {
502 utter << std::set<std::string>({"participle_phrase", "subjectless"});
503 }
504 }
505
506 break;
507 }
508
509 case verbly::part_type::verb:
510 {
511 std::cout << "V: " << verb.getBaseForm() << std::endl;
512
513 if (it.hasSynrestr("progressive"))
514 {
515 utter << verbly::token(verb, verbly::inflection::s_form);
516 } else if (it.hasSynrestr("past_participle"))
517 {
518 utter << verbly::token(verb, verbly::inflection::past_participle);
519 } else if (it.hasSynrestr("infinitive_phrase"))
520 {
521 if (!it.hasSynrestr("bare"))
522 {
523 utter << "to";
524 }
525
526 utter << verb;
527 } else if (it.hasSynrestr("participle_phrase"))
528 {
529 utter << verbly::token(verb, verbly::inflection::ing_form);
530 } else {
531 utter << verb;
532 }
533
534 break;
535 }
536
537 case verbly::part_type::preposition:
538 {
539 std::cout << "PREP" << std::endl;
540
541 if (part.isPrepositionLiteral())
542 {
543 int choiceIndex = std::uniform_int_distribution<int>(0, part.getPrepositionChoices().size()-1)(rng_);
544 utter << part.getPrepositionChoices()[choiceIndex];
545 } else {
546 verbly::filter pgf(true);
547 for (const std::string& choice : part.getPrepositionChoices())
548 {
549 pgf += (verbly::notion::prepositionGroups == choice);
550 }
551
552 utter << database_.words(pgf && (verbly::notion::partOfSpeech == verbly::part_of_speech::preposition)).first();
553 }
554
555 break;
556 }
557
558 case verbly::part_type::adjective:
559 {
560 std::cout << "ADJ" << std::endl;
561
562 utter << std::set<std::string>({"adjective_phrase"});
563
564 break;
565 }
566
567 case verbly::part_type::adverb:
568 {
569 std::cout << "ADV" << std::endl;
570
571 utter << std::set<std::string>({"adverb_phrase"});
572
573 break;
574 }
575
576 case verbly::part_type::literal:
577 {
578 std::cout << "LIT" << std::endl;
579
580 utter << part.getLiteralValue();
581
582 break;
583 }
584
585 case verbly::part_type::invalid:
586 {
587 // Nope
588
589 break;
590 }
591 }
592 }
593
594 if ((parts.size() == 1) && (std::bernoulli_distribution(1.0/4.0)(rng_)))
595 {
596 utter << std::set<std::string>({"adverb_phrase"});
597 }
598
599 return utter;
600}
601
602void sentence::visit(verbly::token& it) const
603{
604 switch (it.getType())
605 {
606 case verbly::token::type::utterance:
607 {
608 for (verbly::token& token : it)
609 {
610 if (!token.isComplete())
611 {
612 visit(token);
613
614 break;
615 }
616 }
617
618 break;
619 }
620
621 case verbly::token::type::fillin:
622 {
623 if (it.hasSynrestr("infinitive_phrase"))
624 {
625 it = generateClause(it);
626 } else if (it.hasSynrestr("adjective_phrase"))
627 {
628 verbly::token phrase;
629
630 if (std::bernoulli_distribution(1.0/2.0)(rng_))
631 {
632 phrase << std::set<std::string>({"participle_phrase", "subjectless"});
633 } else {
634 if (std::bernoulli_distribution(1.0/6.0)(rng_))
635 {
636 phrase << std::set<std::string>({"adverb_phrase"});
637 }
638
639 std::geometric_distribution<int> tagdist(0.2);
640 phrase << database_.words(
641 (verbly::word::tagCount >= tagdist(rng_))
642 && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();
643 }
644
645 it = phrase;
646 } else if (it.hasSynrestr("adverb_phrase"))
647 {
648 std::geometric_distribution<int> tagdist(1.0/23.0);
649
650 it = database_.words(
651 (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb)
652 && (verbly::word::tagCount >= tagdist(rng_))
653 ).first();
654 } else if (it.hasSynrestr("participle_phrase"))
655 {
656 if (std::bernoulli_distribution(1.0/2.0)(rng_))
657 {
658 it = verbly::token(
659 database_.words(
660 (verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
661 && (verbly::lemma::forms(verbly::inflection::ing_form))).first(),
662 verbly::inflection::ing_form);
663 } else {
664 it = generateClause(it);
665 }
666 } else {
667 it = "*the reality of the situation*";
668 }
669
670 break;
671 }
672
673 case verbly::token::type::word:
674 case verbly::token::type::literal:
675 case verbly::token::type::part:
676 {
677 // Nope
678
679 break;
680 }
681 }
682}
diff --git a/sentence.h b/sentence.h new file mode 100644 index 0000000..abf6860 --- /dev/null +++ b/sentence.h
@@ -0,0 +1,39 @@
1#ifndef SENTENCE_H_81987F60
2#define SENTENCE_H_81987F60
3
4#include <verbly.h>
5#include <random>
6#include <string>
7
8class sentence {
9public:
10
11 sentence(
12 const verbly::database& database,
13 std::mt19937& rng);
14
15 std::string generate() const;
16
17private:
18
19 verbly::filter parseSelrestrs(verbly::selrestr selrestr) const;
20
21 bool requiresSelrestr(std::string restriction, verbly::selrestr selrestr) const;
22
23 verbly::word generateStandardNoun(std::string role, verbly::selrestr selrestrs) const;
24
25 verbly::token generateStandardNounPhrase(
26 const verbly::word& noun,
27 std::string role,
28 bool plural,
29 bool definite) const;
30
31 verbly::token generateClause(const verbly::token& it) const;
32
33 void visit(verbly::token& it) const;
34
35 const verbly::database& database_;
36 std::mt19937& rng_;
37};
38
39#endif /* end of include guard: SENTENCE_H_81987F60 */
diff --git a/vendor/verbly b/vendor/verbly
Subproject 1f898f3bd66c29672275c2c884b17ba662ced62 Subproject bea3673ae1b3d19585dec56e96dbcd8a56b96e6