about summary refs log tree commit diff stats
path: root/sentence.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-02-04 10:32:55 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-02-04 10:32:55 -0500
commit8c2e7c646f2a549ea9b4db831d8088f57b3287ae (patch)
treefa9070e88a34284c6089b2939ea1d1ca6bef147e /sentence.cpp
parentcb77fded0b9a8a9034ace592be04176c8778ddca (diff)
downloadfurries-8c2e7c646f2a549ea9b4db831d8088f57b3287ae.tar.gz
furries-8c2e7c646f2a549ea9b4db831d8088f57b3287ae.tar.bz2
furries-8c2e7c646f2a549ea9b4db831d8088f57b3287ae.zip
Updated verbly (new API)
Notably, the bot should not be able to use ethnic slurs now.

sentence.cpp is basically just copied from advice.
Diffstat (limited to 'sentence.cpp')
-rw-r--r--sentence.cpp682
1 files changed, 682 insertions, 0 deletions
diff --git a/sentence.cpp b/sentence.cpp new file mode 100644 index 0000000..fdf883e --- /dev/null +++ b/sentence.cpp
@@ -0,0 +1,682 @@
1#include "sentence.h"
2#include <algorithm>
3#include <list>
4#include <set>
5
6sentence::sentence(
7 const verbly::database& database,
8 std::mt19937& rng) :
9 database_(database),
10 rng_(rng)
11{
12}
13
14std::string sentence::generate() const
15{
16 // Generate the form that the title should take.
17 verbly::token form;
18 form << "the" << "furries" << "are";
19
20 std::set<std::string> synrestrs {"adjective_phrase"};
21 form << synrestrs;
22
23 // Compile the form.
24 while (!form.isComplete())
25 {
26 visit(form);
27 }
28
29 return form.compile();
30}
31
32verbly::filter sentence::parseSelrestrs(
33 verbly::selrestr selrestr) const
34{
35 switch (selrestr.getType())
36 {
37 case verbly::selrestr::type::empty:
38 {
39 return {};
40 }
41
42 case verbly::selrestr::type::singleton:
43 {
44 verbly::filter result;
45
46 if (selrestr.getRestriction() == "concrete")
47 {
48 result = (verbly::notion::wnid == 100001930); // physical entity
49 } else if (selrestr.getRestriction() == "time")
50 {
51 result = (verbly::notion::wnid == 100028270); // time
52 } else if (selrestr.getRestriction() == "state")
53 {
54 result = (verbly::notion::wnid == 100024720); // state
55 } else if (selrestr.getRestriction() == "abstract")
56 {
57 result = (verbly::notion::wnid == 100002137); // abstract entity
58 } else if (selrestr.getRestriction() == "scalar")
59 {
60 result = (verbly::notion::wnid == 103835412); // number
61 } else if (selrestr.getRestriction() == "currency")
62 {
63 result = (verbly::notion::wnid == 105050379); // currency
64 } else if (selrestr.getRestriction() == "location")
65 {
66 result = (verbly::notion::wnid == 100027167); // location
67 } else if (selrestr.getRestriction() == "organization")
68 {
69 result = (verbly::notion::wnid == 100237078); // organization
70 } else if (selrestr.getRestriction() == "int_control")
71 {
72 result = (verbly::notion::wnid == 100007347); // causal agent
73 } else if (selrestr.getRestriction() == "natural")
74 {
75 result = (verbly::notion::wnid == 100019128); // natural object
76 } else if (selrestr.getRestriction() == "phys_obj")
77 {
78 result = (verbly::notion::wnid == 100002684); // physical object
79 } else if (selrestr.getRestriction() == "solid")
80 {
81 result = (verbly::notion::wnid == 113860793); // solid
82 } else if (selrestr.getRestriction() == "shape")
83 {
84 result = (verbly::notion::wnid == 100027807); // shape
85 } else if (selrestr.getRestriction() == "substance")
86 {
87 result = (verbly::notion::wnid == 100019613); // substance
88 } else if (selrestr.getRestriction() == "idea")
89 {
90 result = (verbly::notion::wnid == 105803379); // idea
91 } else if (selrestr.getRestriction() == "sound")
92 {
93 result = (verbly::notion::wnid == 107111047); // sound
94 } else if (selrestr.getRestriction() == "communication")
95 {
96 result = (verbly::notion::wnid == 100033020); // communication
97 } else if (selrestr.getRestriction() == "region")
98 {
99 result = (verbly::notion::wnid == 105221895); // region
100 } else if (selrestr.getRestriction() == "place")
101 {
102 result = (verbly::notion::wnid == 100586262); // place
103 } else if (selrestr.getRestriction() == "machine")
104 {
105 result = (verbly::notion::wnid == 102958343); // machine
106 } else if (selrestr.getRestriction() == "animate")
107 {
108 result = (verbly::notion::wnid == 100004258); // animate thing
109 } else if (selrestr.getRestriction() == "plant")
110 {
111 result = (verbly::notion::wnid == 103956922); // plant
112 } else if (selrestr.getRestriction() == "comestible")
113 {
114 result = (verbly::notion::wnid == 100021265); // food
115 } else if (selrestr.getRestriction() == "artifact")
116 {
117 result = (verbly::notion::wnid == 100021939); // artifact
118 } else if (selrestr.getRestriction() == "vehicle")
119 {
120 result = (verbly::notion::wnid == 104524313); // vehicle
121 } else if (selrestr.getRestriction() == "human")
122 {
123 result = (verbly::notion::wnid == 100007846); // person
124 } else if (selrestr.getRestriction() == "animal")
125 {
126 result = (verbly::notion::wnid == 100015388); // animal
127 } else if (selrestr.getRestriction() == "body_part")
128 {
129 result = (verbly::notion::wnid == 105220461); // body part
130 } else if (selrestr.getRestriction() == "garment")
131 {
132 result = (verbly::notion::wnid == 103051540); // clothing
133 } else if (selrestr.getRestriction() == "tool")
134 {
135 result = (verbly::notion::wnid == 104451818); // tool
136 } else {
137 return {};
138 }
139
140 std::cout << selrestr.getRestriction() << " (" << selrestr.getPos() << ")" << std::endl;
141
142 if (selrestr.getPos())
143 {
144 return (verbly::notion::fullHypernyms %= result);
145 } else {
146 return !(verbly::notion::fullHypernyms %= result);
147 }
148 }
149
150 case verbly::selrestr::type::group:
151 {
152 std::cout << "or: " << selrestr.getOrlogic() << std::endl;
153 verbly::filter ret(selrestr.getOrlogic());
154
155 for (const verbly::selrestr& child : selrestr)
156 {
157 ret += parseSelrestrs(child);
158 }
159
160 return ret;
161 }
162 }
163}
164
165bool sentence::requiresSelrestr(
166 std::string restriction,
167 verbly::selrestr selrestr) const
168{
169 switch (selrestr.getType())
170 {
171 case verbly::selrestr::type::empty:
172 {
173 return false;
174 }
175
176 case verbly::selrestr::type::singleton:
177 {
178 if (selrestr.getRestriction() == restriction)
179 {
180 return selrestr.getPos();
181 } else {
182 return false;
183 }
184 }
185
186 case verbly::selrestr::type::group:
187 {
188 if (selrestr.getOrlogic())
189 {
190 return std::all_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) {
191 return requiresSelrestr(restriction, s);
192 });
193 } else {
194 return std::any_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) {
195 return requiresSelrestr(restriction, s);
196 });
197 }
198 }
199 }
200}
201
202verbly::word sentence::generateStandardNoun(
203 std::string role,
204 verbly::selrestr selrestrs) const
205{
206 std::geometric_distribution<int> tagdist(0.5); // 0.06
207 std::vector<verbly::word> result;
208 bool trySelection = true;
209
210 while (result.empty())
211 {
212 verbly::filter condition =
213 (verbly::notion::partOfSpeech == verbly::part_of_speech::noun)
214 && (verbly::form::proper == false)
215 && (verbly::word::tagCount >= 1)
216 && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs
217
218 // Only use selection restrictions for a first attempt.
219 if (trySelection)
220 {
221 verbly::filter selrestrCondition = parseSelrestrs(selrestrs).compact();
222
223 if (selrestrCondition.getType() != verbly::filter::type::empty)
224 {
225 condition &= std::move(selrestrCondition);
226 } else if (role == "Attribute")
227 {
228 condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute
229 } else if (role == "Instrument")
230 {
231 condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 104451818)); // tool
232 } else if (role == "Agent")
233 {
234 condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100007347)); // causal agent
235 }
236
237 trySelection = false;
238 } else {
239 std::cout << "Selection failed" << std::endl;
240 }
241
242 result = database_.words(condition).all();
243 }
244
245 return result.front();
246}
247
248verbly::token sentence::generateStandardNounPhrase(
249 const verbly::word& noun,
250 std::string role,
251 bool plural,
252 bool definite) const
253{
254 verbly::token utter;
255 verbly::word sounder = noun;
256 verbly::word descript;
257
258 if (std::bernoulli_distribution(1.0/8.0)(rng_))
259 {
260 std::geometric_distribution<int> tagdist(0.2);
261 descript = database_.words(
262 (verbly::word::tagCount >= tagdist(rng_))
263 && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();
264
265 sounder = descript;
266 }
267
268 if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite))
269 {
270 utter << "the";
271
272 if (std::bernoulli_distribution(1.0/2.0)(rng_))
273 {
274 plural = true;
275 }
276 } else {
277 if ((role != "Theme") && (role != "Attribute") && std::bernoulli_distribution(1.0/2.0)(rng_))
278 {
279 utter << "your";
280 } else if (!plural) {
281 if (sounder.getLemma().getBaseForm().startsWithVowelSound())
282 {
283 utter << "an";
284 } else {
285 utter << "a";
286 }
287 }
288 }
289
290 if (descript)
291 {
292 utter << descript;
293 }
294
295 if (plural && noun.getLemma().hasInflection(verbly::inflection::plural))
296 {
297 utter << verbly::token(noun, verbly::inflection::plural);
298 } else {
299 utter << noun;
300 }
301
302 return utter;
303}
304
305verbly::token sentence::generateClause(
306 const verbly::token& it) const
307{
308 verbly::token utter;
309 std::geometric_distribution<int> tagdist(0.07);
310 std::vector<verbly::word> verbDataset;
311
312 verbly::filter frameCondition =
313 (verbly::frame::length >= 2)
314 && (verbly::frame::parts(0) %= (
315 (verbly::part::type == verbly::part_type::noun_phrase)
316 && (verbly::part::role == "Agent"))
317 && (verbly::frame::parts(1) %=
318 (verbly::part::type == verbly::part_type::verb))
319 && !(verbly::frame::parts() %= (
320 verbly::part::synrestrs %= "adjp")));
321
322 if (it.hasSynrestr("experiencer"))
323 {
324 frameCondition &=
325 (verbly::frame::parts(2) %=
326 (verbly::part::type == verbly::part_type::noun_phrase)
327 && !(verbly::part::synrestrs %= "genitive")
328 && ((verbly::part::role == "Patient")
329 || (verbly::part::role == "Experiencer")));
330 }
331
332 verbly::filter verbCondition =
333 (verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
334 && frameCondition;
335
336 if (it.hasSynrestr("participle_phrase"))
337 {
338 verbCondition &= (verbly::lemma::forms(verbly::inflection::ing_form));
339 } else if (it.hasSynrestr("progressive"))
340 {
341 verbCondition &= (verbly::lemma::forms(verbly::inflection::s_form));
342 } else if (it.hasSynrestr("past_participle"))
343 {
344 verbCondition &= (verbly::lemma::forms(verbly::inflection::past_participle));
345 }
346
347 // Because of the tag distribution, it's possible (albeit extremely unlikely)
348 // for the verb query to fail, so we loop until it succeeds.
349 while (verbDataset.empty())
350 {
351 verbDataset = database_.words(
352 verbCondition
353 && (verbly::word::tagCount >= tagdist(rng_))
354 ).all();
355 }
356
357 verbly::word verb = verbDataset.front();
358 verbly::frame frame = database_.frames(frameCondition && verb).first();
359 std::list<verbly::part> parts(std::begin(frame.getParts()), std::end(frame.getParts()));
360
361 if (it.hasSynrestr("experiencer"))
362 {
363 // Ignore the direct object.
364 parts.erase(std::next(parts.begin(), 2));
365 }
366
367 if (it.hasSynrestr("subjectless"))
368 {
369 // Ignore the subject.
370 parts.pop_front();
371 }
372
373 for (const verbly::part& part : parts)
374 {
375 switch (part.getType())
376 {
377 case verbly::part_type::noun_phrase:
378 {
379 std::cout << "NP: ";
380 for (auto& s : part.getNounSynrestrs())
381 {
382 std::cout << s << " ";
383 }
384 std::cout << std::endl;
385
386 if (requiresSelrestr("currency", part.getNounSelrestrs()))
387 {
388 int lead = std::uniform_int_distribution<int>(1,9)(rng_);
389 int tail = std::uniform_int_distribution<int>(0,6)(rng_);
390 std::string tailStr(tail, '0');
391
392 utter << ("$" + std::to_string(lead) + tailStr);
393 } else if (part.nounHasSynrestr("adjp"))
394 {
395 utter << std::set<std::string>({"adjective_phrase"});
396 } else if ((part.nounHasSynrestr("be_sc_ing"))
397 || (part.nounHasSynrestr("ac_ing"))
398 || (part.nounHasSynrestr("sc_ing"))
399 || (part.nounHasSynrestr("np_omit_ing"))
400 || (part.nounHasSynrestr("oc_ing")))
401 {
402 utter << std::set<std::string>({"participle_phrase", "subjectless"});
403 } else if ((part.nounHasSynrestr("poss_ing"))
404 || (part.nounHasSynrestr("possing"))
405 || (part.nounHasSynrestr("pos_ing")))
406 {
407 utter << "your";
408 utter << std::set<std::string>({"participle_phrase", "subjectless"});
409 } else if (part.nounHasSynrestr("genitive"))
410 {
411 utter << "your";
412 } else if (part.nounHasSynrestr("adv_loc"))
413 {
414 if (std::bernoulli_distribution(1.0/2.0)(rng_))
415 {
416 utter << "here";
417 } else {
418 utter << "there";
419 }
420 } else if (part.nounHasSynrestr("refl"))
421 {
422 utter << "yourself";
423 } else if ((part.nounHasSynrestr("sc_to_inf"))
424 || (part.nounHasSynrestr("ac_to_inf"))
425 || (part.nounHasSynrestr("vc_to_inf"))
426 || (part.nounHasSynrestr("rs_to_inf"))
427 || (part.nounHasSynrestr("oc_to_inf")))
428 {
429 utter << std::set<std::string>({"infinitive_phrase", "subjectless"});
430 } else if (part.nounHasSynrestr("oc_bare_inf"))
431 {
432 utter << std::set<std::string>({"infinitive_phrase", "bare", "subjectless"});
433 } else if (part.nounHasSynrestr("wh_comp"))
434 {
435 utter << "whether";
436
437 verbly::token sentence(std::set<std::string>({"progressive"}));
438 utter << generateClause(sentence);
439 } else if (part.nounHasSynrestr("that_comp"))
440 {
441 utter << "that";
442 utter << "they";
443
444 verbly::token sentence(std::set<std::string>({"subjectless"}));
445 utter << generateClause(sentence);
446 } else if (part.nounHasSynrestr("what_extract"))
447 {
448 utter << "what";
449
450 verbly::token sentence(std::set<std::string>({"progressive", "experiencer"}));
451 utter << generateClause(sentence);
452 } else if (part.nounHasSynrestr("how_extract"))
453 {
454 utter << "how";
455
456 verbly::token sentence(std::set<std::string>({"progressive"}));
457 utter << generateClause(sentence);
458 } else if (part.nounHasSynrestr("wh_inf"))
459 {
460 utter << "how";
461
462 verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless"}));
463 utter << generateClause(sentence);
464 } else if (part.nounHasSynrestr("what_inf"))
465 {
466 utter << "what";
467
468 verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless", "experiencer"}));
469 utter << generateClause(sentence);
470 } else if (part.nounHasSynrestr("wheth_inf"))
471 {
472 utter << "whether";
473
474 verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless"}));
475 utter << generateClause(sentence);
476 } else if (part.nounHasSynrestr("quotation"))
477 {
478 verbly::token sentence(std::set<std::string>({"participle_phrase"}));
479 while (!sentence.isComplete())
480 {
481 visit(sentence);
482 }
483
484 utter << ("\"" + sentence.compile() + "\"");
485 } else {
486 verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs());
487
488 bool plural = part.nounHasSynrestr("plural");
489 if (!plural)
490 {
491 plural = requiresSelrestr("plural", part.getNounSelrestrs());
492 }
493
494 utter << generateStandardNounPhrase(
495 noun,
496 part.getNounRole(),
497 plural,
498 part.nounHasSynrestr("definite"));
499
500 if (part.nounHasSynrestr("acc_ing") || part.nounHasSynrestr("ac_ing"))
501 {
502 utter << std::set<std::string>({"participle_phrase", "subjectless"});
503 }
504 }
505
506 break;
507 }
508
509 case verbly::part_type::verb:
510 {
511 std::cout << "V: " << verb.getBaseForm() << std::endl;
512
513 if (it.hasSynrestr("progressive"))
514 {
515 utter << verbly::token(verb, verbly::inflection::s_form);
516 } else if (it.hasSynrestr("past_participle"))
517 {
518 utter << verbly::token(verb, verbly::inflection::past_participle);
519 } else if (it.hasSynrestr("infinitive_phrase"))
520 {
521 if (!it.hasSynrestr("bare"))
522 {
523 utter << "to";
524 }
525
526 utter << verb;
527 } else if (it.hasSynrestr("participle_phrase"))
528 {
529 utter << verbly::token(verb, verbly::inflection::ing_form);
530 } else {
531 utter << verb;
532 }
533
534 break;
535 }
536
537 case verbly::part_type::preposition:
538 {
539 std::cout << "PREP" << std::endl;
540
541 if (part.isPrepositionLiteral())
542 {
543 int choiceIndex = std::uniform_int_distribution<int>(0, part.getPrepositionChoices().size()-1)(rng_);
544 utter << part.getPrepositionChoices()[choiceIndex];
545 } else {
546 verbly::filter pgf(true);
547 for (const std::string& choice : part.getPrepositionChoices())
548 {
549 pgf += (verbly::notion::prepositionGroups == choice);
550 }
551
552 utter << database_.words(pgf && (verbly::notion::partOfSpeech == verbly::part_of_speech::preposition)).first();
553 }
554
555 break;
556 }
557
558 case verbly::part_type::adjective:
559 {
560 std::cout << "ADJ" << std::endl;
561
562 utter << std::set<std::string>({"adjective_phrase"});
563
564 break;
565 }
566
567 case verbly::part_type::adverb:
568 {
569 std::cout << "ADV" << std::endl;
570
571 utter << std::set<std::string>({"adverb_phrase"});
572
573 break;
574 }
575
576 case verbly::part_type::literal:
577 {
578 std::cout << "LIT" << std::endl;
579
580 utter << part.getLiteralValue();
581
582 break;
583 }
584
585 case verbly::part_type::invalid:
586 {
587 // Nope
588
589 break;
590 }
591 }
592 }
593
594 if ((parts.size() == 1) && (std::bernoulli_distribution(1.0/4.0)(rng_)))
595 {
596 utter << std::set<std::string>({"adverb_phrase"});
597 }
598
599 return utter;
600}
601
602void sentence::visit(verbly::token& it) const
603{
604 switch (it.getType())
605 {
606 case verbly::token::type::utterance:
607 {
608 for (verbly::token& token : it)
609 {
610 if (!token.isComplete())
611 {
612 visit(token);
613
614 break;
615 }
616 }
617
618 break;
619 }
620
621 case verbly::token::type::fillin:
622 {
623 if (it.hasSynrestr("infinitive_phrase"))
624 {
625 it = generateClause(it);
626 } else if (it.hasSynrestr("adjective_phrase"))
627 {
628 verbly::token phrase;
629
630 if (std::bernoulli_distribution(1.0/2.0)(rng_))
631 {
632 phrase << std::set<std::string>({"participle_phrase", "subjectless"});
633 } else {
634 if (std::bernoulli_distribution(1.0/6.0)(rng_))
635 {
636 phrase << std::set<std::string>({"adverb_phrase"});
637 }
638
639 std::geometric_distribution<int> tagdist(0.2);
640 phrase << database_.words(
641 (verbly::word::tagCount >= tagdist(rng_))
642 && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();
643 }
644
645 it = phrase;
646 } else if (it.hasSynrestr("adverb_phrase"))
647 {
648 std::geometric_distribution<int> tagdist(1.0/23.0);
649
650 it = database_.words(
651 (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb)
652 && (verbly::word::tagCount >= tagdist(rng_))
653 ).first();
654 } else if (it.hasSynrestr("participle_phrase"))
655 {
656 if (std::bernoulli_distribution(1.0/2.0)(rng_))
657 {
658 it = verbly::token(
659 database_.words(
660 (verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
661 && (verbly::lemma::forms(verbly::inflection::ing_form))).first(),
662 verbly::inflection::ing_form);
663 } else {
664 it = generateClause(it);
665 }
666 } else {
667 it = "*the reality of the situation*";
668 }
669
670 break;
671 }
672
673 case verbly::token::type::word:
674 case verbly::token::type::literal:
675 case verbly::token::type::part:
676 {
677 // Nope
678
679 break;
680 }
681 }
682}