diff options
Diffstat (limited to 'sentence.cpp')
-rw-r--r-- | sentence.cpp | 754 |
1 files changed, 754 insertions, 0 deletions
diff --git a/sentence.cpp b/sentence.cpp new file mode 100644 index 0000000..421aaf6 --- /dev/null +++ b/sentence.cpp | |||
@@ -0,0 +1,754 @@ | |||
1 | #include "sentence.h" | ||
2 | #include <algorithm> | ||
3 | #include <list> | ||
4 | #include <set> | ||
5 | |||
6 | sentence::sentence( | ||
7 | const verbly::database& database, | ||
8 | std::mt19937& rng) : | ||
9 | database_(database), | ||
10 | rng_(rng) | ||
11 | { | ||
12 | } | ||
13 | |||
14 | std::string sentence::generate() const | ||
15 | { | ||
16 | // Generate the form that the title should take. | ||
17 | verbly::token form; | ||
18 | std::set<std::string> synrestrs {"infinitive_phrase", "bare", "subjectless"}; | ||
19 | std::set<std::string> secondSyn {"participle_phrase", "subjectless"}; | ||
20 | std::set<std::string> adjSyn {"adjective_phrase"}; | ||
21 | |||
22 | if (std::bernoulli_distribution(1.0/6.0)(rng_)) | ||
23 | { | ||
24 | form << "not"; | ||
25 | } | ||
26 | |||
27 | if (std::bernoulli_distribution(1.0/6.0)(rng_)) | ||
28 | { | ||
29 | form << "be"; | ||
30 | form << adjSyn; | ||
31 | } else { | ||
32 | if (std::bernoulli_distribution(1.0/6.0)(rng_)) | ||
33 | { | ||
34 | form << "get"; | ||
35 | synrestrs.insert("experiencer"); | ||
36 | synrestrs.insert("past_participle"); | ||
37 | } | ||
38 | |||
39 | form << synrestrs; | ||
40 | } | ||
41 | |||
42 | if (std::bernoulli_distribution(1.0/5.0)(rng_)) | ||
43 | { | ||
44 | if (std::bernoulli_distribution(1.0/4.0)(rng_)) | ||
45 | { | ||
46 | form << "without"; | ||
47 | } else { | ||
48 | form << "while"; | ||
49 | } | ||
50 | |||
51 | form << secondSyn; | ||
52 | } | ||
53 | |||
54 | // Attempt to compile the form, restarting if a bad word is generated. | ||
55 | std::set<std::string> badWords = {"raped"}; | ||
56 | |||
57 | verbly::token tok = form; | ||
58 | std::list<std::string> words; | ||
59 | for (;;) | ||
60 | { | ||
61 | // Compile the form. | ||
62 | while (!tok.isComplete()) | ||
63 | { | ||
64 | visit(tok); | ||
65 | } | ||
66 | |||
67 | std::string compiled = tok.compile(); | ||
68 | words = verbly::split<std::list<std::string>>(compiled, " "); | ||
69 | |||
70 | // Ensure that there are no bad words in the output. | ||
71 | if (!std::any_of(std::begin(words), std::end(words), [&badWords] (const std::string& word) { | ||
72 | std::string canonWord; | ||
73 | |||
74 | for (char ch : word) | ||
75 | { | ||
76 | if (std::isalpha(ch)) | ||
77 | { | ||
78 | canonWord.push_back(std::tolower(ch)); | ||
79 | } | ||
80 | } | ||
81 | |||
82 | return (badWords.count(canonWord) == 1); | ||
83 | })) { | ||
84 | break; | ||
85 | } else { | ||
86 | std::cout << "Bad word generated." << std::endl; | ||
87 | } | ||
88 | } | ||
89 | |||
90 | // Put the form into title case. | ||
91 | for (std::string& word : words) | ||
92 | { | ||
93 | if ((word[0] == '"') && (word.length() > 1)) | ||
94 | { | ||
95 | word[1] = std::toupper(word[1]); | ||
96 | } else { | ||
97 | word[0] = std::toupper(word[0]); | ||
98 | } | ||
99 | } | ||
100 | |||
101 | return verbly::implode(std::begin(words), std::end(words), " "); | ||
102 | } | ||
103 | |||
104 | verbly::filter sentence::parseSelrestrs( | ||
105 | verbly::selrestr selrestr) const | ||
106 | { | ||
107 | switch (selrestr.getType()) | ||
108 | { | ||
109 | case verbly::selrestr::type::empty: | ||
110 | { | ||
111 | return {}; | ||
112 | } | ||
113 | |||
114 | case verbly::selrestr::type::singleton: | ||
115 | { | ||
116 | verbly::filter result; | ||
117 | |||
118 | if (selrestr.getRestriction() == "concrete") | ||
119 | { | ||
120 | result = (verbly::notion::wnid == 100001930); // physical entity | ||
121 | } else if (selrestr.getRestriction() == "time") | ||
122 | { | ||
123 | result = (verbly::notion::wnid == 100028270); // time | ||
124 | } else if (selrestr.getRestriction() == "state") | ||
125 | { | ||
126 | result = (verbly::notion::wnid == 100024720); // state | ||
127 | } else if (selrestr.getRestriction() == "abstract") | ||
128 | { | ||
129 | result = (verbly::notion::wnid == 100002137); // abstract entity | ||
130 | } else if (selrestr.getRestriction() == "scalar") | ||
131 | { | ||
132 | result = (verbly::notion::wnid == 103835412); // number | ||
133 | } else if (selrestr.getRestriction() == "currency") | ||
134 | { | ||
135 | result = (verbly::notion::wnid == 105050379); // currency | ||
136 | } else if (selrestr.getRestriction() == "location") | ||
137 | { | ||
138 | result = (verbly::notion::wnid == 100027167); // location | ||
139 | } else if (selrestr.getRestriction() == "organization") | ||
140 | { | ||
141 | result = (verbly::notion::wnid == 100237078); // organization | ||
142 | } else if (selrestr.getRestriction() == "int_control") | ||
143 | { | ||
144 | result = (verbly::notion::wnid == 100007347); // causal agent | ||
145 | } else if (selrestr.getRestriction() == "natural") | ||
146 | { | ||
147 | result = (verbly::notion::wnid == 100019128); // natural object | ||
148 | } else if (selrestr.getRestriction() == "phys_obj") | ||
149 | { | ||
150 | result = (verbly::notion::wnid == 100002684); // physical object | ||
151 | } else if (selrestr.getRestriction() == "solid") | ||
152 | { | ||
153 | result = (verbly::notion::wnid == 113860793); // solid | ||
154 | } else if (selrestr.getRestriction() == "shape") | ||
155 | { | ||
156 | result = (verbly::notion::wnid == 100027807); // shape | ||
157 | } else if (selrestr.getRestriction() == "substance") | ||
158 | { | ||
159 | result = (verbly::notion::wnid == 100019613); // substance | ||
160 | } else if (selrestr.getRestriction() == "idea") | ||
161 | { | ||
162 | result = (verbly::notion::wnid == 105803379); // idea | ||
163 | } else if (selrestr.getRestriction() == "sound") | ||
164 | { | ||
165 | result = (verbly::notion::wnid == 107111047); // sound | ||
166 | } else if (selrestr.getRestriction() == "communication") | ||
167 | { | ||
168 | result = (verbly::notion::wnid == 100033020); // communication | ||
169 | } else if (selrestr.getRestriction() == "region") | ||
170 | { | ||
171 | result = (verbly::notion::wnid == 105221895); // region | ||
172 | } else if (selrestr.getRestriction() == "place") | ||
173 | { | ||
174 | result = (verbly::notion::wnid == 100586262); // place | ||
175 | } else if (selrestr.getRestriction() == "machine") | ||
176 | { | ||
177 | result = (verbly::notion::wnid == 102958343); // machine | ||
178 | } else if (selrestr.getRestriction() == "animate") | ||
179 | { | ||
180 | result = (verbly::notion::wnid == 100004258); // animate thing | ||
181 | } else if (selrestr.getRestriction() == "plant") | ||
182 | { | ||
183 | result = (verbly::notion::wnid == 103956922); // plant | ||
184 | } else if (selrestr.getRestriction() == "comestible") | ||
185 | { | ||
186 | result = (verbly::notion::wnid == 100021265); // food | ||
187 | } else if (selrestr.getRestriction() == "artifact") | ||
188 | { | ||
189 | result = (verbly::notion::wnid == 100021939); // artifact | ||
190 | } else if (selrestr.getRestriction() == "vehicle") | ||
191 | { | ||
192 | result = (verbly::notion::wnid == 104524313); // vehicle | ||
193 | } else if (selrestr.getRestriction() == "human") | ||
194 | { | ||
195 | result = (verbly::notion::wnid == 100007846); // person | ||
196 | } else if (selrestr.getRestriction() == "animal") | ||
197 | { | ||
198 | result = (verbly::notion::wnid == 100015388); // animal | ||
199 | } else if (selrestr.getRestriction() == "body_part") | ||
200 | { | ||
201 | result = (verbly::notion::wnid == 105220461); // body part | ||
202 | } else if (selrestr.getRestriction() == "garment") | ||
203 | { | ||
204 | result = (verbly::notion::wnid == 103051540); // clothing | ||
205 | } else if (selrestr.getRestriction() == "tool") | ||
206 | { | ||
207 | result = (verbly::notion::wnid == 104451818); // tool | ||
208 | } else { | ||
209 | return {}; | ||
210 | } | ||
211 | |||
212 | std::cout << selrestr.getRestriction() << " (" << selrestr.getPos() << ")" << std::endl; | ||
213 | |||
214 | if (selrestr.getPos()) | ||
215 | { | ||
216 | return (verbly::notion::fullHypernyms %= result); | ||
217 | } else { | ||
218 | return !(verbly::notion::fullHypernyms %= result); | ||
219 | } | ||
220 | } | ||
221 | |||
222 | case verbly::selrestr::type::group: | ||
223 | { | ||
224 | std::cout << "or: " << selrestr.getOrlogic() << std::endl; | ||
225 | verbly::filter ret(selrestr.getOrlogic()); | ||
226 | |||
227 | for (const verbly::selrestr& child : selrestr) | ||
228 | { | ||
229 | ret += parseSelrestrs(child); | ||
230 | } | ||
231 | |||
232 | return ret; | ||
233 | } | ||
234 | } | ||
235 | } | ||
236 | |||
237 | bool sentence::requiresSelrestr( | ||
238 | std::string restriction, | ||
239 | verbly::selrestr selrestr) const | ||
240 | { | ||
241 | switch (selrestr.getType()) | ||
242 | { | ||
243 | case verbly::selrestr::type::empty: | ||
244 | { | ||
245 | return false; | ||
246 | } | ||
247 | |||
248 | case verbly::selrestr::type::singleton: | ||
249 | { | ||
250 | if (selrestr.getRestriction() == restriction) | ||
251 | { | ||
252 | return selrestr.getPos(); | ||
253 | } else { | ||
254 | return false; | ||
255 | } | ||
256 | } | ||
257 | |||
258 | case verbly::selrestr::type::group: | ||
259 | { | ||
260 | if (selrestr.getOrlogic()) | ||
261 | { | ||
262 | return std::all_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) { | ||
263 | return requiresSelrestr(restriction, s); | ||
264 | }); | ||
265 | } else { | ||
266 | return std::any_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) { | ||
267 | return requiresSelrestr(restriction, s); | ||
268 | }); | ||
269 | } | ||
270 | } | ||
271 | } | ||
272 | } | ||
273 | |||
274 | verbly::word sentence::generateStandardNoun( | ||
275 | std::string role, | ||
276 | verbly::selrestr selrestrs) const | ||
277 | { | ||
278 | std::geometric_distribution<int> tagdist(0.5); // 0.06 | ||
279 | std::vector<verbly::word> result; | ||
280 | bool trySelection = true; | ||
281 | |||
282 | while (result.empty()) | ||
283 | { | ||
284 | verbly::filter condition = | ||
285 | (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) | ||
286 | && (verbly::form::proper == false) | ||
287 | //&& (verbly::form::complexity == 1) | ||
288 | // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words | ||
289 | && (verbly::word::tagCount >= 1) | ||
290 | && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs | ||
291 | |||
292 | // Only use selection restrictions for a first attempt. | ||
293 | if (trySelection) | ||
294 | { | ||
295 | verbly::filter selrestrCondition = parseSelrestrs(selrestrs).compact(); | ||
296 | |||
297 | if (selrestrCondition.getType() != verbly::filter::type::empty) | ||
298 | { | ||
299 | condition &= std::move(selrestrCondition); | ||
300 | } else if (role == "Attribute") | ||
301 | { | ||
302 | condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute | ||
303 | } else if (role == "Instrument") | ||
304 | { | ||
305 | condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 104451818)); // tool | ||
306 | } else if (role == "Agent") | ||
307 | { | ||
308 | condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100007347)); // causal agent | ||
309 | } | ||
310 | |||
311 | trySelection = false; | ||
312 | } else { | ||
313 | std::cout << "Selection failed" << std::endl; | ||
314 | } | ||
315 | |||
316 | result = database_.words(condition).all(); | ||
317 | } | ||
318 | |||
319 | return result.front(); | ||
320 | } | ||
321 | |||
322 | verbly::token sentence::generateStandardNounPhrase( | ||
323 | const verbly::word& noun, | ||
324 | std::string role, | ||
325 | bool plural, | ||
326 | bool definite) const | ||
327 | { | ||
328 | verbly::token utter; | ||
329 | verbly::word sounder = noun; | ||
330 | verbly::word descript; | ||
331 | |||
332 | if (std::bernoulli_distribution(1.0/8.0)(rng_)) | ||
333 | { | ||
334 | std::geometric_distribution<int> tagdist(0.2); | ||
335 | descript = database_.words( | ||
336 | (verbly::word::tagCount >= tagdist(rng_)) | ||
337 | && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); | ||
338 | |||
339 | sounder = descript; | ||
340 | } | ||
341 | |||
342 | if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite)) | ||
343 | { | ||
344 | utter << "the"; | ||
345 | |||
346 | if (std::bernoulli_distribution(1.0/2.0)(rng_)) | ||
347 | { | ||
348 | plural = true; | ||
349 | } | ||
350 | } else { | ||
351 | if ((role != "Theme") && (role != "Attribute") && std::bernoulli_distribution(1.0/2.0)(rng_)) | ||
352 | { | ||
353 | utter << "your"; | ||
354 | } else if (!plural) { | ||
355 | if (sounder.getLemma().getBaseForm().startsWithVowelSound()) | ||
356 | { | ||
357 | utter << "an"; | ||
358 | } else { | ||
359 | utter << "a"; | ||
360 | } | ||
361 | } | ||
362 | } | ||
363 | |||
364 | if (descript) | ||
365 | { | ||
366 | utter << descript; | ||
367 | } | ||
368 | |||
369 | if (plural && noun.getLemma().hasInflection(verbly::inflection::plural)) | ||
370 | { | ||
371 | utter << verbly::token(noun, verbly::inflection::plural); | ||
372 | } else { | ||
373 | utter << noun; | ||
374 | } | ||
375 | |||
376 | return utter; | ||
377 | } | ||
378 | |||
379 | verbly::token sentence::generateClause( | ||
380 | const verbly::token& it) const | ||
381 | { | ||
382 | verbly::token utter; | ||
383 | std::geometric_distribution<int> tagdist(0.07); | ||
384 | std::vector<verbly::word> verbDataset; | ||
385 | |||
386 | verbly::filter frameCondition = | ||
387 | (verbly::frame::length >= 2) | ||
388 | && (verbly::frame::part(0) %= ( | ||
389 | (verbly::part::type == verbly::part_type::noun_phrase) | ||
390 | && (verbly::part::role == "Agent")) | ||
391 | && !(verbly::frame::part() %= ( | ||
392 | verbly::part::synrestr %= "adjp"))); | ||
393 | |||
394 | if (it.hasSynrestr("experiencer")) | ||
395 | { | ||
396 | frameCondition &= | ||
397 | (verbly::frame::part(2) %= | ||
398 | (verbly::part::type == verbly::part_type::noun_phrase) | ||
399 | && !(verbly::part::synrestr %= "genitive") | ||
400 | && ((verbly::part::role == "Patient") | ||
401 | || (verbly::part::role == "Experiencer"))); | ||
402 | } | ||
403 | |||
404 | verbly::filter verbCondition = | ||
405 | (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) | ||
406 | && frameCondition; | ||
407 | |||
408 | if (it.hasSynrestr("participle_phrase")) | ||
409 | { | ||
410 | verbCondition &= (verbly::lemma::form(verbly::inflection::ing_form)); | ||
411 | } else if (it.hasSynrestr("progressive")) | ||
412 | { | ||
413 | verbCondition &= (verbly::lemma::form(verbly::inflection::s_form)); | ||
414 | } else if (it.hasSynrestr("past_participle")) | ||
415 | { | ||
416 | verbCondition &= (verbly::lemma::form(verbly::inflection::past_participle)); | ||
417 | } | ||
418 | |||
419 | // Because of the tag distribution, it's possible (albeit extremely unlikely) | ||
420 | // for the verb query to fail, so we loop until it succeeds. | ||
421 | while (verbDataset.empty()) | ||
422 | { | ||
423 | verbDataset = database_.words( | ||
424 | verbCondition | ||
425 | && (verbly::word::tagCount >= tagdist(rng_)) | ||
426 | ).all(); | ||
427 | } | ||
428 | |||
429 | verbly::word verb = verbDataset.front(); | ||
430 | verbly::frame frame = database_.frames(frameCondition && verb).first(); | ||
431 | std::list<verbly::part> parts(std::begin(frame.getParts()), std::end(frame.getParts())); | ||
432 | |||
433 | if (it.hasSynrestr("experiencer")) | ||
434 | { | ||
435 | // Ignore the direct object. | ||
436 | parts.erase(std::next(parts.begin(), 2)); | ||
437 | } | ||
438 | |||
439 | if (it.hasSynrestr("subjectless")) | ||
440 | { | ||
441 | // Ignore the subject. | ||
442 | parts.pop_front(); | ||
443 | } | ||
444 | |||
445 | for (const verbly::part& part : parts) | ||
446 | { | ||
447 | switch (part.getType()) | ||
448 | { | ||
449 | case verbly::part_type::noun_phrase: | ||
450 | { | ||
451 | std::cout << "NP: "; | ||
452 | for (auto& s : part.getNounSynrestrs()) | ||
453 | { | ||
454 | std::cout << s << " "; | ||
455 | } | ||
456 | std::cout << std::endl; | ||
457 | |||
458 | if (requiresSelrestr("currency", part.getNounSelrestrs())) | ||
459 | { | ||
460 | int lead = std::uniform_int_distribution<int>(1,9)(rng_); | ||
461 | int tail = std::uniform_int_distribution<int>(0,6)(rng_); | ||
462 | std::string tailStr(tail, '0'); | ||
463 | |||
464 | utter << ("$" + std::to_string(lead) + tailStr); | ||
465 | } else if (part.nounHasSynrestr("adjp")) | ||
466 | { | ||
467 | utter << std::set<std::string>({"adjective_phrase"}); | ||
468 | } else if ((part.nounHasSynrestr("be_sc_ing")) | ||
469 | || (part.nounHasSynrestr("ac_ing")) | ||
470 | || (part.nounHasSynrestr("sc_ing")) | ||
471 | || (part.nounHasSynrestr("np_omit_ing")) | ||
472 | || (part.nounHasSynrestr("oc_ing"))) | ||
473 | { | ||
474 | utter << std::set<std::string>({"participle_phrase", "subjectless"}); | ||
475 | } else if ((part.nounHasSynrestr("poss_ing")) | ||
476 | || (part.nounHasSynrestr("possing")) | ||
477 | || (part.nounHasSynrestr("pos_ing"))) | ||
478 | { | ||
479 | utter << "your"; | ||
480 | utter << std::set<std::string>({"participle_phrase", "subjectless"}); | ||
481 | } else if (part.nounHasSynrestr("genitive")) | ||
482 | { | ||
483 | utter << "your"; | ||
484 | } else if (part.nounHasSynrestr("adv_loc")) | ||
485 | { | ||
486 | if (std::bernoulli_distribution(1.0/2.0)(rng_)) | ||
487 | { | ||
488 | utter << "here"; | ||
489 | } else { | ||
490 | utter << "there"; | ||
491 | } | ||
492 | } else if (part.nounHasSynrestr("refl")) | ||
493 | { | ||
494 | utter << "yourself"; | ||
495 | } else if ((part.nounHasSynrestr("sc_to_inf")) | ||
496 | || (part.nounHasSynrestr("ac_to_inf")) | ||
497 | || (part.nounHasSynrestr("vc_to_inf")) | ||
498 | || (part.nounHasSynrestr("rs_to_inf")) | ||
499 | || (part.nounHasSynrestr("oc_to_inf"))) | ||
500 | { | ||
501 | utter << std::set<std::string>({"infinitive_phrase", "subjectless"}); | ||
502 | } else if (part.nounHasSynrestr("oc_bare_inf")) | ||
503 | { | ||
504 | utter << std::set<std::string>({"infinitive_phrase", "bare", "subjectless"}); | ||
505 | } else if (part.nounHasSynrestr("wh_comp")) | ||
506 | { | ||
507 | utter << "whether"; | ||
508 | |||
509 | verbly::token sentence(std::set<std::string>({"progressive"})); | ||
510 | utter << generateClause(sentence); | ||
511 | } else if (part.nounHasSynrestr("that_comp")) | ||
512 | { | ||
513 | utter << "that"; | ||
514 | utter << "they"; | ||
515 | |||
516 | verbly::token sentence(std::set<std::string>({"subjectless"})); | ||
517 | utter << generateClause(sentence); | ||
518 | } else if (part.nounHasSynrestr("what_extract")) | ||
519 | { | ||
520 | utter << "what"; | ||
521 | |||
522 | verbly::token sentence(std::set<std::string>({"progressive", "experiencer"})); | ||
523 | utter << generateClause(sentence); | ||
524 | } else if (part.nounHasSynrestr("how_extract")) | ||
525 | { | ||
526 | utter << "how"; | ||
527 | |||
528 | verbly::token sentence(std::set<std::string>({"progressive"})); | ||
529 | utter << generateClause(sentence); | ||
530 | } else if (part.nounHasSynrestr("wh_inf")) | ||
531 | { | ||
532 | utter << "how"; | ||
533 | |||
534 | verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless"})); | ||
535 | utter << generateClause(sentence); | ||
536 | } else if (part.nounHasSynrestr("what_inf")) | ||
537 | { | ||
538 | utter << "what"; | ||
539 | |||
540 | verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless", "experiencer"})); | ||
541 | utter << generateClause(sentence); | ||
542 | } else if (part.nounHasSynrestr("wheth_inf")) | ||
543 | { | ||
544 | utter << "whether"; | ||
545 | |||
546 | verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless"})); | ||
547 | utter << generateClause(sentence); | ||
548 | } else if (part.nounHasSynrestr("quotation")) | ||
549 | { | ||
550 | verbly::token sentence(std::set<std::string>({"participle_phrase"})); | ||
551 | while (!sentence.isComplete()) | ||
552 | { | ||
553 | visit(sentence); | ||
554 | } | ||
555 | |||
556 | utter << ("\"" + sentence.compile() + "\""); | ||
557 | } else { | ||
558 | verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); | ||
559 | |||
560 | bool plural = part.nounHasSynrestr("plural"); | ||
561 | if (!plural) | ||
562 | { | ||
563 | plural = requiresSelrestr("plural", part.getNounSelrestrs()); | ||
564 | } | ||
565 | |||
566 | utter << generateStandardNounPhrase( | ||
567 | noun, | ||
568 | part.getNounRole(), | ||
569 | plural, | ||
570 | part.nounHasSynrestr("definite")); | ||
571 | |||
572 | if (part.nounHasSynrestr("acc_ing") || part.nounHasSynrestr("ac_ing")) | ||
573 | { | ||
574 | utter << std::set<std::string>({"participle_phrase", "subjectless"}); | ||
575 | } | ||
576 | } | ||
577 | |||
578 | break; | ||
579 | } | ||
580 | |||
581 | case verbly::part_type::verb: | ||
582 | { | ||
583 | std::cout << "V: " << verb.getBaseForm() << std::endl; | ||
584 | |||
585 | if (it.hasSynrestr("progressive")) | ||
586 | { | ||
587 | utter << verbly::token(verb, verbly::inflection::s_form); | ||
588 | } else if (it.hasSynrestr("past_participle")) | ||
589 | { | ||
590 | utter << verbly::token(verb, verbly::inflection::past_participle); | ||
591 | } else if (it.hasSynrestr("infinitive_phrase")) | ||
592 | { | ||
593 | if (!it.hasSynrestr("bare")) | ||
594 | { | ||
595 | utter << "to"; | ||
596 | } | ||
597 | |||
598 | utter << verb; | ||
599 | } else if (it.hasSynrestr("participle_phrase")) | ||
600 | { | ||
601 | utter << verbly::token(verb, verbly::inflection::ing_form); | ||
602 | } else { | ||
603 | utter << verb; | ||
604 | } | ||
605 | |||
606 | break; | ||
607 | } | ||
608 | |||
609 | case verbly::part_type::preposition: | ||
610 | { | ||
611 | std::cout << "PREP" << std::endl; | ||
612 | |||
613 | if (part.isPrepositionLiteral()) | ||
614 | { | ||
615 | int choiceIndex = std::uniform_int_distribution<int>(0, part.getPrepositionChoices().size()-1)(rng_); | ||
616 | utter << part.getPrepositionChoices()[choiceIndex]; | ||
617 | } else { | ||
618 | verbly::filter pgf(true); | ||
619 | for (const std::string& choice : part.getPrepositionChoices()) | ||
620 | { | ||
621 | pgf += (verbly::notion::prepositionGroup == choice); | ||
622 | } | ||
623 | |||
624 | utter << database_.words(pgf && (verbly::notion::partOfSpeech == verbly::part_of_speech::preposition)).first(); | ||
625 | } | ||
626 | |||
627 | break; | ||
628 | } | ||
629 | |||
630 | case verbly::part_type::adjective: | ||
631 | { | ||
632 | std::cout << "ADJ" << std::endl; | ||
633 | |||
634 | utter << std::set<std::string>({"adjective_phrase"}); | ||
635 | |||
636 | break; | ||
637 | } | ||
638 | |||
639 | case verbly::part_type::adverb: | ||
640 | { | ||
641 | std::cout << "ADV" << std::endl; | ||
642 | |||
643 | utter << std::set<std::string>({"adverb_phrase"}); | ||
644 | |||
645 | break; | ||
646 | } | ||
647 | |||
648 | case verbly::part_type::literal: | ||
649 | { | ||
650 | std::cout << "LIT" << std::endl; | ||
651 | |||
652 | utter << part.getLiteralValue(); | ||
653 | |||
654 | break; | ||
655 | } | ||
656 | |||
657 | case verbly::part_type::invalid: | ||
658 | { | ||
659 | // Nope | ||
660 | |||
661 | break; | ||
662 | } | ||
663 | } | ||
664 | } | ||
665 | |||
666 | if ((parts.size() == 1) && (std::bernoulli_distribution(1.0/4.0)(rng_))) | ||
667 | { | ||
668 | utter << std::set<std::string>({"adverb_phrase"}); | ||
669 | } | ||
670 | |||
671 | return utter; | ||
672 | } | ||
673 | |||
674 | void sentence::visit(verbly::token& it) const | ||
675 | { | ||
676 | switch (it.getType()) | ||
677 | { | ||
678 | case verbly::token::type::utterance: | ||
679 | { | ||
680 | for (verbly::token& token : it) | ||
681 | { | ||
682 | if (!token.isComplete()) | ||
683 | { | ||
684 | visit(token); | ||
685 | |||
686 | break; | ||
687 | } | ||
688 | } | ||
689 | |||
690 | break; | ||
691 | } | ||
692 | |||
693 | case verbly::token::type::fillin: | ||
694 | { | ||
695 | if (it.hasSynrestr("infinitive_phrase")) | ||
696 | { | ||
697 | it = generateClause(it); | ||
698 | } else if (it.hasSynrestr("adjective_phrase")) | ||
699 | { | ||
700 | verbly::token phrase; | ||
701 | |||
702 | if (std::bernoulli_distribution(1.0/6.0)(rng_)) | ||
703 | { | ||
704 | phrase << std::set<std::string>({"adverb_phrase"}); | ||
705 | } | ||
706 | |||
707 | if (std::bernoulli_distribution(1.0/4.0)(rng_)) | ||
708 | { | ||
709 | phrase << std::set<std::string>({"participle_phrase", "subjectless"}); | ||
710 | } else { | ||
711 | std::geometric_distribution<int> tagdist(0.2); | ||
712 | phrase << database_.words( | ||
713 | (verbly::word::tagCount >= tagdist(rng_)) | ||
714 | && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); | ||
715 | } | ||
716 | |||
717 | it = phrase; | ||
718 | } else if (it.hasSynrestr("adverb_phrase")) | ||
719 | { | ||
720 | std::geometric_distribution<int> tagdist(1.0/23.0); | ||
721 | |||
722 | it = database_.words( | ||
723 | (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb) | ||
724 | && (verbly::word::tagCount >= tagdist(rng_)) | ||
725 | ).first(); | ||
726 | } else if (it.hasSynrestr("participle_phrase")) | ||
727 | { | ||
728 | if (std::bernoulli_distribution(1.0/2.0)(rng_)) | ||
729 | { | ||
730 | it = verbly::token( | ||
731 | database_.words( | ||
732 | (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) | ||
733 | && (verbly::lemma::form(verbly::inflection::ing_form))).first(), | ||
734 | verbly::inflection::ing_form); | ||
735 | } else { | ||
736 | it = generateClause(it); | ||
737 | } | ||
738 | } else { | ||
739 | it = "*the reality of the situation*"; | ||
740 | } | ||
741 | |||
742 | break; | ||
743 | } | ||
744 | |||
745 | case verbly::token::type::word: | ||
746 | case verbly::token::type::literal: | ||
747 | case verbly::token::type::part: | ||
748 | { | ||
749 | // Nope | ||
750 | |||
751 | break; | ||
752 | } | ||
753 | } | ||
754 | } | ||