diff options
Diffstat (limited to 'sentence.cpp')
-rw-r--r-- | sentence.cpp | 139 |
1 files changed, 59 insertions, 80 deletions
diff --git a/sentence.cpp b/sentence.cpp index 5f1b03a..a9b40c2 100644 --- a/sentence.cpp +++ b/sentence.cpp | |||
@@ -9,6 +9,19 @@ sentence::sentence( | |||
9 | database_(database), | 9 | database_(database), |
10 | rng_(rng) | 10 | rng_(rng) |
11 | { | 11 | { |
12 | verbly::filter blacklist; | ||
13 | |||
14 | for (std::string word : { | ||
15 | "raped", "Negro" | ||
16 | }) | ||
17 | { | ||
18 | blacklist |= (verbly::form::text == word); | ||
19 | } | ||
20 | |||
21 | badWords_ = !blacklist; | ||
22 | |||
23 | // Blacklist ethnic slurs | ||
24 | badWords_ &= !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); | ||
12 | } | 25 | } |
13 | 26 | ||
14 | std::string sentence::generate() const | 27 | std::string sentence::generate() const |
@@ -51,54 +64,18 @@ std::string sentence::generate() const | |||
51 | form << secondSyn; | 64 | form << secondSyn; |
52 | } | 65 | } |
53 | 66 | ||
54 | // Attempt to compile the form, restarting if a bad word is generated. | 67 | // Compile the form. |
55 | std::set<std::string> badWords = {"raped"}; | 68 | verbly::token tok = verbly::token::capitalize( |
69 | verbly::token::casing::title_case, form); | ||
56 | 70 | ||
57 | verbly::token tok = form; | 71 | while (!tok.isComplete()) |
58 | std::list<std::string> words; | ||
59 | for (;;) | ||
60 | { | 72 | { |
61 | // Compile the form. | 73 | visit(tok); |
62 | while (!tok.isComplete()) | ||
63 | { | ||
64 | visit(tok); | ||
65 | } | ||
66 | |||
67 | std::string compiled = tok.compile(); | ||
68 | words = verbly::split<std::list<std::string>>(compiled, " "); | ||
69 | |||
70 | // Ensure that there are no bad words in the output. | ||
71 | if (!std::any_of(std::begin(words), std::end(words), [&badWords] (const std::string& word) { | ||
72 | std::string canonWord; | ||
73 | |||
74 | for (char ch : word) | ||
75 | { | ||
76 | if (std::isalpha(ch)) | ||
77 | { | ||
78 | canonWord.push_back(std::tolower(ch)); | ||
79 | } | ||
80 | } | ||
81 | |||
82 | return (badWords.count(canonWord) == 1); | ||
83 | })) { | ||
84 | break; | ||
85 | } else { | ||
86 | std::cout << "Bad word generated." << std::endl; | ||
87 | } | ||
88 | } | 74 | } |
89 | 75 | ||
90 | // Put the form into title case. | 76 | std::string compiled = tok.compile(); |
91 | for (std::string& word : words) | ||
92 | { | ||
93 | if ((word[0] == '"') && (word.length() > 1)) | ||
94 | { | ||
95 | word[1] = std::toupper(word[1]); | ||
96 | } else { | ||
97 | word[0] = std::toupper(word[0]); | ||
98 | } | ||
99 | } | ||
100 | 77 | ||
101 | return verbly::implode(std::begin(words), std::end(words), " "); | 78 | return compiled; |
102 | } | 79 | } |
103 | 80 | ||
104 | bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::string> choices) const | 81 | bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::string> choices) const |
@@ -111,7 +88,7 @@ bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::str | |||
111 | validChoices++; | 88 | validChoices++; |
112 | } | 89 | } |
113 | } | 90 | } |
114 | 91 | ||
115 | return std::bernoulli_distribution(static_cast<double>(validChoices)/static_cast<double>(selrestrs.size()))(rng_); | 92 | return std::bernoulli_distribution(static_cast<double>(validChoices)/static_cast<double>(selrestrs.size()))(rng_); |
116 | } | 93 | } |
117 | 94 | ||
@@ -131,7 +108,7 @@ verbly::word sentence::generateStandardNoun( | |||
131 | //&& (verbly::form::complexity == 1) | 108 | //&& (verbly::form::complexity == 1) |
132 | // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words | 109 | // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words |
133 | && (verbly::word::tagCount >= 1) | 110 | && (verbly::word::tagCount >= 1) |
134 | && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs | 111 | && badWords_; |
135 | 112 | ||
136 | // Only use selection restrictions for a first attempt. | 113 | // Only use selection restrictions for a first attempt. |
137 | if (trySelection) | 114 | if (trySelection) |
@@ -248,7 +225,7 @@ verbly::word sentence::generateStandardNoun( | |||
248 | selection += (verbly::notion::wnid == 103670849); // line | 225 | selection += (verbly::notion::wnid == 103670849); // line |
249 | } | 226 | } |
250 | } | 227 | } |
251 | 228 | ||
252 | if (selection.compact().getType() != verbly::filter::type::empty) | 229 | if (selection.compact().getType() != verbly::filter::type::empty) |
253 | { | 230 | { |
254 | condition &= (verbly::notion::fullHypernyms %= std::move(selection)); | 231 | condition &= (verbly::notion::fullHypernyms %= std::move(selection)); |
@@ -281,18 +258,7 @@ verbly::token sentence::generateStandardNounPhrase( | |||
281 | bool definite) const | 258 | bool definite) const |
282 | { | 259 | { |
283 | verbly::token utter; | 260 | verbly::token utter; |
284 | verbly::word sounder = noun; | 261 | bool indefiniteArticle = false; |
285 | verbly::word descript; | ||
286 | |||
287 | if (std::bernoulli_distribution(1.0/8.0)(rng_)) | ||
288 | { | ||
289 | std::geometric_distribution<int> tagdist(0.2); | ||
290 | descript = database_.words( | ||
291 | (verbly::word::tagCount >= tagdist(rng_)) | ||
292 | && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); | ||
293 | |||
294 | sounder = descript; | ||
295 | } | ||
296 | 262 | ||
297 | if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite)) | 263 | if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite)) |
298 | { | 264 | { |
@@ -307,18 +273,18 @@ verbly::token sentence::generateStandardNounPhrase( | |||
307 | { | 273 | { |
308 | utter << "your"; | 274 | utter << "your"; |
309 | } else if (!plural) { | 275 | } else if (!plural) { |
310 | if (sounder.getBaseForm().startsWithVowelSound()) | 276 | indefiniteArticle = true; |
311 | { | ||
312 | utter << "an"; | ||
313 | } else { | ||
314 | utter << "a"; | ||
315 | } | ||
316 | } | 277 | } |
317 | } | 278 | } |
318 | 279 | ||
319 | if (descript.isValid()) | 280 | if (std::bernoulli_distribution(1.0/8.0)(rng_)) |
320 | { | 281 | { |
321 | utter << descript; | 282 | std::geometric_distribution<int> tagdist(0.2); |
283 | |||
284 | utter << database_.words( | ||
285 | (verbly::word::tagCount >= tagdist(rng_)) | ||
286 | && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective) | ||
287 | && badWords_).first(); | ||
322 | } | 288 | } |
323 | 289 | ||
324 | if (plural && noun.hasInflection(verbly::inflection::plural)) | 290 | if (plural && noun.hasInflection(verbly::inflection::plural)) |
@@ -328,7 +294,12 @@ verbly::token sentence::generateStandardNounPhrase( | |||
328 | utter << noun; | 294 | utter << noun; |
329 | } | 295 | } |
330 | 296 | ||
331 | return utter; | 297 | if (indefiniteArticle) |
298 | { | ||
299 | return verbly::token::indefiniteArticle(utter); | ||
300 | } else { | ||
301 | return utter; | ||
302 | } | ||
332 | } | 303 | } |
333 | 304 | ||
334 | verbly::token sentence::generateClause( | 305 | verbly::token sentence::generateClause( |
@@ -360,7 +331,8 @@ verbly::token sentence::generateClause( | |||
360 | 331 | ||
361 | verbly::filter verbCondition = | 332 | verbly::filter verbCondition = |
362 | (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) | 333 | (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) |
363 | && frameCondition; | 334 | && frameCondition |
335 | && badWords_; | ||
364 | 336 | ||
365 | if (it.hasSynrestr("participle_phrase")) | 337 | if (it.hasSynrestr("participle_phrase")) |
366 | { | 338 | { |
@@ -501,20 +473,15 @@ verbly::token sentence::generateClause( | |||
501 | utter << generateClause(sentence); | 473 | utter << generateClause(sentence); |
502 | } else if (part.nounHasSynrestr("quotation")) | 474 | } else if (part.nounHasSynrestr("quotation")) |
503 | { | 475 | { |
504 | verbly::token sentence(std::set<std::string>({"participle_phrase"})); | 476 | utter << verbly::token::quote("\"", "\"", |
505 | while (!sentence.isComplete()) | 477 | verbly::token(std::set<std::string>({"past_participle"}))); |
506 | { | ||
507 | visit(sentence); | ||
508 | } | ||
509 | |||
510 | utter << ("\"" + sentence.compile() + "\""); | ||
511 | } else { | 478 | } else { |
512 | if (part.nounHasSynrestr("genitive")) | 479 | if (part.nounHasSynrestr("genitive")) |
513 | { | 480 | { |
514 | verbly::word noun = generateStandardNoun("Passive", {"animate"}); | 481 | verbly::word noun = generateStandardNoun("Passive", {"animate"}); |
515 | verbly::token owner = generateStandardNounPhrase(noun, "Passive", false, true); | 482 | verbly::token owner = generateStandardNounPhrase(noun, "Passive", false, true); |
516 | std::string ownerStr = owner.compile() + "'s"; | 483 | |
517 | utter << ownerStr; | 484 | utter << verbly::token::punctuation("'s", owner); |
518 | } | 485 | } |
519 | 486 | ||
520 | verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); | 487 | verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); |
@@ -669,7 +636,8 @@ void sentence::visit(verbly::token& it) const | |||
669 | std::geometric_distribution<int> tagdist(0.2); | 636 | std::geometric_distribution<int> tagdist(0.2); |
670 | phrase << database_.words( | 637 | phrase << database_.words( |
671 | (verbly::word::tagCount >= tagdist(rng_)) | 638 | (verbly::word::tagCount >= tagdist(rng_)) |
672 | && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); | 639 | && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective) |
640 | && badWords_).first(); | ||
673 | } | 641 | } |
674 | 642 | ||
675 | it = phrase; | 643 | it = phrase; |
@@ -680,7 +648,7 @@ void sentence::visit(verbly::token& it) const | |||
680 | it = database_.words( | 648 | it = database_.words( |
681 | (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb) | 649 | (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb) |
682 | && (verbly::word::tagCount >= tagdist(rng_)) | 650 | && (verbly::word::tagCount >= tagdist(rng_)) |
683 | ).first(); | 651 | && badWords_).first(); |
684 | } else if (it.hasSynrestr("participle_phrase")) | 652 | } else if (it.hasSynrestr("participle_phrase")) |
685 | { | 653 | { |
686 | if (std::bernoulli_distribution(1.0/2.0)(rng_)) | 654 | if (std::bernoulli_distribution(1.0/2.0)(rng_)) |
@@ -688,11 +656,15 @@ void sentence::visit(verbly::token& it) const | |||
688 | it = verbly::token( | 656 | it = verbly::token( |
689 | database_.words( | 657 | database_.words( |
690 | (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) | 658 | (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) |
691 | && (verbly::word::forms(verbly::inflection::ing_form))).first(), | 659 | && (verbly::word::forms(verbly::inflection::ing_form)) |
660 | && badWords_).first(), | ||
692 | verbly::inflection::ing_form); | 661 | verbly::inflection::ing_form); |
693 | } else { | 662 | } else { |
694 | it = generateClause(it); | 663 | it = generateClause(it); |
695 | } | 664 | } |
665 | } else if (it.hasSynrestr("past_participle")) | ||
666 | { | ||
667 | it = generateClause(it); | ||
696 | } else { | 668 | } else { |
697 | it = "*the reality of the situation*"; | 669 | it = "*the reality of the situation*"; |
698 | } | 670 | } |
@@ -700,6 +672,13 @@ void sentence::visit(verbly::token& it) const | |||
700 | break; | 672 | break; |
701 | } | 673 | } |
702 | 674 | ||
675 | case verbly::token::type::transform: | ||
676 | { | ||
677 | visit(it.getInnerToken()); | ||
678 | |||
679 | break; | ||
680 | } | ||
681 | |||
703 | case verbly::token::type::word: | 682 | case verbly::token::type::word: |
704 | case verbly::token::type::literal: | 683 | case verbly::token::type::literal: |
705 | case verbly::token::type::part: | 684 | case verbly::token::type::part: |