summary refs log tree commit diff stats
path: root/sentence.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'sentence.cpp')
-rw-r--r--sentence.cpp139
1 files changed, 59 insertions, 80 deletions
diff --git a/sentence.cpp b/sentence.cpp index 5f1b03a..a9b40c2 100644 --- a/sentence.cpp +++ b/sentence.cpp
@@ -9,6 +9,19 @@ sentence::sentence(
9 database_(database), 9 database_(database),
10 rng_(rng) 10 rng_(rng)
11{ 11{
12 verbly::filter blacklist;
13
14 for (std::string word : {
15 "raped", "Negro"
16 })
17 {
18 blacklist |= (verbly::form::text == word);
19 }
20
21 badWords_ = !blacklist;
22
23 // Blacklist ethnic slurs
24 badWords_ &= !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862));
12} 25}
13 26
14std::string sentence::generate() const 27std::string sentence::generate() const
@@ -51,54 +64,18 @@ std::string sentence::generate() const
51 form << secondSyn; 64 form << secondSyn;
52 } 65 }
53 66
54 // Attempt to compile the form, restarting if a bad word is generated. 67 // Compile the form.
55 std::set<std::string> badWords = {"raped"}; 68 verbly::token tok = verbly::token::capitalize(
69 verbly::token::casing::title_case, form);
56 70
57 verbly::token tok = form; 71 while (!tok.isComplete())
58 std::list<std::string> words;
59 for (;;)
60 { 72 {
61 // Compile the form. 73 visit(tok);
62 while (!tok.isComplete())
63 {
64 visit(tok);
65 }
66
67 std::string compiled = tok.compile();
68 words = verbly::split<std::list<std::string>>(compiled, " ");
69
70 // Ensure that there are no bad words in the output.
71 if (!std::any_of(std::begin(words), std::end(words), [&badWords] (const std::string& word) {
72 std::string canonWord;
73
74 for (char ch : word)
75 {
76 if (std::isalpha(ch))
77 {
78 canonWord.push_back(std::tolower(ch));
79 }
80 }
81
82 return (badWords.count(canonWord) == 1);
83 })) {
84 break;
85 } else {
86 std::cout << "Bad word generated." << std::endl;
87 }
88 } 74 }
89 75
90 // Put the form into title case. 76 std::string compiled = tok.compile();
91 for (std::string& word : words)
92 {
93 if ((word[0] == '"') && (word.length() > 1))
94 {
95 word[1] = std::toupper(word[1]);
96 } else {
97 word[0] = std::toupper(word[0]);
98 }
99 }
100 77
101 return verbly::implode(std::begin(words), std::end(words), " "); 78 return compiled;
102} 79}
103 80
104bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::string> choices) const 81bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::string> choices) const
@@ -111,7 +88,7 @@ bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::str
111 validChoices++; 88 validChoices++;
112 } 89 }
113 } 90 }
114 91
115 return std::bernoulli_distribution(static_cast<double>(validChoices)/static_cast<double>(selrestrs.size()))(rng_); 92 return std::bernoulli_distribution(static_cast<double>(validChoices)/static_cast<double>(selrestrs.size()))(rng_);
116} 93}
117 94
@@ -131,7 +108,7 @@ verbly::word sentence::generateStandardNoun(
131 //&& (verbly::form::complexity == 1) 108 //&& (verbly::form::complexity == 1)
132 // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words 109 // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words
133 && (verbly::word::tagCount >= 1) 110 && (verbly::word::tagCount >= 1)
134 && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs 111 && badWords_;
135 112
136 // Only use selection restrictions for a first attempt. 113 // Only use selection restrictions for a first attempt.
137 if (trySelection) 114 if (trySelection)
@@ -248,7 +225,7 @@ verbly::word sentence::generateStandardNoun(
248 selection += (verbly::notion::wnid == 103670849); // line 225 selection += (verbly::notion::wnid == 103670849); // line
249 } 226 }
250 } 227 }
251 228
252 if (selection.compact().getType() != verbly::filter::type::empty) 229 if (selection.compact().getType() != verbly::filter::type::empty)
253 { 230 {
254 condition &= (verbly::notion::fullHypernyms %= std::move(selection)); 231 condition &= (verbly::notion::fullHypernyms %= std::move(selection));
@@ -281,18 +258,7 @@ verbly::token sentence::generateStandardNounPhrase(
281 bool definite) const 258 bool definite) const
282{ 259{
283 verbly::token utter; 260 verbly::token utter;
284 verbly::word sounder = noun; 261 bool indefiniteArticle = false;
285 verbly::word descript;
286
287 if (std::bernoulli_distribution(1.0/8.0)(rng_))
288 {
289 std::geometric_distribution<int> tagdist(0.2);
290 descript = database_.words(
291 (verbly::word::tagCount >= tagdist(rng_))
292 && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();
293
294 sounder = descript;
295 }
296 262
297 if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite)) 263 if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite))
298 { 264 {
@@ -307,18 +273,18 @@ verbly::token sentence::generateStandardNounPhrase(
307 { 273 {
308 utter << "your"; 274 utter << "your";
309 } else if (!plural) { 275 } else if (!plural) {
310 if (sounder.getBaseForm().startsWithVowelSound()) 276 indefiniteArticle = true;
311 {
312 utter << "an";
313 } else {
314 utter << "a";
315 }
316 } 277 }
317 } 278 }
318 279
319 if (descript.isValid()) 280 if (std::bernoulli_distribution(1.0/8.0)(rng_))
320 { 281 {
321 utter << descript; 282 std::geometric_distribution<int> tagdist(0.2);
283
284 utter << database_.words(
285 (verbly::word::tagCount >= tagdist(rng_))
286 && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)
287 && badWords_).first();
322 } 288 }
323 289
324 if (plural && noun.hasInflection(verbly::inflection::plural)) 290 if (plural && noun.hasInflection(verbly::inflection::plural))
@@ -328,7 +294,12 @@ verbly::token sentence::generateStandardNounPhrase(
328 utter << noun; 294 utter << noun;
329 } 295 }
330 296
331 return utter; 297 if (indefiniteArticle)
298 {
299 return verbly::token::indefiniteArticle(utter);
300 } else {
301 return utter;
302 }
332} 303}
333 304
334verbly::token sentence::generateClause( 305verbly::token sentence::generateClause(
@@ -360,7 +331,8 @@ verbly::token sentence::generateClause(
360 331
361 verbly::filter verbCondition = 332 verbly::filter verbCondition =
362 (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) 333 (verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
363 && frameCondition; 334 && frameCondition
335 && badWords_;
364 336
365 if (it.hasSynrestr("participle_phrase")) 337 if (it.hasSynrestr("participle_phrase"))
366 { 338 {
@@ -501,20 +473,15 @@ verbly::token sentence::generateClause(
501 utter << generateClause(sentence); 473 utter << generateClause(sentence);
502 } else if (part.nounHasSynrestr("quotation")) 474 } else if (part.nounHasSynrestr("quotation"))
503 { 475 {
504 verbly::token sentence(std::set<std::string>({"participle_phrase"})); 476 utter << verbly::token::quote("\"", "\"",
505 while (!sentence.isComplete()) 477 verbly::token(std::set<std::string>({"past_participle"})));
506 {
507 visit(sentence);
508 }
509
510 utter << ("\"" + sentence.compile() + "\"");
511 } else { 478 } else {
512 if (part.nounHasSynrestr("genitive")) 479 if (part.nounHasSynrestr("genitive"))
513 { 480 {
514 verbly::word noun = generateStandardNoun("Passive", {"animate"}); 481 verbly::word noun = generateStandardNoun("Passive", {"animate"});
515 verbly::token owner = generateStandardNounPhrase(noun, "Passive", false, true); 482 verbly::token owner = generateStandardNounPhrase(noun, "Passive", false, true);
516 std::string ownerStr = owner.compile() + "'s"; 483
517 utter << ownerStr; 484 utter << verbly::token::punctuation("'s", owner);
518 } 485 }
519 486
520 verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); 487 verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs());
@@ -669,7 +636,8 @@ void sentence::visit(verbly::token& it) const
669 std::geometric_distribution<int> tagdist(0.2); 636 std::geometric_distribution<int> tagdist(0.2);
670 phrase << database_.words( 637 phrase << database_.words(
671 (verbly::word::tagCount >= tagdist(rng_)) 638 (verbly::word::tagCount >= tagdist(rng_))
672 && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); 639 && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)
640 && badWords_).first();
673 } 641 }
674 642
675 it = phrase; 643 it = phrase;
@@ -680,7 +648,7 @@ void sentence::visit(verbly::token& it) const
680 it = database_.words( 648 it = database_.words(
681 (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb) 649 (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb)
682 && (verbly::word::tagCount >= tagdist(rng_)) 650 && (verbly::word::tagCount >= tagdist(rng_))
683 ).first(); 651 && badWords_).first();
684 } else if (it.hasSynrestr("participle_phrase")) 652 } else if (it.hasSynrestr("participle_phrase"))
685 { 653 {
686 if (std::bernoulli_distribution(1.0/2.0)(rng_)) 654 if (std::bernoulli_distribution(1.0/2.0)(rng_))
@@ -688,11 +656,15 @@ void sentence::visit(verbly::token& it) const
688 it = verbly::token( 656 it = verbly::token(
689 database_.words( 657 database_.words(
690 (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) 658 (verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
691 && (verbly::word::forms(verbly::inflection::ing_form))).first(), 659 && (verbly::word::forms(verbly::inflection::ing_form))
660 && badWords_).first(),
692 verbly::inflection::ing_form); 661 verbly::inflection::ing_form);
693 } else { 662 } else {
694 it = generateClause(it); 663 it = generateClause(it);
695 } 664 }
665 } else if (it.hasSynrestr("past_participle"))
666 {
667 it = generateClause(it);
696 } else { 668 } else {
697 it = "*the reality of the situation*"; 669 it = "*the reality of the situation*";
698 } 670 }
@@ -700,6 +672,13 @@ void sentence::visit(verbly::token& it) const
700 break; 672 break;
701 } 673 }
702 674
675 case verbly::token::type::transform:
676 {
677 visit(it.getInnerToken());
678
679 break;
680 }
681
703 case verbly::token::type::word: 682 case verbly::token::type::word:
704 case verbly::token::type::literal: 683 case verbly::token::type::literal:
705 case verbly::token::type::part: 684 case verbly::token::type::part: