Updated verbly (transform tokens)

author: Kelly Rauchenberger <fefferburbia@gmail.com> 2017-11-08 11:38:23 -0500
committer: Kelly Rauchenberger <fefferburbia@gmail.com> 2017-11-08 11:38:23 -0500
commit: 8bcea38de87292681d32eff488c954e37dda6bbe (patch)
tree: d87c727da19acd0e0ccf65113875f1c590cf7dd5 /sentence.cpp
parent: 3d5c1c5d35e3934faffeb63790a8fd494518a979 (diff)
download: advice-8bcea38de87292681d32eff488c954e37dda6bbe.tar.gz
advice-8bcea38de87292681d32eff488c954e37dda6bbe.tar.bz2
advice-8bcea38de87292681d32eff488c954e37dda6bbe.zip
1 files changed, 59 insertions, 80 deletions
diff --git a/sentence.cpp b/sentence.cpp
index 5f1b03a..a9b40c2 100644
--- a/sentence.cpp
+++ b/sentence.cpp

@@ -9,6 +9,19 @@ sentence::sentence(
    database_(database),
    rng_(rng)
 {
+  verbly::filter blacklist;
+  for (std::string word : {
+    "raped", "Negro"
+  })
+  {
+    blacklist |= (verbly::form::text == word);
+  }
+  badWords_ = !blacklist;
+   // Blacklist ethnic slurs
+  badWords_ &= !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862));
 }
 std::string sentence::generate() const
@@ -51,54 +64,18 @@ std::string sentence::generate() const
    form << secondSyn;
  }
-  // Attempt to compile the form, restarting if a bad word is generated.
+  // Compile the form.
-  std::set<std::string> badWords = {"raped"};
+  verbly::token tok = verbly::token::capitalize(
+    verbly::token::casing::title_case, form);
-  verbly::token tok = form;
+  while (!tok.isComplete())
-  std::list<std::string> words;
-  for (;;)
  {
-    // Compile the form.
+    visit(tok);
-    while (!tok.isComplete())
-    {
-      visit(tok);
-    }
-    std::string compiled = tok.compile();
-    words = verbly::split<std::list<std::string>>(compiled, " ");
-    // Ensure that there are no bad words in the output.
-    if (!std::any_of(std::begin(words), std::end(words), [&badWords] (const std::string& word) {
-      std::string canonWord;
-      for (char ch : word)
-      {
-        if (std::isalpha(ch))
-        {
-          canonWord.push_back(std::tolower(ch));
-        }
-      }
-      return (badWords.count(canonWord) == 1);
-    })) {
-      break;
-    } else {
-      std::cout << "Bad word generated." << std::endl;
-    }
  }
-  // Put the form into title case.
+  std::string compiled = tok.compile();
-  for (std::string& word : words)
-  {
-    if ((word[0] == '"') && (word.length() > 1))
-    {
-      word[1] = std::toupper(word[1]);
-    } else {
-      word[0] = std::toupper(word[0]);
-    }
-  }
-  return verbly::implode(std::begin(words), std::end(words), " ");
+  return compiled;
 }
 bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::string> choices) const
@@ -111,7 +88,7 @@ bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::str
      validChoices++;
    }
  }
-  
  return std::bernoulli_distribution(static_cast<double>(validChoices)/static_cast<double>(selrestrs.size()))(rng_);
 }
@@ -131,7 +108,7 @@ verbly::word sentence::generateStandardNoun(
      //&& (verbly::form::complexity == 1)
     // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words
      && (verbly::word::tagCount >= 1)
-      && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs
+      && badWords_;
    // Only use selection restrictions for a first attempt.
    if (trySelection)
@@ -248,7 +225,7 @@ verbly::word sentence::generateStandardNoun(
          selection += (verbly::notion::wnid == 103670849); // line
        }
      }
-      
      if (selection.compact().getType() != verbly::filter::type::empty)
      {
        condition &= (verbly::notion::fullHypernyms %= std::move(selection));
@@ -281,18 +258,7 @@ verbly::token sentence::generateStandardNounPhrase(
  bool definite) const
 {
  verbly::token utter;
-  verbly::word sounder = noun;
+  bool indefiniteArticle = false;
-  verbly::word descript;
-  if (std::bernoulli_distribution(1.0/8.0)(rng_))
-  {
-    std::geometric_distribution<int> tagdist(0.2);
-    descript = database_.words(
-      (verbly::word::tagCount >= tagdist(rng_))
-      && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();
-    sounder = descript;
-  }
  if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite))
  {
@@ -307,18 +273,18 @@ verbly::token sentence::generateStandardNounPhrase(
    {
      utter << "your";
    } else if (!plural) {
-      if (sounder.getBaseForm().startsWithVowelSound())
+      indefiniteArticle = true;
-      {
-        utter << "an";
-      } else {
-        utter << "a";
-      }
    }
  }
-  if (descript.isValid())
+  if (std::bernoulli_distribution(1.0/8.0)(rng_))
  {
-    utter << descript;
+    std::geometric_distribution<int> tagdist(0.2);
+    utter << database_.words(
+      (verbly::word::tagCount >= tagdist(rng_))
+      && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)
+      && badWords_).first();
  }
  if (plural && noun.hasInflection(verbly::inflection::plural))
@@ -328,7 +294,12 @@ verbly::token sentence::generateStandardNounPhrase(
    utter << noun;
  }
-  return utter;
+  if (indefiniteArticle)
+  {
+    return verbly::token::indefiniteArticle(utter);
+  } else {
+    return utter;
+  }
 }
 verbly::token sentence::generateClause(
@@ -360,7 +331,8 @@ verbly::token sentence::generateClause(
  verbly::filter verbCondition =
    (verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
-    && frameCondition;
+    && frameCondition
+    && badWords_;
  if (it.hasSynrestr("participle_phrase"))
  {
@@ -501,20 +473,15 @@ verbly::token sentence::generateClause(
          utter << generateClause(sentence);
        } else if (part.nounHasSynrestr("quotation"))
        {
-          verbly::token sentence(std::set<std::string>({"participle_phrase"}));
+          utter << verbly::token::quote("\"", "\"",
-          while (!sentence.isComplete())
+            verbly::token(std::set<std::string>({"past_participle"})));
-          {
-            visit(sentence);
-          }
-          utter << ("\"" + sentence.compile() + "\"");
        } else {
          if (part.nounHasSynrestr("genitive"))
          {
            verbly::word noun = generateStandardNoun("Passive", {"animate"});
            verbly::token owner = generateStandardNounPhrase(noun, "Passive", false, true);
-            std::string ownerStr = owner.compile() + "'s";
-            utter << ownerStr;
+            utter << verbly::token::punctuation("'s", owner);
          }
          verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs());
@@ -669,7 +636,8 @@ void sentence::visit(verbly::token& it) const
          std::geometric_distribution<int> tagdist(0.2);
          phrase << database_.words(
            (verbly::word::tagCount >= tagdist(rng_))
-            && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();
+            && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)
+            && badWords_).first();
        }
        it = phrase;
@@ -680,7 +648,7 @@ void sentence::visit(verbly::token& it) const
        it = database_.words(
          (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb)
          && (verbly::word::tagCount >= tagdist(rng_))
-          ).first();
+          && badWords_).first();
      } else if (it.hasSynrestr("participle_phrase"))
      {
        if (std::bernoulli_distribution(1.0/2.0)(rng_))
@@ -688,11 +656,15 @@ void sentence::visit(verbly::token& it) const
          it = verbly::token(
            database_.words(
              (verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
-              && (verbly::word::forms(verbly::inflection::ing_form))).first(),
+              && (verbly::word::forms(verbly::inflection::ing_form))
+              && badWords_).first(),
            verbly::inflection::ing_form);
        } else {
          it = generateClause(it);
        }
+      } else if (it.hasSynrestr("past_participle"))
+      {
+        it = generateClause(it);
      } else {
        it = "*the reality of the situation*";
      }
@@ -700,6 +672,13 @@ void sentence::visit(verbly::token& it) const
      break;
    }
+    case verbly::token::type::transform:
+    {
+      visit(it.getInnerToken());
+      break;
+    }
    case verbly::token::type::word:
    case verbly::token::type::literal:
    case verbly::token::type::part:
author	Kelly Rauchenberger <fefferburbia@gmail.com>	2017-11-08 11:38:23 -0500
committer	Kelly Rauchenberger <fefferburbia@gmail.com>	2017-11-08 11:38:23 -0500
commit	8bcea38de87292681d32eff488c954e37dda6bbe (patch)
tree	d87c727da19acd0e0ccf65113875f1c590cf7dd5 /sentence.cpp
parent	3d5c1c5d35e3934faffeb63790a8fd494518a979 (diff)
download	advice-8bcea38de87292681d32eff488c954e37dda6bbe.tar.gz advice-8bcea38de87292681d32eff488c954e37dda6bbe.tar.bz2 advice-8bcea38de87292681d32eff488c954e37dda6bbe.zip

diff --git a/sentence.cpp b/sentence.cpp index 5f1b03a..a9b40c2 100644 --- a/sentence.cpp +++ b/sentence.cpp
@@ -9,6 +9,19 @@ sentence::sentence(
9	database_(database),	9	database_(database),
10	rng_(rng)	10	rng_(rng)
11	{	11	{
		12	verbly::filter blacklist;
		13
		14	for (std::string word : {
		15	"raped", "Negro"
		16	})
		17	{
		18	blacklist \|= (verbly::form::text == word);
		19	}
		20
		21	badWords_ = !blacklist;
		22
		23	// Blacklist ethnic slurs
		24	badWords_ &= !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862));
12	}	25	}
13		26
14	std::string sentence::generate() const	27	std::string sentence::generate() const
@@ -51,54 +64,18 @@ std::string sentence::generate() const
51	form << secondSyn;	64	form << secondSyn;
52	}	65	}
53		66
54	// Attempt to compile the form, restarting if a bad word is generated.	67	// Compile the form.
55	std::set<std::string> badWords = {"raped"};	68	verbly::token tok = verbly::token::capitalize(
		69	verbly::token::casing::title_case, form);
56		70
57	verbly::token tok = form;	71	while (!tok.isComplete())
58	std::list<std::string> words;
59	for (;;)
60	{	72	{
61	// Compile the form.	73	visit(tok);
62	while (!tok.isComplete())
63	{
64	visit(tok);
65	}
66
67	std::string compiled = tok.compile();
68	words = verbly::split<std::list<std::string>>(compiled, " ");
69
70	// Ensure that there are no bad words in the output.
71	if (!std::any_of(std::begin(words), std::end(words), [&badWords] (const std::string& word) {
72	std::string canonWord;
73
74	for (char ch : word)
75	{
76	if (std::isalpha(ch))
77	{
78	canonWord.push_back(std::tolower(ch));
79	}
80	}
81
82	return (badWords.count(canonWord) == 1);
83	})) {
84	break;
85	} else {
86	std::cout << "Bad word generated." << std::endl;
87	}
88	}	74	}
89		75
90	// Put the form into title case.	76	std::string compiled = tok.compile();
91	for (std::string& word : words)
92	{
93	if ((word[0] == '"') && (word.length() > 1))
94	{
95	word[1] = std::toupper(word[1]);
96	} else {
97	word[0] = std::toupper(word[0]);
98	}
99	}
100		77
101	return verbly::implode(std::begin(words), std::end(words), " ");	78	return compiled;
102	}	79	}
103		80
104	bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::string> choices) const	81	bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::string> choices) const
@@ -111,7 +88,7 @@ bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::str
111	validChoices++;	88	validChoices++;
112	}	89	}
113	}	90	}
114		91
115	return std::bernoulli_distribution(static_cast<double>(validChoices)/static_cast<double>(selrestrs.size()))(rng_);	92	return std::bernoulli_distribution(static_cast<double>(validChoices)/static_cast<double>(selrestrs.size()))(rng_);
116	}	93	}
117		94
@@ -131,7 +108,7 @@ verbly::word sentence::generateStandardNoun(
131	//&& (verbly::form::complexity == 1)	108	//&& (verbly::form::complexity == 1)
132	// && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words	109	// && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words
133	&& (verbly::word::tagCount >= 1)	110	&& (verbly::word::tagCount >= 1)
134	&& !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs	111	&& badWords_;
135		112
136	// Only use selection restrictions for a first attempt.	113	// Only use selection restrictions for a first attempt.
137	if (trySelection)	114	if (trySelection)
@@ -248,7 +225,7 @@ verbly::word sentence::generateStandardNoun(
248	selection += (verbly::notion::wnid == 103670849); // line	225	selection += (verbly::notion::wnid == 103670849); // line
249	}	226	}
250	}	227	}
251		228
252	if (selection.compact().getType() != verbly::filter::type::empty)	229	if (selection.compact().getType() != verbly::filter::type::empty)
253	{	230	{
254	condition &= (verbly::notion::fullHypernyms %= std::move(selection));	231	condition &= (verbly::notion::fullHypernyms %= std::move(selection));
@@ -281,18 +258,7 @@ verbly::token sentence::generateStandardNounPhrase(
281	bool definite) const	258	bool definite) const
282	{	259	{
283	verbly::token utter;	260	verbly::token utter;
284	verbly::word sounder = noun;	261	bool indefiniteArticle = false;
285	verbly::word descript;
286
287	if (std::bernoulli_distribution(1.0/8.0)(rng_))
288	{
289	std::geometric_distribution<int> tagdist(0.2);
290	descript = database_.words(
291	(verbly::word::tagCount >= tagdist(rng_))
292	&& (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();
293
294	sounder = descript;
295	}
296		262
297	if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite))	263	if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite))
298	{	264	{
@@ -307,18 +273,18 @@ verbly::token sentence::generateStandardNounPhrase(
307	{	273	{
308	utter << "your";	274	utter << "your";
309	} else if (!plural) {	275	} else if (!plural) {
310	if (sounder.getBaseForm().startsWithVowelSound())	276	indefiniteArticle = true;
311	{
312	utter << "an";
313	} else {
314	utter << "a";
315	}
316	}	277	}
317	}	278	}
318		279
319	if (descript.isValid())	280	if (std::bernoulli_distribution(1.0/8.0)(rng_))
320	{	281	{
321	utter << descript;	282	std::geometric_distribution<int> tagdist(0.2);
		283
		284	utter << database_.words(
		285	(verbly::word::tagCount >= tagdist(rng_))
		286	&& (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)
		287	&& badWords_).first();
322	}	288	}
323		289
324	if (plural && noun.hasInflection(verbly::inflection::plural))	290	if (plural && noun.hasInflection(verbly::inflection::plural))
@@ -328,7 +294,12 @@ verbly::token sentence::generateStandardNounPhrase(
328	utter << noun;	294	utter << noun;
329	}	295	}
330		296
331	return utter;	297	if (indefiniteArticle)
		298	{
		299	return verbly::token::indefiniteArticle(utter);
		300	} else {
		301	return utter;
		302	}
332	}	303	}
333		304
334	verbly::token sentence::generateClause(	305	verbly::token sentence::generateClause(
@@ -360,7 +331,8 @@ verbly::token sentence::generateClause(
360		331
361	verbly::filter verbCondition =	332	verbly::filter verbCondition =
362	(verbly::notion::partOfSpeech == verbly::part_of_speech::verb)	333	(verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
363	&& frameCondition;	334	&& frameCondition
		335	&& badWords_;
364		336
365	if (it.hasSynrestr("participle_phrase"))	337	if (it.hasSynrestr("participle_phrase"))
366	{	338	{
@@ -501,20 +473,15 @@ verbly::token sentence::generateClause(
501	utter << generateClause(sentence);	473	utter << generateClause(sentence);
502	} else if (part.nounHasSynrestr("quotation"))	474	} else if (part.nounHasSynrestr("quotation"))
503	{	475	{
504	verbly::token sentence(std::set<std::string>({"participle_phrase"}));	476	utter << verbly::token::quote("\"", "\"",
505	while (!sentence.isComplete())	477	verbly::token(std::set<std::string>({"past_participle"})));
506	{
507	visit(sentence);
508	}
509
510	utter << ("\"" + sentence.compile() + "\"");
511	} else {	478	} else {
512	if (part.nounHasSynrestr("genitive"))	479	if (part.nounHasSynrestr("genitive"))
513	{	480	{
514	verbly::word noun = generateStandardNoun("Passive", {"animate"});	481	verbly::word noun = generateStandardNoun("Passive", {"animate"});
515	verbly::token owner = generateStandardNounPhrase(noun, "Passive", false, true);	482	verbly::token owner = generateStandardNounPhrase(noun, "Passive", false, true);
516	std::string ownerStr = owner.compile() + "'s";	483
517	utter << ownerStr;	484	utter << verbly::token::punctuation("'s", owner);
518	}	485	}
519		486
520	verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs());	487	verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs());
@@ -669,7 +636,8 @@ void sentence::visit(verbly::token& it) const
669	std::geometric_distribution<int> tagdist(0.2);	636	std::geometric_distribution<int> tagdist(0.2);
670	phrase << database_.words(	637	phrase << database_.words(
671	(verbly::word::tagCount >= tagdist(rng_))	638	(verbly::word::tagCount >= tagdist(rng_))
672	&& (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();	639	&& (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)
		640	&& badWords_).first();
673	}	641	}
674		642
675	it = phrase;	643	it = phrase;
@@ -680,7 +648,7 @@ void sentence::visit(verbly::token& it) const
680	it = database_.words(	648	it = database_.words(
681	(verbly::notion::partOfSpeech == verbly::part_of_speech::adverb)	649	(verbly::notion::partOfSpeech == verbly::part_of_speech::adverb)
682	&& (verbly::word::tagCount >= tagdist(rng_))	650	&& (verbly::word::tagCount >= tagdist(rng_))
683	).first();	651	&& badWords_).first();
684	} else if (it.hasSynrestr("participle_phrase"))	652	} else if (it.hasSynrestr("participle_phrase"))
685	{	653	{
686	if (std::bernoulli_distribution(1.0/2.0)(rng_))	654	if (std::bernoulli_distribution(1.0/2.0)(rng_))
@@ -688,11 +656,15 @@ void sentence::visit(verbly::token& it) const
688	it = verbly::token(	656	it = verbly::token(
689	database_.words(	657	database_.words(
690	(verbly::notion::partOfSpeech == verbly::part_of_speech::verb)	658	(verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
691	&& (verbly::word::forms(verbly::inflection::ing_form))).first(),	659	&& (verbly::word::forms(verbly::inflection::ing_form))
		660	&& badWords_).first(),
692	verbly::inflection::ing_form);	661	verbly::inflection::ing_form);
693	} else {	662	} else {
694	it = generateClause(it);	663	it = generateClause(it);
695	}	664	}
		665	} else if (it.hasSynrestr("past_participle"))
		666	{
		667	it = generateClause(it);
696	} else {	668	} else {
697	it = "the reality of the situation";	669	it = "the reality of the situation";
698	}	670	}
@@ -700,6 +672,13 @@ void sentence::visit(verbly::token& it) const
700	break;	672	break;
701	}	673	}
702		674
		675	case verbly::token::type::transform:
		676	{
		677	visit(it.getInnerToken());
		678
		679	break;
		680	}
		681
703	case verbly::token::type::word:	682	case verbly::token::type::word:
704	case verbly::token::type::literal:	683	case verbly::token::type::literal:
705	case verbly::token::type::part:	684	case verbly::token::type::part: