2 files changed, 342 insertions, 66 deletions
diff --git a/data.txt b/data.txt
index f25657f..dd476ef 100644
--- a/data.txt
+++ b/data.txt

@@ -28,6 +28,251 @@ you're a piece of {WORD}
 what the {WORD}
 what the {WORD}ing {WORD}
 kindly catch the 9am train to {Word}sville
+If you look up "{WORD}" in the dictionary, there's a picture of you underneath!
+I never want to see your {WORD}ing {WORD} again
 INSULT,END
-you piece of {WORD}
-\ No newline at end of file
+you piece of {WORD}
+WORD
+{STARTSONANT}{VOWEL}{ENDSONANT}
+{WORD2}
+VOWEL
+a
+e
+i
+o
+u
+a
+e
+i
+o
+u
+a
+e
+i
+o
+u
+a
+e
+i
+o
+u
+ae
+ai
+au
+ea
+ee
+ei
+ie
+io
+oi
+ou
+ui
+uu
+STARTSONANT
+b
+c
+d
+f
+g
+h
+j
+k
+l
+m
+n
+p
+r
+s
+t
+b
+c
+d
+f
+g
+h
+j
+k
+l
+m
+n
+p
+q
+r
+s
+t
+v
+w
+x
+z
+b
+c
+d
+f
+g
+h
+j
+k
+l
+m
+n
+p
+q
+r
+s
+t
+v
+w
+x
+z
+bh
+bl
+br
+ch
+cl
+cr
+dr
+dw
+fl
+fr
+gl
+gr
+kl
+kn
+kr
+ph
+pl
+pr
+pt
+rh
+sc
+sh
+sk
+sl
+sm
+sn
+sp
+sq
+sr
+st
+sw
+th
+tr
+tw
+wh
+wr
+zh
+ENDSONANT
+b
+d
+f
+g
+h
+k
+l
+m
+n
+p
+r
+t
+b
+d
+f
+g
+h
+j
+k
+l
+m
+n
+p
+r
+t
+v
+w
+x
+z
+b
+d
+f
+g
+h
+j
+k
+l
+m
+n
+p
+r
+t
+v
+w
+x
+z
+bf
+bh
+bk
+ch
+ck
+dk
+dp
+dt
+ff
+fh
+fk
+fp
+ft
+gf
+gh
+gk
+hk
+lb
+ld
+lf
+lg
+lh
+lk
+lm
+ln
+lp
+lt
+mf
+mk
+mn
+mp
+nd
+nf
+ng
+nk
+np
+nt
+pf
+ph
+pk
+pt
+rb
+rd
+rf
+rg
+rk
+rm
+rn
+rp
+rt
+sk
+sp
+st
+wd
+wf
+wg
+wk
+wl
+wm
+wn
+wp
+wt
+zk
+\ No newline at end of file
diff --git a/patterner.cpp b/patterner.cpp
index af844cf..1deffb8 100644
--- a/patterner.cpp
+++ b/patterner.cpp

@@ -47,92 +47,123 @@ patterner::patterner(
 std::string patterner::generate()
 {
  std::string action = "{MAIN}";
-  int tknloc;
-  while ((tknloc = action.find("{")) != std::string::npos)
-  {
-    std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1);
-    std::string modifier;
-    int modloc;
-    if ((modloc = token.find(":")) != std::string::npos)
-    {
-      modifier = token.substr(modloc+1);
-      token = token.substr(0, modloc);
-    }
-    std::string canontkn;
+  verbly::filter slurBlacklist =
-    std::transform(std::begin(token), std::end(token),
+    (verbly::word::usageDomains %= (
-        std::back_inserter(canontkn), [] (char ch) {
+      (verbly::notion::wnid == 106718862) // ethnic slur
-      return std::toupper(ch);
+      || (verbly::notion::wnid == 106717170) // disparagement (other slurs)
-    });
+      || (verbly::notion::wnid == 107124340))); // obscenity (other profanity)
-    std::string result;
+  while (action == "{MAIN}")
-    if (canontkn == "WORD")
+  {
-    {
+    int tknloc;
-      result = data_.words(
+    while ((tknloc = action.find("{")) != std::string::npos)
-        (verbly::word::forms(verbly::inflection::base) %=
-          (verbly::form::complexity == 1)
-            && (verbly::form::length == 4)
-            && (verbly::form::proper == false)
-            && (verbly::pronunciation::numOfSyllables == 1))
-        && !(verbly::word::usageDomains %=
-          (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs
-        .first().getBaseForm().getText();
-    } else if (canontkn == "\\N")
    {
-      result = "\n";
+      std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1);
-    } else {
+      std::string modifier;
-      auto group = groups_[canontkn];
+      int modloc;
-      std::uniform_int_distribution<int> groupdist(0, group.size()-1);
+      if ((modloc = token.find(":")) != std::string::npos)
-      int groupind = groupdist(rng_);
+      {
-      result = group[groupind];
+        modifier = token.substr(modloc+1);
-    }
+        token = token.substr(0, modloc);
+      }
-    if (modifier == "indefinite")
+      std::string canontkn;
-    {
+      std::transform(std::begin(token), std::end(token),
-      if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1])))
+          std::back_inserter(canontkn), [] (char ch) {
+        return std::toupper(ch);
+      });
+      std::string result;
+      if (canontkn == "WORD2")
      {
-        result = "an " + result;
+        result = data_.words(
-      } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u'))
+          (verbly::notion::partOfSpeech == verbly::part_of_speech::noun)
+          && (verbly::word::forms(verbly::inflection::base) %=
+            (verbly::form::complexity == 1)
+              && (verbly::form::length == 4)
+              && (verbly::form::proper == false)
+              && (verbly::pronunciation::numOfSyllables == 1))
+          && !slurBlacklist)
+          .first().getBaseForm().getText();
+      } else if (canontkn == "\\N")
      {
-        result = "an " + result;
+        result = "\n";
      } else {
-        result = "a " + result;
+        auto group = groups_[canontkn];
+        std::uniform_int_distribution<int> groupdist(0, group.size()-1);
+        int groupind = groupdist(rng_);
+        result = group[groupind];
      }
-    }
-    std::string finalresult;
+      if (modifier == "indefinite")
-    if (islower(token[0]))
-    {
-      std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) {
-        return std::tolower(ch);
-      });
-    } else if (isupper(token[0]) && !isupper(token[1]))
-    {
-      auto words = verbly::split<std::list<std::string>>(result, " ");
-      for (auto& word : words)
      {
-        if (word[0] == '{')
+        if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1])))
        {
-          word[1] = std::toupper(word[1]);
+          result = "an " + result;
+        } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u'))
+        {
+          result = "an " + result;
+        } else {
+          result = "a " + result;
+        }
+      }
-          for (int k=2; k<word.length(); k++)
+      std::string finalresult;
+      if (islower(token[0]))
+      {
+        std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) {
+          return std::tolower(ch);
+        });
+      } else if (isupper(token[0]) && !isupper(token[1]))
+      {
+        auto words = verbly::split<std::list<std::string>>(result, " ");
+        for (auto& word : words)
+        {
+          if (word[0] == '{')
          {
-            if (std::isalpha(word[k]))
+            word[1] = std::toupper(word[1]);
+            for (int k=2; k<word.length(); k++)
            {
-              word[k] = std::tolower(word[k]);
+              if (std::isalpha(word[k]))
+              {
+                word[k] = std::tolower(word[k]);
+              }
            }
+          } else {
+            word[0] = std::toupper(word[0]);
          }
-        } else {
-          word[0] = std::toupper(word[0]);
        }
+        finalresult = verbly::implode(std::begin(words), std::end(words), " ");
+      } else {
+        finalresult = result;
      }
-      finalresult = verbly::implode(std::begin(words), std::end(words), " ");
+      action.replace(tknloc, action.find("}")-tknloc+1, finalresult);
-    } else {
-      finalresult = result;
    }
-    action.replace(tknloc, action.find("}")-tknloc+1, finalresult);
+    std::string canonical;
+    std::transform(std::begin(action), std::end(action),
+      std::back_inserter(canonical), [] (char ch)
+    {
+      return std::tolower(ch);
+    });
+    std::list<std::string> words =
+      verbly::split<std::list<std::string>>(canonical, " ");
+    for (std::string word : words)
+    {
+      if (!data_.forms(
+        (verbly::form::text == word)
+        && slurBlacklist).all().empty())
+      {
+        action = "{MAIN}";
+        break;
+      }
+    }
  }
  return action;

diff --git a/data.txt b/data.txt index f25657f..dd476ef 100644 --- a/data.txt +++ b/data.txt
@@ -28,6 +28,251 @@ you're a piece of {WORD}
28	what the {WORD}	28	what the {WORD}
29	what the {WORD}ing {WORD}	29	what the {WORD}ing {WORD}
30	kindly catch the 9am train to {Word}sville	30	kindly catch the 9am train to {Word}sville
		31	If you look up "{WORD}" in the dictionary, there's a picture of you underneath!
		32	I never want to see your {WORD}ing {WORD} again
31		33
32	INSULT,END	34	INSULT,END
33	you piece of {WORD} \ No newline at end of file	35	you piece of {WORD}
		36
		37	WORD
		38	{STARTSONANT}{VOWEL}{ENDSONANT}
		39	{WORD2}
		40
		41	VOWEL
		42	a
		43	e
		44	i
		45	o
		46	u
		47	a
		48	e
		49	i
		50	o
		51	u
		52	a
		53	e
		54	i
		55	o
		56	u
		57	a
		58	e
		59	i
		60	o
		61	u
		62	ae
		63	ai
		64	au
		65	ea
		66	ee
		67	ei
		68	ie
		69	io
		70	oi
		71	ou
		72	ui
		73	uu
		74
		75	STARTSONANT
		76	b
		77	c
		78	d
		79	f
		80	g
		81	h
		82	j
		83	k
		84	l
		85	m
		86	n
		87	p
		88	r
		89	s
		90	t
		91	b
		92	c
		93	d
		94	f
		95	g
		96	h
		97	j
		98	k
		99	l
		100	m
		101	n
		102	p
		103	q
		104	r
		105	s
		106	t
		107	v
		108	w
		109	x
		110	z
		111	b
		112	c
		113	d
		114	f
		115	g
		116	h
		117	j
		118	k
		119	l
		120	m
		121	n
		122	p
		123	q
		124	r
		125	s
		126	t
		127	v
		128	w
		129	x
		130	z
		131	bh
		132	bl
		133	br
		134	ch
		135	cl
		136	cr
		137	dr
		138	dw
		139	fl
		140	fr
		141	gl
		142	gr
		143	kl
		144	kn
		145	kr
		146	ph
		147	pl
		148	pr
		149	pt
		150	rh
		151	sc
		152	sh
		153	sk
		154	sl
		155	sm
		156	sn
		157	sp
		158	sq
		159	sr
		160	st
		161	sw
		162	th
		163	tr
		164	tw
		165	wh
		166	wr
		167	zh
		168
		169	ENDSONANT
		170	b
		171	d
		172	f
		173	g
		174	h
		175	k
		176	l
		177	m
		178	n
		179	p
		180	r
		181	t
		182	b
		183	d
		184	f
		185	g
		186	h
		187	j
		188	k
		189	l
		190	m
		191	n
		192	p
		193	r
		194	t
		195	v
		196	w
		197	x
		198	z
		199	b
		200	d
		201	f
		202	g
		203	h
		204	j
		205	k
		206	l
		207	m
		208	n
		209	p
		210	r
		211	t
		212	v
		213	w
		214	x
		215	z
		216	bf
		217	bh
		218	bk
		219	ch
		220	ck
		221	dk
		222	dp
		223	dt
		224	ff
		225	fh
		226	fk
		227	fp
		228	ft
		229	gf
		230	gh
		231	gk
		232	hk
		233	lb
		234	ld
		235	lf
		236	lg
		237	lh
		238	lk
		239	lm
		240	ln
		241	lp
		242	lt
		243	mf
		244	mk
		245	mn
		246	mp
		247	nd
		248	nf
		249	ng
		250	nk
		251	np
		252	nt
		253	pf
		254	ph
		255	pk
		256	pt
		257	rb
		258	rd
		259	rf
		260	rg
		261	rk
		262	rm
		263	rn
		264	rp
		265	rt
		266	sk
		267	sp
		268	st
		269	wd
		270	wf
		271	wg
		272	wk
		273	wl
		274	wm
		275	wn
		276	wp
		277	wt
		278	zk \ No newline at end of file


diff --git a/patterner.cpp b/patterner.cpp index af844cf..1deffb8 100644 --- a/patterner.cpp +++ b/patterner.cpp
@@ -47,92 +47,123 @@ patterner::patterner(
47	std::string patterner::generate()	47	std::string patterner::generate()
48	{	48	{
49	std::string action = "{MAIN}";	49	std::string action = "{MAIN}";
50	int tknloc;
51	while ((tknloc = action.find("{")) != std::string::npos)
52	{
53	std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1);
54	std::string modifier;
55	int modloc;
56	if ((modloc = token.find(":")) != std::string::npos)
57	{
58	modifier = token.substr(modloc+1);
59	token = token.substr(0, modloc);
60	}
61		50
62	std::string canontkn;	51	verbly::filter slurBlacklist =
63	std::transform(std::begin(token), std::end(token),	52	(verbly::word::usageDomains %= (
64	std::back_inserter(canontkn), [] (char ch) {	53	(verbly::notion::wnid == 106718862) // ethnic slur
65	return std::toupper(ch);	54	\|\| (verbly::notion::wnid == 106717170) // disparagement (other slurs)
66	});	55	\|\| (verbly::notion::wnid == 107124340))); // obscenity (other profanity)
67		56
68	std::string result;	57	while (action == "{MAIN}")
69	if (canontkn == "WORD")	58	{
70	{	59	int tknloc;
71	result = data_.words(	60	while ((tknloc = action.find("{")) != std::string::npos)
72	(verbly::word::forms(verbly::inflection::base) %=
73	(verbly::form::complexity == 1)
74	&& (verbly::form::length == 4)
75	&& (verbly::form::proper == false)
76	&& (verbly::pronunciation::numOfSyllables == 1))
77	&& !(verbly::word::usageDomains %=
78	(verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs
79	.first().getBaseForm().getText();
80	} else if (canontkn == "\\N")
81	{	61	{
82	result = "\n";	62	std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1);
83	} else {	63	std::string modifier;
84	auto group = groups_[canontkn];	64	int modloc;
85	std::uniform_int_distribution<int> groupdist(0, group.size()-1);	65	if ((modloc = token.find(":")) != std::string::npos)
86	int groupind = groupdist(rng_);	66	{
87	result = group[groupind];	67	modifier = token.substr(modloc+1);
88	}	68	token = token.substr(0, modloc);
		69	}
89		70
90	if (modifier == "indefinite")	71	std::string canontkn;
91	{	72	std::transform(std::begin(token), std::end(token),
92	if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1])))	73	std::back_inserter(canontkn), [] (char ch) {
		74	return std::toupper(ch);
		75	});
		76
		77	std::string result;
		78	if (canontkn == "WORD2")
93	{	79	{
94	result = "an " + result;	80	result = data_.words(
95	} else if ((result[0] == 'a') \|\| (result[0] == 'e') \|\| (result[0] == 'i') \|\| (result[0] == 'o') \|\| (result[0] == 'u'))	81	(verbly::notion::partOfSpeech == verbly::part_of_speech::noun)
		82	&& (verbly::word::forms(verbly::inflection::base) %=
		83	(verbly::form::complexity == 1)
		84	&& (verbly::form::length == 4)
		85	&& (verbly::form::proper == false)
		86	&& (verbly::pronunciation::numOfSyllables == 1))
		87	&& !slurBlacklist)
		88	.first().getBaseForm().getText();
		89	} else if (canontkn == "\\N")
96	{	90	{
97	result = "an " + result;	91	result = "\n";
98	} else {	92	} else {
99	result = "a " + result;	93	auto group = groups_[canontkn];
		94	std::uniform_int_distribution<int> groupdist(0, group.size()-1);
		95	int groupind = groupdist(rng_);
		96	result = group[groupind];
100	}	97	}
101	}
102		98
103	std::string finalresult;	99	if (modifier == "indefinite")
104	if (islower(token[0]))
105	{
106	std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) {
107	return std::tolower(ch);
108	});
109	} else if (isupper(token[0]) && !isupper(token[1]))
110	{
111	auto words = verbly::split<std::list<std::string>>(result, " ");
112	for (auto& word : words)
113	{	100	{
114	if (word[0] == '{')	101	if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1])))
115	{	102	{
116	word[1] = std::toupper(word[1]);	103	result = "an " + result;
		104	} else if ((result[0] == 'a') \|\| (result[0] == 'e') \|\| (result[0] == 'i') \|\| (result[0] == 'o') \|\| (result[0] == 'u'))
		105	{
		106	result = "an " + result;
		107	} else {
		108	result = "a " + result;
		109	}
		110	}
117		111
118	for (int k=2; k<word.length(); k++)	112	std::string finalresult;
		113	if (islower(token[0]))
		114	{
		115	std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) {
		116	return std::tolower(ch);
		117	});
		118	} else if (isupper(token[0]) && !isupper(token[1]))
		119	{
		120	auto words = verbly::split<std::list<std::string>>(result, " ");
		121	for (auto& word : words)
		122	{
		123	if (word[0] == '{')
119	{	124	{
120	if (std::isalpha(word[k]))	125	word[1] = std::toupper(word[1]);
		126
		127	for (int k=2; k<word.length(); k++)
121	{	128	{
122	word[k] = std::tolower(word[k]);	129	if (std::isalpha(word[k]))
		130	{
		131	word[k] = std::tolower(word[k]);
		132	}
123	}	133	}
		134	} else {
		135	word[0] = std::toupper(word[0]);
124	}	136	}
125	} else {
126	word[0] = std::toupper(word[0]);
127	}	137	}
		138
		139	finalresult = verbly::implode(std::begin(words), std::end(words), " ");
		140	} else {
		141	finalresult = result;
128	}	142	}
129		143
130	finalresult = verbly::implode(std::begin(words), std::end(words), " ");	144	action.replace(tknloc, action.find("}")-tknloc+1, finalresult);
131	} else {
132	finalresult = result;
133	}	145	}
134		146
135	action.replace(tknloc, action.find("}")-tknloc+1, finalresult);	147	std::string canonical;
		148	std::transform(std::begin(action), std::end(action),
		149	std::back_inserter(canonical), [] (char ch)
		150	{
		151	return std::tolower(ch);
		152	});
		153
		154	std::list<std::string> words =
		155	verbly::split<std::list<std::string>>(canonical, " ");
		156
		157	for (std::string word : words)
		158	{
		159	if (!data_.forms(
		160	(verbly::form::text == word)
		161	&& slurBlacklist).all().empty())
		162	{
		163	action = "{MAIN}";
		164	break;
		165	}
		166	}
136	}	167	}
137		168
138	return action;	169	return action;