summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--data.txt247
-rw-r--r--patterner.cpp161
2 files changed, 342 insertions, 66 deletions
diff --git a/data.txt b/data.txt index f25657f..dd476ef 100644 --- a/data.txt +++ b/data.txt
@@ -28,6 +28,251 @@ you're a piece of {WORD}
28what the {WORD} 28what the {WORD}
29what the {WORD}ing {WORD} 29what the {WORD}ing {WORD}
30kindly catch the 9am train to {Word}sville 30kindly catch the 9am train to {Word}sville
31If you look up "{WORD}" in the dictionary, there's a picture of you underneath!
32I never want to see your {WORD}ing {WORD} again
31 33
32INSULT,END 34INSULT,END
33you piece of {WORD} \ No newline at end of file 35you piece of {WORD}
36
37WORD
38{STARTSONANT}{VOWEL}{ENDSONANT}
39{WORD2}
40
41VOWEL
42a
43e
44i
45o
46u
47a
48e
49i
50o
51u
52a
53e
54i
55o
56u
57a
58e
59i
60o
61u
62ae
63ai
64au
65ea
66ee
67ei
68ie
69io
70oi
71ou
72ui
73uu
74
75STARTSONANT
76b
77c
78d
79f
80g
81h
82j
83k
84l
85m
86n
87p
88r
89s
90t
91b
92c
93d
94f
95g
96h
97j
98k
99l
100m
101n
102p
103q
104r
105s
106t
107v
108w
109x
110z
111b
112c
113d
114f
115g
116h
117j
118k
119l
120m
121n
122p
123q
124r
125s
126t
127v
128w
129x
130z
131bh
132bl
133br
134ch
135cl
136cr
137dr
138dw
139fl
140fr
141gl
142gr
143kl
144kn
145kr
146ph
147pl
148pr
149pt
150rh
151sc
152sh
153sk
154sl
155sm
156sn
157sp
158sq
159sr
160st
161sw
162th
163tr
164tw
165wh
166wr
167zh
168
169ENDSONANT
170b
171d
172f
173g
174h
175k
176l
177m
178n
179p
180r
181t
182b
183d
184f
185g
186h
187j
188k
189l
190m
191n
192p
193r
194t
195v
196w
197x
198z
199b
200d
201f
202g
203h
204j
205k
206l
207m
208n
209p
210r
211t
212v
213w
214x
215z
216bf
217bh
218bk
219ch
220ck
221dk
222dp
223dt
224ff
225fh
226fk
227fp
228ft
229gf
230gh
231gk
232hk
233lb
234ld
235lf
236lg
237lh
238lk
239lm
240ln
241lp
242lt
243mf
244mk
245mn
246mp
247nd
248nf
249ng
250nk
251np
252nt
253pf
254ph
255pk
256pt
257rb
258rd
259rf
260rg
261rk
262rm
263rn
264rp
265rt
266sk
267sp
268st
269wd
270wf
271wg
272wk
273wl
274wm
275wn
276wp
277wt
278zk \ No newline at end of file
diff --git a/patterner.cpp b/patterner.cpp index af844cf..1deffb8 100644 --- a/patterner.cpp +++ b/patterner.cpp
@@ -47,92 +47,123 @@ patterner::patterner(
47std::string patterner::generate() 47std::string patterner::generate()
48{ 48{
49 std::string action = "{MAIN}"; 49 std::string action = "{MAIN}";
50 int tknloc;
51 while ((tknloc = action.find("{")) != std::string::npos)
52 {
53 std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1);
54 std::string modifier;
55 int modloc;
56 if ((modloc = token.find(":")) != std::string::npos)
57 {
58 modifier = token.substr(modloc+1);
59 token = token.substr(0, modloc);
60 }
61 50
62 std::string canontkn; 51 verbly::filter slurBlacklist =
63 std::transform(std::begin(token), std::end(token), 52 (verbly::word::usageDomains %= (
64 std::back_inserter(canontkn), [] (char ch) { 53 (verbly::notion::wnid == 106718862) // ethnic slur
65 return std::toupper(ch); 54 || (verbly::notion::wnid == 106717170) // disparagement (other slurs)
66 }); 55 || (verbly::notion::wnid == 107124340))); // obscenity (other profanity)
67 56
68 std::string result; 57 while (action == "{MAIN}")
69 if (canontkn == "WORD") 58 {
70 { 59 int tknloc;
71 result = data_.words( 60 while ((tknloc = action.find("{")) != std::string::npos)
72 (verbly::word::forms(verbly::inflection::base) %=
73 (verbly::form::complexity == 1)
74 && (verbly::form::length == 4)
75 && (verbly::form::proper == false)
76 && (verbly::pronunciation::numOfSyllables == 1))
77 && !(verbly::word::usageDomains %=
78 (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs
79 .first().getBaseForm().getText();
80 } else if (canontkn == "\\N")
81 { 61 {
82 result = "\n"; 62 std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1);
83 } else { 63 std::string modifier;
84 auto group = groups_[canontkn]; 64 int modloc;
85 std::uniform_int_distribution<int> groupdist(0, group.size()-1); 65 if ((modloc = token.find(":")) != std::string::npos)
86 int groupind = groupdist(rng_); 66 {
87 result = group[groupind]; 67 modifier = token.substr(modloc+1);
88 } 68 token = token.substr(0, modloc);
69 }
89 70
90 if (modifier == "indefinite") 71 std::string canontkn;
91 { 72 std::transform(std::begin(token), std::end(token),
92 if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) 73 std::back_inserter(canontkn), [] (char ch) {
74 return std::toupper(ch);
75 });
76
77 std::string result;
78 if (canontkn == "WORD2")
93 { 79 {
94 result = "an " + result; 80 result = data_.words(
95 } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) 81 (verbly::notion::partOfSpeech == verbly::part_of_speech::noun)
82 && (verbly::word::forms(verbly::inflection::base) %=
83 (verbly::form::complexity == 1)
84 && (verbly::form::length == 4)
85 && (verbly::form::proper == false)
86 && (verbly::pronunciation::numOfSyllables == 1))
87 && !slurBlacklist)
88 .first().getBaseForm().getText();
89 } else if (canontkn == "\\N")
96 { 90 {
97 result = "an " + result; 91 result = "\n";
98 } else { 92 } else {
99 result = "a " + result; 93 auto group = groups_[canontkn];
94 std::uniform_int_distribution<int> groupdist(0, group.size()-1);
95 int groupind = groupdist(rng_);
96 result = group[groupind];
100 } 97 }
101 }
102 98
103 std::string finalresult; 99 if (modifier == "indefinite")
104 if (islower(token[0]))
105 {
106 std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) {
107 return std::tolower(ch);
108 });
109 } else if (isupper(token[0]) && !isupper(token[1]))
110 {
111 auto words = verbly::split<std::list<std::string>>(result, " ");
112 for (auto& word : words)
113 { 100 {
114 if (word[0] == '{') 101 if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1])))
115 { 102 {
116 word[1] = std::toupper(word[1]); 103 result = "an " + result;
104 } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u'))
105 {
106 result = "an " + result;
107 } else {
108 result = "a " + result;
109 }
110 }
117 111
118 for (int k=2; k<word.length(); k++) 112 std::string finalresult;
113 if (islower(token[0]))
114 {
115 std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) {
116 return std::tolower(ch);
117 });
118 } else if (isupper(token[0]) && !isupper(token[1]))
119 {
120 auto words = verbly::split<std::list<std::string>>(result, " ");
121 for (auto& word : words)
122 {
123 if (word[0] == '{')
119 { 124 {
120 if (std::isalpha(word[k])) 125 word[1] = std::toupper(word[1]);
126
127 for (int k=2; k<word.length(); k++)
121 { 128 {
122 word[k] = std::tolower(word[k]); 129 if (std::isalpha(word[k]))
130 {
131 word[k] = std::tolower(word[k]);
132 }
123 } 133 }
134 } else {
135 word[0] = std::toupper(word[0]);
124 } 136 }
125 } else {
126 word[0] = std::toupper(word[0]);
127 } 137 }
138
139 finalresult = verbly::implode(std::begin(words), std::end(words), " ");
140 } else {
141 finalresult = result;
128 } 142 }
129 143
130 finalresult = verbly::implode(std::begin(words), std::end(words), " "); 144 action.replace(tknloc, action.find("}")-tknloc+1, finalresult);
131 } else {
132 finalresult = result;
133 } 145 }
134 146
135 action.replace(tknloc, action.find("}")-tknloc+1, finalresult); 147 std::string canonical;
148 std::transform(std::begin(action), std::end(action),
149 std::back_inserter(canonical), [] (char ch)
150 {
151 return std::tolower(ch);
152 });
153
154 std::list<std::string> words =
155 verbly::split<std::list<std::string>>(canonical, " ");
156
157 for (std::string word : words)
158 {
159 if (!data_.forms(
160 (verbly::form::text == word)
161 && slurBlacklist).all().empty())
162 {
163 action = "{MAIN}";
164 break;
165 }
166 }
136 } 167 }
137 168
138 return action; 169 return action;