diff options
-rw-r--r-- | data.txt | 247 | ||||
-rw-r--r-- | patterner.cpp | 161 |
2 files changed, 342 insertions, 66 deletions
diff --git a/data.txt b/data.txt index f25657f..dd476ef 100644 --- a/data.txt +++ b/data.txt | |||
@@ -28,6 +28,251 @@ you're a piece of {WORD} | |||
28 | what the {WORD} | 28 | what the {WORD} |
29 | what the {WORD}ing {WORD} | 29 | what the {WORD}ing {WORD} |
30 | kindly catch the 9am train to {Word}sville | 30 | kindly catch the 9am train to {Word}sville |
31 | If you look up "{WORD}" in the dictionary, there's a picture of you underneath! | ||
32 | I never want to see your {WORD}ing {WORD} again | ||
31 | 33 | ||
32 | INSULT,END | 34 | INSULT,END |
33 | you piece of {WORD} \ No newline at end of file | 35 | you piece of {WORD} |
36 | |||
37 | WORD | ||
38 | {STARTSONANT}{VOWEL}{ENDSONANT} | ||
39 | {WORD2} | ||
40 | |||
41 | VOWEL | ||
42 | a | ||
43 | e | ||
44 | i | ||
45 | o | ||
46 | u | ||
47 | a | ||
48 | e | ||
49 | i | ||
50 | o | ||
51 | u | ||
52 | a | ||
53 | e | ||
54 | i | ||
55 | o | ||
56 | u | ||
57 | a | ||
58 | e | ||
59 | i | ||
60 | o | ||
61 | u | ||
62 | ae | ||
63 | ai | ||
64 | au | ||
65 | ea | ||
66 | ee | ||
67 | ei | ||
68 | ie | ||
69 | io | ||
70 | oi | ||
71 | ou | ||
72 | ui | ||
73 | uu | ||
74 | |||
75 | STARTSONANT | ||
76 | b | ||
77 | c | ||
78 | d | ||
79 | f | ||
80 | g | ||
81 | h | ||
82 | j | ||
83 | k | ||
84 | l | ||
85 | m | ||
86 | n | ||
87 | p | ||
88 | r | ||
89 | s | ||
90 | t | ||
91 | b | ||
92 | c | ||
93 | d | ||
94 | f | ||
95 | g | ||
96 | h | ||
97 | j | ||
98 | k | ||
99 | l | ||
100 | m | ||
101 | n | ||
102 | p | ||
103 | q | ||
104 | r | ||
105 | s | ||
106 | t | ||
107 | v | ||
108 | w | ||
109 | x | ||
110 | z | ||
111 | b | ||
112 | c | ||
113 | d | ||
114 | f | ||
115 | g | ||
116 | h | ||
117 | j | ||
118 | k | ||
119 | l | ||
120 | m | ||
121 | n | ||
122 | p | ||
123 | q | ||
124 | r | ||
125 | s | ||
126 | t | ||
127 | v | ||
128 | w | ||
129 | x | ||
130 | z | ||
131 | bh | ||
132 | bl | ||
133 | br | ||
134 | ch | ||
135 | cl | ||
136 | cr | ||
137 | dr | ||
138 | dw | ||
139 | fl | ||
140 | fr | ||
141 | gl | ||
142 | gr | ||
143 | kl | ||
144 | kn | ||
145 | kr | ||
146 | ph | ||
147 | pl | ||
148 | pr | ||
149 | pt | ||
150 | rh | ||
151 | sc | ||
152 | sh | ||
153 | sk | ||
154 | sl | ||
155 | sm | ||
156 | sn | ||
157 | sp | ||
158 | sq | ||
159 | sr | ||
160 | st | ||
161 | sw | ||
162 | th | ||
163 | tr | ||
164 | tw | ||
165 | wh | ||
166 | wr | ||
167 | zh | ||
168 | |||
169 | ENDSONANT | ||
170 | b | ||
171 | d | ||
172 | f | ||
173 | g | ||
174 | h | ||
175 | k | ||
176 | l | ||
177 | m | ||
178 | n | ||
179 | p | ||
180 | r | ||
181 | t | ||
182 | b | ||
183 | d | ||
184 | f | ||
185 | g | ||
186 | h | ||
187 | j | ||
188 | k | ||
189 | l | ||
190 | m | ||
191 | n | ||
192 | p | ||
193 | r | ||
194 | t | ||
195 | v | ||
196 | w | ||
197 | x | ||
198 | z | ||
199 | b | ||
200 | d | ||
201 | f | ||
202 | g | ||
203 | h | ||
204 | j | ||
205 | k | ||
206 | l | ||
207 | m | ||
208 | n | ||
209 | p | ||
210 | r | ||
211 | t | ||
212 | v | ||
213 | w | ||
214 | x | ||
215 | z | ||
216 | bf | ||
217 | bh | ||
218 | bk | ||
219 | ch | ||
220 | ck | ||
221 | dk | ||
222 | dp | ||
223 | dt | ||
224 | ff | ||
225 | fh | ||
226 | fk | ||
227 | fp | ||
228 | ft | ||
229 | gf | ||
230 | gh | ||
231 | gk | ||
232 | hk | ||
233 | lb | ||
234 | ld | ||
235 | lf | ||
236 | lg | ||
237 | lh | ||
238 | lk | ||
239 | lm | ||
240 | ln | ||
241 | lp | ||
242 | lt | ||
243 | mf | ||
244 | mk | ||
245 | mn | ||
246 | mp | ||
247 | nd | ||
248 | nf | ||
249 | ng | ||
250 | nk | ||
251 | np | ||
252 | nt | ||
253 | pf | ||
254 | ph | ||
255 | pk | ||
256 | pt | ||
257 | rb | ||
258 | rd | ||
259 | rf | ||
260 | rg | ||
261 | rk | ||
262 | rm | ||
263 | rn | ||
264 | rp | ||
265 | rt | ||
266 | sk | ||
267 | sp | ||
268 | st | ||
269 | wd | ||
270 | wf | ||
271 | wg | ||
272 | wk | ||
273 | wl | ||
274 | wm | ||
275 | wn | ||
276 | wp | ||
277 | wt | ||
278 | zk \ No newline at end of file | ||
diff --git a/patterner.cpp b/patterner.cpp index af844cf..1deffb8 100644 --- a/patterner.cpp +++ b/patterner.cpp | |||
@@ -47,92 +47,123 @@ patterner::patterner( | |||
47 | std::string patterner::generate() | 47 | std::string patterner::generate() |
48 | { | 48 | { |
49 | std::string action = "{MAIN}"; | 49 | std::string action = "{MAIN}"; |
50 | int tknloc; | ||
51 | while ((tknloc = action.find("{")) != std::string::npos) | ||
52 | { | ||
53 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); | ||
54 | std::string modifier; | ||
55 | int modloc; | ||
56 | if ((modloc = token.find(":")) != std::string::npos) | ||
57 | { | ||
58 | modifier = token.substr(modloc+1); | ||
59 | token = token.substr(0, modloc); | ||
60 | } | ||
61 | 50 | ||
62 | std::string canontkn; | 51 | verbly::filter slurBlacklist = |
63 | std::transform(std::begin(token), std::end(token), | 52 | (verbly::word::usageDomains %= ( |
64 | std::back_inserter(canontkn), [] (char ch) { | 53 | (verbly::notion::wnid == 106718862) // ethnic slur |
65 | return std::toupper(ch); | 54 | || (verbly::notion::wnid == 106717170) // disparagement (other slurs) |
66 | }); | 55 | || (verbly::notion::wnid == 107124340))); // obscenity (other profanity) |
67 | 56 | ||
68 | std::string result; | 57 | while (action == "{MAIN}") |
69 | if (canontkn == "WORD") | 58 | { |
70 | { | 59 | int tknloc; |
71 | result = data_.words( | 60 | while ((tknloc = action.find("{")) != std::string::npos) |
72 | (verbly::word::forms(verbly::inflection::base) %= | ||
73 | (verbly::form::complexity == 1) | ||
74 | && (verbly::form::length == 4) | ||
75 | && (verbly::form::proper == false) | ||
76 | && (verbly::pronunciation::numOfSyllables == 1)) | ||
77 | && !(verbly::word::usageDomains %= | ||
78 | (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs | ||
79 | .first().getBaseForm().getText(); | ||
80 | } else if (canontkn == "\\N") | ||
81 | { | 61 | { |
82 | result = "\n"; | 62 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); |
83 | } else { | 63 | std::string modifier; |
84 | auto group = groups_[canontkn]; | 64 | int modloc; |
85 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | 65 | if ((modloc = token.find(":")) != std::string::npos) |
86 | int groupind = groupdist(rng_); | 66 | { |
87 | result = group[groupind]; | 67 | modifier = token.substr(modloc+1); |
88 | } | 68 | token = token.substr(0, modloc); |
69 | } | ||
89 | 70 | ||
90 | if (modifier == "indefinite") | 71 | std::string canontkn; |
91 | { | 72 | std::transform(std::begin(token), std::end(token), |
92 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) | 73 | std::back_inserter(canontkn), [] (char ch) { |
74 | return std::toupper(ch); | ||
75 | }); | ||
76 | |||
77 | std::string result; | ||
78 | if (canontkn == "WORD2") | ||
93 | { | 79 | { |
94 | result = "an " + result; | 80 | result = data_.words( |
95 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | 81 | (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) |
82 | && (verbly::word::forms(verbly::inflection::base) %= | ||
83 | (verbly::form::complexity == 1) | ||
84 | && (verbly::form::length == 4) | ||
85 | && (verbly::form::proper == false) | ||
86 | && (verbly::pronunciation::numOfSyllables == 1)) | ||
87 | && !slurBlacklist) | ||
88 | .first().getBaseForm().getText(); | ||
89 | } else if (canontkn == "\\N") | ||
96 | { | 90 | { |
97 | result = "an " + result; | 91 | result = "\n"; |
98 | } else { | 92 | } else { |
99 | result = "a " + result; | 93 | auto group = groups_[canontkn]; |
94 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | ||
95 | int groupind = groupdist(rng_); | ||
96 | result = group[groupind]; | ||
100 | } | 97 | } |
101 | } | ||
102 | 98 | ||
103 | std::string finalresult; | 99 | if (modifier == "indefinite") |
104 | if (islower(token[0])) | ||
105 | { | ||
106 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
107 | return std::tolower(ch); | ||
108 | }); | ||
109 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
110 | { | ||
111 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
112 | for (auto& word : words) | ||
113 | { | 100 | { |
114 | if (word[0] == '{') | 101 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) |
115 | { | 102 | { |
116 | word[1] = std::toupper(word[1]); | 103 | result = "an " + result; |
104 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | ||
105 | { | ||
106 | result = "an " + result; | ||
107 | } else { | ||
108 | result = "a " + result; | ||
109 | } | ||
110 | } | ||
117 | 111 | ||
118 | for (int k=2; k<word.length(); k++) | 112 | std::string finalresult; |
113 | if (islower(token[0])) | ||
114 | { | ||
115 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
116 | return std::tolower(ch); | ||
117 | }); | ||
118 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
119 | { | ||
120 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
121 | for (auto& word : words) | ||
122 | { | ||
123 | if (word[0] == '{') | ||
119 | { | 124 | { |
120 | if (std::isalpha(word[k])) | 125 | word[1] = std::toupper(word[1]); |
126 | |||
127 | for (int k=2; k<word.length(); k++) | ||
121 | { | 128 | { |
122 | word[k] = std::tolower(word[k]); | 129 | if (std::isalpha(word[k])) |
130 | { | ||
131 | word[k] = std::tolower(word[k]); | ||
132 | } | ||
123 | } | 133 | } |
134 | } else { | ||
135 | word[0] = std::toupper(word[0]); | ||
124 | } | 136 | } |
125 | } else { | ||
126 | word[0] = std::toupper(word[0]); | ||
127 | } | 137 | } |
138 | |||
139 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | ||
140 | } else { | ||
141 | finalresult = result; | ||
128 | } | 142 | } |
129 | 143 | ||
130 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | 144 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); |
131 | } else { | ||
132 | finalresult = result; | ||
133 | } | 145 | } |
134 | 146 | ||
135 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); | 147 | std::string canonical; |
148 | std::transform(std::begin(action), std::end(action), | ||
149 | std::back_inserter(canonical), [] (char ch) | ||
150 | { | ||
151 | return std::tolower(ch); | ||
152 | }); | ||
153 | |||
154 | std::list<std::string> words = | ||
155 | verbly::split<std::list<std::string>>(canonical, " "); | ||
156 | |||
157 | for (std::string word : words) | ||
158 | { | ||
159 | if (!data_.forms( | ||
160 | (verbly::form::text == word) | ||
161 | && slurBlacklist).all().empty()) | ||
162 | { | ||
163 | action = "{MAIN}"; | ||
164 | break; | ||
165 | } | ||
166 | } | ||
136 | } | 167 | } |
137 | 168 | ||
138 | return action; | 169 | return action; |