diff options
| author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-10-16 14:00:34 -0400 |
|---|---|---|
| committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-10-16 14:00:34 -0400 |
| commit | 58cdbee6f08bd5d4f371d8eb9739d74332213703 (patch) | |
| tree | d468ab60bbf1a494533d4740c5e03ee0b2e85b6e | |
| parent | f0849f537677a440ae138c5176ad86eab99db1a2 (diff) | |
| download | insult-58cdbee6f08bd5d4f371d8eb9739d74332213703.tar.gz insult-58cdbee6f08bd5d4f371d8eb9739d74332213703.tar.bz2 insult-58cdbee6f08bd5d4f371d8eb9739d74332213703.zip | |
Added in made-up words
| -rw-r--r-- | data.txt | 247 | ||||
| -rw-r--r-- | patterner.cpp | 161 |
2 files changed, 342 insertions, 66 deletions
| diff --git a/data.txt b/data.txt index f25657f..dd476ef 100644 --- a/data.txt +++ b/data.txt | |||
| @@ -28,6 +28,251 @@ you're a piece of {WORD} | |||
| 28 | what the {WORD} | 28 | what the {WORD} |
| 29 | what the {WORD}ing {WORD} | 29 | what the {WORD}ing {WORD} |
| 30 | kindly catch the 9am train to {Word}sville | 30 | kindly catch the 9am train to {Word}sville |
| 31 | If you look up "{WORD}" in the dictionary, there's a picture of you underneath! | ||
| 32 | I never want to see your {WORD}ing {WORD} again | ||
| 31 | 33 | ||
| 32 | INSULT,END | 34 | INSULT,END |
| 33 | you piece of {WORD} \ No newline at end of file | 35 | you piece of {WORD} |
| 36 | |||
| 37 | WORD | ||
| 38 | {STARTSONANT}{VOWEL}{ENDSONANT} | ||
| 39 | {WORD2} | ||
| 40 | |||
| 41 | VOWEL | ||
| 42 | a | ||
| 43 | e | ||
| 44 | i | ||
| 45 | o | ||
| 46 | u | ||
| 47 | a | ||
| 48 | e | ||
| 49 | i | ||
| 50 | o | ||
| 51 | u | ||
| 52 | a | ||
| 53 | e | ||
| 54 | i | ||
| 55 | o | ||
| 56 | u | ||
| 57 | a | ||
| 58 | e | ||
| 59 | i | ||
| 60 | o | ||
| 61 | u | ||
| 62 | ae | ||
| 63 | ai | ||
| 64 | au | ||
| 65 | ea | ||
| 66 | ee | ||
| 67 | ei | ||
| 68 | ie | ||
| 69 | io | ||
| 70 | oi | ||
| 71 | ou | ||
| 72 | ui | ||
| 73 | uu | ||
| 74 | |||
| 75 | STARTSONANT | ||
| 76 | b | ||
| 77 | c | ||
| 78 | d | ||
| 79 | f | ||
| 80 | g | ||
| 81 | h | ||
| 82 | j | ||
| 83 | k | ||
| 84 | l | ||
| 85 | m | ||
| 86 | n | ||
| 87 | p | ||
| 88 | r | ||
| 89 | s | ||
| 90 | t | ||
| 91 | b | ||
| 92 | c | ||
| 93 | d | ||
| 94 | f | ||
| 95 | g | ||
| 96 | h | ||
| 97 | j | ||
| 98 | k | ||
| 99 | l | ||
| 100 | m | ||
| 101 | n | ||
| 102 | p | ||
| 103 | q | ||
| 104 | r | ||
| 105 | s | ||
| 106 | t | ||
| 107 | v | ||
| 108 | w | ||
| 109 | x | ||
| 110 | z | ||
| 111 | b | ||
| 112 | c | ||
| 113 | d | ||
| 114 | f | ||
| 115 | g | ||
| 116 | h | ||
| 117 | j | ||
| 118 | k | ||
| 119 | l | ||
| 120 | m | ||
| 121 | n | ||
| 122 | p | ||
| 123 | q | ||
| 124 | r | ||
| 125 | s | ||
| 126 | t | ||
| 127 | v | ||
| 128 | w | ||
| 129 | x | ||
| 130 | z | ||
| 131 | bh | ||
| 132 | bl | ||
| 133 | br | ||
| 134 | ch | ||
| 135 | cl | ||
| 136 | cr | ||
| 137 | dr | ||
| 138 | dw | ||
| 139 | fl | ||
| 140 | fr | ||
| 141 | gl | ||
| 142 | gr | ||
| 143 | kl | ||
| 144 | kn | ||
| 145 | kr | ||
| 146 | ph | ||
| 147 | pl | ||
| 148 | pr | ||
| 149 | pt | ||
| 150 | rh | ||
| 151 | sc | ||
| 152 | sh | ||
| 153 | sk | ||
| 154 | sl | ||
| 155 | sm | ||
| 156 | sn | ||
| 157 | sp | ||
| 158 | sq | ||
| 159 | sr | ||
| 160 | st | ||
| 161 | sw | ||
| 162 | th | ||
| 163 | tr | ||
| 164 | tw | ||
| 165 | wh | ||
| 166 | wr | ||
| 167 | zh | ||
| 168 | |||
| 169 | ENDSONANT | ||
| 170 | b | ||
| 171 | d | ||
| 172 | f | ||
| 173 | g | ||
| 174 | h | ||
| 175 | k | ||
| 176 | l | ||
| 177 | m | ||
| 178 | n | ||
| 179 | p | ||
| 180 | r | ||
| 181 | t | ||
| 182 | b | ||
| 183 | d | ||
| 184 | f | ||
| 185 | g | ||
| 186 | h | ||
| 187 | j | ||
| 188 | k | ||
| 189 | l | ||
| 190 | m | ||
| 191 | n | ||
| 192 | p | ||
| 193 | r | ||
| 194 | t | ||
| 195 | v | ||
| 196 | w | ||
| 197 | x | ||
| 198 | z | ||
| 199 | b | ||
| 200 | d | ||
| 201 | f | ||
| 202 | g | ||
| 203 | h | ||
| 204 | j | ||
| 205 | k | ||
| 206 | l | ||
| 207 | m | ||
| 208 | n | ||
| 209 | p | ||
| 210 | r | ||
| 211 | t | ||
| 212 | v | ||
| 213 | w | ||
| 214 | x | ||
| 215 | z | ||
| 216 | bf | ||
| 217 | bh | ||
| 218 | bk | ||
| 219 | ch | ||
| 220 | ck | ||
| 221 | dk | ||
| 222 | dp | ||
| 223 | dt | ||
| 224 | ff | ||
| 225 | fh | ||
| 226 | fk | ||
| 227 | fp | ||
| 228 | ft | ||
| 229 | gf | ||
| 230 | gh | ||
| 231 | gk | ||
| 232 | hk | ||
| 233 | lb | ||
| 234 | ld | ||
| 235 | lf | ||
| 236 | lg | ||
| 237 | lh | ||
| 238 | lk | ||
| 239 | lm | ||
| 240 | ln | ||
| 241 | lp | ||
| 242 | lt | ||
| 243 | mf | ||
| 244 | mk | ||
| 245 | mn | ||
| 246 | mp | ||
| 247 | nd | ||
| 248 | nf | ||
| 249 | ng | ||
| 250 | nk | ||
| 251 | np | ||
| 252 | nt | ||
| 253 | pf | ||
| 254 | ph | ||
| 255 | pk | ||
| 256 | pt | ||
| 257 | rb | ||
| 258 | rd | ||
| 259 | rf | ||
| 260 | rg | ||
| 261 | rk | ||
| 262 | rm | ||
| 263 | rn | ||
| 264 | rp | ||
| 265 | rt | ||
| 266 | sk | ||
| 267 | sp | ||
| 268 | st | ||
| 269 | wd | ||
| 270 | wf | ||
| 271 | wg | ||
| 272 | wk | ||
| 273 | wl | ||
| 274 | wm | ||
| 275 | wn | ||
| 276 | wp | ||
| 277 | wt | ||
| 278 | zk \ No newline at end of file | ||
| diff --git a/patterner.cpp b/patterner.cpp index af844cf..1deffb8 100644 --- a/patterner.cpp +++ b/patterner.cpp | |||
| @@ -47,92 +47,123 @@ patterner::patterner( | |||
| 47 | std::string patterner::generate() | 47 | std::string patterner::generate() |
| 48 | { | 48 | { |
| 49 | std::string action = "{MAIN}"; | 49 | std::string action = "{MAIN}"; |
| 50 | int tknloc; | ||
| 51 | while ((tknloc = action.find("{")) != std::string::npos) | ||
| 52 | { | ||
| 53 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); | ||
| 54 | std::string modifier; | ||
| 55 | int modloc; | ||
| 56 | if ((modloc = token.find(":")) != std::string::npos) | ||
| 57 | { | ||
| 58 | modifier = token.substr(modloc+1); | ||
| 59 | token = token.substr(0, modloc); | ||
| 60 | } | ||
| 61 | 50 | ||
| 62 | std::string canontkn; | 51 | verbly::filter slurBlacklist = |
| 63 | std::transform(std::begin(token), std::end(token), | 52 | (verbly::word::usageDomains %= ( |
| 64 | std::back_inserter(canontkn), [] (char ch) { | 53 | (verbly::notion::wnid == 106718862) // ethnic slur |
| 65 | return std::toupper(ch); | 54 | || (verbly::notion::wnid == 106717170) // disparagement (other slurs) |
| 66 | }); | 55 | || (verbly::notion::wnid == 107124340))); // obscenity (other profanity) |
| 67 | 56 | ||
| 68 | std::string result; | 57 | while (action == "{MAIN}") |
| 69 | if (canontkn == "WORD") | 58 | { |
| 70 | { | 59 | int tknloc; |
| 71 | result = data_.words( | 60 | while ((tknloc = action.find("{")) != std::string::npos) |
| 72 | (verbly::word::forms(verbly::inflection::base) %= | ||
| 73 | (verbly::form::complexity == 1) | ||
| 74 | && (verbly::form::length == 4) | ||
| 75 | && (verbly::form::proper == false) | ||
| 76 | && (verbly::pronunciation::numOfSyllables == 1)) | ||
| 77 | && !(verbly::word::usageDomains %= | ||
| 78 | (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs | ||
| 79 | .first().getBaseForm().getText(); | ||
| 80 | } else if (canontkn == "\\N") | ||
| 81 | { | 61 | { |
| 82 | result = "\n"; | 62 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); |
| 83 | } else { | 63 | std::string modifier; |
| 84 | auto group = groups_[canontkn]; | 64 | int modloc; |
| 85 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | 65 | if ((modloc = token.find(":")) != std::string::npos) |
| 86 | int groupind = groupdist(rng_); | 66 | { |
| 87 | result = group[groupind]; | 67 | modifier = token.substr(modloc+1); |
| 88 | } | 68 | token = token.substr(0, modloc); |
| 69 | } | ||
| 89 | 70 | ||
| 90 | if (modifier == "indefinite") | 71 | std::string canontkn; |
| 91 | { | 72 | std::transform(std::begin(token), std::end(token), |
| 92 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) | 73 | std::back_inserter(canontkn), [] (char ch) { |
| 74 | return std::toupper(ch); | ||
| 75 | }); | ||
| 76 | |||
| 77 | std::string result; | ||
| 78 | if (canontkn == "WORD2") | ||
| 93 | { | 79 | { |
| 94 | result = "an " + result; | 80 | result = data_.words( |
| 95 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | 81 | (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) |
| 82 | && (verbly::word::forms(verbly::inflection::base) %= | ||
| 83 | (verbly::form::complexity == 1) | ||
| 84 | && (verbly::form::length == 4) | ||
| 85 | && (verbly::form::proper == false) | ||
| 86 | && (verbly::pronunciation::numOfSyllables == 1)) | ||
| 87 | && !slurBlacklist) | ||
| 88 | .first().getBaseForm().getText(); | ||
| 89 | } else if (canontkn == "\\N") | ||
| 96 | { | 90 | { |
| 97 | result = "an " + result; | 91 | result = "\n"; |
| 98 | } else { | 92 | } else { |
| 99 | result = "a " + result; | 93 | auto group = groups_[canontkn]; |
| 94 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | ||
| 95 | int groupind = groupdist(rng_); | ||
| 96 | result = group[groupind]; | ||
| 100 | } | 97 | } |
| 101 | } | ||
| 102 | 98 | ||
| 103 | std::string finalresult; | 99 | if (modifier == "indefinite") |
| 104 | if (islower(token[0])) | ||
| 105 | { | ||
| 106 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
| 107 | return std::tolower(ch); | ||
| 108 | }); | ||
| 109 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
| 110 | { | ||
| 111 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
| 112 | for (auto& word : words) | ||
| 113 | { | 100 | { |
| 114 | if (word[0] == '{') | 101 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) |
| 115 | { | 102 | { |
| 116 | word[1] = std::toupper(word[1]); | 103 | result = "an " + result; |
| 104 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | ||
| 105 | { | ||
| 106 | result = "an " + result; | ||
| 107 | } else { | ||
| 108 | result = "a " + result; | ||
| 109 | } | ||
| 110 | } | ||
| 117 | 111 | ||
| 118 | for (int k=2; k<word.length(); k++) | 112 | std::string finalresult; |
| 113 | if (islower(token[0])) | ||
| 114 | { | ||
| 115 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
| 116 | return std::tolower(ch); | ||
| 117 | }); | ||
| 118 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
| 119 | { | ||
| 120 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
| 121 | for (auto& word : words) | ||
| 122 | { | ||
| 123 | if (word[0] == '{') | ||
| 119 | { | 124 | { |
| 120 | if (std::isalpha(word[k])) | 125 | word[1] = std::toupper(word[1]); |
| 126 | |||
| 127 | for (int k=2; k<word.length(); k++) | ||
| 121 | { | 128 | { |
| 122 | word[k] = std::tolower(word[k]); | 129 | if (std::isalpha(word[k])) |
| 130 | { | ||
| 131 | word[k] = std::tolower(word[k]); | ||
| 132 | } | ||
| 123 | } | 133 | } |
| 134 | } else { | ||
| 135 | word[0] = std::toupper(word[0]); | ||
| 124 | } | 136 | } |
| 125 | } else { | ||
| 126 | word[0] = std::toupper(word[0]); | ||
| 127 | } | 137 | } |
| 138 | |||
| 139 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | ||
| 140 | } else { | ||
| 141 | finalresult = result; | ||
| 128 | } | 142 | } |
| 129 | 143 | ||
| 130 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | 144 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); |
| 131 | } else { | ||
| 132 | finalresult = result; | ||
| 133 | } | 145 | } |
| 134 | 146 | ||
| 135 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); | 147 | std::string canonical; |
| 148 | std::transform(std::begin(action), std::end(action), | ||
| 149 | std::back_inserter(canonical), [] (char ch) | ||
| 150 | { | ||
| 151 | return std::tolower(ch); | ||
| 152 | }); | ||
| 153 | |||
| 154 | std::list<std::string> words = | ||
| 155 | verbly::split<std::list<std::string>>(canonical, " "); | ||
| 156 | |||
| 157 | for (std::string word : words) | ||
| 158 | { | ||
| 159 | if (!data_.forms( | ||
| 160 | (verbly::form::text == word) | ||
| 161 | && slurBlacklist).all().empty()) | ||
| 162 | { | ||
| 163 | action = "{MAIN}"; | ||
| 164 | break; | ||
| 165 | } | ||
| 166 | } | ||
| 136 | } | 167 | } |
| 137 | 168 | ||
| 138 | return action; | 169 | return action; |
