diff options
| author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-10-16 14:00:34 -0400 |
|---|---|---|
| committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-10-16 14:00:34 -0400 |
| commit | 58cdbee6f08bd5d4f371d8eb9739d74332213703 (patch) | |
| tree | d468ab60bbf1a494533d4740c5e03ee0b2e85b6e /patterner.cpp | |
| parent | f0849f537677a440ae138c5176ad86eab99db1a2 (diff) | |
| download | insult-58cdbee6f08bd5d4f371d8eb9739d74332213703.tar.gz insult-58cdbee6f08bd5d4f371d8eb9739d74332213703.tar.bz2 insult-58cdbee6f08bd5d4f371d8eb9739d74332213703.zip | |
Added in made-up words
Diffstat (limited to 'patterner.cpp')
| -rw-r--r-- | patterner.cpp | 161 |
1 files changed, 96 insertions, 65 deletions
| diff --git a/patterner.cpp b/patterner.cpp index af844cf..1deffb8 100644 --- a/patterner.cpp +++ b/patterner.cpp | |||
| @@ -47,92 +47,123 @@ patterner::patterner( | |||
| 47 | std::string patterner::generate() | 47 | std::string patterner::generate() |
| 48 | { | 48 | { |
| 49 | std::string action = "{MAIN}"; | 49 | std::string action = "{MAIN}"; |
| 50 | int tknloc; | ||
| 51 | while ((tknloc = action.find("{")) != std::string::npos) | ||
| 52 | { | ||
| 53 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); | ||
| 54 | std::string modifier; | ||
| 55 | int modloc; | ||
| 56 | if ((modloc = token.find(":")) != std::string::npos) | ||
| 57 | { | ||
| 58 | modifier = token.substr(modloc+1); | ||
| 59 | token = token.substr(0, modloc); | ||
| 60 | } | ||
| 61 | 50 | ||
| 62 | std::string canontkn; | 51 | verbly::filter slurBlacklist = |
| 63 | std::transform(std::begin(token), std::end(token), | 52 | (verbly::word::usageDomains %= ( |
| 64 | std::back_inserter(canontkn), [] (char ch) { | 53 | (verbly::notion::wnid == 106718862) // ethnic slur |
| 65 | return std::toupper(ch); | 54 | || (verbly::notion::wnid == 106717170) // disparagement (other slurs) |
| 66 | }); | 55 | || (verbly::notion::wnid == 107124340))); // obscenity (other profanity) |
| 67 | 56 | ||
| 68 | std::string result; | 57 | while (action == "{MAIN}") |
| 69 | if (canontkn == "WORD") | 58 | { |
| 70 | { | 59 | int tknloc; |
| 71 | result = data_.words( | 60 | while ((tknloc = action.find("{")) != std::string::npos) |
| 72 | (verbly::word::forms(verbly::inflection::base) %= | ||
| 73 | (verbly::form::complexity == 1) | ||
| 74 | && (verbly::form::length == 4) | ||
| 75 | && (verbly::form::proper == false) | ||
| 76 | && (verbly::pronunciation::numOfSyllables == 1)) | ||
| 77 | && !(verbly::word::usageDomains %= | ||
| 78 | (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs | ||
| 79 | .first().getBaseForm().getText(); | ||
| 80 | } else if (canontkn == "\\N") | ||
| 81 | { | 61 | { |
| 82 | result = "\n"; | 62 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); |
| 83 | } else { | 63 | std::string modifier; |
| 84 | auto group = groups_[canontkn]; | 64 | int modloc; |
| 85 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | 65 | if ((modloc = token.find(":")) != std::string::npos) |
| 86 | int groupind = groupdist(rng_); | 66 | { |
| 87 | result = group[groupind]; | 67 | modifier = token.substr(modloc+1); |
| 88 | } | 68 | token = token.substr(0, modloc); |
| 69 | } | ||
| 89 | 70 | ||
| 90 | if (modifier == "indefinite") | 71 | std::string canontkn; |
| 91 | { | 72 | std::transform(std::begin(token), std::end(token), |
| 92 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) | 73 | std::back_inserter(canontkn), [] (char ch) { |
| 74 | return std::toupper(ch); | ||
| 75 | }); | ||
| 76 | |||
| 77 | std::string result; | ||
| 78 | if (canontkn == "WORD2") | ||
| 93 | { | 79 | { |
| 94 | result = "an " + result; | 80 | result = data_.words( |
| 95 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | 81 | (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) |
| 82 | && (verbly::word::forms(verbly::inflection::base) %= | ||
| 83 | (verbly::form::complexity == 1) | ||
| 84 | && (verbly::form::length == 4) | ||
| 85 | && (verbly::form::proper == false) | ||
| 86 | && (verbly::pronunciation::numOfSyllables == 1)) | ||
| 87 | && !slurBlacklist) | ||
| 88 | .first().getBaseForm().getText(); | ||
| 89 | } else if (canontkn == "\\N") | ||
| 96 | { | 90 | { |
| 97 | result = "an " + result; | 91 | result = "\n"; |
| 98 | } else { | 92 | } else { |
| 99 | result = "a " + result; | 93 | auto group = groups_[canontkn]; |
| 94 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | ||
| 95 | int groupind = groupdist(rng_); | ||
| 96 | result = group[groupind]; | ||
| 100 | } | 97 | } |
| 101 | } | ||
| 102 | 98 | ||
| 103 | std::string finalresult; | 99 | if (modifier == "indefinite") |
| 104 | if (islower(token[0])) | ||
| 105 | { | ||
| 106 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
| 107 | return std::tolower(ch); | ||
| 108 | }); | ||
| 109 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
| 110 | { | ||
| 111 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
| 112 | for (auto& word : words) | ||
| 113 | { | 100 | { |
| 114 | if (word[0] == '{') | 101 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) |
| 115 | { | 102 | { |
| 116 | word[1] = std::toupper(word[1]); | 103 | result = "an " + result; |
| 104 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | ||
| 105 | { | ||
| 106 | result = "an " + result; | ||
| 107 | } else { | ||
| 108 | result = "a " + result; | ||
| 109 | } | ||
| 110 | } | ||
| 117 | 111 | ||
| 118 | for (int k=2; k<word.length(); k++) | 112 | std::string finalresult; |
| 113 | if (islower(token[0])) | ||
| 114 | { | ||
| 115 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
| 116 | return std::tolower(ch); | ||
| 117 | }); | ||
| 118 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
| 119 | { | ||
| 120 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
| 121 | for (auto& word : words) | ||
| 122 | { | ||
| 123 | if (word[0] == '{') | ||
| 119 | { | 124 | { |
| 120 | if (std::isalpha(word[k])) | 125 | word[1] = std::toupper(word[1]); |
| 126 | |||
| 127 | for (int k=2; k<word.length(); k++) | ||
| 121 | { | 128 | { |
| 122 | word[k] = std::tolower(word[k]); | 129 | if (std::isalpha(word[k])) |
| 130 | { | ||
| 131 | word[k] = std::tolower(word[k]); | ||
| 132 | } | ||
| 123 | } | 133 | } |
| 134 | } else { | ||
| 135 | word[0] = std::toupper(word[0]); | ||
| 124 | } | 136 | } |
| 125 | } else { | ||
| 126 | word[0] = std::toupper(word[0]); | ||
| 127 | } | 137 | } |
| 138 | |||
| 139 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | ||
| 140 | } else { | ||
| 141 | finalresult = result; | ||
| 128 | } | 142 | } |
| 129 | 143 | ||
| 130 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | 144 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); |
| 131 | } else { | ||
| 132 | finalresult = result; | ||
| 133 | } | 145 | } |
| 134 | 146 | ||
| 135 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); | 147 | std::string canonical; |
| 148 | std::transform(std::begin(action), std::end(action), | ||
| 149 | std::back_inserter(canonical), [] (char ch) | ||
| 150 | { | ||
| 151 | return std::tolower(ch); | ||
| 152 | }); | ||
| 153 | |||
| 154 | std::list<std::string> words = | ||
| 155 | verbly::split<std::list<std::string>>(canonical, " "); | ||
| 156 | |||
| 157 | for (std::string word : words) | ||
| 158 | { | ||
| 159 | if (!data_.forms( | ||
| 160 | (verbly::form::text == word) | ||
| 161 | && slurBlacklist).all().empty()) | ||
| 162 | { | ||
| 163 | action = "{MAIN}"; | ||
| 164 | break; | ||
| 165 | } | ||
| 166 | } | ||
| 136 | } | 167 | } |
| 137 | 168 | ||
| 138 | return action; | 169 | return action; |
