From 58cdbee6f08bd5d4f371d8eb9739d74332213703 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 16 Oct 2017 14:00:34 -0400 Subject: Added in made-up words --- patterner.cpp | 161 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 96 insertions(+), 65 deletions(-) (limited to 'patterner.cpp') diff --git a/patterner.cpp b/patterner.cpp index af844cf..1deffb8 100644 --- a/patterner.cpp +++ b/patterner.cpp @@ -47,92 +47,123 @@ patterner::patterner( std::string patterner::generate() { std::string action = "{MAIN}"; - int tknloc; - while ((tknloc = action.find("{")) != std::string::npos) - { - std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); - std::string modifier; - int modloc; - if ((modloc = token.find(":")) != std::string::npos) - { - modifier = token.substr(modloc+1); - token = token.substr(0, modloc); - } - std::string canontkn; - std::transform(std::begin(token), std::end(token), - std::back_inserter(canontkn), [] (char ch) { - return std::toupper(ch); - }); + verbly::filter slurBlacklist = + (verbly::word::usageDomains %= ( + (verbly::notion::wnid == 106718862) // ethnic slur + || (verbly::notion::wnid == 106717170) // disparagement (other slurs) + || (verbly::notion::wnid == 107124340))); // obscenity (other profanity) - std::string result; - if (canontkn == "WORD") - { - result = data_.words( - (verbly::word::forms(verbly::inflection::base) %= - (verbly::form::complexity == 1) - && (verbly::form::length == 4) - && (verbly::form::proper == false) - && (verbly::pronunciation::numOfSyllables == 1)) - && !(verbly::word::usageDomains %= - (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs - .first().getBaseForm().getText(); - } else if (canontkn == "\\N") + while (action == "{MAIN}") + { + int tknloc; + while ((tknloc = action.find("{")) != std::string::npos) { - result = "\n"; - } else { - auto group = groups_[canontkn]; - std::uniform_int_distribution groupdist(0, group.size()-1); - int groupind = groupdist(rng_); - result = group[groupind]; - } + std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); + std::string modifier; + int modloc; + if ((modloc = token.find(":")) != std::string::npos) + { + modifier = token.substr(modloc+1); + token = token.substr(0, modloc); + } - if (modifier == "indefinite") - { - if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) + std::string canontkn; + std::transform(std::begin(token), std::end(token), + std::back_inserter(canontkn), [] (char ch) { + return std::toupper(ch); + }); + + std::string result; + if (canontkn == "WORD2") { - result = "an " + result; - } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) + result = data_.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) + && (verbly::word::forms(verbly::inflection::base) %= + (verbly::form::complexity == 1) + && (verbly::form::length == 4) + && (verbly::form::proper == false) + && (verbly::pronunciation::numOfSyllables == 1)) + && !slurBlacklist) + .first().getBaseForm().getText(); + } else if (canontkn == "\\N") { - result = "an " + result; + result = "\n"; } else { - result = "a " + result; + auto group = groups_[canontkn]; + std::uniform_int_distribution groupdist(0, group.size()-1); + int groupind = groupdist(rng_); + result = group[groupind]; } - } - std::string finalresult; - if (islower(token[0])) - { - std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { - return std::tolower(ch); - }); - } else if (isupper(token[0]) && !isupper(token[1])) - { - auto words = verbly::split>(result, " "); - for (auto& word : words) + if (modifier == "indefinite") { - if (word[0] == '{') + if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) { - word[1] = std::toupper(word[1]); + result = "an " + result; + } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) + { + result = "an " + result; + } else { + result = "a " + result; + } + } - for (int k=2; k>(result, " "); + for (auto& word : words) + { + if (word[0] == '{') { - if (std::isalpha(word[k])) + word[1] = std::toupper(word[1]); + + for (int k=2; k words = + verbly::split>(canonical, " "); + + for (std::string word : words) + { + if (!data_.forms( + (verbly::form::text == word) + && slurBlacklist).all().empty()) + { + action = "{MAIN}"; + break; + } + } } return action; -- cgit 1.4.1