From 58cdbee6f08bd5d4f371d8eb9739d74332213703 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 16 Oct 2017 14:00:34 -0400 Subject: Added in made-up words --- data.txt | 247 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- patterner.cpp | 161 ++++++++++++++++++++++---------------- 2 files changed, 342 insertions(+), 66 deletions(-) diff --git a/data.txt b/data.txt index f25657f..dd476ef 100644 --- a/data.txt +++ b/data.txt @@ -28,6 +28,251 @@ you're a piece of {WORD} what the {WORD} what the {WORD}ing {WORD} kindly catch the 9am train to {Word}sville +If you look up "{WORD}" in the dictionary, there's a picture of you underneath! +I never want to see your {WORD}ing {WORD} again INSULT,END -you piece of {WORD} \ No newline at end of file +you piece of {WORD} + +WORD +{STARTSONANT}{VOWEL}{ENDSONANT} +{WORD2} + +VOWEL +a +e +i +o +u +a +e +i +o +u +a +e +i +o +u +a +e +i +o +u +ae +ai +au +ea +ee +ei +ie +io +oi +ou +ui +uu + +STARTSONANT +b +c +d +f +g +h +j +k +l +m +n +p +r +s +t +b +c +d +f +g +h +j +k +l +m +n +p +q +r +s +t +v +w +x +z +b +c +d +f +g +h +j +k +l +m +n +p +q +r +s +t +v +w +x +z +bh +bl +br +ch +cl +cr +dr +dw +fl +fr +gl +gr +kl +kn +kr +ph +pl +pr +pt +rh +sc +sh +sk +sl +sm +sn +sp +sq +sr +st +sw +th +tr +tw +wh +wr +zh + +ENDSONANT +b +d +f +g +h +k +l +m +n +p +r +t +b +d +f +g +h +j +k +l +m +n +p +r +t +v +w +x +z +b +d +f +g +h +j +k +l +m +n +p +r +t +v +w +x +z +bf +bh +bk +ch +ck +dk +dp +dt +ff +fh +fk +fp +ft +gf +gh +gk +hk +lb +ld +lf +lg +lh +lk +lm +ln +lp +lt +mf +mk +mn +mp +nd +nf +ng +nk +np +nt +pf +ph +pk +pt +rb +rd +rf +rg +rk +rm +rn +rp +rt +sk +sp +st +wd +wf +wg +wk +wl +wm +wn +wp +wt +zk \ No newline at end of file diff --git a/patterner.cpp b/patterner.cpp index af844cf..1deffb8 100644 --- a/patterner.cpp +++ b/patterner.cpp @@ -47,92 +47,123 @@ patterner::patterner( std::string patterner::generate() { std::string action = "{MAIN}"; - int tknloc; - while ((tknloc = action.find("{")) != std::string::npos) - { - std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); - std::string modifier; - int modloc; - if ((modloc = token.find(":")) != std::string::npos) - { - modifier = token.substr(modloc+1); - token = token.substr(0, modloc); - } - std::string canontkn; - std::transform(std::begin(token), std::end(token), - std::back_inserter(canontkn), [] (char ch) { - return std::toupper(ch); - }); + verbly::filter slurBlacklist = + (verbly::word::usageDomains %= ( + (verbly::notion::wnid == 106718862) // ethnic slur + || (verbly::notion::wnid == 106717170) // disparagement (other slurs) + || (verbly::notion::wnid == 107124340))); // obscenity (other profanity) - std::string result; - if (canontkn == "WORD") - { - result = data_.words( - (verbly::word::forms(verbly::inflection::base) %= - (verbly::form::complexity == 1) - && (verbly::form::length == 4) - && (verbly::form::proper == false) - && (verbly::pronunciation::numOfSyllables == 1)) - && !(verbly::word::usageDomains %= - (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs - .first().getBaseForm().getText(); - } else if (canontkn == "\\N") + while (action == "{MAIN}") + { + int tknloc; + while ((tknloc = action.find("{")) != std::string::npos) { - result = "\n"; - } else { - auto group = groups_[canontkn]; - std::uniform_int_distribution groupdist(0, group.size()-1); - int groupind = groupdist(rng_); - result = group[groupind]; - } + std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); + std::string modifier; + int modloc; + if ((modloc = token.find(":")) != std::string::npos) + { + modifier = token.substr(modloc+1); + token = token.substr(0, modloc); + } - if (modifier == "indefinite") - { - if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) + std::string canontkn; + std::transform(std::begin(token), std::end(token), + std::back_inserter(canontkn), [] (char ch) { + return std::toupper(ch); + }); + + std::string result; + if (canontkn == "WORD2") { - result = "an " + result; - } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) + result = data_.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) + && (verbly::word::forms(verbly::inflection::base) %= + (verbly::form::complexity == 1) + && (verbly::form::length == 4) + && (verbly::form::proper == false) + && (verbly::pronunciation::numOfSyllables == 1)) + && !slurBlacklist) + .first().getBaseForm().getText(); + } else if (canontkn == "\\N") { - result = "an " + result; + result = "\n"; } else { - result = "a " + result; + auto group = groups_[canontkn]; + std::uniform_int_distribution groupdist(0, group.size()-1); + int groupind = groupdist(rng_); + result = group[groupind]; } - } - std::string finalresult; - if (islower(token[0])) - { - std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { - return std::tolower(ch); - }); - } else if (isupper(token[0]) && !isupper(token[1])) - { - auto words = verbly::split>(result, " "); - for (auto& word : words) + if (modifier == "indefinite") { - if (word[0] == '{') + if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) { - word[1] = std::toupper(word[1]); + result = "an " + result; + } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) + { + result = "an " + result; + } else { + result = "a " + result; + } + } - for (int k=2; k>(result, " "); + for (auto& word : words) + { + if (word[0] == '{') { - if (std::isalpha(word[k])) + word[1] = std::toupper(word[1]); + + for (int k=2; k words = + verbly::split>(canonical, " "); + + for (std::string word : words) + { + if (!data_.forms( + (verbly::form::text == word) + && slurBlacklist).all().empty()) + { + action = "{MAIN}"; + break; + } + } } return action; -- cgit 1.4.1