From ffc9edbc9fbeb65fd32c1fa24584010a83a6de31 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Fri, 24 Feb 2017 11:15:12 -0500 Subject: Created bot --- sentence.cpp | 668 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 668 insertions(+) create mode 100644 sentence.cpp (limited to 'sentence.cpp') diff --git a/sentence.cpp b/sentence.cpp new file mode 100644 index 0000000..3dabe58 --- /dev/null +++ b/sentence.cpp @@ -0,0 +1,668 @@ +#include "sentence.h" +#include +#include +#include + +sentence::sentence( + const verbly::database& database, + std::mt19937& rng) : + database_(database), + rng_(rng) +{ +} + +std::string sentence::generate() const +{ + // Generate the form that the sentence should take. + std::vector actions { + {"like", verbly::token(std::set({"infinitive_phrase", "subjectless"}))}, + {"have", verbly::token(std::set({"gerund_phrase", "subjectless"}))} + }; + + verbly::token form = actions[ + std::uniform_int_distribution(0, actions.size()-1)(rng_)]; + + // Compile the form. + while (!form.isComplete()) + { + visit(form); + } + + std::string compiled = form.compile(); + std::list words = + verbly::split>(compiled, " "); + + verbly::token cased; + for (std::string& word : words) + { + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + cased << verbly::token::capitalize(verbly::token::casing::all_caps, word); + } else { + cased << word; + } + } + + return cased.compile(); +} + +bool sentence::chooseSelrestr(std::set selrestrs, std::set choices) const +{ + int validChoices = 0; + for (const std::string& choice : choices) + { + if (selrestrs.count(choice)) + { + validChoices++; + } + } + + return std::bernoulli_distribution(static_cast(validChoices)/static_cast(selrestrs.size()))(rng_); +} + +verbly::word sentence::generateStandardNoun( + std::string role, + std::set selrestrs) const +{ + std::geometric_distribution tagdist(0.5); // 0.06 + std::vector result; + bool trySelection = true; + + while (result.empty()) + { + verbly::filter condition = + (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) + && (verbly::form::proper == false) + //&& (verbly::form::complexity == 1) + // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words + && (verbly::word::tagCount >= 1) + && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs + + // Only use selection restrictions for a first attempt. + if (trySelection) + { + verbly::filter selection(true); + + for (const std::string& selrestr : selrestrs) + { + if (selrestr == "concrete") + { + selection += (verbly::notion::wnid == 100001930); // physical entity + } else if (selrestr == "time") + { + selection += (verbly::notion::wnid == 100028270); // time + } else if (selrestr == "state") + { + selection += (verbly::notion::wnid == 100024720); // state + } else if (selrestr == "abstract") + { + selection += (verbly::notion::wnid == 100002137); // abstract entity + } else if (selrestr == "scalar") + { + selection += (verbly::notion::wnid == 103835412); // number + } else if (selrestr == "currency") + { + selection += (verbly::notion::wnid == 105050379); // currency + } else if (selrestr == "location") + { + selection += (verbly::notion::wnid == 100027167); // location + } else if (selrestr == "organization") + { + selection += (verbly::notion::wnid == 100237078); // organization + } else if (selrestr == "int_control") + { + selection += (verbly::notion::wnid == 100007347); // causal agent + } else if (selrestr == "natural") + { + selection += (verbly::notion::wnid == 100019128); // natural object + } else if (selrestr == "phys_obj") + { + selection += (verbly::notion::wnid == 100002684); // physical object + } else if (selrestr == "solid") + { + selection += (verbly::notion::wnid == 113860793); // solid + } else if (selrestr == "shape") + { + selection += (verbly::notion::wnid == 100027807); // shape + } else if (selrestr == "substance") + { + selection += (verbly::notion::wnid == 100019613); // substance + } else if (selrestr == "idea") + { + selection += (verbly::notion::wnid == 105803379); // idea + } else if (selrestr == "sound") + { + selection += (verbly::notion::wnid == 107111047); // sound + } else if (selrestr == "communication") + { + selection += (verbly::notion::wnid == 100033020); // communication + } else if (selrestr == "region") + { + selection += (verbly::notion::wnid == 105221895); // region + } else if (selrestr == "place") + { + selection += (verbly::notion::wnid == 100586262); // place + } else if (selrestr == "machine") + { + selection += (verbly::notion::wnid == 102958343); // machine + } else if (selrestr == "animate") + { + selection += (verbly::notion::wnid == 100004258); // animate thing + } else if (selrestr == "plant") + { + selection += (verbly::notion::wnid == 103956922); // plant + } else if (selrestr == "comestible") + { + selection += (verbly::notion::wnid == 100021265); // food + } else if (selrestr == "artifact") + { + selection += (verbly::notion::wnid == 100021939); // artifact + } else if (selrestr == "vehicle") + { + selection += (verbly::notion::wnid == 104524313); // vehicle + } else if (selrestr == "human") + { + selection += (verbly::notion::wnid == 100007846); // person + } else if (selrestr == "animal") + { + selection += (verbly::notion::wnid == 100015388); // animal + } else if (selrestr == "body_part") + { + selection += (verbly::notion::wnid == 105220461); // body part + } else if (selrestr == "garment") + { + selection += (verbly::notion::wnid == 103051540); // clothing + } else if (selrestr == "tool") + { + selection += (verbly::notion::wnid == 104451818); // tool + } else if ((selrestr == "concrete_inanimate") || (selrestr == "inanimate")) + { + selection += (verbly::notion::wnid == 100021939); // artifact + selection += (verbly::notion::wnid == 100019128); // natural object + } else if (selrestr == "non_region_location") + { + selection += (verbly::notion::wnid == 102913152); // building + } else if (selrestr == "non_solid_food") + { + selection += (verbly::notion::wnid == 107881800); // beverage + } else if (selrestr == "solid_food") + { + selection += (verbly::notion::wnid == 107555863); // solid food + } else if (selrestr == "slinky") + { + selection += (verbly::notion::wnid == 103670849); // line + } + } + + if (selection.compact().getType() != verbly::filter::type::empty) + { + condition &= (verbly::notion::fullHypernyms %= std::move(selection)); + } else if (role == "Attribute") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute + } else if (role == "Instrument") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 104451818)); // tool + } else if (role == "Agent") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100007347)); // causal agent + } + + trySelection = false; + } else { + std::cout << "Selection failed" << std::endl; + } + + result = database_.words(condition).all(); + } + + return result.front(); +} + +verbly::token sentence::generateStandardNounPhrase( + const verbly::word& noun, + std::string role, + bool plural, + bool definite) const +{ + verbly::token utter; + verbly::word sounder = noun; + verbly::word descript; + + /*if (std::bernoulli_distribution(1.0/8.0)(rng_)) + { + std::geometric_distribution tagdist(0.2); + descript = database_.words( + (verbly::word::tagCount >= tagdist(rng_)) + && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); + + sounder = descript; + }*/ + + if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite)) + { + utter << "the"; + + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + plural = true; + } + } else { + if ((role != "Theme") && (role != "Attribute") && std::bernoulli_distribution(1.0/2.0)(rng_)) + { + utter << "your"; + } else if (!plural) { + if (sounder.getBaseForm().startsWithVowelSound()) + { + utter << "an"; + } else { + utter << "a"; + } + } + } + + if (descript.isValid()) + { + utter << descript; + } + + if (plural && noun.hasInflection(verbly::inflection::plural)) + { + utter << verbly::token(noun, verbly::inflection::plural); + } else { + utter << noun; + } + + return utter; +} + +verbly::token sentence::generateClause( + const verbly::token& it) const +{ + verbly::token utter; + std::geometric_distribution tagdist(0.07); + std::vector verbDataset; + + verbly::filter frameCondition = + (verbly::frame::length >= 2) + && (verbly::frame::parts(0) %= ( + (verbly::part::type == verbly::part_type::noun_phrase) + && (verbly::part::role == "Agent")) + && (verbly::frame::parts(1) %= + (verbly::part::type == verbly::part_type::verb)) + && !(verbly::frame::parts() %= ( + verbly::part::synrestrs %= "adjp"))); + + if (it.hasSynrestr("experiencer")) + { + frameCondition &= + (verbly::frame::parts(2) %= + (verbly::part::type == verbly::part_type::noun_phrase) + && !(verbly::part::synrestrs %= "genitive") + && ((verbly::part::role == "Patient") + || (verbly::part::role == "Experiencer"))); + } + + verbly::filter verbCondition = + (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) + && frameCondition; + + if (it.hasSynrestr("participle_phrase")) + { + verbCondition &= (verbly::word::forms(verbly::inflection::ing_form)); + } else if (it.hasSynrestr("progressive")) + { + verbCondition &= (verbly::word::forms(verbly::inflection::s_form)); + } else if (it.hasSynrestr("past_participle")) + { + verbCondition &= (verbly::word::forms(verbly::inflection::past_participle)); + } + + // Because of the tag distribution, it's possible (albeit extremely unlikely) + // for the verb query to fail, so we loop until it succeeds. + while (verbDataset.empty()) + { + verbDataset = database_.words( + verbCondition + && (verbly::word::tagCount >= tagdist(rng_)) + ).all(); + } + + verbly::word verb = verbDataset.front(); + verbly::frame frame = database_.frames(frameCondition && verb).first(); + std::list parts(std::begin(frame.getParts()), std::end(frame.getParts())); + + if (it.hasSynrestr("experiencer")) + { + // Ignore the direct object. + parts.erase(std::next(parts.begin(), 2)); + } + + if (it.hasSynrestr("subjectless")) + { + // Ignore the subject. + parts.pop_front(); + } + + for (const verbly::part& part : parts) + { + switch (part.getType()) + { + case verbly::part_type::noun_phrase: + { + std::cout << "NP: "; + for (auto& s : part.getNounSynrestrs()) + { + std::cout << s << " "; + } + std::cout << std::endl; + + if (chooseSelrestr(part.getNounSelrestrs(), {"currency"})) + { + int lead = std::uniform_int_distribution(1,9)(rng_); + int tail = std::uniform_int_distribution(0,6)(rng_); + std::string tailStr(tail, '0'); + + utter << ("$" + std::to_string(lead) + tailStr); + } else if (part.nounHasSynrestr("adjp")) + { + utter << std::set({"adjective_phrase"}); + } else if ((part.nounHasSynrestr("be_sc_ing")) + || (part.nounHasSynrestr("ac_ing")) + || (part.nounHasSynrestr("sc_ing")) + || (part.nounHasSynrestr("np_omit_ing")) + || (part.nounHasSynrestr("oc_ing"))) + { + utter << std::set({"participle_phrase", "subjectless"}); + } else if ((part.nounHasSynrestr("poss_ing")) + || (part.nounHasSynrestr("possing")) + || (part.nounHasSynrestr("pos_ing"))) + { + utter << "your"; + utter << std::set({"participle_phrase", "subjectless"}); + } else if (part.nounHasSynrestr("adv_loc")) + { + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + utter << "here"; + } else { + utter << "there"; + } + } else if (part.nounHasSynrestr("refl")) + { + utter << "yourself"; + } else if ((part.nounHasSynrestr("sc_to_inf")) + || (part.nounHasSynrestr("ac_to_inf")) + || (part.nounHasSynrestr("vc_to_inf")) + || (part.nounHasSynrestr("rs_to_inf")) + || (part.nounHasSynrestr("oc_to_inf"))) + { + utter << std::set({"infinitive_phrase", "subjectless"}); + } else if (part.nounHasSynrestr("oc_bare_inf")) + { + utter << std::set({"infinitive_phrase", "bare", "subjectless"}); + } else if (part.nounHasSynrestr("wh_comp")) + { + utter << "whether"; + + verbly::token sentence(std::set({"progressive"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("that_comp")) + { + utter << "that"; + utter << "they"; + + verbly::token sentence(std::set({"subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("what_extract")) + { + utter << "what"; + + verbly::token sentence(std::set({"progressive", "experiencer"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("how_extract")) + { + utter << "how"; + + verbly::token sentence(std::set({"progressive"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("wh_inf")) + { + utter << "how"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("what_inf")) + { + utter << "what"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless", "experiencer"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("wheth_inf")) + { + utter << "whether"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("quotation")) + { + verbly::token sentence(std::set({"participle_phrase"})); + while (!sentence.isComplete()) + { + visit(sentence); + } + + utter << ("\"" + sentence.compile() + "\""); + } else { + if (part.nounHasSynrestr("genitive")) + { + verbly::word noun = generateStandardNoun("Passive", {"animate"}); + verbly::token owner = generateStandardNounPhrase(noun, "Passive", false, true); + std::string ownerStr = owner.compile() + "'s"; + utter << ownerStr; + } + + verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); + + bool plural = part.nounHasSynrestr("plural") || chooseSelrestr(part.getNounSelrestrs(), {"group", "plural"}); + + utter << generateStandardNounPhrase( + noun, + part.getNounRole(), + plural, + part.nounHasSynrestr("definite")); + + if (part.nounHasSynrestr("acc_ing") || part.nounHasSynrestr("ac_ing")) + { + utter << std::set({"participle_phrase", "subjectless"}); + } + } + + break; + } + + case verbly::part_type::verb: + { + std::cout << "V: " << verb.getBaseForm().getText() << std::endl; + + if (it.hasSynrestr("progressive")) + { + utter << verbly::token(verb, verbly::inflection::s_form); + } else if (it.hasSynrestr("past_participle")) + { + utter << verbly::token(verb, verbly::inflection::past_participle); + } else if (it.hasSynrestr("infinitive_phrase")) + { + if (!it.hasSynrestr("bare")) + { + utter << "to"; + } + + utter << verb; + } else if (it.hasSynrestr("participle_phrase")) + { + utter << verbly::token(verb, verbly::inflection::ing_form); + } else if (it.hasSynrestr("gerund_phrase")) + { + utter << verbly::token(verb, verbly::inflection::past_participle); + } else { + utter << verb; + } + + break; + } + + case verbly::part_type::preposition: + { + std::cout << "PREP" << std::endl; + + if (part.isPrepositionLiteral()) + { + int choiceIndex = std::uniform_int_distribution(0, part.getPrepositionChoices().size()-1)(rng_); + utter << part.getPrepositionChoices()[choiceIndex]; + } else { + verbly::filter pgf(true); + for (const std::string& choice : part.getPrepositionChoices()) + { + pgf += (verbly::notion::prepositionGroups == choice); + } + + utter << database_.words(pgf && (verbly::notion::partOfSpeech == verbly::part_of_speech::preposition)).first(); + } + + break; + } + + case verbly::part_type::adjective: + { + std::cout << "ADJ" << std::endl; + + utter << std::set({"adjective_phrase"}); + + break; + } + + case verbly::part_type::adverb: + { + std::cout << "ADV" << std::endl; + + utter << std::set({"adverb_phrase"}); + + break; + } + + case verbly::part_type::literal: + { + std::cout << "LIT" << std::endl; + + utter << part.getLiteralValue(); + + break; + } + + case verbly::part_type::invalid: + { + // Nope + + break; + } + } + } + + if ((parts.size() == 1) && (std::bernoulli_distribution(1.0/4.0)(rng_))) + { + utter << std::set({"adverb_phrase"}); + } + + return utter; +} + +void sentence::visit(verbly::token& it) const +{ + switch (it.getType()) + { + case verbly::token::type::utterance: + { + for (verbly::token& token : it) + { + if (!token.isComplete()) + { + visit(token); + + break; + } + } + + break; + } + + case verbly::token::type::fillin: + { + if (it.hasSynrestr("infinitive_phrase") || it.hasSynrestr("gerund_phrase")) + { + it = generateClause(it); + } else if (it.hasSynrestr("adjective_phrase")) + { + verbly::token phrase; + + if (std::bernoulli_distribution(1.0/6.0)(rng_)) + { + phrase << std::set({"adverb_phrase"}); + } + + if (std::bernoulli_distribution(1.0/4.0)(rng_)) + { + phrase << std::set({"participle_phrase", "subjectless"}); + } else { + std::geometric_distribution tagdist(0.2); + phrase << database_.words( + (verbly::word::tagCount >= tagdist(rng_)) + && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); + } + + it = phrase; + } else if (it.hasSynrestr("adverb_phrase")) + { + std::geometric_distribution tagdist(1.0/23.0); + + it = database_.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb) + && (verbly::word::tagCount >= tagdist(rng_)) + ).first(); + } else if (it.hasSynrestr("participle_phrase")) + { + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + it = verbly::token( + database_.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) + && (verbly::word::forms(verbly::inflection::ing_form))).first(), + verbly::inflection::ing_form); + } else { + it = generateClause(it); + } + } else { + it = "*the reality of the situation*"; + } + + break; + } + + case verbly::token::type::transform: + { + visit(it.getInnerToken()); + + break; + } + + case verbly::token::type::word: + case verbly::token::type::literal: + case verbly::token::type::part: + { + // Nope + + break; + } + } +} + -- cgit 1.4.1