From 8c2e7c646f2a549ea9b4db831d8088f57b3287ae Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Sat, 4 Feb 2017 10:32:55 -0500 Subject: Updated verbly (new API) Notably, the bot should not be able to use ethnic slurs now. sentence.cpp is basically just copied from advice. --- CMakeLists.txt | 2 +- furries.cpp | 460 ++------------------------------------ sentence.cpp | 682 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sentence.h | 39 ++++ vendor/verbly | 2 +- 5 files changed, 739 insertions(+), 446 deletions(-) create mode 100644 sentence.cpp create mode 100644 sentence.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e6a8aa7..cbc9e68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ add_subdirectory(vendor/verbly) add_subdirectory(vendor/yaml-cpp EXCLUDE_FROM_ALL) include_directories(vendor/libtwittercpp/src vendor/verbly/lib vendor/yaml-cpp/include) -add_executable(furries furries.cpp) +add_executable(furries furries.cpp sentence.cpp) set_property(TARGET furries PROPERTY CXX_STANDARD 11) set_property(TARGET furries PROPERTY CXX_STANDARD_REQUIRED ON) target_link_libraries(furries verbly twitter++ yaml-cpp) diff --git a/furries.cpp b/furries.cpp index de2fa02..3f9c76d 100644 --- a/furries.cpp +++ b/furries.cpp @@ -6,429 +6,7 @@ #include #include #include - -class fill_blanks { - private: - verbly::data& database; - - public: - fill_blanks(verbly::data& database) : database(database) - { - - } - - verbly::filter parse_selrestrs(verbly::frame::selrestr selrestr) - { - switch (selrestr.get_type()) - { - case verbly::frame::selrestr::type::empty: - { - return verbly::filter{}; - } - - case verbly::frame::selrestr::type::singleton: - { - verbly::noun n; - - if (selrestr.get_restriction() == "concrete") - { - n = database.nouns().with_singular_form("physical entity").limit(1).run().front(); - } else if (selrestr.get_restriction() == "time") - { - n = database.nouns().with_singular_form("time").limit(1).run().front(); - } else if (selrestr.get_restriction() == "state") - { - n = database.nouns().with_singular_form("state").limit(1).run().front(); - } else if (selrestr.get_restriction() == "abstract") - { - n = database.nouns().with_singular_form("abstract entity").limit(1).run().front(); - } else if (selrestr.get_restriction() == "time") - { - n = database.nouns().with_singular_form("time").limit(1).run().front(); - } else if (selrestr.get_restriction() == "scalar") - { - n = database.nouns().with_singular_form("number").limit(1).run().front(); - } else if (selrestr.get_restriction() == "currency") - { - auto nn2 = database.nouns().with_singular_form("currency").limit(2).run(); - std::vector nn(std::begin(nn2), std::end(nn2)); - n = nn[1]; - } else if (selrestr.get_restriction() == "location") - { - n = database.nouns().with_singular_form("location").limit(1).run().front(); - } else if (selrestr.get_restriction() == "organization") - { - n = database.nouns().with_singular_form("organization").limit(1).run().front(); - } else if (selrestr.get_restriction() == "int_control") - { - n = database.nouns().with_singular_form("causal agent").limit(1).run().front(); - } else if (selrestr.get_restriction() == "natural") - { - n = database.nouns().with_singular_form("natural object").limit(1).run().front(); - } else if (selrestr.get_restriction() == "phys_obj") - { - n = database.nouns().with_singular_form("physical object").limit(1).run().front(); - } else if (selrestr.get_restriction() == "solid") - { - n = database.nouns().with_singular_form("solid").limit(1).run().front(); - } else if (selrestr.get_restriction() == "shape") - { - n = database.nouns().with_singular_form("shape").limit(1).run().front(); - } else if (selrestr.get_restriction() == "substance") - { - n = database.nouns().with_singular_form("substance").limit(1).run().front(); - } else if (selrestr.get_restriction() == "idea") - { - n = database.nouns().with_singular_form("idea").limit(1).run().front(); - } else if (selrestr.get_restriction() == "sound") - { - auto nn2 = database.nouns().with_singular_form("sound").limit(4).run(); - std::vector nn(std::begin(nn2), std::end(nn2)); - n = nn[3]; - } else if (selrestr.get_restriction() == "communication") - { - n = database.nouns().with_singular_form("communication").limit(1).run().front(); - } else if (selrestr.get_restriction() == "region") - { - n = database.nouns().with_singular_form("region").limit(1).run().front(); - } else if (selrestr.get_restriction() == "place") - { - n = database.nouns().with_singular_form("place").limit(1).run().front(); - } else if (selrestr.get_restriction() == "machine") - { - n = database.nouns().with_singular_form("machine").limit(1).run().front(); - } else if (selrestr.get_restriction() == "animate") - { - n = database.nouns().with_singular_form("animate being").limit(1).run().front(); - } else if (selrestr.get_restriction() == "plant") - { - auto nn2 = database.nouns().with_singular_form("plant").limit(2).run(); - std::vector nn(std::begin(nn2), std::end(nn2)); - n = nn[1]; - } else if (selrestr.get_restriction() == "comestible") - { - n = database.nouns().with_singular_form("food").limit(1).run().front(); - } else if (selrestr.get_restriction() == "artifact") - { - n = database.nouns().with_singular_form("artifact").limit(1).run().front(); - } else if (selrestr.get_restriction() == "vehicle") - { - n = database.nouns().with_singular_form("vehicle").limit(1).run().front(); - } else if (selrestr.get_restriction() == "human") - { - n = database.nouns().with_singular_form("person").limit(1).run().front(); - } else if (selrestr.get_restriction() == "animal") - { - n = database.nouns().with_singular_form("animal").limit(1).run().front(); - } else if (selrestr.get_restriction() == "body_part") - { - n = database.nouns().with_singular_form("body part").limit(1).run().front(); - } else if (selrestr.get_restriction() == "garment") - { - n = database.nouns().with_singular_form("clothing").limit(1).run().front(); - } else if (selrestr.get_restriction() == "tool") - { - n = database.nouns().with_singular_form("tool").limit(1).run().front(); - } else { - return verbly::filter{}; - } - - return verbly::filter{n, !selrestr.get_pos()}; - } - - case verbly::frame::selrestr::type::group: - { - verbly::filter ret; - ret.set_orlogic(selrestr.get_orlogic()); - - std::transform(std::begin(selrestr), std::end(selrestr), std::back_inserter(ret), [&] (verbly::frame::selrestr sr) { - return parse_selrestrs(sr); - }); - - return ret; - } - } - } - - template - void visit(verbly::token& it, RNG&& rng) - { - switch (it.get_type()) - { - case verbly::token::type::utterance: - { - for (auto& tkn : it) - { - if (!tkn.is_complete()) - { - visit(tkn, rng); - - break; - } - } - - break; - } - - case verbly::token::type::fillin: - { - switch (it.get_fillin_type()) - { - case verbly::token::fillin_type::participle_phrase: - { - for (;;) - { - verbly::verb v = database.verbs().has_frames().random().limit(1).run().front(); - auto frames = v.frames().run(); - std::vector filtered; - std::remove_copy_if(std::begin(frames), std::end(frames), std::back_inserter(filtered), [] (verbly::frame& f) { - if (f.parts().size() < 2) - { - return true; - } - - if (f.parts()[0].get_type() != verbly::frame::part::type::noun_phrase) - { - return true; - } - - if (f.parts()[0].get_role() != "Agent") - { - return true; - } - - if (f.parts()[1].get_type() != verbly::frame::part::type::verb) - { - return true; - } - - return false; - }); - - if (filtered.empty()) - { - continue; - } - - int fr_i = std::uniform_int_distribution(0, filtered.size()-1)(rng); - verbly::frame fr = filtered[fr_i]; - verbly::token utter; - for (auto part : fr.parts()) - { - switch (part.get_type()) - { - case verbly::frame::part::type::noun_phrase: - { - if (part.get_role() == "Agent") - { - continue; - } - - if (part.get_synrestrs().count("adjp") == 1) - { - utter << verbly::token{verbly::token::fillin_type::adjective_phrase}; - - continue; - } else if ((part.get_synrestrs().count("be_sc_ing") == 1) - || (part.get_synrestrs().count("ac_ing") == 1) - || (part.get_synrestrs().count("sc_ing") == 1) - || (part.get_synrestrs().count("np_omit_ing") == 1) - || (part.get_synrestrs().count("oc_ing") == 1)) - { - utter << verbly::token{verbly::token::fillin_type::participle_phrase}; - - continue; - } else if ((part.get_synrestrs().count("poss_ing") == 1) - || (part.get_synrestrs().count("possing") == 1) - || (part.get_synrestrs().count("pos_ing") == 1)) - { - utter << verbly::token{"their"}; - utter << verbly::token{verbly::token::fillin_type::participle_phrase}; - - continue; - } else if (part.get_synrestrs().count("genitive") == 1) - { - utter << verbly::token{"their"}; - - continue; - } else if (part.get_synrestrs().count("adv_loc") == 1) - { - if (std::bernoulli_distribution(1.0/2.0)(rng)) - { - utter << verbly::token{"here"}; - } else { - utter << verbly::token{"there"}; - } - - continue; - } else if (part.get_synrestrs().count("refl") == 1) - { - utter << verbly::token{"themselves"}; - - continue; - } else if ((part.get_synrestrs().count("sc_to_inf") == 1) - || (part.get_synrestrs().count("ac_to_inf") == 1) - || (part.get_synrestrs().count("vc_to_inf") == 1) - || (part.get_synrestrs().count("rs_to_inf") == 1) - || (part.get_synrestrs().count("oc_to_inf") == 1)) - { - utter << verbly::token{verbly::token::fillin_type::infinitive_phrase}; - - continue; - } else if (part.get_synrestrs().count("oc_bare_inf") == 1) - { - verbly::token tkn{verbly::token::fillin_type::infinitive_phrase}; - tkn.set_extra(1); - - utter << tkn; - - continue; - } - - auto selrestrs = fr.roles()[part.get_role()]; - auto query = database.nouns().limit(1).random().is_not_proper().full_hyponym_of(parse_selrestrs(selrestrs)); - verbly::noun n = query.run().front(); - if ((std::bernoulli_distribution(1.0/2.0)(rng)) && (part.get_synrestrs().count("definite") == 0)) - { - utter << verbly::token{"the"}; - } else { - if (n.starts_with_vowel_sound()) - { - utter << verbly::token{"an"}; - } else { - utter << verbly::token{"a"}; - } - } - - if (part.get_synrestrs().count("plural") == 1) - { - utter << verbly::token{n, verbly::token::noun_inflection::plural}; - } else { - utter << verbly::token{n}; - } - - if (part.get_synrestrs().count("acc_ing") == 1) - { - utter << verbly::token{verbly::token::fillin_type::participle_phrase}; - } - - break; - } - - case verbly::frame::part::type::verb: - { - utter << verbly::token{v, verbly::token::verb_inflection::ing_form}; - - break; - } - - case verbly::frame::part::type::literal_preposition: - { - int ch_i = std::uniform_int_distribution(0, part.get_choices().size()-1)(rng); - utter << verbly::token{part.get_choices()[ch_i]}; - - break; - } - - case verbly::frame::part::type::selection_preposition: - { - auto query = database.prepositions(); - for (auto preprestr : part.get_preprestrs()) - { - query.in_group(preprestr); - } - utter << verbly::token{query.random().limit(1).run().front()}; - - break; - } - - case verbly::frame::part::type::adjective: - { - utter << verbly::token{verbly::token::fillin_type::adjective_phrase}; - - break; - } - - case verbly::frame::part::type::adverb: - { - utter << verbly::token{verbly::token::fillin_type::adverb_phrase}; - - break; - } - - case verbly::frame::part::type::literal: - { - utter << verbly::token{part.get_literal()}; - - break; - } - } - } - - it = utter; - - break; - } - - break; - } - - case verbly::token::fillin_type::adjective_phrase: - { - verbly::token phrase; - - if (std::bernoulli_distribution(1.0/4.0)(rng)) - { - phrase << verbly::token{verbly::token::fillin_type::adverb_phrase}; - } - - if (std::bernoulli_distribution(1.0/2.0)(rng)) - { - phrase << verbly::token{verbly::token::fillin_type::participle_phrase}; - } else { - phrase << verbly::token{database.adjectives().random().limit(1).run().front()}; - } - - it = phrase; - - break; - } - - case verbly::token::fillin_type::adverb_phrase: - { - it = verbly::token{database.adverbs().random().limit(1).run().front()}; - - break; - } - - case verbly::token::fillin_type::infinitive_phrase: - { - verbly::token utter; - if (it.get_extra() != 1) - { - utter << verbly::token{"to"}; - } - - utter << verbly::token{database.verbs().random().limit(1).run().front()}; - - it = utter; - - break; - } - - default: - { - it = verbly::token{"*the reality of the situation*"}; - - break; - } - } - - break; - } - } - } -}; +#include "sentence.h" int main(int argc, char** argv) { @@ -437,51 +15,45 @@ int main(int argc, char** argv) std::cout << "usage: furries [configfile]" << std::endl; return -1; } - + std::string configfile(argv[1]); YAML::Node config = YAML::LoadFile(configfile); std::random_device random_device; std::mt19937 random_engine{random_device()}; - + twitter::auth auth; auth.setConsumerKey(config["consumer_key"].as()); auth.setConsumerSecret(config["consumer_secret"].as()); auth.setAccessKey(config["access_key"].as()); auth.setAccessSecret(config["access_secret"].as()); - + twitter::client client(auth); - verbly::data database {config["verbly_datafile"].as()}; - + verbly::database database(config["verbly_datafile"].as()); + sentence generator(database, random_engine); + for (;;) { std::cout << "Generating tweet..." << std::endl; - fill_blanks yeah {database}; - verbly::token action{ - {"the furries are"}, - {verbly::token::fillin_type::adjective_phrase} - }; - while (!action.is_complete()) - { - yeah.visit(action, random_engine); - } - - std::string result = action.compile(); + std::string result = generator.generate(); result.resize(140); + std::cout << result << std::endl; + try { client.updateStatus(result); - + std::cout << "Tweeted!" << std::endl; + std::cout << "Waiting..." << std::endl; + + std::this_thread::sleep_for(std::chrono::hours(1)); + + std::cout << std::endl; } catch (const twitter::twitter_error& e) { std::cout << "Twitter error: " << e.what() << std::endl; } - - std::cout << "Waiting..." << std::endl; - - std::this_thread::sleep_for(std::chrono::hours(1)); } } diff --git a/sentence.cpp b/sentence.cpp new file mode 100644 index 0000000..fdf883e --- /dev/null +++ b/sentence.cpp @@ -0,0 +1,682 @@ +#include "sentence.h" +#include +#include +#include + +sentence::sentence( + const verbly::database& database, + std::mt19937& rng) : + database_(database), + rng_(rng) +{ +} + +std::string sentence::generate() const +{ + // Generate the form that the title should take. + verbly::token form; + form << "the" << "furries" << "are"; + + std::set synrestrs {"adjective_phrase"}; + form << synrestrs; + + // Compile the form. + while (!form.isComplete()) + { + visit(form); + } + + return form.compile(); +} + +verbly::filter sentence::parseSelrestrs( + verbly::selrestr selrestr) const +{ + switch (selrestr.getType()) + { + case verbly::selrestr::type::empty: + { + return {}; + } + + case verbly::selrestr::type::singleton: + { + verbly::filter result; + + if (selrestr.getRestriction() == "concrete") + { + result = (verbly::notion::wnid == 100001930); // physical entity + } else if (selrestr.getRestriction() == "time") + { + result = (verbly::notion::wnid == 100028270); // time + } else if (selrestr.getRestriction() == "state") + { + result = (verbly::notion::wnid == 100024720); // state + } else if (selrestr.getRestriction() == "abstract") + { + result = (verbly::notion::wnid == 100002137); // abstract entity + } else if (selrestr.getRestriction() == "scalar") + { + result = (verbly::notion::wnid == 103835412); // number + } else if (selrestr.getRestriction() == "currency") + { + result = (verbly::notion::wnid == 105050379); // currency + } else if (selrestr.getRestriction() == "location") + { + result = (verbly::notion::wnid == 100027167); // location + } else if (selrestr.getRestriction() == "organization") + { + result = (verbly::notion::wnid == 100237078); // organization + } else if (selrestr.getRestriction() == "int_control") + { + result = (verbly::notion::wnid == 100007347); // causal agent + } else if (selrestr.getRestriction() == "natural") + { + result = (verbly::notion::wnid == 100019128); // natural object + } else if (selrestr.getRestriction() == "phys_obj") + { + result = (verbly::notion::wnid == 100002684); // physical object + } else if (selrestr.getRestriction() == "solid") + { + result = (verbly::notion::wnid == 113860793); // solid + } else if (selrestr.getRestriction() == "shape") + { + result = (verbly::notion::wnid == 100027807); // shape + } else if (selrestr.getRestriction() == "substance") + { + result = (verbly::notion::wnid == 100019613); // substance + } else if (selrestr.getRestriction() == "idea") + { + result = (verbly::notion::wnid == 105803379); // idea + } else if (selrestr.getRestriction() == "sound") + { + result = (verbly::notion::wnid == 107111047); // sound + } else if (selrestr.getRestriction() == "communication") + { + result = (verbly::notion::wnid == 100033020); // communication + } else if (selrestr.getRestriction() == "region") + { + result = (verbly::notion::wnid == 105221895); // region + } else if (selrestr.getRestriction() == "place") + { + result = (verbly::notion::wnid == 100586262); // place + } else if (selrestr.getRestriction() == "machine") + { + result = (verbly::notion::wnid == 102958343); // machine + } else if (selrestr.getRestriction() == "animate") + { + result = (verbly::notion::wnid == 100004258); // animate thing + } else if (selrestr.getRestriction() == "plant") + { + result = (verbly::notion::wnid == 103956922); // plant + } else if (selrestr.getRestriction() == "comestible") + { + result = (verbly::notion::wnid == 100021265); // food + } else if (selrestr.getRestriction() == "artifact") + { + result = (verbly::notion::wnid == 100021939); // artifact + } else if (selrestr.getRestriction() == "vehicle") + { + result = (verbly::notion::wnid == 104524313); // vehicle + } else if (selrestr.getRestriction() == "human") + { + result = (verbly::notion::wnid == 100007846); // person + } else if (selrestr.getRestriction() == "animal") + { + result = (verbly::notion::wnid == 100015388); // animal + } else if (selrestr.getRestriction() == "body_part") + { + result = (verbly::notion::wnid == 105220461); // body part + } else if (selrestr.getRestriction() == "garment") + { + result = (verbly::notion::wnid == 103051540); // clothing + } else if (selrestr.getRestriction() == "tool") + { + result = (verbly::notion::wnid == 104451818); // tool + } else { + return {}; + } + + std::cout << selrestr.getRestriction() << " (" << selrestr.getPos() << ")" << std::endl; + + if (selrestr.getPos()) + { + return (verbly::notion::fullHypernyms %= result); + } else { + return !(verbly::notion::fullHypernyms %= result); + } + } + + case verbly::selrestr::type::group: + { + std::cout << "or: " << selrestr.getOrlogic() << std::endl; + verbly::filter ret(selrestr.getOrlogic()); + + for (const verbly::selrestr& child : selrestr) + { + ret += parseSelrestrs(child); + } + + return ret; + } + } +} + +bool sentence::requiresSelrestr( + std::string restriction, + verbly::selrestr selrestr) const +{ + switch (selrestr.getType()) + { + case verbly::selrestr::type::empty: + { + return false; + } + + case verbly::selrestr::type::singleton: + { + if (selrestr.getRestriction() == restriction) + { + return selrestr.getPos(); + } else { + return false; + } + } + + case verbly::selrestr::type::group: + { + if (selrestr.getOrlogic()) + { + return std::all_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) { + return requiresSelrestr(restriction, s); + }); + } else { + return std::any_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) { + return requiresSelrestr(restriction, s); + }); + } + } + } +} + +verbly::word sentence::generateStandardNoun( + std::string role, + verbly::selrestr selrestrs) const +{ + std::geometric_distribution tagdist(0.5); // 0.06 + std::vector result; + bool trySelection = true; + + while (result.empty()) + { + verbly::filter condition = + (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) + && (verbly::form::proper == false) + && (verbly::word::tagCount >= 1) + && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs + + // Only use selection restrictions for a first attempt. + if (trySelection) + { + verbly::filter selrestrCondition = parseSelrestrs(selrestrs).compact(); + + if (selrestrCondition.getType() != verbly::filter::type::empty) + { + condition &= std::move(selrestrCondition); + } else if (role == "Attribute") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute + } else if (role == "Instrument") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 104451818)); // tool + } else if (role == "Agent") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100007347)); // causal agent + } + + trySelection = false; + } else { + std::cout << "Selection failed" << std::endl; + } + + result = database_.words(condition).all(); + } + + return result.front(); +} + +verbly::token sentence::generateStandardNounPhrase( + const verbly::word& noun, + std::string role, + bool plural, + bool definite) const +{ + verbly::token utter; + verbly::word sounder = noun; + verbly::word descript; + + if (std::bernoulli_distribution(1.0/8.0)(rng_)) + { + std::geometric_distribution tagdist(0.2); + descript = database_.words( + (verbly::word::tagCount >= tagdist(rng_)) + && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); + + sounder = descript; + } + + if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite)) + { + utter << "the"; + + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + plural = true; + } + } else { + if ((role != "Theme") && (role != "Attribute") && std::bernoulli_distribution(1.0/2.0)(rng_)) + { + utter << "your"; + } else if (!plural) { + if (sounder.getLemma().getBaseForm().startsWithVowelSound()) + { + utter << "an"; + } else { + utter << "a"; + } + } + } + + if (descript) + { + utter << descript; + } + + if (plural && noun.getLemma().hasInflection(verbly::inflection::plural)) + { + utter << verbly::token(noun, verbly::inflection::plural); + } else { + utter << noun; + } + + return utter; +} + +verbly::token sentence::generateClause( + const verbly::token& it) const +{ + verbly::token utter; + std::geometric_distribution tagdist(0.07); + std::vector verbDataset; + + verbly::filter frameCondition = + (verbly::frame::length >= 2) + && (verbly::frame::parts(0) %= ( + (verbly::part::type == verbly::part_type::noun_phrase) + && (verbly::part::role == "Agent")) + && (verbly::frame::parts(1) %= + (verbly::part::type == verbly::part_type::verb)) + && !(verbly::frame::parts() %= ( + verbly::part::synrestrs %= "adjp"))); + + if (it.hasSynrestr("experiencer")) + { + frameCondition &= + (verbly::frame::parts(2) %= + (verbly::part::type == verbly::part_type::noun_phrase) + && !(verbly::part::synrestrs %= "genitive") + && ((verbly::part::role == "Patient") + || (verbly::part::role == "Experiencer"))); + } + + verbly::filter verbCondition = + (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) + && frameCondition; + + if (it.hasSynrestr("participle_phrase")) + { + verbCondition &= (verbly::lemma::forms(verbly::inflection::ing_form)); + } else if (it.hasSynrestr("progressive")) + { + verbCondition &= (verbly::lemma::forms(verbly::inflection::s_form)); + } else if (it.hasSynrestr("past_participle")) + { + verbCondition &= (verbly::lemma::forms(verbly::inflection::past_participle)); + } + + // Because of the tag distribution, it's possible (albeit extremely unlikely) + // for the verb query to fail, so we loop until it succeeds. + while (verbDataset.empty()) + { + verbDataset = database_.words( + verbCondition + && (verbly::word::tagCount >= tagdist(rng_)) + ).all(); + } + + verbly::word verb = verbDataset.front(); + verbly::frame frame = database_.frames(frameCondition && verb).first(); + std::list parts(std::begin(frame.getParts()), std::end(frame.getParts())); + + if (it.hasSynrestr("experiencer")) + { + // Ignore the direct object. + parts.erase(std::next(parts.begin(), 2)); + } + + if (it.hasSynrestr("subjectless")) + { + // Ignore the subject. + parts.pop_front(); + } + + for (const verbly::part& part : parts) + { + switch (part.getType()) + { + case verbly::part_type::noun_phrase: + { + std::cout << "NP: "; + for (auto& s : part.getNounSynrestrs()) + { + std::cout << s << " "; + } + std::cout << std::endl; + + if (requiresSelrestr("currency", part.getNounSelrestrs())) + { + int lead = std::uniform_int_distribution(1,9)(rng_); + int tail = std::uniform_int_distribution(0,6)(rng_); + std::string tailStr(tail, '0'); + + utter << ("$" + std::to_string(lead) + tailStr); + } else if (part.nounHasSynrestr("adjp")) + { + utter << std::set({"adjective_phrase"}); + } else if ((part.nounHasSynrestr("be_sc_ing")) + || (part.nounHasSynrestr("ac_ing")) + || (part.nounHasSynrestr("sc_ing")) + || (part.nounHasSynrestr("np_omit_ing")) + || (part.nounHasSynrestr("oc_ing"))) + { + utter << std::set({"participle_phrase", "subjectless"}); + } else if ((part.nounHasSynrestr("poss_ing")) + || (part.nounHasSynrestr("possing")) + || (part.nounHasSynrestr("pos_ing"))) + { + utter << "your"; + utter << std::set({"participle_phrase", "subjectless"}); + } else if (part.nounHasSynrestr("genitive")) + { + utter << "your"; + } else if (part.nounHasSynrestr("adv_loc")) + { + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + utter << "here"; + } else { + utter << "there"; + } + } else if (part.nounHasSynrestr("refl")) + { + utter << "yourself"; + } else if ((part.nounHasSynrestr("sc_to_inf")) + || (part.nounHasSynrestr("ac_to_inf")) + || (part.nounHasSynrestr("vc_to_inf")) + || (part.nounHasSynrestr("rs_to_inf")) + || (part.nounHasSynrestr("oc_to_inf"))) + { + utter << std::set({"infinitive_phrase", "subjectless"}); + } else if (part.nounHasSynrestr("oc_bare_inf")) + { + utter << std::set({"infinitive_phrase", "bare", "subjectless"}); + } else if (part.nounHasSynrestr("wh_comp")) + { + utter << "whether"; + + verbly::token sentence(std::set({"progressive"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("that_comp")) + { + utter << "that"; + utter << "they"; + + verbly::token sentence(std::set({"subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("what_extract")) + { + utter << "what"; + + verbly::token sentence(std::set({"progressive", "experiencer"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("how_extract")) + { + utter << "how"; + + verbly::token sentence(std::set({"progressive"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("wh_inf")) + { + utter << "how"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("what_inf")) + { + utter << "what"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless", "experiencer"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("wheth_inf")) + { + utter << "whether"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("quotation")) + { + verbly::token sentence(std::set({"participle_phrase"})); + while (!sentence.isComplete()) + { + visit(sentence); + } + + utter << ("\"" + sentence.compile() + "\""); + } else { + verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); + + bool plural = part.nounHasSynrestr("plural"); + if (!plural) + { + plural = requiresSelrestr("plural", part.getNounSelrestrs()); + } + + utter << generateStandardNounPhrase( + noun, + part.getNounRole(), + plural, + part.nounHasSynrestr("definite")); + + if (part.nounHasSynrestr("acc_ing") || part.nounHasSynrestr("ac_ing")) + { + utter << std::set({"participle_phrase", "subjectless"}); + } + } + + break; + } + + case verbly::part_type::verb: + { + std::cout << "V: " << verb.getBaseForm() << std::endl; + + if (it.hasSynrestr("progressive")) + { + utter << verbly::token(verb, verbly::inflection::s_form); + } else if (it.hasSynrestr("past_participle")) + { + utter << verbly::token(verb, verbly::inflection::past_participle); + } else if (it.hasSynrestr("infinitive_phrase")) + { + if (!it.hasSynrestr("bare")) + { + utter << "to"; + } + + utter << verb; + } else if (it.hasSynrestr("participle_phrase")) + { + utter << verbly::token(verb, verbly::inflection::ing_form); + } else { + utter << verb; + } + + break; + } + + case verbly::part_type::preposition: + { + std::cout << "PREP" << std::endl; + + if (part.isPrepositionLiteral()) + { + int choiceIndex = std::uniform_int_distribution(0, part.getPrepositionChoices().size()-1)(rng_); + utter << part.getPrepositionChoices()[choiceIndex]; + } else { + verbly::filter pgf(true); + for (const std::string& choice : part.getPrepositionChoices()) + { + pgf += (verbly::notion::prepositionGroups == choice); + } + + utter << database_.words(pgf && (verbly::notion::partOfSpeech == verbly::part_of_speech::preposition)).first(); + } + + break; + } + + case verbly::part_type::adjective: + { + std::cout << "ADJ" << std::endl; + + utter << std::set({"adjective_phrase"}); + + break; + } + + case verbly::part_type::adverb: + { + std::cout << "ADV" << std::endl; + + utter << std::set({"adverb_phrase"}); + + break; + } + + case verbly::part_type::literal: + { + std::cout << "LIT" << std::endl; + + utter << part.getLiteralValue(); + + break; + } + + case verbly::part_type::invalid: + { + // Nope + + break; + } + } + } + + if ((parts.size() == 1) && (std::bernoulli_distribution(1.0/4.0)(rng_))) + { + utter << std::set({"adverb_phrase"}); + } + + return utter; +} + +void sentence::visit(verbly::token& it) const +{ + switch (it.getType()) + { + case verbly::token::type::utterance: + { + for (verbly::token& token : it) + { + if (!token.isComplete()) + { + visit(token); + + break; + } + } + + break; + } + + case verbly::token::type::fillin: + { + if (it.hasSynrestr("infinitive_phrase")) + { + it = generateClause(it); + } else if (it.hasSynrestr("adjective_phrase")) + { + verbly::token phrase; + + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + phrase << std::set({"participle_phrase", "subjectless"}); + } else { + if (std::bernoulli_distribution(1.0/6.0)(rng_)) + { + phrase << std::set({"adverb_phrase"}); + } + + std::geometric_distribution tagdist(0.2); + phrase << database_.words( + (verbly::word::tagCount >= tagdist(rng_)) + && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); + } + + it = phrase; + } else if (it.hasSynrestr("adverb_phrase")) + { + std::geometric_distribution tagdist(1.0/23.0); + + it = database_.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb) + && (verbly::word::tagCount >= tagdist(rng_)) + ).first(); + } else if (it.hasSynrestr("participle_phrase")) + { + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + it = verbly::token( + database_.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) + && (verbly::lemma::forms(verbly::inflection::ing_form))).first(), + verbly::inflection::ing_form); + } else { + it = generateClause(it); + } + } else { + it = "*the reality of the situation*"; + } + + break; + } + + case verbly::token::type::word: + case verbly::token::type::literal: + case verbly::token::type::part: + { + // Nope + + break; + } + } +} diff --git a/sentence.h b/sentence.h new file mode 100644 index 0000000..abf6860 --- /dev/null +++ b/sentence.h @@ -0,0 +1,39 @@ +#ifndef SENTENCE_H_81987F60 +#define SENTENCE_H_81987F60 + +#include +#include +#include + +class sentence { +public: + + sentence( + const verbly::database& database, + std::mt19937& rng); + + std::string generate() const; + +private: + + verbly::filter parseSelrestrs(verbly::selrestr selrestr) const; + + bool requiresSelrestr(std::string restriction, verbly::selrestr selrestr) const; + + verbly::word generateStandardNoun(std::string role, verbly::selrestr selrestrs) const; + + verbly::token generateStandardNounPhrase( + const verbly::word& noun, + std::string role, + bool plural, + bool definite) const; + + verbly::token generateClause(const verbly::token& it) const; + + void visit(verbly::token& it) const; + + const verbly::database& database_; + std::mt19937& rng_; +}; + +#endif /* end of include guard: SENTENCE_H_81987F60 */ diff --git a/vendor/verbly b/vendor/verbly index 1f898f3..bea3673 160000 --- a/vendor/verbly +++ b/vendor/verbly @@ -1 +1 @@ -Subproject commit 1f898f3bd66c29672275c2c884b17ba662ced626 +Subproject commit bea3673ae1b3d19585dec56e96dbcd8a56b96e6d -- cgit 1.4.1