From ffc9edbc9fbeb65fd32c1fa24584010a83a6de31 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Fri, 24 Feb 2017 11:15:12 -0500 Subject: Created bot --- .gitmodules | 6 + CMakeLists.txt | 13 + sentence.cpp | 668 +++++++++++++++++++++++++++++++++++++++++++++++++++ sentence.h | 38 +++ support.cpp | 77 ++++++ vendor/libtwittercpp | 1 + vendor/verbly | 1 + vendor/yaml-cpp | 1 + 8 files changed, 805 insertions(+) create mode 100644 .gitmodules create mode 100644 CMakeLists.txt create mode 100644 sentence.cpp create mode 100644 sentence.h create mode 100644 support.cpp create mode 160000 vendor/libtwittercpp create mode 160000 vendor/verbly create mode 160000 vendor/yaml-cpp diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..5c2582e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "vendor/libtwittercpp"] + path = vendor/libtwittercpp + url = git@github.com:hatkirby/libtwittercpp +[submodule "vendor/yaml-cpp"] + path = vendor/yaml-cpp + url = git@github.com:jbeder/yaml-cpp diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..add6b55 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required (VERSION 3.1) +project (support) + +add_subdirectory(vendor/verbly) +add_subdirectory(vendor/libtwittercpp) +add_subdirectory(vendor/yaml-cpp EXCLUDE_FROM_ALL) + +include_directories(vendor/verbly/lib vendor/libtwittercpp/src vendor/yaml-cpp/include) +add_executable(support sentence.cpp support.cpp) +set_property(TARGET support PROPERTY CXX_STANDARD 11) +set_property(TARGET support PROPERTY CXX_STANDARD_REQUIRED ON) +target_link_libraries(support verbly yaml-cpp twitter++) + diff --git a/sentence.cpp b/sentence.cpp new file mode 100644 index 0000000..3dabe58 --- /dev/null +++ b/sentence.cpp @@ -0,0 +1,668 @@ +#include "sentence.h" +#include +#include +#include + +sentence::sentence( + const verbly::database& database, + std::mt19937& rng) : + database_(database), + rng_(rng) +{ +} + +std::string sentence::generate() const +{ + // Generate the form that the sentence should take. + std::vector actions { + {"like", verbly::token(std::set({"infinitive_phrase", "subjectless"}))}, + {"have", verbly::token(std::set({"gerund_phrase", "subjectless"}))} + }; + + verbly::token form = actions[ + std::uniform_int_distribution(0, actions.size()-1)(rng_)]; + + // Compile the form. + while (!form.isComplete()) + { + visit(form); + } + + std::string compiled = form.compile(); + std::list words = + verbly::split>(compiled, " "); + + verbly::token cased; + for (std::string& word : words) + { + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + cased << verbly::token::capitalize(verbly::token::casing::all_caps, word); + } else { + cased << word; + } + } + + return cased.compile(); +} + +bool sentence::chooseSelrestr(std::set selrestrs, std::set choices) const +{ + int validChoices = 0; + for (const std::string& choice : choices) + { + if (selrestrs.count(choice)) + { + validChoices++; + } + } + + return std::bernoulli_distribution(static_cast(validChoices)/static_cast(selrestrs.size()))(rng_); +} + +verbly::word sentence::generateStandardNoun( + std::string role, + std::set selrestrs) const +{ + std::geometric_distribution tagdist(0.5); // 0.06 + std::vector result; + bool trySelection = true; + + while (result.empty()) + { + verbly::filter condition = + (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) + && (verbly::form::proper == false) + //&& (verbly::form::complexity == 1) + // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words + && (verbly::word::tagCount >= 1) + && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs + + // Only use selection restrictions for a first attempt. + if (trySelection) + { + verbly::filter selection(true); + + for (const std::string& selrestr : selrestrs) + { + if (selrestr == "concrete") + { + selection += (verbly::notion::wnid == 100001930); // physical entity + } else if (selrestr == "time") + { + selection += (verbly::notion::wnid == 100028270); // time + } else if (selrestr == "state") + { + selection += (verbly::notion::wnid == 100024720); // state + } else if (selrestr == "abstract") + { + selection += (verbly::notion::wnid == 100002137); // abstract entity + } else if (selrestr == "scalar") + { + selection += (verbly::notion::wnid == 103835412); // number + } else if (selrestr == "currency") + { + selection += (verbly::notion::wnid == 105050379); // currency + } else if (selrestr == "location") + { + selection += (verbly::notion::wnid == 100027167); // location + } else if (selrestr == "organization") + { + selection += (verbly::notion::wnid == 100237078); // organization + } else if (selrestr == "int_control") + { + selection += (verbly::notion::wnid == 100007347); // causal agent + } else if (selrestr == "natural") + { + selection += (verbly::notion::wnid == 100019128); // natural object + } else if (selrestr == "phys_obj") + { + selection += (verbly::notion::wnid == 100002684); // physical object + } else if (selrestr == "solid") + { + selection += (verbly::notion::wnid == 113860793); // solid + } else if (selrestr == "shape") + { + selection += (verbly::notion::wnid == 100027807); // shape + } else if (selrestr == "substance") + { + selection += (verbly::notion::wnid == 100019613); // substance + } else if (selrestr == "idea") + { + selection += (verbly::notion::wnid == 105803379); // idea + } else if (selrestr == "sound") + { + selection += (verbly::notion::wnid == 107111047); // sound + } else if (selrestr == "communication") + { + selection += (verbly::notion::wnid == 100033020); // communication + } else if (selrestr == "region") + { + selection += (verbly::notion::wnid == 105221895); // region + } else if (selrestr == "place") + { + selection += (verbly::notion::wnid == 100586262); // place + } else if (selrestr == "machine") + { + selection += (verbly::notion::wnid == 102958343); // machine + } else if (selrestr == "animate") + { + selection += (verbly::notion::wnid == 100004258); // animate thing + } else if (selrestr == "plant") + { + selection += (verbly::notion::wnid == 103956922); // plant + } else if (selrestr == "comestible") + { + selection += (verbly::notion::wnid == 100021265); // food + } else if (selrestr == "artifact") + { + selection += (verbly::notion::wnid == 100021939); // artifact + } else if (selrestr == "vehicle") + { + selection += (verbly::notion::wnid == 104524313); // vehicle + } else if (selrestr == "human") + { + selection += (verbly::notion::wnid == 100007846); // person + } else if (selrestr == "animal") + { + selection += (verbly::notion::wnid == 100015388); // animal + } else if (selrestr == "body_part") + { + selection += (verbly::notion::wnid == 105220461); // body part + } else if (selrestr == "garment") + { + selection += (verbly::notion::wnid == 103051540); // clothing + } else if (selrestr == "tool") + { + selection += (verbly::notion::wnid == 104451818); // tool + } else if ((selrestr == "concrete_inanimate") || (selrestr == "inanimate")) + { + selection += (verbly::notion::wnid == 100021939); // artifact + selection += (verbly::notion::wnid == 100019128); // natural object + } else if (selrestr == "non_region_location") + { + selection += (verbly::notion::wnid == 102913152); // building + } else if (selrestr == "non_solid_food") + { + selection += (verbly::notion::wnid == 107881800); // beverage + } else if (selrestr == "solid_food") + { + selection += (verbly::notion::wnid == 107555863); // solid food + } else if (selrestr == "slinky") + { + selection += (verbly::notion::wnid == 103670849); // line + } + } + + if (selection.compact().getType() != verbly::filter::type::empty) + { + condition &= (verbly::notion::fullHypernyms %= std::move(selection)); + } else if (role == "Attribute") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute + } else if (role == "Instrument") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 104451818)); // tool + } else if (role == "Agent") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100007347)); // causal agent + } + + trySelection = false; + } else { + std::cout << "Selection failed" << std::endl; + } + + result = database_.words(condition).all(); + } + + return result.front(); +} + +verbly::token sentence::generateStandardNounPhrase( + const verbly::word& noun, + std::string role, + bool plural, + bool definite) const +{ + verbly::token utter; + verbly::word sounder = noun; + verbly::word descript; + + /*if (std::bernoulli_distribution(1.0/8.0)(rng_)) + { + std::geometric_distribution tagdist(0.2); + descript = database_.words( + (verbly::word::tagCount >= tagdist(rng_)) + && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); + + sounder = descript; + }*/ + + if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite)) + { + utter << "the"; + + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + plural = true; + } + } else { + if ((role != "Theme") && (role != "Attribute") && std::bernoulli_distribution(1.0/2.0)(rng_)) + { + utter << "your"; + } else if (!plural) { + if (sounder.getBaseForm().startsWithVowelSound()) + { + utter << "an"; + } else { + utter << "a"; + } + } + } + + if (descript.isValid()) + { + utter << descript; + } + + if (plural && noun.hasInflection(verbly::inflection::plural)) + { + utter << verbly::token(noun, verbly::inflection::plural); + } else { + utter << noun; + } + + return utter; +} + +verbly::token sentence::generateClause( + const verbly::token& it) const +{ + verbly::token utter; + std::geometric_distribution tagdist(0.07); + std::vector verbDataset; + + verbly::filter frameCondition = + (verbly::frame::length >= 2) + && (verbly::frame::parts(0) %= ( + (verbly::part::type == verbly::part_type::noun_phrase) + && (verbly::part::role == "Agent")) + && (verbly::frame::parts(1) %= + (verbly::part::type == verbly::part_type::verb)) + && !(verbly::frame::parts() %= ( + verbly::part::synrestrs %= "adjp"))); + + if (it.hasSynrestr("experiencer")) + { + frameCondition &= + (verbly::frame::parts(2) %= + (verbly::part::type == verbly::part_type::noun_phrase) + && !(verbly::part::synrestrs %= "genitive") + && ((verbly::part::role == "Patient") + || (verbly::part::role == "Experiencer"))); + } + + verbly::filter verbCondition = + (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) + && frameCondition; + + if (it.hasSynrestr("participle_phrase")) + { + verbCondition &= (verbly::word::forms(verbly::inflection::ing_form)); + } else if (it.hasSynrestr("progressive")) + { + verbCondition &= (verbly::word::forms(verbly::inflection::s_form)); + } else if (it.hasSynrestr("past_participle")) + { + verbCondition &= (verbly::word::forms(verbly::inflection::past_participle)); + } + + // Because of the tag distribution, it's possible (albeit extremely unlikely) + // for the verb query to fail, so we loop until it succeeds. + while (verbDataset.empty()) + { + verbDataset = database_.words( + verbCondition + && (verbly::word::tagCount >= tagdist(rng_)) + ).all(); + } + + verbly::word verb = verbDataset.front(); + verbly::frame frame = database_.frames(frameCondition && verb).first(); + std::list parts(std::begin(frame.getParts()), std::end(frame.getParts())); + + if (it.hasSynrestr("experiencer")) + { + // Ignore the direct object. + parts.erase(std::next(parts.begin(), 2)); + } + + if (it.hasSynrestr("subjectless")) + { + // Ignore the subject. + parts.pop_front(); + } + + for (const verbly::part& part : parts) + { + switch (part.getType()) + { + case verbly::part_type::noun_phrase: + { + std::cout << "NP: "; + for (auto& s : part.getNounSynrestrs()) + { + std::cout << s << " "; + } + std::cout << std::endl; + + if (chooseSelrestr(part.getNounSelrestrs(), {"currency"})) + { + int lead = std::uniform_int_distribution(1,9)(rng_); + int tail = std::uniform_int_distribution(0,6)(rng_); + std::string tailStr(tail, '0'); + + utter << ("$" + std::to_string(lead) + tailStr); + } else if (part.nounHasSynrestr("adjp")) + { + utter << std::set({"adjective_phrase"}); + } else if ((part.nounHasSynrestr("be_sc_ing")) + || (part.nounHasSynrestr("ac_ing")) + || (part.nounHasSynrestr("sc_ing")) + || (part.nounHasSynrestr("np_omit_ing")) + || (part.nounHasSynrestr("oc_ing"))) + { + utter << std::set({"participle_phrase", "subjectless"}); + } else if ((part.nounHasSynrestr("poss_ing")) + || (part.nounHasSynrestr("possing")) + || (part.nounHasSynrestr("pos_ing"))) + { + utter << "your"; + utter << std::set({"participle_phrase", "subjectless"}); + } else if (part.nounHasSynrestr("adv_loc")) + { + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + utter << "here"; + } else { + utter << "there"; + } + } else if (part.nounHasSynrestr("refl")) + { + utter << "yourself"; + } else if ((part.nounHasSynrestr("sc_to_inf")) + || (part.nounHasSynrestr("ac_to_inf")) + || (part.nounHasSynrestr("vc_to_inf")) + || (part.nounHasSynrestr("rs_to_inf")) + || (part.nounHasSynrestr("oc_to_inf"))) + { + utter << std::set({"infinitive_phrase", "subjectless"}); + } else if (part.nounHasSynrestr("oc_bare_inf")) + { + utter << std::set({"infinitive_phrase", "bare", "subjectless"}); + } else if (part.nounHasSynrestr("wh_comp")) + { + utter << "whether"; + + verbly::token sentence(std::set({"progressive"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("that_comp")) + { + utter << "that"; + utter << "they"; + + verbly::token sentence(std::set({"subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("what_extract")) + { + utter << "what"; + + verbly::token sentence(std::set({"progressive", "experiencer"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("how_extract")) + { + utter << "how"; + + verbly::token sentence(std::set({"progressive"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("wh_inf")) + { + utter << "how"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("what_inf")) + { + utter << "what"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless", "experiencer"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("wheth_inf")) + { + utter << "whether"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("quotation")) + { + verbly::token sentence(std::set({"participle_phrase"})); + while (!sentence.isComplete()) + { + visit(sentence); + } + + utter << ("\"" + sentence.compile() + "\""); + } else { + if (part.nounHasSynrestr("genitive")) + { + verbly::word noun = generateStandardNoun("Passive", {"animate"}); + verbly::token owner = generateStandardNounPhrase(noun, "Passive", false, true); + std::string ownerStr = owner.compile() + "'s"; + utter << ownerStr; + } + + verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); + + bool plural = part.nounHasSynrestr("plural") || chooseSelrestr(part.getNounSelrestrs(), {"group", "plural"}); + + utter << generateStandardNounPhrase( + noun, + part.getNounRole(), + plural, + part.nounHasSynrestr("definite")); + + if (part.nounHasSynrestr("acc_ing") || part.nounHasSynrestr("ac_ing")) + { + utter << std::set({"participle_phrase", "subjectless"}); + } + } + + break; + } + + case verbly::part_type::verb: + { + std::cout << "V: " << verb.getBaseForm().getText() << std::endl; + + if (it.hasSynrestr("progressive")) + { + utter << verbly::token(verb, verbly::inflection::s_form); + } else if (it.hasSynrestr("past_participle")) + { + utter << verbly::token(verb, verbly::inflection::past_participle); + } else if (it.hasSynrestr("infinitive_phrase")) + { + if (!it.hasSynrestr("bare")) + { + utter << "to"; + } + + utter << verb; + } else if (it.hasSynrestr("participle_phrase")) + { + utter << verbly::token(verb, verbly::inflection::ing_form); + } else if (it.hasSynrestr("gerund_phrase")) + { + utter << verbly::token(verb, verbly::inflection::past_participle); + } else { + utter << verb; + } + + break; + } + + case verbly::part_type::preposition: + { + std::cout << "PREP" << std::endl; + + if (part.isPrepositionLiteral()) + { + int choiceIndex = std::uniform_int_distribution(0, part.getPrepositionChoices().size()-1)(rng_); + utter << part.getPrepositionChoices()[choiceIndex]; + } else { + verbly::filter pgf(true); + for (const std::string& choice : part.getPrepositionChoices()) + { + pgf += (verbly::notion::prepositionGroups == choice); + } + + utter << database_.words(pgf && (verbly::notion::partOfSpeech == verbly::part_of_speech::preposition)).first(); + } + + break; + } + + case verbly::part_type::adjective: + { + std::cout << "ADJ" << std::endl; + + utter << std::set({"adjective_phrase"}); + + break; + } + + case verbly::part_type::adverb: + { + std::cout << "ADV" << std::endl; + + utter << std::set({"adverb_phrase"}); + + break; + } + + case verbly::part_type::literal: + { + std::cout << "LIT" << std::endl; + + utter << part.getLiteralValue(); + + break; + } + + case verbly::part_type::invalid: + { + // Nope + + break; + } + } + } + + if ((parts.size() == 1) && (std::bernoulli_distribution(1.0/4.0)(rng_))) + { + utter << std::set({"adverb_phrase"}); + } + + return utter; +} + +void sentence::visit(verbly::token& it) const +{ + switch (it.getType()) + { + case verbly::token::type::utterance: + { + for (verbly::token& token : it) + { + if (!token.isComplete()) + { + visit(token); + + break; + } + } + + break; + } + + case verbly::token::type::fillin: + { + if (it.hasSynrestr("infinitive_phrase") || it.hasSynrestr("gerund_phrase")) + { + it = generateClause(it); + } else if (it.hasSynrestr("adjective_phrase")) + { + verbly::token phrase; + + if (std::bernoulli_distribution(1.0/6.0)(rng_)) + { + phrase << std::set({"adverb_phrase"}); + } + + if (std::bernoulli_distribution(1.0/4.0)(rng_)) + { + phrase << std::set({"participle_phrase", "subjectless"}); + } else { + std::geometric_distribution tagdist(0.2); + phrase << database_.words( + (verbly::word::tagCount >= tagdist(rng_)) + && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); + } + + it = phrase; + } else if (it.hasSynrestr("adverb_phrase")) + { + std::geometric_distribution tagdist(1.0/23.0); + + it = database_.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb) + && (verbly::word::tagCount >= tagdist(rng_)) + ).first(); + } else if (it.hasSynrestr("participle_phrase")) + { + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + it = verbly::token( + database_.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) + && (verbly::word::forms(verbly::inflection::ing_form))).first(), + verbly::inflection::ing_form); + } else { + it = generateClause(it); + } + } else { + it = "*the reality of the situation*"; + } + + break; + } + + case verbly::token::type::transform: + { + visit(it.getInnerToken()); + + break; + } + + case verbly::token::type::word: + case verbly::token::type::literal: + case verbly::token::type::part: + { + // Nope + + break; + } + } +} + diff --git a/sentence.h b/sentence.h new file mode 100644 index 0000000..e3f2a03 --- /dev/null +++ b/sentence.h @@ -0,0 +1,38 @@ +#ifndef SENTENCE_H_81987F60 +#define SENTENCE_H_81987F60 + +#include +#include +#include + +class sentence { +public: + + sentence( + const verbly::database& database, + std::mt19937& rng); + + std::string generate() const; + +private: + + bool chooseSelrestr(std::set selrestrs, std::set choices) const; + + verbly::word generateStandardNoun(std::string role, std::set selrestrs) const; + + verbly::token generateStandardNounPhrase( + const verbly::word& noun, + std::string role, + bool plural, + bool definite) const; + + verbly::token generateClause(const verbly::token& it) const; + + void visit(verbly::token& it) const; + + const verbly::database& database_; + std::mt19937& rng_; +}; + +#endif /* end of include guard: SENTENCE_H_81987F60 */ + diff --git a/support.cpp b/support.cpp new file mode 100644 index 0000000..8dc6c4e --- /dev/null +++ b/support.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include +#include +#include "sentence.h" + +int main(int argc, char** argv) +{ + if (argc != 2) + { + std::cout << "usage: support [configfile]" << std::endl; + return -1; + } + + std::string configfile(argv[1]); + YAML::Node config = YAML::LoadFile(configfile); + + twitter::auth auth; + auth.setConsumerKey(config["consumer_key"].as()); + auth.setConsumerSecret(config["consumer_secret"].as()); + auth.setAccessKey(config["access_key"].as()); + auth.setAccessSecret(config["access_secret"].as()); + + twitter::client client(auth); + + std::random_device randomDevice; + std::mt19937 rng{randomDevice()}; + + verbly::database database(config["verbly_datafile"].as()); + sentence generator(database, rng); + + for (;;) + { + verbly::word adjective = database.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective) + && (verbly::word::antiPertainyms %= + (verbly::word::forms(verbly::inflection::plural)))).first(); + + verbly::word noun = database.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) + && (verbly::word::pertainyms %= adjective) + && (verbly::word::forms(verbly::inflection::plural))).first(); + + verbly::token action = { + "RT if you ARE", + verbly::token::punctuation(",", adjective), + "if you SUPPORT", + verbly::token::punctuation(",", + verbly::token(noun, verbly::inflection::plural)), + "or if you", + generator.generate()}; + + std::string result = action.compile(); + if (result.length() <= 140) + { + std::cout << result << std::endl; + + try + { + client.updateStatus(result); + + std::cout << "Tweeted!" << std::endl; + } catch (const twitter::twitter_error& e) + { + std::cout << "Twitter error: " << e.what() << std::endl; + } + + std::this_thread::sleep_for(std::chrono::hours(1)); + + std::cout << std::endl; + } + } +} + diff --git a/vendor/libtwittercpp b/vendor/libtwittercpp new file mode 160000 index 0000000..df90612 --- /dev/null +++ b/vendor/libtwittercpp @@ -0,0 +1 @@ +Subproject commit df906121dd862c0f704e44f28ee079158c431c41 diff --git a/vendor/verbly b/vendor/verbly new file mode 160000 index 0000000..59eab84 --- /dev/null +++ b/vendor/verbly @@ -0,0 +1 @@ +Subproject commit 59eab842de02b2b2ba8bf53e2214b558457e6356 diff --git a/vendor/yaml-cpp b/vendor/yaml-cpp new file mode 160000 index 0000000..bedb28f --- /dev/null +++ b/vendor/yaml-cpp @@ -0,0 +1 @@ +Subproject commit bedb28fdb4fd52d97e02f6cb946cae631037089e -- cgit 1.4.1