From 18742d79e1de863889521c492e938491489316fe Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Fri, 3 Feb 2017 13:56:19 -0500 Subject: Created bot --- .gitmodules | 9 + CMakeLists.txt | 17 ++ advice.cpp | 359 ++++++++++++++++++++++++ advice.h | 46 ++++ coolvetica.ttf | Bin 0 -> 135916 bytes main.cpp | 33 +++ sentence.cpp | 754 +++++++++++++++++++++++++++++++++++++++++++++++++++ sentence.h | 39 +++ vendor/libtwittercpp | 1 + vendor/verbly | 1 + vendor/yaml-cpp | 1 + 11 files changed, 1260 insertions(+) create mode 100644 .gitmodules create mode 100644 CMakeLists.txt create mode 100644 advice.cpp create mode 100644 advice.h create mode 100755 coolvetica.ttf create mode 100644 main.cpp create mode 100644 sentence.cpp create mode 100644 sentence.h create mode 160000 vendor/libtwittercpp create mode 160000 vendor/verbly create mode 160000 vendor/yaml-cpp diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..79dde9f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,9 @@ +[submodule "vendor/verbly"] + path = vendor/verbly + url = https://github.com/hatkirby/verbly +[submodule "vendor/libtwittercpp"] + path = vendor/libtwittercpp + url = https://github.com/hatkirby/libtwittercpp +[submodule "vendor/yaml-cpp"] + path = vendor/yaml-cpp + url = https://github.com/jbeder/yaml-cpp diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..5fc053d --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required (VERSION 3.1) +project (advice) + +set(CMAKE_BUILD_TYPE Debug) + +find_package(PkgConfig) +pkg_check_modules(GraphicsMagick GraphicsMagick++ REQUIRED) + +add_subdirectory(vendor/verbly) +add_subdirectory(vendor/libtwittercpp) +add_subdirectory(vendor/yaml-cpp EXCLUDE_FROM_ALL) + +include_directories(vendor/verbly/lib vendor/libtwittercpp/src vendor/libtwittercpp/vendor/curlcpp/include ${GraphicsMagick_INCLUDE_DIRS} vendor/yaml-cpp/include) +add_executable(advice main.cpp advice.cpp sentence.cpp) +set_property(TARGET advice PROPERTY CXX_STANDARD 11) +set_property(TARGET advice PROPERTY CXX_STANDARD_REQUIRED ON) +target_link_libraries(advice verbly twitter++ ${GraphicsMagick_LIBRARIES} yaml-cpp) diff --git a/advice.cpp b/advice.cpp new file mode 100644 index 0000000..320f719 --- /dev/null +++ b/advice.cpp @@ -0,0 +1,359 @@ +#include "advice.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +advice::advice( + std::string configFile, + std::mt19937& rng) : + rng_(rng) +{ + // Load the config file. + YAML::Node config = YAML::LoadFile(configFile); + + // Set up the Twitter client. + twitter::auth auth; + auth.setConsumerKey(config["consumer_key"].as()); + auth.setConsumerSecret(config["consumer_secret"].as()); + auth.setAccessKey(config["access_key"].as()); + auth.setAccessSecret(config["access_secret"].as()); + + client_ = std::unique_ptr(new twitter::client(auth)); + + // Set up the verbly database. + database_ = std::unique_ptr(new verbly::database(config["verbly_datafile"].as())); + + // Set up the sentence generator. + generator_ = std::unique_ptr(new sentence(*database_, rng_)); +} + +verbly::word advice::generateImageNoun() const +{ + verbly::filter whitelist = + (verbly::notion::wnid == 109287968) // Geological formations + || (verbly::notion::wnid == 109208496) // Asterisms (collections of stars) + || (verbly::notion::wnid == 109239740) // Celestial bodies + || (verbly::notion::wnid == 109277686) // Exterrestrial objects (comets and meteroids) + || (verbly::notion::wnid == 109403211) // Radiators (supposedly natural radiators but actually these are just pictures of radiators) + || (verbly::notion::wnid == 109416076) // Rocks + || (verbly::notion::wnid == 105442131) // Chromosomes + || (verbly::notion::wnid == 100324978) // Tightrope walking + || (verbly::notion::wnid == 100326094) // Rock climbing + || (verbly::notion::wnid == 100433458) // Contact sports + || (verbly::notion::wnid == 100433802) // Gymnastics + || (verbly::notion::wnid == 100439826) // Track and field + || (verbly::notion::wnid == 100440747) // Skiing + || (verbly::notion::wnid == 100441824) // Water sport + || (verbly::notion::wnid == 100445351) // Rowing + || (verbly::notion::wnid == 100446980) // Archery + // TODO: add more sports + || (verbly::notion::wnid == 100021939) // Artifacts + || (verbly::notion::wnid == 101471682) // Vertebrates + ; + + verbly::filter blacklist = + (verbly::notion::wnid == 106883725) // swastika + || (verbly::notion::wnid == 104416901) // tetraskele + || (verbly::notion::wnid == 102512053) // fish + || (verbly::notion::wnid == 103575691) // instrument of execution + ; + + verbly::query pictureQuery = database_->words( + (verbly::notion::fullHypernyms %= whitelist) + && !(verbly::notion::fullHypernyms %= blacklist) + && (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) + && (verbly::notion::numOfImages >= 1)); + + return pictureQuery.first(); +} + +Magick::Image advice::getImageForNoun(verbly::word pictured) const +{ + // Accept string from Google Chrome + std::string accept = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; + curl::curl_header headers; + headers.add(accept); + + int backoff = 0; + + std::cout << "Generating noun..." << std::endl; + std::cout << "Noun: " << pictured.getBaseForm() << std::endl; + std::cout << "Getting URLs..." << std::endl; + + std::string lstdata; + while (lstdata.empty()) + { + std::ostringstream lstbuf; + curl::curl_ios lstios(lstbuf); + curl::curl_easy lsthandle(lstios); + std::string lsturl = pictured.getNotion().getImageNetUrl(); + lsthandle.add(lsturl.c_str()); + + try + { + lsthandle.perform(); + } catch (const curl::curl_easy_exception& e) + { + e.print_traceback(); + + backoff++; + std::cout << "Waiting for " << backoff << " seconds..." << std::endl; + + std::this_thread::sleep_for(std::chrono::seconds(backoff)); + + continue; + } + + backoff = 0; + + if (lsthandle.get_info().get() != 200) + { + throw could_not_get_images(); + } + + std::cout << "Got URLs." << std::endl; + lstdata = lstbuf.str(); + } + + std::vector lstvec = verbly::split>(lstdata, "\r\n"); + if (lstvec.empty()) + { + throw could_not_get_images(); + } + + std::shuffle(std::begin(lstvec), std::end(lstvec), rng_); + + std::deque urls; + for (std::string& url : lstvec) + { + urls.push_back(url); + } + + bool found = false; + Magick::Blob img; + Magick::Image pic; + + while (!found && !urls.empty()) + { + std::string url = urls.front(); + urls.pop_front(); + + std::ostringstream imgbuf; + curl::curl_ios imgios(imgbuf); + curl::curl_easy imghandle(imgios); + + imghandle.add(headers.get()); + imghandle.add(url.c_str()); + imghandle.add(30); + + try + { + imghandle.perform(); + } catch (curl::curl_easy_exception error) { + error.print_traceback(); + + continue; + } + + if (imghandle.get_info().get() != 200) + { + continue; + } + + std::string content_type = imghandle.get_info().get(); + if (content_type.substr(0, 6) != "image/") + { + continue; + } + + std::string imgstr = imgbuf.str(); + img = Magick::Blob(imgstr.c_str(), imgstr.length()); + pic.read(img); + if (pic.rows() == 0) + { + continue; + } + + // Too small! + if (pic.columns() < 400) + { + continue; + } + + std::cout << url << std::endl; + found = true; + } + + if (!found) + { + throw could_not_get_images(); + } + + return pic; +} + +Magick::Image advice::layoutImage(Magick::Image pic, std::string title) const +{ + // Want a 16:9 aspect + int idealwidth = pic.rows()*(16.0/9.0); + if (idealwidth > pic.columns()) + { + // If the image is narrower than the ideal width, use full width. + int newheight = pic.columns()*(9.0/16.0); + + // Just take a slice out of the middle of the image. + int cropy = ((double)(pic.rows() - newheight))/2.0; + + pic.crop(Magick::Geometry(pic.columns(), newheight, 0, cropy)); + } else { + // If the image is wider than the ideal width, use full height. + // Just take a slice out of the middle of the image. + int cropx = ((double)(pic.columns() - idealwidth))/2.0; + + pic.crop(Magick::Geometry(idealwidth, pic.rows(), cropx, 0)); + } + + pic.zoom(Magick::Geometry(400, 225)); + + // Layout the text. + std::list words = verbly::split>(title, " "); + std::vector lines; + std::list cur; + Magick::TypeMetric metric; + pic.fontPointsize(20); + pic.font("@coolvetica.ttf"); + + while (!words.empty()) + { + cur.push_back(words.front()); + + std::string prefixText = verbly::implode(std::begin(cur), std::end(cur), " "); + pic.fontTypeMetrics(prefixText, &metric); + + if (metric.textWidth() > 380) + { + if (cur.size() == 1) + { + words.pop_front(); + } else { + cur.pop_back(); + } + + prefixText = verbly::implode(std::begin(cur), std::end(cur), " "); + lines.push_back(prefixText); + cur.clear(); + } else { + words.pop_front(); + } + } + + if (!cur.empty()) + { + std::string prefixText = verbly::implode(std::begin(cur), std::end(cur), " "); + lines.push_back(prefixText); + } + + int lineHeight = metric.textHeight()-2; + int blockHeight = lineHeight * lines.size() + 18; + std::cout << "line " << lineHeight << "; block " << blockHeight << std::endl; + + std::list drawList; + drawList.push_back(Magick::DrawableFillColor("black")); + drawList.push_back(Magick::DrawableFillOpacity(0.5)); + drawList.push_back(Magick::DrawableStrokeColor("transparent")); + drawList.push_back(Magick::DrawableRectangle(0, 225-blockHeight-20, 400, 255)); // 0, 225-60, 400, 255 + pic.draw(drawList); + + drawList.clear(); + drawList.push_back(Magick::DrawableFont("@coolvetica.ttf")); + drawList.push_back(Magick::DrawableFillColor("white")); + drawList.push_back(Magick::DrawablePointSize(14)); + drawList.push_back(Magick::DrawableText(10, 225-blockHeight+4, "How to")); // 10, 255-62-4 + pic.draw(drawList); + + for (int i=0; igetConfiguration().getCharactersReservedPerMedia(); + if (title.length() > tweetLim) + { + tweetText = title.substr(0, tweetLim - 1) + "…"; + } else { + tweetText = title; + } + + long media_id = client_->uploadMedia("image/png", (const char*) outputimg.data(), outputimg.length()); + client_->updateStatus(tweetText, {media_id}); +} + +void advice::run() const +{ + for (;;) + { + try + { + // Pick a noun to use for the picture. + verbly::word pictured = generateImageNoun(); + + // Find an image of the picked noun. + Magick::Image pic = getImageForNoun(pictured); + + // Generate the image text. + std::string title = generator_->generate(); + + // Layout the image. + Magick::Image output = layoutImage(std::move(pic), title); + + // Tweet the image. + sendTweet(std::move(output), title); + + std::cout << "Done!" << std::endl << "Waiting..." << std::endl << std::endl; + + // Wait. + std::this_thread::sleep_for(std::chrono::hours(1)); + } catch (const could_not_get_images& ex) + { + std::cout << ex.what() << std::endl; + } catch (const Magick::ErrorImage& ex) + { + std::cout << "Image error: " << ex.what() << std::endl; + } catch (const twitter::twitter_error& ex) + { + std::cout << "Twitter error: " << ex.what() << std::endl; + } + } +} diff --git a/advice.h b/advice.h new file mode 100644 index 0000000..33dc531 --- /dev/null +++ b/advice.h @@ -0,0 +1,46 @@ +#ifndef ADVICE_H_5934AC1B +#define ADVICE_H_5934AC1B + +#include +#include +#include +#include +#include +#include +#include +#include "sentence.h" + +class advice { +public: + + advice( + std::string configFile, + std::mt19937& rng); + + void run() const; + +private: + + class could_not_get_images : public std::runtime_error { + public: + + could_not_get_images() : std::runtime_error("Could not get images for noun") + { + } + }; + + verbly::word generateImageNoun() const; + + Magick::Image getImageForNoun(verbly::word pictured) const; + + Magick::Image layoutImage(Magick::Image bg, std::string title) const; + + void sendTweet(Magick::Image pic, std::string title) const; + + std::mt19937& rng_; + std::unique_ptr database_; + std::unique_ptr generator_; + std::unique_ptr client_; +}; + +#endif /* end of include guard: ADVICE_H_5934AC1B */ diff --git a/coolvetica.ttf b/coolvetica.ttf new file mode 100755 index 0000000..410ca31 Binary files /dev/null and b/coolvetica.ttf differ diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..3661105 --- /dev/null +++ b/main.cpp @@ -0,0 +1,33 @@ +#include "advice.h" + +int main(int argc, char** argv) +{ + Magick::InitializeMagick(nullptr); + + std::random_device random_device; + std::mt19937 random_engine{random_device()}; + + if (argc != 2) + { + std::cout << "usage: advice [configfile]" << std::endl; + return -1; + } + + std::string configfile(argv[1]); + + try + { + advice bot(configfile, random_engine); + + try + { + bot.run(); + } catch (const std::exception& ex) + { + std::cout << "Error running bot: " << ex.what() << std::endl; + } + } catch (const std::exception& ex) + { + std::cout << "Error initializing bot: " << ex.what() << std::endl; + } +} diff --git a/sentence.cpp b/sentence.cpp new file mode 100644 index 0000000..421aaf6 --- /dev/null +++ b/sentence.cpp @@ -0,0 +1,754 @@ +#include "sentence.h" +#include +#include +#include + +sentence::sentence( + const verbly::database& database, + std::mt19937& rng) : + database_(database), + rng_(rng) +{ +} + +std::string sentence::generate() const +{ + // Generate the form that the title should take. + verbly::token form; + std::set synrestrs {"infinitive_phrase", "bare", "subjectless"}; + std::set secondSyn {"participle_phrase", "subjectless"}; + std::set adjSyn {"adjective_phrase"}; + + if (std::bernoulli_distribution(1.0/6.0)(rng_)) + { + form << "not"; + } + + if (std::bernoulli_distribution(1.0/6.0)(rng_)) + { + form << "be"; + form << adjSyn; + } else { + if (std::bernoulli_distribution(1.0/6.0)(rng_)) + { + form << "get"; + synrestrs.insert("experiencer"); + synrestrs.insert("past_participle"); + } + + form << synrestrs; + } + + if (std::bernoulli_distribution(1.0/5.0)(rng_)) + { + if (std::bernoulli_distribution(1.0/4.0)(rng_)) + { + form << "without"; + } else { + form << "while"; + } + + form << secondSyn; + } + + // Attempt to compile the form, restarting if a bad word is generated. + std::set badWords = {"raped"}; + + verbly::token tok = form; + std::list words; + for (;;) + { + // Compile the form. + while (!tok.isComplete()) + { + visit(tok); + } + + std::string compiled = tok.compile(); + words = verbly::split>(compiled, " "); + + // Ensure that there are no bad words in the output. + if (!std::any_of(std::begin(words), std::end(words), [&badWords] (const std::string& word) { + std::string canonWord; + + for (char ch : word) + { + if (std::isalpha(ch)) + { + canonWord.push_back(std::tolower(ch)); + } + } + + return (badWords.count(canonWord) == 1); + })) { + break; + } else { + std::cout << "Bad word generated." << std::endl; + } + } + + // Put the form into title case. + for (std::string& word : words) + { + if ((word[0] == '"') && (word.length() > 1)) + { + word[1] = std::toupper(word[1]); + } else { + word[0] = std::toupper(word[0]); + } + } + + return verbly::implode(std::begin(words), std::end(words), " "); +} + +verbly::filter sentence::parseSelrestrs( + verbly::selrestr selrestr) const +{ + switch (selrestr.getType()) + { + case verbly::selrestr::type::empty: + { + return {}; + } + + case verbly::selrestr::type::singleton: + { + verbly::filter result; + + if (selrestr.getRestriction() == "concrete") + { + result = (verbly::notion::wnid == 100001930); // physical entity + } else if (selrestr.getRestriction() == "time") + { + result = (verbly::notion::wnid == 100028270); // time + } else if (selrestr.getRestriction() == "state") + { + result = (verbly::notion::wnid == 100024720); // state + } else if (selrestr.getRestriction() == "abstract") + { + result = (verbly::notion::wnid == 100002137); // abstract entity + } else if (selrestr.getRestriction() == "scalar") + { + result = (verbly::notion::wnid == 103835412); // number + } else if (selrestr.getRestriction() == "currency") + { + result = (verbly::notion::wnid == 105050379); // currency + } else if (selrestr.getRestriction() == "location") + { + result = (verbly::notion::wnid == 100027167); // location + } else if (selrestr.getRestriction() == "organization") + { + result = (verbly::notion::wnid == 100237078); // organization + } else if (selrestr.getRestriction() == "int_control") + { + result = (verbly::notion::wnid == 100007347); // causal agent + } else if (selrestr.getRestriction() == "natural") + { + result = (verbly::notion::wnid == 100019128); // natural object + } else if (selrestr.getRestriction() == "phys_obj") + { + result = (verbly::notion::wnid == 100002684); // physical object + } else if (selrestr.getRestriction() == "solid") + { + result = (verbly::notion::wnid == 113860793); // solid + } else if (selrestr.getRestriction() == "shape") + { + result = (verbly::notion::wnid == 100027807); // shape + } else if (selrestr.getRestriction() == "substance") + { + result = (verbly::notion::wnid == 100019613); // substance + } else if (selrestr.getRestriction() == "idea") + { + result = (verbly::notion::wnid == 105803379); // idea + } else if (selrestr.getRestriction() == "sound") + { + result = (verbly::notion::wnid == 107111047); // sound + } else if (selrestr.getRestriction() == "communication") + { + result = (verbly::notion::wnid == 100033020); // communication + } else if (selrestr.getRestriction() == "region") + { + result = (verbly::notion::wnid == 105221895); // region + } else if (selrestr.getRestriction() == "place") + { + result = (verbly::notion::wnid == 100586262); // place + } else if (selrestr.getRestriction() == "machine") + { + result = (verbly::notion::wnid == 102958343); // machine + } else if (selrestr.getRestriction() == "animate") + { + result = (verbly::notion::wnid == 100004258); // animate thing + } else if (selrestr.getRestriction() == "plant") + { + result = (verbly::notion::wnid == 103956922); // plant + } else if (selrestr.getRestriction() == "comestible") + { + result = (verbly::notion::wnid == 100021265); // food + } else if (selrestr.getRestriction() == "artifact") + { + result = (verbly::notion::wnid == 100021939); // artifact + } else if (selrestr.getRestriction() == "vehicle") + { + result = (verbly::notion::wnid == 104524313); // vehicle + } else if (selrestr.getRestriction() == "human") + { + result = (verbly::notion::wnid == 100007846); // person + } else if (selrestr.getRestriction() == "animal") + { + result = (verbly::notion::wnid == 100015388); // animal + } else if (selrestr.getRestriction() == "body_part") + { + result = (verbly::notion::wnid == 105220461); // body part + } else if (selrestr.getRestriction() == "garment") + { + result = (verbly::notion::wnid == 103051540); // clothing + } else if (selrestr.getRestriction() == "tool") + { + result = (verbly::notion::wnid == 104451818); // tool + } else { + return {}; + } + + std::cout << selrestr.getRestriction() << " (" << selrestr.getPos() << ")" << std::endl; + + if (selrestr.getPos()) + { + return (verbly::notion::fullHypernyms %= result); + } else { + return !(verbly::notion::fullHypernyms %= result); + } + } + + case verbly::selrestr::type::group: + { + std::cout << "or: " << selrestr.getOrlogic() << std::endl; + verbly::filter ret(selrestr.getOrlogic()); + + for (const verbly::selrestr& child : selrestr) + { + ret += parseSelrestrs(child); + } + + return ret; + } + } +} + +bool sentence::requiresSelrestr( + std::string restriction, + verbly::selrestr selrestr) const +{ + switch (selrestr.getType()) + { + case verbly::selrestr::type::empty: + { + return false; + } + + case verbly::selrestr::type::singleton: + { + if (selrestr.getRestriction() == restriction) + { + return selrestr.getPos(); + } else { + return false; + } + } + + case verbly::selrestr::type::group: + { + if (selrestr.getOrlogic()) + { + return std::all_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) { + return requiresSelrestr(restriction, s); + }); + } else { + return std::any_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) { + return requiresSelrestr(restriction, s); + }); + } + } + } +} + +verbly::word sentence::generateStandardNoun( + std::string role, + verbly::selrestr selrestrs) const +{ + std::geometric_distribution tagdist(0.5); // 0.06 + std::vector result; + bool trySelection = true; + + while (result.empty()) + { + verbly::filter condition = + (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) + && (verbly::form::proper == false) + //&& (verbly::form::complexity == 1) + // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words + && (verbly::word::tagCount >= 1) + && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs + + // Only use selection restrictions for a first attempt. + if (trySelection) + { + verbly::filter selrestrCondition = parseSelrestrs(selrestrs).compact(); + + if (selrestrCondition.getType() != verbly::filter::type::empty) + { + condition &= std::move(selrestrCondition); + } else if (role == "Attribute") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute + } else if (role == "Instrument") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 104451818)); // tool + } else if (role == "Agent") + { + condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100007347)); // causal agent + } + + trySelection = false; + } else { + std::cout << "Selection failed" << std::endl; + } + + result = database_.words(condition).all(); + } + + return result.front(); +} + +verbly::token sentence::generateStandardNounPhrase( + const verbly::word& noun, + std::string role, + bool plural, + bool definite) const +{ + verbly::token utter; + verbly::word sounder = noun; + verbly::word descript; + + if (std::bernoulli_distribution(1.0/8.0)(rng_)) + { + std::geometric_distribution tagdist(0.2); + descript = database_.words( + (verbly::word::tagCount >= tagdist(rng_)) + && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); + + sounder = descript; + } + + if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite)) + { + utter << "the"; + + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + plural = true; + } + } else { + if ((role != "Theme") && (role != "Attribute") && std::bernoulli_distribution(1.0/2.0)(rng_)) + { + utter << "your"; + } else if (!plural) { + if (sounder.getLemma().getBaseForm().startsWithVowelSound()) + { + utter << "an"; + } else { + utter << "a"; + } + } + } + + if (descript) + { + utter << descript; + } + + if (plural && noun.getLemma().hasInflection(verbly::inflection::plural)) + { + utter << verbly::token(noun, verbly::inflection::plural); + } else { + utter << noun; + } + + return utter; +} + +verbly::token sentence::generateClause( + const verbly::token& it) const +{ + verbly::token utter; + std::geometric_distribution tagdist(0.07); + std::vector verbDataset; + + verbly::filter frameCondition = + (verbly::frame::length >= 2) + && (verbly::frame::part(0) %= ( + (verbly::part::type == verbly::part_type::noun_phrase) + && (verbly::part::role == "Agent")) + && !(verbly::frame::part() %= ( + verbly::part::synrestr %= "adjp"))); + + if (it.hasSynrestr("experiencer")) + { + frameCondition &= + (verbly::frame::part(2) %= + (verbly::part::type == verbly::part_type::noun_phrase) + && !(verbly::part::synrestr %= "genitive") + && ((verbly::part::role == "Patient") + || (verbly::part::role == "Experiencer"))); + } + + verbly::filter verbCondition = + (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) + && frameCondition; + + if (it.hasSynrestr("participle_phrase")) + { + verbCondition &= (verbly::lemma::form(verbly::inflection::ing_form)); + } else if (it.hasSynrestr("progressive")) + { + verbCondition &= (verbly::lemma::form(verbly::inflection::s_form)); + } else if (it.hasSynrestr("past_participle")) + { + verbCondition &= (verbly::lemma::form(verbly::inflection::past_participle)); + } + + // Because of the tag distribution, it's possible (albeit extremely unlikely) + // for the verb query to fail, so we loop until it succeeds. + while (verbDataset.empty()) + { + verbDataset = database_.words( + verbCondition + && (verbly::word::tagCount >= tagdist(rng_)) + ).all(); + } + + verbly::word verb = verbDataset.front(); + verbly::frame frame = database_.frames(frameCondition && verb).first(); + std::list parts(std::begin(frame.getParts()), std::end(frame.getParts())); + + if (it.hasSynrestr("experiencer")) + { + // Ignore the direct object. + parts.erase(std::next(parts.begin(), 2)); + } + + if (it.hasSynrestr("subjectless")) + { + // Ignore the subject. + parts.pop_front(); + } + + for (const verbly::part& part : parts) + { + switch (part.getType()) + { + case verbly::part_type::noun_phrase: + { + std::cout << "NP: "; + for (auto& s : part.getNounSynrestrs()) + { + std::cout << s << " "; + } + std::cout << std::endl; + + if (requiresSelrestr("currency", part.getNounSelrestrs())) + { + int lead = std::uniform_int_distribution(1,9)(rng_); + int tail = std::uniform_int_distribution(0,6)(rng_); + std::string tailStr(tail, '0'); + + utter << ("$" + std::to_string(lead) + tailStr); + } else if (part.nounHasSynrestr("adjp")) + { + utter << std::set({"adjective_phrase"}); + } else if ((part.nounHasSynrestr("be_sc_ing")) + || (part.nounHasSynrestr("ac_ing")) + || (part.nounHasSynrestr("sc_ing")) + || (part.nounHasSynrestr("np_omit_ing")) + || (part.nounHasSynrestr("oc_ing"))) + { + utter << std::set({"participle_phrase", "subjectless"}); + } else if ((part.nounHasSynrestr("poss_ing")) + || (part.nounHasSynrestr("possing")) + || (part.nounHasSynrestr("pos_ing"))) + { + utter << "your"; + utter << std::set({"participle_phrase", "subjectless"}); + } else if (part.nounHasSynrestr("genitive")) + { + utter << "your"; + } else if (part.nounHasSynrestr("adv_loc")) + { + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + utter << "here"; + } else { + utter << "there"; + } + } else if (part.nounHasSynrestr("refl")) + { + utter << "yourself"; + } else if ((part.nounHasSynrestr("sc_to_inf")) + || (part.nounHasSynrestr("ac_to_inf")) + || (part.nounHasSynrestr("vc_to_inf")) + || (part.nounHasSynrestr("rs_to_inf")) + || (part.nounHasSynrestr("oc_to_inf"))) + { + utter << std::set({"infinitive_phrase", "subjectless"}); + } else if (part.nounHasSynrestr("oc_bare_inf")) + { + utter << std::set({"infinitive_phrase", "bare", "subjectless"}); + } else if (part.nounHasSynrestr("wh_comp")) + { + utter << "whether"; + + verbly::token sentence(std::set({"progressive"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("that_comp")) + { + utter << "that"; + utter << "they"; + + verbly::token sentence(std::set({"subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("what_extract")) + { + utter << "what"; + + verbly::token sentence(std::set({"progressive", "experiencer"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("how_extract")) + { + utter << "how"; + + verbly::token sentence(std::set({"progressive"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("wh_inf")) + { + utter << "how"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("what_inf")) + { + utter << "what"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless", "experiencer"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("wheth_inf")) + { + utter << "whether"; + + verbly::token sentence(std::set({"infinitive_phrase", "subjectless"})); + utter << generateClause(sentence); + } else if (part.nounHasSynrestr("quotation")) + { + verbly::token sentence(std::set({"participle_phrase"})); + while (!sentence.isComplete()) + { + visit(sentence); + } + + utter << ("\"" + sentence.compile() + "\""); + } else { + verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); + + bool plural = part.nounHasSynrestr("plural"); + if (!plural) + { + plural = requiresSelrestr("plural", part.getNounSelrestrs()); + } + + utter << generateStandardNounPhrase( + noun, + part.getNounRole(), + plural, + part.nounHasSynrestr("definite")); + + if (part.nounHasSynrestr("acc_ing") || part.nounHasSynrestr("ac_ing")) + { + utter << std::set({"participle_phrase", "subjectless"}); + } + } + + break; + } + + case verbly::part_type::verb: + { + std::cout << "V: " << verb.getBaseForm() << std::endl; + + if (it.hasSynrestr("progressive")) + { + utter << verbly::token(verb, verbly::inflection::s_form); + } else if (it.hasSynrestr("past_participle")) + { + utter << verbly::token(verb, verbly::inflection::past_participle); + } else if (it.hasSynrestr("infinitive_phrase")) + { + if (!it.hasSynrestr("bare")) + { + utter << "to"; + } + + utter << verb; + } else if (it.hasSynrestr("participle_phrase")) + { + utter << verbly::token(verb, verbly::inflection::ing_form); + } else { + utter << verb; + } + + break; + } + + case verbly::part_type::preposition: + { + std::cout << "PREP" << std::endl; + + if (part.isPrepositionLiteral()) + { + int choiceIndex = std::uniform_int_distribution(0, part.getPrepositionChoices().size()-1)(rng_); + utter << part.getPrepositionChoices()[choiceIndex]; + } else { + verbly::filter pgf(true); + for (const std::string& choice : part.getPrepositionChoices()) + { + pgf += (verbly::notion::prepositionGroup == choice); + } + + utter << database_.words(pgf && (verbly::notion::partOfSpeech == verbly::part_of_speech::preposition)).first(); + } + + break; + } + + case verbly::part_type::adjective: + { + std::cout << "ADJ" << std::endl; + + utter << std::set({"adjective_phrase"}); + + break; + } + + case verbly::part_type::adverb: + { + std::cout << "ADV" << std::endl; + + utter << std::set({"adverb_phrase"}); + + break; + } + + case verbly::part_type::literal: + { + std::cout << "LIT" << std::endl; + + utter << part.getLiteralValue(); + + break; + } + + case verbly::part_type::invalid: + { + // Nope + + break; + } + } + } + + if ((parts.size() == 1) && (std::bernoulli_distribution(1.0/4.0)(rng_))) + { + utter << std::set({"adverb_phrase"}); + } + + return utter; +} + +void sentence::visit(verbly::token& it) const +{ + switch (it.getType()) + { + case verbly::token::type::utterance: + { + for (verbly::token& token : it) + { + if (!token.isComplete()) + { + visit(token); + + break; + } + } + + break; + } + + case verbly::token::type::fillin: + { + if (it.hasSynrestr("infinitive_phrase")) + { + it = generateClause(it); + } else if (it.hasSynrestr("adjective_phrase")) + { + verbly::token phrase; + + if (std::bernoulli_distribution(1.0/6.0)(rng_)) + { + phrase << std::set({"adverb_phrase"}); + } + + if (std::bernoulli_distribution(1.0/4.0)(rng_)) + { + phrase << std::set({"participle_phrase", "subjectless"}); + } else { + std::geometric_distribution tagdist(0.2); + phrase << database_.words( + (verbly::word::tagCount >= tagdist(rng_)) + && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); + } + + it = phrase; + } else if (it.hasSynrestr("adverb_phrase")) + { + std::geometric_distribution tagdist(1.0/23.0); + + it = database_.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb) + && (verbly::word::tagCount >= tagdist(rng_)) + ).first(); + } else if (it.hasSynrestr("participle_phrase")) + { + if (std::bernoulli_distribution(1.0/2.0)(rng_)) + { + it = verbly::token( + database_.words( + (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) + && (verbly::lemma::form(verbly::inflection::ing_form))).first(), + verbly::inflection::ing_form); + } else { + it = generateClause(it); + } + } else { + it = "*the reality of the situation*"; + } + + break; + } + + case verbly::token::type::word: + case verbly::token::type::literal: + case verbly::token::type::part: + { + // Nope + + break; + } + } +} diff --git a/sentence.h b/sentence.h new file mode 100644 index 0000000..abf6860 --- /dev/null +++ b/sentence.h @@ -0,0 +1,39 @@ +#ifndef SENTENCE_H_81987F60 +#define SENTENCE_H_81987F60 + +#include +#include +#include + +class sentence { +public: + + sentence( + const verbly::database& database, + std::mt19937& rng); + + std::string generate() const; + +private: + + verbly::filter parseSelrestrs(verbly::selrestr selrestr) const; + + bool requiresSelrestr(std::string restriction, verbly::selrestr selrestr) const; + + verbly::word generateStandardNoun(std::string role, verbly::selrestr selrestrs) const; + + verbly::token generateStandardNounPhrase( + const verbly::word& noun, + std::string role, + bool plural, + bool definite) const; + + verbly::token generateClause(const verbly::token& it) const; + + void visit(verbly::token& it) const; + + const verbly::database& database_; + std::mt19937& rng_; +}; + +#endif /* end of include guard: SENTENCE_H_81987F60 */ diff --git a/vendor/libtwittercpp b/vendor/libtwittercpp new file mode 160000 index 0000000..df90612 --- /dev/null +++ b/vendor/libtwittercpp @@ -0,0 +1 @@ +Subproject commit df906121dd862c0f704e44f28ee079158c431c41 diff --git a/vendor/verbly b/vendor/verbly new file mode 160000 index 0000000..5caeb00 --- /dev/null +++ b/vendor/verbly @@ -0,0 +1 @@ +Subproject commit 5caeb000b00ff7833c3b3c44893d4beffc0afb82 diff --git a/vendor/yaml-cpp b/vendor/yaml-cpp new file mode 160000 index 0000000..bedb28f --- /dev/null +++ b/vendor/yaml-cpp @@ -0,0 +1 @@ +Subproject commit bedb28fdb4fd52d97e02f6cb946cae631037089e -- cgit 1.4.1