diff options
| author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-02-03 13:56:19 -0500 |
|---|---|---|
| committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-02-03 13:56:19 -0500 |
| commit | 18742d79e1de863889521c492e938491489316fe (patch) | |
| tree | 9930ef168c81c01e1c947b47d697d494aa7ca0f2 | |
| download | advice-18742d79e1de863889521c492e938491489316fe.tar.gz advice-18742d79e1de863889521c492e938491489316fe.tar.bz2 advice-18742d79e1de863889521c492e938491489316fe.zip | |
Created bot
| -rw-r--r-- | .gitmodules | 9 | ||||
| -rw-r--r-- | CMakeLists.txt | 17 | ||||
| -rw-r--r-- | advice.cpp | 359 | ||||
| -rw-r--r-- | advice.h | 46 | ||||
| -rwxr-xr-x | coolvetica.ttf | bin | 0 -> 135916 bytes | |||
| -rw-r--r-- | main.cpp | 33 | ||||
| -rw-r--r-- | sentence.cpp | 754 | ||||
| -rw-r--r-- | sentence.h | 39 | ||||
| m--------- | vendor/libtwittercpp | 0 | ||||
| m--------- | vendor/verbly | 0 | ||||
| m--------- | vendor/yaml-cpp | 0 |
11 files changed, 1257 insertions, 0 deletions
| diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..79dde9f --- /dev/null +++ b/.gitmodules | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | [submodule "vendor/verbly"] | ||
| 2 | path = vendor/verbly | ||
| 3 | url = https://github.com/hatkirby/verbly | ||
| 4 | [submodule "vendor/libtwittercpp"] | ||
| 5 | path = vendor/libtwittercpp | ||
| 6 | url = https://github.com/hatkirby/libtwittercpp | ||
| 7 | [submodule "vendor/yaml-cpp"] | ||
| 8 | path = vendor/yaml-cpp | ||
| 9 | url = https://github.com/jbeder/yaml-cpp | ||
| diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..5fc053d --- /dev/null +++ b/CMakeLists.txt | |||
| @@ -0,0 +1,17 @@ | |||
| 1 | cmake_minimum_required (VERSION 3.1) | ||
| 2 | project (advice) | ||
| 3 | |||
| 4 | set(CMAKE_BUILD_TYPE Debug) | ||
| 5 | |||
| 6 | find_package(PkgConfig) | ||
| 7 | pkg_check_modules(GraphicsMagick GraphicsMagick++ REQUIRED) | ||
| 8 | |||
| 9 | add_subdirectory(vendor/verbly) | ||
| 10 | add_subdirectory(vendor/libtwittercpp) | ||
| 11 | add_subdirectory(vendor/yaml-cpp EXCLUDE_FROM_ALL) | ||
| 12 | |||
| 13 | include_directories(vendor/verbly/lib vendor/libtwittercpp/src vendor/libtwittercpp/vendor/curlcpp/include ${GraphicsMagick_INCLUDE_DIRS} vendor/yaml-cpp/include) | ||
| 14 | add_executable(advice main.cpp advice.cpp sentence.cpp) | ||
| 15 | set_property(TARGET advice PROPERTY CXX_STANDARD 11) | ||
| 16 | set_property(TARGET advice PROPERTY CXX_STANDARD_REQUIRED ON) | ||
| 17 | target_link_libraries(advice verbly twitter++ ${GraphicsMagick_LIBRARIES} yaml-cpp) | ||
| diff --git a/advice.cpp b/advice.cpp new file mode 100644 index 0000000..320f719 --- /dev/null +++ b/advice.cpp | |||
| @@ -0,0 +1,359 @@ | |||
| 1 | #include "advice.h" | ||
| 2 | #include <algorithm> | ||
| 3 | #include <iostream> | ||
| 4 | #include <vector> | ||
| 5 | #include <deque> | ||
| 6 | #include <curl_easy.h> | ||
| 7 | #include <curl_header.h> | ||
| 8 | #include <sstream> | ||
| 9 | #include <list> | ||
| 10 | #include <chrono> | ||
| 11 | #include <thread> | ||
| 12 | #include <yaml-cpp/yaml.h> | ||
| 13 | |||
| 14 | advice::advice( | ||
| 15 | std::string configFile, | ||
| 16 | std::mt19937& rng) : | ||
| 17 | rng_(rng) | ||
| 18 | { | ||
| 19 | // Load the config file. | ||
| 20 | YAML::Node config = YAML::LoadFile(configFile); | ||
| 21 | |||
| 22 | // Set up the Twitter client. | ||
| 23 | twitter::auth auth; | ||
| 24 | auth.setConsumerKey(config["consumer_key"].as<std::string>()); | ||
| 25 | auth.setConsumerSecret(config["consumer_secret"].as<std::string>()); | ||
| 26 | auth.setAccessKey(config["access_key"].as<std::string>()); | ||
| 27 | auth.setAccessSecret(config["access_secret"].as<std::string>()); | ||
| 28 | |||
| 29 | client_ = std::unique_ptr<twitter::client>(new twitter::client(auth)); | ||
| 30 | |||
| 31 | // Set up the verbly database. | ||
| 32 | database_ = std::unique_ptr<verbly::database>(new verbly::database(config["verbly_datafile"].as<std::string>())); | ||
| 33 | |||
| 34 | // Set up the sentence generator. | ||
| 35 | generator_ = std::unique_ptr<sentence>(new sentence(*database_, rng_)); | ||
| 36 | } | ||
| 37 | |||
| 38 | verbly::word advice::generateImageNoun() const | ||
| 39 | { | ||
| 40 | verbly::filter whitelist = | ||
| 41 | (verbly::notion::wnid == 109287968) // Geological formations | ||
| 42 | || (verbly::notion::wnid == 109208496) // Asterisms (collections of stars) | ||
| 43 | || (verbly::notion::wnid == 109239740) // Celestial bodies | ||
| 44 | || (verbly::notion::wnid == 109277686) // Exterrestrial objects (comets and meteroids) | ||
| 45 | || (verbly::notion::wnid == 109403211) // Radiators (supposedly natural radiators but actually these are just pictures of radiators) | ||
| 46 | || (verbly::notion::wnid == 109416076) // Rocks | ||
| 47 | || (verbly::notion::wnid == 105442131) // Chromosomes | ||
| 48 | || (verbly::notion::wnid == 100324978) // Tightrope walking | ||
| 49 | || (verbly::notion::wnid == 100326094) // Rock climbing | ||
| 50 | || (verbly::notion::wnid == 100433458) // Contact sports | ||
| 51 | || (verbly::notion::wnid == 100433802) // Gymnastics | ||
| 52 | || (verbly::notion::wnid == 100439826) // Track and field | ||
| 53 | || (verbly::notion::wnid == 100440747) // Skiing | ||
| 54 | || (verbly::notion::wnid == 100441824) // Water sport | ||
| 55 | || (verbly::notion::wnid == 100445351) // Rowing | ||
| 56 | || (verbly::notion::wnid == 100446980) // Archery | ||
| 57 | // TODO: add more sports | ||
| 58 | || (verbly::notion::wnid == 100021939) // Artifacts | ||
| 59 | || (verbly::notion::wnid == 101471682) // Vertebrates | ||
| 60 | ; | ||
| 61 | |||
| 62 | verbly::filter blacklist = | ||
| 63 | (verbly::notion::wnid == 106883725) // swastika | ||
| 64 | || (verbly::notion::wnid == 104416901) // tetraskele | ||
| 65 | || (verbly::notion::wnid == 102512053) // fish | ||
| 66 | || (verbly::notion::wnid == 103575691) // instrument of execution | ||
| 67 | ; | ||
| 68 | |||
| 69 | verbly::query<verbly::word> pictureQuery = database_->words( | ||
| 70 | (verbly::notion::fullHypernyms %= whitelist) | ||
| 71 | && !(verbly::notion::fullHypernyms %= blacklist) | ||
| 72 | && (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) | ||
| 73 | && (verbly::notion::numOfImages >= 1)); | ||
| 74 | |||
| 75 | return pictureQuery.first(); | ||
| 76 | } | ||
| 77 | |||
| 78 | Magick::Image advice::getImageForNoun(verbly::word pictured) const | ||
| 79 | { | ||
| 80 | // Accept string from Google Chrome | ||
| 81 | std::string accept = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; | ||
| 82 | curl::curl_header headers; | ||
| 83 | headers.add(accept); | ||
| 84 | |||
| 85 | int backoff = 0; | ||
| 86 | |||
| 87 | std::cout << "Generating noun..." << std::endl; | ||
| 88 | std::cout << "Noun: " << pictured.getBaseForm() << std::endl; | ||
| 89 | std::cout << "Getting URLs..." << std::endl; | ||
| 90 | |||
| 91 | std::string lstdata; | ||
| 92 | while (lstdata.empty()) | ||
| 93 | { | ||
| 94 | std::ostringstream lstbuf; | ||
| 95 | curl::curl_ios<std::ostringstream> lstios(lstbuf); | ||
| 96 | curl::curl_easy lsthandle(lstios); | ||
| 97 | std::string lsturl = pictured.getNotion().getImageNetUrl(); | ||
| 98 | lsthandle.add<CURLOPT_URL>(lsturl.c_str()); | ||
| 99 | |||
| 100 | try | ||
| 101 | { | ||
| 102 | lsthandle.perform(); | ||
| 103 | } catch (const curl::curl_easy_exception& e) | ||
| 104 | { | ||
| 105 | e.print_traceback(); | ||
| 106 | |||
| 107 | backoff++; | ||
| 108 | std::cout << "Waiting for " << backoff << " seconds..." << std::endl; | ||
| 109 | |||
| 110 | std::this_thread::sleep_for(std::chrono::seconds(backoff)); | ||
| 111 | |||
| 112 | continue; | ||
| 113 | } | ||
| 114 | |||
| 115 | backoff = 0; | ||
| 116 | |||
| 117 | if (lsthandle.get_info<CURLINFO_RESPONSE_CODE>().get() != 200) | ||
| 118 | { | ||
| 119 | throw could_not_get_images(); | ||
| 120 | } | ||
| 121 | |||
| 122 | std::cout << "Got URLs." << std::endl; | ||
| 123 | lstdata = lstbuf.str(); | ||
| 124 | } | ||
| 125 | |||
| 126 | std::vector<std::string> lstvec = verbly::split<std::vector<std::string>>(lstdata, "\r\n"); | ||
| 127 | if (lstvec.empty()) | ||
| 128 | { | ||
| 129 | throw could_not_get_images(); | ||
| 130 | } | ||
| 131 | |||
| 132 | std::shuffle(std::begin(lstvec), std::end(lstvec), rng_); | ||
| 133 | |||
| 134 | std::deque<std::string> urls; | ||
| 135 | for (std::string& url : lstvec) | ||
| 136 | { | ||
| 137 | urls.push_back(url); | ||
| 138 | } | ||
| 139 | |||
| 140 | bool found = false; | ||
| 141 | Magick::Blob img; | ||
| 142 | Magick::Image pic; | ||
| 143 | |||
| 144 | while (!found && !urls.empty()) | ||
| 145 | { | ||
| 146 | std::string url = urls.front(); | ||
| 147 | urls.pop_front(); | ||
| 148 | |||
| 149 | std::ostringstream imgbuf; | ||
| 150 | curl::curl_ios<std::ostringstream> imgios(imgbuf); | ||
| 151 | curl::curl_easy imghandle(imgios); | ||
| 152 | |||
| 153 | imghandle.add<CURLOPT_HTTPHEADER>(headers.get()); | ||
| 154 | imghandle.add<CURLOPT_URL>(url.c_str()); | ||
| 155 | imghandle.add<CURLOPT_CONNECTTIMEOUT>(30); | ||
| 156 | |||
| 157 | try | ||
| 158 | { | ||
| 159 | imghandle.perform(); | ||
| 160 | } catch (curl::curl_easy_exception error) { | ||
| 161 | error.print_traceback(); | ||
| 162 | |||
| 163 | continue; | ||
| 164 | } | ||
| 165 | |||
| 166 | if (imghandle.get_info<CURLINFO_RESPONSE_CODE>().get() != 200) | ||
| 167 | { | ||
| 168 | continue; | ||
| 169 | } | ||
| 170 | |||
| 171 | std::string content_type = imghandle.get_info<CURLINFO_CONTENT_TYPE>().get(); | ||
| 172 | if (content_type.substr(0, 6) != "image/") | ||
| 173 | { | ||
| 174 | continue; | ||
| 175 | } | ||
| 176 | |||
| 177 | std::string imgstr = imgbuf.str(); | ||
| 178 | img = Magick::Blob(imgstr.c_str(), imgstr.length()); | ||
| 179 | pic.read(img); | ||
| 180 | if (pic.rows() == 0) | ||
| 181 | { | ||
| 182 | continue; | ||
| 183 | } | ||
| 184 | |||
| 185 | // Too small! | ||
| 186 | if (pic.columns() < 400) | ||
| 187 | { | ||
| 188 | continue; | ||
| 189 | } | ||
| 190 | |||
| 191 | std::cout << url << std::endl; | ||
| 192 | found = true; | ||
| 193 | } | ||
| 194 | |||
| 195 | if (!found) | ||
| 196 | { | ||
| 197 | throw could_not_get_images(); | ||
| 198 | } | ||
| 199 | |||
| 200 | return pic; | ||
| 201 | } | ||
| 202 | |||
| 203 | Magick::Image advice::layoutImage(Magick::Image pic, std::string title) const | ||
| 204 | { | ||
| 205 | // Want a 16:9 aspect | ||
| 206 | int idealwidth = pic.rows()*(16.0/9.0); | ||
| 207 | if (idealwidth > pic.columns()) | ||
| 208 | { | ||
| 209 | // If the image is narrower than the ideal width, use full width. | ||
| 210 | int newheight = pic.columns()*(9.0/16.0); | ||
| 211 | |||
| 212 | // Just take a slice out of the middle of the image. | ||
| 213 | int cropy = ((double)(pic.rows() - newheight))/2.0; | ||
| 214 | |||
| 215 | pic.crop(Magick::Geometry(pic.columns(), newheight, 0, cropy)); | ||
| 216 | } else { | ||
| 217 | // If the image is wider than the ideal width, use full height. | ||
| 218 | // Just take a slice out of the middle of the image. | ||
| 219 | int cropx = ((double)(pic.columns() - idealwidth))/2.0; | ||
| 220 | |||
| 221 | pic.crop(Magick::Geometry(idealwidth, pic.rows(), cropx, 0)); | ||
| 222 | } | ||
| 223 | |||
| 224 | pic.zoom(Magick::Geometry(400, 225)); | ||
| 225 | |||
| 226 | // Layout the text. | ||
| 227 | std::list<std::string> words = verbly::split<std::list<std::string>>(title, " "); | ||
| 228 | std::vector<std::string> lines; | ||
| 229 | std::list<std::string> cur; | ||
| 230 | Magick::TypeMetric metric; | ||
| 231 | pic.fontPointsize(20); | ||
| 232 | pic.font("@coolvetica.ttf"); | ||
| 233 | |||
| 234 | while (!words.empty()) | ||
| 235 | { | ||
| 236 | cur.push_back(words.front()); | ||
| 237 | |||
| 238 | std::string prefixText = verbly::implode(std::begin(cur), std::end(cur), " "); | ||
| 239 | pic.fontTypeMetrics(prefixText, &metric); | ||
| 240 | |||
| 241 | if (metric.textWidth() > 380) | ||
| 242 | { | ||
| 243 | if (cur.size() == 1) | ||
| 244 | { | ||
| 245 | words.pop_front(); | ||
| 246 | } else { | ||
| 247 | cur.pop_back(); | ||
| 248 | } | ||
| 249 | |||
| 250 | prefixText = verbly::implode(std::begin(cur), std::end(cur), " "); | ||
| 251 | lines.push_back(prefixText); | ||
| 252 | cur.clear(); | ||
| 253 | } else { | ||
| 254 | words.pop_front(); | ||
| 255 | } | ||
| 256 | } | ||
| 257 | |||
| 258 | if (!cur.empty()) | ||
| 259 | { | ||
| 260 | std::string prefixText = verbly::implode(std::begin(cur), std::end(cur), " "); | ||
| 261 | lines.push_back(prefixText); | ||
| 262 | } | ||
| 263 | |||
| 264 | int lineHeight = metric.textHeight()-2; | ||
| 265 | int blockHeight = lineHeight * lines.size() + 18; | ||
| 266 | std::cout << "line " << lineHeight << "; block " << blockHeight << std::endl; | ||
| 267 | |||
| 268 | std::list<Magick::Drawable> drawList; | ||
| 269 | drawList.push_back(Magick::DrawableFillColor("black")); | ||
| 270 | drawList.push_back(Magick::DrawableFillOpacity(0.5)); | ||
| 271 | drawList.push_back(Magick::DrawableStrokeColor("transparent")); | ||
| 272 | drawList.push_back(Magick::DrawableRectangle(0, 225-blockHeight-20, 400, 255)); // 0, 225-60, 400, 255 | ||
| 273 | pic.draw(drawList); | ||
| 274 | |||
| 275 | drawList.clear(); | ||
| 276 | drawList.push_back(Magick::DrawableFont("@coolvetica.ttf")); | ||
| 277 | drawList.push_back(Magick::DrawableFillColor("white")); | ||
| 278 | drawList.push_back(Magick::DrawablePointSize(14)); | ||
| 279 | drawList.push_back(Magick::DrawableText(10, 225-blockHeight+4, "How to")); // 10, 255-62-4 | ||
| 280 | pic.draw(drawList); | ||
| 281 | |||
| 282 | for (int i=0; i<lines.size(); i++) | ||
| 283 | { | ||
| 284 | drawList.clear(); | ||
| 285 | drawList.push_back(Magick::DrawableFont("@coolvetica.ttf")); | ||
| 286 | drawList.push_back(Magick::DrawableFillColor("white")); | ||
| 287 | drawList.push_back(Magick::DrawablePointSize(20)); | ||
| 288 | drawList.push_back(Magick::DrawableText(10, 255-blockHeight+(i*lineHeight)-4, lines[i])); // 10, 255-20-25 | ||
| 289 | pic.draw(drawList); | ||
| 290 | } | ||
| 291 | |||
| 292 | return pic; | ||
| 293 | } | ||
| 294 | |||
| 295 | void advice::sendTweet(Magick::Image pic, std::string title) const | ||
| 296 | { | ||
| 297 | Magick::Blob outputimg; | ||
| 298 | |||
| 299 | try | ||
| 300 | { | ||
| 301 | pic.magick("png"); | ||
| 302 | pic.write(&outputimg); | ||
| 303 | } catch (const Magick::WarningCoder& e) | ||
| 304 | { | ||
| 305 | // Ignore | ||
| 306 | } | ||
| 307 | |||
| 308 | std::cout << "Generated image!" << std::endl << "Tweeting..." << std::endl; | ||
| 309 | |||
| 310 | std::string tweetText; | ||
| 311 | size_t tweetLim = 140 - client_->getConfiguration().getCharactersReservedPerMedia(); | ||
| 312 | if (title.length() > tweetLim) | ||
| 313 | { | ||
| 314 | tweetText = title.substr(0, tweetLim - 1) + "…"; | ||
| 315 | } else { | ||
| 316 | tweetText = title; | ||
| 317 | } | ||
| 318 | |||
| 319 | long media_id = client_->uploadMedia("image/png", (const char*) outputimg.data(), outputimg.length()); | ||
| 320 | client_->updateStatus(tweetText, {media_id}); | ||
| 321 | } | ||
| 322 | |||
| 323 | void advice::run() const | ||
| 324 | { | ||
| 325 | for (;;) | ||
| 326 | { | ||
| 327 | try | ||
| 328 | { | ||
| 329 | // Pick a noun to use for the picture. | ||
| 330 | verbly::word pictured = generateImageNoun(); | ||
| 331 | |||
| 332 | // Find an image of the picked noun. | ||
| 333 | Magick::Image pic = getImageForNoun(pictured); | ||
| 334 | |||
| 335 | // Generate the image text. | ||
| 336 | std::string title = generator_->generate(); | ||
| 337 | |||
| 338 | // Layout the image. | ||
| 339 | Magick::Image output = layoutImage(std::move(pic), title); | ||
| 340 | |||
| 341 | // Tweet the image. | ||
| 342 | sendTweet(std::move(output), title); | ||
| 343 | |||
| 344 | std::cout << "Done!" << std::endl << "Waiting..." << std::endl << std::endl; | ||
| 345 | |||
| 346 | // Wait. | ||
| 347 | std::this_thread::sleep_for(std::chrono::hours(1)); | ||
| 348 | } catch (const could_not_get_images& ex) | ||
| 349 | { | ||
| 350 | std::cout << ex.what() << std::endl; | ||
| 351 | } catch (const Magick::ErrorImage& ex) | ||
| 352 | { | ||
| 353 | std::cout << "Image error: " << ex.what() << std::endl; | ||
| 354 | } catch (const twitter::twitter_error& ex) | ||
| 355 | { | ||
| 356 | std::cout << "Twitter error: " << ex.what() << std::endl; | ||
| 357 | } | ||
| 358 | } | ||
| 359 | } | ||
| diff --git a/advice.h b/advice.h new file mode 100644 index 0000000..33dc531 --- /dev/null +++ b/advice.h | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | #ifndef ADVICE_H_5934AC1B | ||
| 2 | #define ADVICE_H_5934AC1B | ||
| 3 | |||
| 4 | #include <random> | ||
| 5 | #include <twitter.h> | ||
| 6 | #include <verbly.h> | ||
| 7 | #include <string> | ||
| 8 | #include <memory> | ||
| 9 | #include <Magick++.h> | ||
| 10 | #include <stdexcept> | ||
| 11 | #include "sentence.h" | ||
| 12 | |||
| 13 | class advice { | ||
| 14 | public: | ||
| 15 | |||
| 16 | advice( | ||
| 17 | std::string configFile, | ||
| 18 | std::mt19937& rng); | ||
| 19 | |||
| 20 | void run() const; | ||
| 21 | |||
| 22 | private: | ||
| 23 | |||
| 24 | class could_not_get_images : public std::runtime_error { | ||
| 25 | public: | ||
| 26 | |||
| 27 | could_not_get_images() : std::runtime_error("Could not get images for noun") | ||
| 28 | { | ||
| 29 | } | ||
| 30 | }; | ||
| 31 | |||
| 32 | verbly::word generateImageNoun() const; | ||
| 33 | |||
| 34 | Magick::Image getImageForNoun(verbly::word pictured) const; | ||
| 35 | |||
| 36 | Magick::Image layoutImage(Magick::Image bg, std::string title) const; | ||
| 37 | |||
| 38 | void sendTweet(Magick::Image pic, std::string title) const; | ||
| 39 | |||
| 40 | std::mt19937& rng_; | ||
| 41 | std::unique_ptr<verbly::database> database_; | ||
| 42 | std::unique_ptr<sentence> generator_; | ||
| 43 | std::unique_ptr<twitter::client> client_; | ||
| 44 | }; | ||
| 45 | |||
| 46 | #endif /* end of include guard: ADVICE_H_5934AC1B */ | ||
| diff --git a/coolvetica.ttf b/coolvetica.ttf new file mode 100755 index 0000000..410ca31 --- /dev/null +++ b/coolvetica.ttf | |||
| Binary files differ | |||
| diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..3661105 --- /dev/null +++ b/main.cpp | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | #include "advice.h" | ||
| 2 | |||
| 3 | int main(int argc, char** argv) | ||
| 4 | { | ||
| 5 | Magick::InitializeMagick(nullptr); | ||
| 6 | |||
| 7 | std::random_device random_device; | ||
| 8 | std::mt19937 random_engine{random_device()}; | ||
| 9 | |||
| 10 | if (argc != 2) | ||
| 11 | { | ||
| 12 | std::cout << "usage: advice [configfile]" << std::endl; | ||
| 13 | return -1; | ||
| 14 | } | ||
| 15 | |||
| 16 | std::string configfile(argv[1]); | ||
| 17 | |||
| 18 | try | ||
| 19 | { | ||
| 20 | advice bot(configfile, random_engine); | ||
| 21 | |||
| 22 | try | ||
| 23 | { | ||
| 24 | bot.run(); | ||
| 25 | } catch (const std::exception& ex) | ||
| 26 | { | ||
| 27 | std::cout << "Error running bot: " << ex.what() << std::endl; | ||
| 28 | } | ||
| 29 | } catch (const std::exception& ex) | ||
| 30 | { | ||
| 31 | std::cout << "Error initializing bot: " << ex.what() << std::endl; | ||
| 32 | } | ||
| 33 | } | ||
| diff --git a/sentence.cpp b/sentence.cpp new file mode 100644 index 0000000..421aaf6 --- /dev/null +++ b/sentence.cpp | |||
| @@ -0,0 +1,754 @@ | |||
| 1 | #include "sentence.h" | ||
| 2 | #include <algorithm> | ||
| 3 | #include <list> | ||
| 4 | #include <set> | ||
| 5 | |||
| 6 | sentence::sentence( | ||
| 7 | const verbly::database& database, | ||
| 8 | std::mt19937& rng) : | ||
| 9 | database_(database), | ||
| 10 | rng_(rng) | ||
| 11 | { | ||
| 12 | } | ||
| 13 | |||
| 14 | std::string sentence::generate() const | ||
| 15 | { | ||
| 16 | // Generate the form that the title should take. | ||
| 17 | verbly::token form; | ||
| 18 | std::set<std::string> synrestrs {"infinitive_phrase", "bare", "subjectless"}; | ||
| 19 | std::set<std::string> secondSyn {"participle_phrase", "subjectless"}; | ||
| 20 | std::set<std::string> adjSyn {"adjective_phrase"}; | ||
| 21 | |||
| 22 | if (std::bernoulli_distribution(1.0/6.0)(rng_)) | ||
| 23 | { | ||
| 24 | form << "not"; | ||
| 25 | } | ||
| 26 | |||
| 27 | if (std::bernoulli_distribution(1.0/6.0)(rng_)) | ||
| 28 | { | ||
| 29 | form << "be"; | ||
| 30 | form << adjSyn; | ||
| 31 | } else { | ||
| 32 | if (std::bernoulli_distribution(1.0/6.0)(rng_)) | ||
| 33 | { | ||
| 34 | form << "get"; | ||
| 35 | synrestrs.insert("experiencer"); | ||
| 36 | synrestrs.insert("past_participle"); | ||
| 37 | } | ||
| 38 | |||
| 39 | form << synrestrs; | ||
| 40 | } | ||
| 41 | |||
| 42 | if (std::bernoulli_distribution(1.0/5.0)(rng_)) | ||
| 43 | { | ||
| 44 | if (std::bernoulli_distribution(1.0/4.0)(rng_)) | ||
| 45 | { | ||
| 46 | form << "without"; | ||
| 47 | } else { | ||
| 48 | form << "while"; | ||
| 49 | } | ||
| 50 | |||
| 51 | form << secondSyn; | ||
| 52 | } | ||
| 53 | |||
| 54 | // Attempt to compile the form, restarting if a bad word is generated. | ||
| 55 | std::set<std::string> badWords = {"raped"}; | ||
| 56 | |||
| 57 | verbly::token tok = form; | ||
| 58 | std::list<std::string> words; | ||
| 59 | for (;;) | ||
| 60 | { | ||
| 61 | // Compile the form. | ||
| 62 | while (!tok.isComplete()) | ||
| 63 | { | ||
| 64 | visit(tok); | ||
| 65 | } | ||
| 66 | |||
| 67 | std::string compiled = tok.compile(); | ||
| 68 | words = verbly::split<std::list<std::string>>(compiled, " "); | ||
| 69 | |||
| 70 | // Ensure that there are no bad words in the output. | ||
| 71 | if (!std::any_of(std::begin(words), std::end(words), [&badWords] (const std::string& word) { | ||
| 72 | std::string canonWord; | ||
| 73 | |||
| 74 | for (char ch : word) | ||
| 75 | { | ||
| 76 | if (std::isalpha(ch)) | ||
| 77 | { | ||
| 78 | canonWord.push_back(std::tolower(ch)); | ||
| 79 | } | ||
| 80 | } | ||
| 81 | |||
| 82 | return (badWords.count(canonWord) == 1); | ||
| 83 | })) { | ||
| 84 | break; | ||
| 85 | } else { | ||
| 86 | std::cout << "Bad word generated." << std::endl; | ||
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | // Put the form into title case. | ||
| 91 | for (std::string& word : words) | ||
| 92 | { | ||
| 93 | if ((word[0] == '"') && (word.length() > 1)) | ||
| 94 | { | ||
| 95 | word[1] = std::toupper(word[1]); | ||
| 96 | } else { | ||
| 97 | word[0] = std::toupper(word[0]); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | return verbly::implode(std::begin(words), std::end(words), " "); | ||
| 102 | } | ||
| 103 | |||
| 104 | verbly::filter sentence::parseSelrestrs( | ||
| 105 | verbly::selrestr selrestr) const | ||
| 106 | { | ||
| 107 | switch (selrestr.getType()) | ||
| 108 | { | ||
| 109 | case verbly::selrestr::type::empty: | ||
| 110 | { | ||
| 111 | return {}; | ||
| 112 | } | ||
| 113 | |||
| 114 | case verbly::selrestr::type::singleton: | ||
| 115 | { | ||
| 116 | verbly::filter result; | ||
| 117 | |||
| 118 | if (selrestr.getRestriction() == "concrete") | ||
| 119 | { | ||
| 120 | result = (verbly::notion::wnid == 100001930); // physical entity | ||
| 121 | } else if (selrestr.getRestriction() == "time") | ||
| 122 | { | ||
| 123 | result = (verbly::notion::wnid == 100028270); // time | ||
| 124 | } else if (selrestr.getRestriction() == "state") | ||
| 125 | { | ||
| 126 | result = (verbly::notion::wnid == 100024720); // state | ||
| 127 | } else if (selrestr.getRestriction() == "abstract") | ||
| 128 | { | ||
| 129 | result = (verbly::notion::wnid == 100002137); // abstract entity | ||
| 130 | } else if (selrestr.getRestriction() == "scalar") | ||
| 131 | { | ||
| 132 | result = (verbly::notion::wnid == 103835412); // number | ||
| 133 | } else if (selrestr.getRestriction() == "currency") | ||
| 134 | { | ||
| 135 | result = (verbly::notion::wnid == 105050379); // currency | ||
| 136 | } else if (selrestr.getRestriction() == "location") | ||
| 137 | { | ||
| 138 | result = (verbly::notion::wnid == 100027167); // location | ||
| 139 | } else if (selrestr.getRestriction() == "organization") | ||
| 140 | { | ||
| 141 | result = (verbly::notion::wnid == 100237078); // organization | ||
| 142 | } else if (selrestr.getRestriction() == "int_control") | ||
| 143 | { | ||
| 144 | result = (verbly::notion::wnid == 100007347); // causal agent | ||
| 145 | } else if (selrestr.getRestriction() == "natural") | ||
| 146 | { | ||
| 147 | result = (verbly::notion::wnid == 100019128); // natural object | ||
| 148 | } else if (selrestr.getRestriction() == "phys_obj") | ||
| 149 | { | ||
| 150 | result = (verbly::notion::wnid == 100002684); // physical object | ||
| 151 | } else if (selrestr.getRestriction() == "solid") | ||
| 152 | { | ||
| 153 | result = (verbly::notion::wnid == 113860793); // solid | ||
| 154 | } else if (selrestr.getRestriction() == "shape") | ||
| 155 | { | ||
| 156 | result = (verbly::notion::wnid == 100027807); // shape | ||
| 157 | } else if (selrestr.getRestriction() == "substance") | ||
| 158 | { | ||
| 159 | result = (verbly::notion::wnid == 100019613); // substance | ||
| 160 | } else if (selrestr.getRestriction() == "idea") | ||
| 161 | { | ||
| 162 | result = (verbly::notion::wnid == 105803379); // idea | ||
| 163 | } else if (selrestr.getRestriction() == "sound") | ||
| 164 | { | ||
| 165 | result = (verbly::notion::wnid == 107111047); // sound | ||
| 166 | } else if (selrestr.getRestriction() == "communication") | ||
| 167 | { | ||
| 168 | result = (verbly::notion::wnid == 100033020); // communication | ||
| 169 | } else if (selrestr.getRestriction() == "region") | ||
| 170 | { | ||
| 171 | result = (verbly::notion::wnid == 105221895); // region | ||
| 172 | } else if (selrestr.getRestriction() == "place") | ||
| 173 | { | ||
| 174 | result = (verbly::notion::wnid == 100586262); // place | ||
| 175 | } else if (selrestr.getRestriction() == "machine") | ||
| 176 | { | ||
| 177 | result = (verbly::notion::wnid == 102958343); // machine | ||
| 178 | } else if (selrestr.getRestriction() == "animate") | ||
| 179 | { | ||
| 180 | result = (verbly::notion::wnid == 100004258); // animate thing | ||
| 181 | } else if (selrestr.getRestriction() == "plant") | ||
| 182 | { | ||
| 183 | result = (verbly::notion::wnid == 103956922); // plant | ||
| 184 | } else if (selrestr.getRestriction() == "comestible") | ||
| 185 | { | ||
| 186 | result = (verbly::notion::wnid == 100021265); // food | ||
| 187 | } else if (selrestr.getRestriction() == "artifact") | ||
| 188 | { | ||
| 189 | result = (verbly::notion::wnid == 100021939); // artifact | ||
| 190 | } else if (selrestr.getRestriction() == "vehicle") | ||
| 191 | { | ||
| 192 | result = (verbly::notion::wnid == 104524313); // vehicle | ||
| 193 | } else if (selrestr.getRestriction() == "human") | ||
| 194 | { | ||
| 195 | result = (verbly::notion::wnid == 100007846); // person | ||
| 196 | } else if (selrestr.getRestriction() == "animal") | ||
| 197 | { | ||
| 198 | result = (verbly::notion::wnid == 100015388); // animal | ||
| 199 | } else if (selrestr.getRestriction() == "body_part") | ||
| 200 | { | ||
| 201 | result = (verbly::notion::wnid == 105220461); // body part | ||
| 202 | } else if (selrestr.getRestriction() == "garment") | ||
| 203 | { | ||
| 204 | result = (verbly::notion::wnid == 103051540); // clothing | ||
| 205 | } else if (selrestr.getRestriction() == "tool") | ||
| 206 | { | ||
| 207 | result = (verbly::notion::wnid == 104451818); // tool | ||
| 208 | } else { | ||
| 209 | return {}; | ||
| 210 | } | ||
| 211 | |||
| 212 | std::cout << selrestr.getRestriction() << " (" << selrestr.getPos() << ")" << std::endl; | ||
| 213 | |||
| 214 | if (selrestr.getPos()) | ||
| 215 | { | ||
| 216 | return (verbly::notion::fullHypernyms %= result); | ||
| 217 | } else { | ||
| 218 | return !(verbly::notion::fullHypernyms %= result); | ||
| 219 | } | ||
| 220 | } | ||
| 221 | |||
| 222 | case verbly::selrestr::type::group: | ||
| 223 | { | ||
| 224 | std::cout << "or: " << selrestr.getOrlogic() << std::endl; | ||
| 225 | verbly::filter ret(selrestr.getOrlogic()); | ||
| 226 | |||
| 227 | for (const verbly::selrestr& child : selrestr) | ||
| 228 | { | ||
| 229 | ret += parseSelrestrs(child); | ||
| 230 | } | ||
| 231 | |||
| 232 | return ret; | ||
| 233 | } | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | bool sentence::requiresSelrestr( | ||
| 238 | std::string restriction, | ||
| 239 | verbly::selrestr selrestr) const | ||
| 240 | { | ||
| 241 | switch (selrestr.getType()) | ||
| 242 | { | ||
| 243 | case verbly::selrestr::type::empty: | ||
| 244 | { | ||
| 245 | return false; | ||
| 246 | } | ||
| 247 | |||
| 248 | case verbly::selrestr::type::singleton: | ||
| 249 | { | ||
| 250 | if (selrestr.getRestriction() == restriction) | ||
| 251 | { | ||
| 252 | return selrestr.getPos(); | ||
| 253 | } else { | ||
| 254 | return false; | ||
| 255 | } | ||
| 256 | } | ||
| 257 | |||
| 258 | case verbly::selrestr::type::group: | ||
| 259 | { | ||
| 260 | if (selrestr.getOrlogic()) | ||
| 261 | { | ||
| 262 | return std::all_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) { | ||
| 263 | return requiresSelrestr(restriction, s); | ||
| 264 | }); | ||
| 265 | } else { | ||
| 266 | return std::any_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) { | ||
| 267 | return requiresSelrestr(restriction, s); | ||
| 268 | }); | ||
| 269 | } | ||
| 270 | } | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | verbly::word sentence::generateStandardNoun( | ||
| 275 | std::string role, | ||
| 276 | verbly::selrestr selrestrs) const | ||
| 277 | { | ||
| 278 | std::geometric_distribution<int> tagdist(0.5); // 0.06 | ||
| 279 | std::vector<verbly::word> result; | ||
| 280 | bool trySelection = true; | ||
| 281 | |||
| 282 | while (result.empty()) | ||
| 283 | { | ||
| 284 | verbly::filter condition = | ||
| 285 | (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) | ||
| 286 | && (verbly::form::proper == false) | ||
| 287 | //&& (verbly::form::complexity == 1) | ||
| 288 | // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words | ||
| 289 | && (verbly::word::tagCount >= 1) | ||
| 290 | && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs | ||
| 291 | |||
| 292 | // Only use selection restrictions for a first attempt. | ||
| 293 | if (trySelection) | ||
| 294 | { | ||
| 295 | verbly::filter selrestrCondition = parseSelrestrs(selrestrs).compact(); | ||
| 296 | |||
| 297 | if (selrestrCondition.getType() != verbly::filter::type::empty) | ||
| 298 | { | ||
| 299 | condition &= std::move(selrestrCondition); | ||
| 300 | } else if (role == "Attribute") | ||
| 301 | { | ||
| 302 | condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute | ||
| 303 | } else if (role == "Instrument") | ||
| 304 | { | ||
| 305 | condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 104451818)); // tool | ||
| 306 | } else if (role == "Agent") | ||
| 307 | { | ||
| 308 | condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100007347)); // causal agent | ||
| 309 | } | ||
| 310 | |||
| 311 | trySelection = false; | ||
| 312 | } else { | ||
| 313 | std::cout << "Selection failed" << std::endl; | ||
| 314 | } | ||
| 315 | |||
| 316 | result = database_.words(condition).all(); | ||
| 317 | } | ||
| 318 | |||
| 319 | return result.front(); | ||
| 320 | } | ||
| 321 | |||
| 322 | verbly::token sentence::generateStandardNounPhrase( | ||
| 323 | const verbly::word& noun, | ||
| 324 | std::string role, | ||
| 325 | bool plural, | ||
| 326 | bool definite) const | ||
| 327 | { | ||
| 328 | verbly::token utter; | ||
| 329 | verbly::word sounder = noun; | ||
| 330 | verbly::word descript; | ||
| 331 | |||
| 332 | if (std::bernoulli_distribution(1.0/8.0)(rng_)) | ||
| 333 | { | ||
| 334 | std::geometric_distribution<int> tagdist(0.2); | ||
| 335 | descript = database_.words( | ||
| 336 | (verbly::word::tagCount >= tagdist(rng_)) | ||
| 337 | && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); | ||
| 338 | |||
| 339 | sounder = descript; | ||
| 340 | } | ||
| 341 | |||
| 342 | if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite)) | ||
| 343 | { | ||
| 344 | utter << "the"; | ||
| 345 | |||
| 346 | if (std::bernoulli_distribution(1.0/2.0)(rng_)) | ||
| 347 | { | ||
| 348 | plural = true; | ||
| 349 | } | ||
| 350 | } else { | ||
| 351 | if ((role != "Theme") && (role != "Attribute") && std::bernoulli_distribution(1.0/2.0)(rng_)) | ||
| 352 | { | ||
| 353 | utter << "your"; | ||
| 354 | } else if (!plural) { | ||
| 355 | if (sounder.getLemma().getBaseForm().startsWithVowelSound()) | ||
| 356 | { | ||
| 357 | utter << "an"; | ||
| 358 | } else { | ||
| 359 | utter << "a"; | ||
| 360 | } | ||
| 361 | } | ||
| 362 | } | ||
| 363 | |||
| 364 | if (descript) | ||
| 365 | { | ||
| 366 | utter << descript; | ||
| 367 | } | ||
| 368 | |||
| 369 | if (plural && noun.getLemma().hasInflection(verbly::inflection::plural)) | ||
| 370 | { | ||
| 371 | utter << verbly::token(noun, verbly::inflection::plural); | ||
| 372 | } else { | ||
| 373 | utter << noun; | ||
| 374 | } | ||
| 375 | |||
| 376 | return utter; | ||
| 377 | } | ||
| 378 | |||
| 379 | verbly::token sentence::generateClause( | ||
| 380 | const verbly::token& it) const | ||
| 381 | { | ||
| 382 | verbly::token utter; | ||
| 383 | std::geometric_distribution<int> tagdist(0.07); | ||
| 384 | std::vector<verbly::word> verbDataset; | ||
| 385 | |||
| 386 | verbly::filter frameCondition = | ||
| 387 | (verbly::frame::length >= 2) | ||
| 388 | && (verbly::frame::part(0) %= ( | ||
| 389 | (verbly::part::type == verbly::part_type::noun_phrase) | ||
| 390 | && (verbly::part::role == "Agent")) | ||
| 391 | && !(verbly::frame::part() %= ( | ||
| 392 | verbly::part::synrestr %= "adjp"))); | ||
| 393 | |||
| 394 | if (it.hasSynrestr("experiencer")) | ||
| 395 | { | ||
| 396 | frameCondition &= | ||
| 397 | (verbly::frame::part(2) %= | ||
| 398 | (verbly::part::type == verbly::part_type::noun_phrase) | ||
| 399 | && !(verbly::part::synrestr %= "genitive") | ||
| 400 | && ((verbly::part::role == "Patient") | ||
| 401 | || (verbly::part::role == "Experiencer"))); | ||
| 402 | } | ||
| 403 | |||
| 404 | verbly::filter verbCondition = | ||
| 405 | (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) | ||
| 406 | && frameCondition; | ||
| 407 | |||
| 408 | if (it.hasSynrestr("participle_phrase")) | ||
| 409 | { | ||
| 410 | verbCondition &= (verbly::lemma::form(verbly::inflection::ing_form)); | ||
| 411 | } else if (it.hasSynrestr("progressive")) | ||
| 412 | { | ||
| 413 | verbCondition &= (verbly::lemma::form(verbly::inflection::s_form)); | ||
| 414 | } else if (it.hasSynrestr("past_participle")) | ||
| 415 | { | ||
| 416 | verbCondition &= (verbly::lemma::form(verbly::inflection::past_participle)); | ||
| 417 | } | ||
| 418 | |||
| 419 | // Because of the tag distribution, it's possible (albeit extremely unlikely) | ||
| 420 | // for the verb query to fail, so we loop until it succeeds. | ||
| 421 | while (verbDataset.empty()) | ||
| 422 | { | ||
| 423 | verbDataset = database_.words( | ||
| 424 | verbCondition | ||
| 425 | && (verbly::word::tagCount >= tagdist(rng_)) | ||
| 426 | ).all(); | ||
| 427 | } | ||
| 428 | |||
| 429 | verbly::word verb = verbDataset.front(); | ||
| 430 | verbly::frame frame = database_.frames(frameCondition && verb).first(); | ||
| 431 | std::list<verbly::part> parts(std::begin(frame.getParts()), std::end(frame.getParts())); | ||
| 432 | |||
| 433 | if (it.hasSynrestr("experiencer")) | ||
| 434 | { | ||
| 435 | // Ignore the direct object. | ||
| 436 | parts.erase(std::next(parts.begin(), 2)); | ||
| 437 | } | ||
| 438 | |||
| 439 | if (it.hasSynrestr("subjectless")) | ||
| 440 | { | ||
| 441 | // Ignore the subject. | ||
| 442 | parts.pop_front(); | ||
| 443 | } | ||
| 444 | |||
| 445 | for (const verbly::part& part : parts) | ||
| 446 | { | ||
| 447 | switch (part.getType()) | ||
| 448 | { | ||
| 449 | case verbly::part_type::noun_phrase: | ||
| 450 | { | ||
| 451 | std::cout << "NP: "; | ||
| 452 | for (auto& s : part.getNounSynrestrs()) | ||
| 453 | { | ||
| 454 | std::cout << s << " "; | ||
| 455 | } | ||
| 456 | std::cout << std::endl; | ||
| 457 | |||
| 458 | if (requiresSelrestr("currency", part.getNounSelrestrs())) | ||
| 459 | { | ||
| 460 | int lead = std::uniform_int_distribution<int>(1,9)(rng_); | ||
| 461 | int tail = std::uniform_int_distribution<int>(0,6)(rng_); | ||
| 462 | std::string tailStr(tail, '0'); | ||
| 463 | |||
| 464 | utter << ("$" + std::to_string(lead) + tailStr); | ||
| 465 | } else if (part.nounHasSynrestr("adjp")) | ||
| 466 | { | ||
| 467 | utter << std::set<std::string>({"adjective_phrase"}); | ||
| 468 | } else if ((part.nounHasSynrestr("be_sc_ing")) | ||
| 469 | || (part.nounHasSynrestr("ac_ing")) | ||
| 470 | || (part.nounHasSynrestr("sc_ing")) | ||
| 471 | || (part.nounHasSynrestr("np_omit_ing")) | ||
| 472 | || (part.nounHasSynrestr("oc_ing"))) | ||
| 473 | { | ||
| 474 | utter << std::set<std::string>({"participle_phrase", "subjectless"}); | ||
| 475 | } else if ((part.nounHasSynrestr("poss_ing")) | ||
| 476 | || (part.nounHasSynrestr("possing")) | ||
| 477 | || (part.nounHasSynrestr("pos_ing"))) | ||
| 478 | { | ||
| 479 | utter << "your"; | ||
| 480 | utter << std::set<std::string>({"participle_phrase", "subjectless"}); | ||
| 481 | } else if (part.nounHasSynrestr("genitive")) | ||
| 482 | { | ||
| 483 | utter << "your"; | ||
| 484 | } else if (part.nounHasSynrestr("adv_loc")) | ||
| 485 | { | ||
| 486 | if (std::bernoulli_distribution(1.0/2.0)(rng_)) | ||
| 487 | { | ||
| 488 | utter << "here"; | ||
| 489 | } else { | ||
| 490 | utter << "there"; | ||
| 491 | } | ||
| 492 | } else if (part.nounHasSynrestr("refl")) | ||
| 493 | { | ||
| 494 | utter << "yourself"; | ||
| 495 | } else if ((part.nounHasSynrestr("sc_to_inf")) | ||
| 496 | || (part.nounHasSynrestr("ac_to_inf")) | ||
| 497 | || (part.nounHasSynrestr("vc_to_inf")) | ||
| 498 | || (part.nounHasSynrestr("rs_to_inf")) | ||
| 499 | || (part.nounHasSynrestr("oc_to_inf"))) | ||
| 500 | { | ||
| 501 | utter << std::set<std::string>({"infinitive_phrase", "subjectless"}); | ||
| 502 | } else if (part.nounHasSynrestr("oc_bare_inf")) | ||
| 503 | { | ||
| 504 | utter << std::set<std::string>({"infinitive_phrase", "bare", "subjectless"}); | ||
| 505 | } else if (part.nounHasSynrestr("wh_comp")) | ||
| 506 | { | ||
| 507 | utter << "whether"; | ||
| 508 | |||
| 509 | verbly::token sentence(std::set<std::string>({"progressive"})); | ||
| 510 | utter << generateClause(sentence); | ||
| 511 | } else if (part.nounHasSynrestr("that_comp")) | ||
| 512 | { | ||
| 513 | utter << "that"; | ||
| 514 | utter << "they"; | ||
| 515 | |||
| 516 | verbly::token sentence(std::set<std::string>({"subjectless"})); | ||
| 517 | utter << generateClause(sentence); | ||
| 518 | } else if (part.nounHasSynrestr("what_extract")) | ||
| 519 | { | ||
| 520 | utter << "what"; | ||
| 521 | |||
| 522 | verbly::token sentence(std::set<std::string>({"progressive", "experiencer"})); | ||
| 523 | utter << generateClause(sentence); | ||
| 524 | } else if (part.nounHasSynrestr("how_extract")) | ||
| 525 | { | ||
| 526 | utter << "how"; | ||
| 527 | |||
| 528 | verbly::token sentence(std::set<std::string>({"progressive"})); | ||
| 529 | utter << generateClause(sentence); | ||
| 530 | } else if (part.nounHasSynrestr("wh_inf")) | ||
| 531 | { | ||
| 532 | utter << "how"; | ||
| 533 | |||
| 534 | verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless"})); | ||
| 535 | utter << generateClause(sentence); | ||
| 536 | } else if (part.nounHasSynrestr("what_inf")) | ||
| 537 | { | ||
| 538 | utter << "what"; | ||
| 539 | |||
| 540 | verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless", "experiencer"})); | ||
| 541 | utter << generateClause(sentence); | ||
| 542 | } else if (part.nounHasSynrestr("wheth_inf")) | ||
| 543 | { | ||
| 544 | utter << "whether"; | ||
| 545 | |||
| 546 | verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless"})); | ||
| 547 | utter << generateClause(sentence); | ||
| 548 | } else if (part.nounHasSynrestr("quotation")) | ||
| 549 | { | ||
| 550 | verbly::token sentence(std::set<std::string>({"participle_phrase"})); | ||
| 551 | while (!sentence.isComplete()) | ||
| 552 | { | ||
| 553 | visit(sentence); | ||
| 554 | } | ||
| 555 | |||
| 556 | utter << ("\"" + sentence.compile() + "\""); | ||
| 557 | } else { | ||
| 558 | verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); | ||
| 559 | |||
| 560 | bool plural = part.nounHasSynrestr("plural"); | ||
| 561 | if (!plural) | ||
| 562 | { | ||
| 563 | plural = requiresSelrestr("plural", part.getNounSelrestrs()); | ||
| 564 | } | ||
| 565 | |||
| 566 | utter << generateStandardNounPhrase( | ||
| 567 | noun, | ||
| 568 | part.getNounRole(), | ||
| 569 | plural, | ||
| 570 | part.nounHasSynrestr("definite")); | ||
| 571 | |||
| 572 | if (part.nounHasSynrestr("acc_ing") || part.nounHasSynrestr("ac_ing")) | ||
| 573 | { | ||
| 574 | utter << std::set<std::string>({"participle_phrase", "subjectless"}); | ||
| 575 | } | ||
| 576 | } | ||
| 577 | |||
| 578 | break; | ||
| 579 | } | ||
| 580 | |||
| 581 | case verbly::part_type::verb: | ||
| 582 | { | ||
| 583 | std::cout << "V: " << verb.getBaseForm() << std::endl; | ||
| 584 | |||
| 585 | if (it.hasSynrestr("progressive")) | ||
| 586 | { | ||
| 587 | utter << verbly::token(verb, verbly::inflection::s_form); | ||
| 588 | } else if (it.hasSynrestr("past_participle")) | ||
| 589 | { | ||
| 590 | utter << verbly::token(verb, verbly::inflection::past_participle); | ||
| 591 | } else if (it.hasSynrestr("infinitive_phrase")) | ||
| 592 | { | ||
| 593 | if (!it.hasSynrestr("bare")) | ||
| 594 | { | ||
| 595 | utter << "to"; | ||
| 596 | } | ||
| 597 | |||
| 598 | utter << verb; | ||
| 599 | } else if (it.hasSynrestr("participle_phrase")) | ||
| 600 | { | ||
| 601 | utter << verbly::token(verb, verbly::inflection::ing_form); | ||
| 602 | } else { | ||
| 603 | utter << verb; | ||
| 604 | } | ||
| 605 | |||
| 606 | break; | ||
| 607 | } | ||
| 608 | |||
| 609 | case verbly::part_type::preposition: | ||
| 610 | { | ||
| 611 | std::cout << "PREP" << std::endl; | ||
| 612 | |||
| 613 | if (part.isPrepositionLiteral()) | ||
| 614 | { | ||
| 615 | int choiceIndex = std::uniform_int_distribution<int>(0, part.getPrepositionChoices().size()-1)(rng_); | ||
| 616 | utter << part.getPrepositionChoices()[choiceIndex]; | ||
| 617 | } else { | ||
| 618 | verbly::filter pgf(true); | ||
| 619 | for (const std::string& choice : part.getPrepositionChoices()) | ||
| 620 | { | ||
| 621 | pgf += (verbly::notion::prepositionGroup == choice); | ||
| 622 | } | ||
| 623 | |||
| 624 | utter << database_.words(pgf && (verbly::notion::partOfSpeech == verbly::part_of_speech::preposition)).first(); | ||
| 625 | } | ||
| 626 | |||
| 627 | break; | ||
| 628 | } | ||
| 629 | |||
| 630 | case verbly::part_type::adjective: | ||
| 631 | { | ||
| 632 | std::cout << "ADJ" << std::endl; | ||
| 633 | |||
| 634 | utter << std::set<std::string>({"adjective_phrase"}); | ||
| 635 | |||
| 636 | break; | ||
| 637 | } | ||
| 638 | |||
| 639 | case verbly::part_type::adverb: | ||
| 640 | { | ||
| 641 | std::cout << "ADV" << std::endl; | ||
| 642 | |||
| 643 | utter << std::set<std::string>({"adverb_phrase"}); | ||
| 644 | |||
| 645 | break; | ||
| 646 | } | ||
| 647 | |||
| 648 | case verbly::part_type::literal: | ||
| 649 | { | ||
| 650 | std::cout << "LIT" << std::endl; | ||
| 651 | |||
| 652 | utter << part.getLiteralValue(); | ||
| 653 | |||
| 654 | break; | ||
| 655 | } | ||
| 656 | |||
| 657 | case verbly::part_type::invalid: | ||
| 658 | { | ||
| 659 | // Nope | ||
| 660 | |||
| 661 | break; | ||
| 662 | } | ||
| 663 | } | ||
| 664 | } | ||
| 665 | |||
| 666 | if ((parts.size() == 1) && (std::bernoulli_distribution(1.0/4.0)(rng_))) | ||
| 667 | { | ||
| 668 | utter << std::set<std::string>({"adverb_phrase"}); | ||
| 669 | } | ||
| 670 | |||
| 671 | return utter; | ||
| 672 | } | ||
| 673 | |||
| 674 | void sentence::visit(verbly::token& it) const | ||
| 675 | { | ||
| 676 | switch (it.getType()) | ||
| 677 | { | ||
| 678 | case verbly::token::type::utterance: | ||
| 679 | { | ||
| 680 | for (verbly::token& token : it) | ||
| 681 | { | ||
| 682 | if (!token.isComplete()) | ||
| 683 | { | ||
| 684 | visit(token); | ||
| 685 | |||
| 686 | break; | ||
| 687 | } | ||
| 688 | } | ||
| 689 | |||
| 690 | break; | ||
| 691 | } | ||
| 692 | |||
| 693 | case verbly::token::type::fillin: | ||
| 694 | { | ||
| 695 | if (it.hasSynrestr("infinitive_phrase")) | ||
| 696 | { | ||
| 697 | it = generateClause(it); | ||
| 698 | } else if (it.hasSynrestr("adjective_phrase")) | ||
| 699 | { | ||
| 700 | verbly::token phrase; | ||
| 701 | |||
| 702 | if (std::bernoulli_distribution(1.0/6.0)(rng_)) | ||
| 703 | { | ||
| 704 | phrase << std::set<std::string>({"adverb_phrase"}); | ||
| 705 | } | ||
| 706 | |||
| 707 | if (std::bernoulli_distribution(1.0/4.0)(rng_)) | ||
| 708 | { | ||
| 709 | phrase << std::set<std::string>({"participle_phrase", "subjectless"}); | ||
| 710 | } else { | ||
| 711 | std::geometric_distribution<int> tagdist(0.2); | ||
| 712 | phrase << database_.words( | ||
| 713 | (verbly::word::tagCount >= tagdist(rng_)) | ||
| 714 | && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first(); | ||
| 715 | } | ||
| 716 | |||
| 717 | it = phrase; | ||
| 718 | } else if (it.hasSynrestr("adverb_phrase")) | ||
| 719 | { | ||
| 720 | std::geometric_distribution<int> tagdist(1.0/23.0); | ||
| 721 | |||
| 722 | it = database_.words( | ||
| 723 | (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb) | ||
| 724 | && (verbly::word::tagCount >= tagdist(rng_)) | ||
| 725 | ).first(); | ||
| 726 | } else if (it.hasSynrestr("participle_phrase")) | ||
| 727 | { | ||
| 728 | if (std::bernoulli_distribution(1.0/2.0)(rng_)) | ||
| 729 | { | ||
| 730 | it = verbly::token( | ||
| 731 | database_.words( | ||
| 732 | (verbly::notion::partOfSpeech == verbly::part_of_speech::verb) | ||
| 733 | && (verbly::lemma::form(verbly::inflection::ing_form))).first(), | ||
| 734 | verbly::inflection::ing_form); | ||
| 735 | } else { | ||
| 736 | it = generateClause(it); | ||
| 737 | } | ||
| 738 | } else { | ||
| 739 | it = "*the reality of the situation*"; | ||
| 740 | } | ||
| 741 | |||
| 742 | break; | ||
| 743 | } | ||
| 744 | |||
| 745 | case verbly::token::type::word: | ||
| 746 | case verbly::token::type::literal: | ||
| 747 | case verbly::token::type::part: | ||
| 748 | { | ||
| 749 | // Nope | ||
| 750 | |||
| 751 | break; | ||
| 752 | } | ||
| 753 | } | ||
| 754 | } | ||
| diff --git a/sentence.h b/sentence.h new file mode 100644 index 0000000..abf6860 --- /dev/null +++ b/sentence.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | #ifndef SENTENCE_H_81987F60 | ||
| 2 | #define SENTENCE_H_81987F60 | ||
| 3 | |||
| 4 | #include <verbly.h> | ||
| 5 | #include <random> | ||
| 6 | #include <string> | ||
| 7 | |||
| 8 | class sentence { | ||
| 9 | public: | ||
| 10 | |||
| 11 | sentence( | ||
| 12 | const verbly::database& database, | ||
| 13 | std::mt19937& rng); | ||
| 14 | |||
| 15 | std::string generate() const; | ||
| 16 | |||
| 17 | private: | ||
| 18 | |||
| 19 | verbly::filter parseSelrestrs(verbly::selrestr selrestr) const; | ||
| 20 | |||
| 21 | bool requiresSelrestr(std::string restriction, verbly::selrestr selrestr) const; | ||
| 22 | |||
| 23 | verbly::word generateStandardNoun(std::string role, verbly::selrestr selrestrs) const; | ||
| 24 | |||
| 25 | verbly::token generateStandardNounPhrase( | ||
| 26 | const verbly::word& noun, | ||
| 27 | std::string role, | ||
| 28 | bool plural, | ||
| 29 | bool definite) const; | ||
| 30 | |||
| 31 | verbly::token generateClause(const verbly::token& it) const; | ||
| 32 | |||
| 33 | void visit(verbly::token& it) const; | ||
| 34 | |||
| 35 | const verbly::database& database_; | ||
| 36 | std::mt19937& rng_; | ||
| 37 | }; | ||
| 38 | |||
| 39 | #endif /* end of include guard: SENTENCE_H_81987F60 */ | ||
| diff --git a/vendor/libtwittercpp b/vendor/libtwittercpp new file mode 160000 | |||
| Subproject df906121dd862c0f704e44f28ee079158c431c4 | |||
| diff --git a/vendor/verbly b/vendor/verbly new file mode 160000 | |||
| Subproject 5caeb000b00ff7833c3b3c44893d4beffc0afb8 | |||
| diff --git a/vendor/yaml-cpp b/vendor/yaml-cpp new file mode 160000 | |||
| Subproject bedb28fdb4fd52d97e02f6cb946cae631037089 | |||
