summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.gitmodules9
-rw-r--r--CMakeLists.txt17
-rw-r--r--advice.cpp359
-rw-r--r--advice.h46
-rwxr-xr-xcoolvetica.ttfbin0 -> 135916 bytes
-rw-r--r--main.cpp33
-rw-r--r--sentence.cpp754
-rw-r--r--sentence.h39
m---------vendor/libtwittercpp0
m---------vendor/verbly0
m---------vendor/yaml-cpp0
11 files changed, 1257 insertions, 0 deletions
diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..79dde9f --- /dev/null +++ b/.gitmodules
@@ -0,0 +1,9 @@
1[submodule "vendor/verbly"]
2 path = vendor/verbly
3 url = https://github.com/hatkirby/verbly
4[submodule "vendor/libtwittercpp"]
5 path = vendor/libtwittercpp
6 url = https://github.com/hatkirby/libtwittercpp
7[submodule "vendor/yaml-cpp"]
8 path = vendor/yaml-cpp
9 url = https://github.com/jbeder/yaml-cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..5fc053d --- /dev/null +++ b/CMakeLists.txt
@@ -0,0 +1,17 @@
1cmake_minimum_required (VERSION 3.1)
2project (advice)
3
4set(CMAKE_BUILD_TYPE Debug)
5
6find_package(PkgConfig)
7pkg_check_modules(GraphicsMagick GraphicsMagick++ REQUIRED)
8
9add_subdirectory(vendor/verbly)
10add_subdirectory(vendor/libtwittercpp)
11add_subdirectory(vendor/yaml-cpp EXCLUDE_FROM_ALL)
12
13include_directories(vendor/verbly/lib vendor/libtwittercpp/src vendor/libtwittercpp/vendor/curlcpp/include ${GraphicsMagick_INCLUDE_DIRS} vendor/yaml-cpp/include)
14add_executable(advice main.cpp advice.cpp sentence.cpp)
15set_property(TARGET advice PROPERTY CXX_STANDARD 11)
16set_property(TARGET advice PROPERTY CXX_STANDARD_REQUIRED ON)
17target_link_libraries(advice verbly twitter++ ${GraphicsMagick_LIBRARIES} yaml-cpp)
diff --git a/advice.cpp b/advice.cpp new file mode 100644 index 0000000..320f719 --- /dev/null +++ b/advice.cpp
@@ -0,0 +1,359 @@
1#include "advice.h"
2#include <algorithm>
3#include <iostream>
4#include <vector>
5#include <deque>
6#include <curl_easy.h>
7#include <curl_header.h>
8#include <sstream>
9#include <list>
10#include <chrono>
11#include <thread>
12#include <yaml-cpp/yaml.h>
13
14advice::advice(
15 std::string configFile,
16 std::mt19937& rng) :
17 rng_(rng)
18{
19 // Load the config file.
20 YAML::Node config = YAML::LoadFile(configFile);
21
22 // Set up the Twitter client.
23 twitter::auth auth;
24 auth.setConsumerKey(config["consumer_key"].as<std::string>());
25 auth.setConsumerSecret(config["consumer_secret"].as<std::string>());
26 auth.setAccessKey(config["access_key"].as<std::string>());
27 auth.setAccessSecret(config["access_secret"].as<std::string>());
28
29 client_ = std::unique_ptr<twitter::client>(new twitter::client(auth));
30
31 // Set up the verbly database.
32 database_ = std::unique_ptr<verbly::database>(new verbly::database(config["verbly_datafile"].as<std::string>()));
33
34 // Set up the sentence generator.
35 generator_ = std::unique_ptr<sentence>(new sentence(*database_, rng_));
36}
37
38verbly::word advice::generateImageNoun() const
39{
40 verbly::filter whitelist =
41 (verbly::notion::wnid == 109287968) // Geological formations
42 || (verbly::notion::wnid == 109208496) // Asterisms (collections of stars)
43 || (verbly::notion::wnid == 109239740) // Celestial bodies
44 || (verbly::notion::wnid == 109277686) // Exterrestrial objects (comets and meteroids)
45 || (verbly::notion::wnid == 109403211) // Radiators (supposedly natural radiators but actually these are just pictures of radiators)
46 || (verbly::notion::wnid == 109416076) // Rocks
47 || (verbly::notion::wnid == 105442131) // Chromosomes
48 || (verbly::notion::wnid == 100324978) // Tightrope walking
49 || (verbly::notion::wnid == 100326094) // Rock climbing
50 || (verbly::notion::wnid == 100433458) // Contact sports
51 || (verbly::notion::wnid == 100433802) // Gymnastics
52 || (verbly::notion::wnid == 100439826) // Track and field
53 || (verbly::notion::wnid == 100440747) // Skiing
54 || (verbly::notion::wnid == 100441824) // Water sport
55 || (verbly::notion::wnid == 100445351) // Rowing
56 || (verbly::notion::wnid == 100446980) // Archery
57 // TODO: add more sports
58 || (verbly::notion::wnid == 100021939) // Artifacts
59 || (verbly::notion::wnid == 101471682) // Vertebrates
60 ;
61
62 verbly::filter blacklist =
63 (verbly::notion::wnid == 106883725) // swastika
64 || (verbly::notion::wnid == 104416901) // tetraskele
65 || (verbly::notion::wnid == 102512053) // fish
66 || (verbly::notion::wnid == 103575691) // instrument of execution
67 ;
68
69 verbly::query<verbly::word> pictureQuery = database_->words(
70 (verbly::notion::fullHypernyms %= whitelist)
71 && !(verbly::notion::fullHypernyms %= blacklist)
72 && (verbly::notion::partOfSpeech == verbly::part_of_speech::noun)
73 && (verbly::notion::numOfImages >= 1));
74
75 return pictureQuery.first();
76}
77
78Magick::Image advice::getImageForNoun(verbly::word pictured) const
79{
80 // Accept string from Google Chrome
81 std::string accept = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
82 curl::curl_header headers;
83 headers.add(accept);
84
85 int backoff = 0;
86
87 std::cout << "Generating noun..." << std::endl;
88 std::cout << "Noun: " << pictured.getBaseForm() << std::endl;
89 std::cout << "Getting URLs..." << std::endl;
90
91 std::string lstdata;
92 while (lstdata.empty())
93 {
94 std::ostringstream lstbuf;
95 curl::curl_ios<std::ostringstream> lstios(lstbuf);
96 curl::curl_easy lsthandle(lstios);
97 std::string lsturl = pictured.getNotion().getImageNetUrl();
98 lsthandle.add<CURLOPT_URL>(lsturl.c_str());
99
100 try
101 {
102 lsthandle.perform();
103 } catch (const curl::curl_easy_exception& e)
104 {
105 e.print_traceback();
106
107 backoff++;
108 std::cout << "Waiting for " << backoff << " seconds..." << std::endl;
109
110 std::this_thread::sleep_for(std::chrono::seconds(backoff));
111
112 continue;
113 }
114
115 backoff = 0;
116
117 if (lsthandle.get_info<CURLINFO_RESPONSE_CODE>().get() != 200)
118 {
119 throw could_not_get_images();
120 }
121
122 std::cout << "Got URLs." << std::endl;
123 lstdata = lstbuf.str();
124 }
125
126 std::vector<std::string> lstvec = verbly::split<std::vector<std::string>>(lstdata, "\r\n");
127 if (lstvec.empty())
128 {
129 throw could_not_get_images();
130 }
131
132 std::shuffle(std::begin(lstvec), std::end(lstvec), rng_);
133
134 std::deque<std::string> urls;
135 for (std::string& url : lstvec)
136 {
137 urls.push_back(url);
138 }
139
140 bool found = false;
141 Magick::Blob img;
142 Magick::Image pic;
143
144 while (!found && !urls.empty())
145 {
146 std::string url = urls.front();
147 urls.pop_front();
148
149 std::ostringstream imgbuf;
150 curl::curl_ios<std::ostringstream> imgios(imgbuf);
151 curl::curl_easy imghandle(imgios);
152
153 imghandle.add<CURLOPT_HTTPHEADER>(headers.get());
154 imghandle.add<CURLOPT_URL>(url.c_str());
155 imghandle.add<CURLOPT_CONNECTTIMEOUT>(30);
156
157 try
158 {
159 imghandle.perform();
160 } catch (curl::curl_easy_exception error) {
161 error.print_traceback();
162
163 continue;
164 }
165
166 if (imghandle.get_info<CURLINFO_RESPONSE_CODE>().get() != 200)
167 {
168 continue;
169 }
170
171 std::string content_type = imghandle.get_info<CURLINFO_CONTENT_TYPE>().get();
172 if (content_type.substr(0, 6) != "image/")
173 {
174 continue;
175 }
176
177 std::string imgstr = imgbuf.str();
178 img = Magick::Blob(imgstr.c_str(), imgstr.length());
179 pic.read(img);
180 if (pic.rows() == 0)
181 {
182 continue;
183 }
184
185 // Too small!
186 if (pic.columns() < 400)
187 {
188 continue;
189 }
190
191 std::cout << url << std::endl;
192 found = true;
193 }
194
195 if (!found)
196 {
197 throw could_not_get_images();
198 }
199
200 return pic;
201}
202
203Magick::Image advice::layoutImage(Magick::Image pic, std::string title) const
204{
205 // Want a 16:9 aspect
206 int idealwidth = pic.rows()*(16.0/9.0);
207 if (idealwidth > pic.columns())
208 {
209 // If the image is narrower than the ideal width, use full width.
210 int newheight = pic.columns()*(9.0/16.0);
211
212 // Just take a slice out of the middle of the image.
213 int cropy = ((double)(pic.rows() - newheight))/2.0;
214
215 pic.crop(Magick::Geometry(pic.columns(), newheight, 0, cropy));
216 } else {
217 // If the image is wider than the ideal width, use full height.
218 // Just take a slice out of the middle of the image.
219 int cropx = ((double)(pic.columns() - idealwidth))/2.0;
220
221 pic.crop(Magick::Geometry(idealwidth, pic.rows(), cropx, 0));
222 }
223
224 pic.zoom(Magick::Geometry(400, 225));
225
226 // Layout the text.
227 std::list<std::string> words = verbly::split<std::list<std::string>>(title, " ");
228 std::vector<std::string> lines;
229 std::list<std::string> cur;
230 Magick::TypeMetric metric;
231 pic.fontPointsize(20);
232 pic.font("@coolvetica.ttf");
233
234 while (!words.empty())
235 {
236 cur.push_back(words.front());
237
238 std::string prefixText = verbly::implode(std::begin(cur), std::end(cur), " ");
239 pic.fontTypeMetrics(prefixText, &metric);
240
241 if (metric.textWidth() > 380)
242 {
243 if (cur.size() == 1)
244 {
245 words.pop_front();
246 } else {
247 cur.pop_back();
248 }
249
250 prefixText = verbly::implode(std::begin(cur), std::end(cur), " ");
251 lines.push_back(prefixText);
252 cur.clear();
253 } else {
254 words.pop_front();
255 }
256 }
257
258 if (!cur.empty())
259 {
260 std::string prefixText = verbly::implode(std::begin(cur), std::end(cur), " ");
261 lines.push_back(prefixText);
262 }
263
264 int lineHeight = metric.textHeight()-2;
265 int blockHeight = lineHeight * lines.size() + 18;
266 std::cout << "line " << lineHeight << "; block " << blockHeight << std::endl;
267
268 std::list<Magick::Drawable> drawList;
269 drawList.push_back(Magick::DrawableFillColor("black"));
270 drawList.push_back(Magick::DrawableFillOpacity(0.5));
271 drawList.push_back(Magick::DrawableStrokeColor("transparent"));
272 drawList.push_back(Magick::DrawableRectangle(0, 225-blockHeight-20, 400, 255)); // 0, 225-60, 400, 255
273 pic.draw(drawList);
274
275 drawList.clear();
276 drawList.push_back(Magick::DrawableFont("@coolvetica.ttf"));
277 drawList.push_back(Magick::DrawableFillColor("white"));
278 drawList.push_back(Magick::DrawablePointSize(14));
279 drawList.push_back(Magick::DrawableText(10, 225-blockHeight+4, "How to")); // 10, 255-62-4
280 pic.draw(drawList);
281
282 for (int i=0; i<lines.size(); i++)
283 {
284 drawList.clear();
285 drawList.push_back(Magick::DrawableFont("@coolvetica.ttf"));
286 drawList.push_back(Magick::DrawableFillColor("white"));
287 drawList.push_back(Magick::DrawablePointSize(20));
288 drawList.push_back(Magick::DrawableText(10, 255-blockHeight+(i*lineHeight)-4, lines[i])); // 10, 255-20-25
289 pic.draw(drawList);
290 }
291
292 return pic;
293}
294
295void advice::sendTweet(Magick::Image pic, std::string title) const
296{
297 Magick::Blob outputimg;
298
299 try
300 {
301 pic.magick("png");
302 pic.write(&outputimg);
303 } catch (const Magick::WarningCoder& e)
304 {
305 // Ignore
306 }
307
308 std::cout << "Generated image!" << std::endl << "Tweeting..." << std::endl;
309
310 std::string tweetText;
311 size_t tweetLim = 140 - client_->getConfiguration().getCharactersReservedPerMedia();
312 if (title.length() > tweetLim)
313 {
314 tweetText = title.substr(0, tweetLim - 1) + "…";
315 } else {
316 tweetText = title;
317 }
318
319 long media_id = client_->uploadMedia("image/png", (const char*) outputimg.data(), outputimg.length());
320 client_->updateStatus(tweetText, {media_id});
321}
322
323void advice::run() const
324{
325 for (;;)
326 {
327 try
328 {
329 // Pick a noun to use for the picture.
330 verbly::word pictured = generateImageNoun();
331
332 // Find an image of the picked noun.
333 Magick::Image pic = getImageForNoun(pictured);
334
335 // Generate the image text.
336 std::string title = generator_->generate();
337
338 // Layout the image.
339 Magick::Image output = layoutImage(std::move(pic), title);
340
341 // Tweet the image.
342 sendTweet(std::move(output), title);
343
344 std::cout << "Done!" << std::endl << "Waiting..." << std::endl << std::endl;
345
346 // Wait.
347 std::this_thread::sleep_for(std::chrono::hours(1));
348 } catch (const could_not_get_images& ex)
349 {
350 std::cout << ex.what() << std::endl;
351 } catch (const Magick::ErrorImage& ex)
352 {
353 std::cout << "Image error: " << ex.what() << std::endl;
354 } catch (const twitter::twitter_error& ex)
355 {
356 std::cout << "Twitter error: " << ex.what() << std::endl;
357 }
358 }
359}
diff --git a/advice.h b/advice.h new file mode 100644 index 0000000..33dc531 --- /dev/null +++ b/advice.h
@@ -0,0 +1,46 @@
1#ifndef ADVICE_H_5934AC1B
2#define ADVICE_H_5934AC1B
3
4#include <random>
5#include <twitter.h>
6#include <verbly.h>
7#include <string>
8#include <memory>
9#include <Magick++.h>
10#include <stdexcept>
11#include "sentence.h"
12
13class advice {
14public:
15
16 advice(
17 std::string configFile,
18 std::mt19937& rng);
19
20 void run() const;
21
22private:
23
24 class could_not_get_images : public std::runtime_error {
25 public:
26
27 could_not_get_images() : std::runtime_error("Could not get images for noun")
28 {
29 }
30 };
31
32 verbly::word generateImageNoun() const;
33
34 Magick::Image getImageForNoun(verbly::word pictured) const;
35
36 Magick::Image layoutImage(Magick::Image bg, std::string title) const;
37
38 void sendTweet(Magick::Image pic, std::string title) const;
39
40 std::mt19937& rng_;
41 std::unique_ptr<verbly::database> database_;
42 std::unique_ptr<sentence> generator_;
43 std::unique_ptr<twitter::client> client_;
44};
45
46#endif /* end of include guard: ADVICE_H_5934AC1B */
diff --git a/coolvetica.ttf b/coolvetica.ttf new file mode 100755 index 0000000..410ca31 --- /dev/null +++ b/coolvetica.ttf
Binary files differ
diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..3661105 --- /dev/null +++ b/main.cpp
@@ -0,0 +1,33 @@
1#include "advice.h"
2
3int main(int argc, char** argv)
4{
5 Magick::InitializeMagick(nullptr);
6
7 std::random_device random_device;
8 std::mt19937 random_engine{random_device()};
9
10 if (argc != 2)
11 {
12 std::cout << "usage: advice [configfile]" << std::endl;
13 return -1;
14 }
15
16 std::string configfile(argv[1]);
17
18 try
19 {
20 advice bot(configfile, random_engine);
21
22 try
23 {
24 bot.run();
25 } catch (const std::exception& ex)
26 {
27 std::cout << "Error running bot: " << ex.what() << std::endl;
28 }
29 } catch (const std::exception& ex)
30 {
31 std::cout << "Error initializing bot: " << ex.what() << std::endl;
32 }
33}
diff --git a/sentence.cpp b/sentence.cpp new file mode 100644 index 0000000..421aaf6 --- /dev/null +++ b/sentence.cpp
@@ -0,0 +1,754 @@
1#include "sentence.h"
2#include <algorithm>
3#include <list>
4#include <set>
5
6sentence::sentence(
7 const verbly::database& database,
8 std::mt19937& rng) :
9 database_(database),
10 rng_(rng)
11{
12}
13
14std::string sentence::generate() const
15{
16 // Generate the form that the title should take.
17 verbly::token form;
18 std::set<std::string> synrestrs {"infinitive_phrase", "bare", "subjectless"};
19 std::set<std::string> secondSyn {"participle_phrase", "subjectless"};
20 std::set<std::string> adjSyn {"adjective_phrase"};
21
22 if (std::bernoulli_distribution(1.0/6.0)(rng_))
23 {
24 form << "not";
25 }
26
27 if (std::bernoulli_distribution(1.0/6.0)(rng_))
28 {
29 form << "be";
30 form << adjSyn;
31 } else {
32 if (std::bernoulli_distribution(1.0/6.0)(rng_))
33 {
34 form << "get";
35 synrestrs.insert("experiencer");
36 synrestrs.insert("past_participle");
37 }
38
39 form << synrestrs;
40 }
41
42 if (std::bernoulli_distribution(1.0/5.0)(rng_))
43 {
44 if (std::bernoulli_distribution(1.0/4.0)(rng_))
45 {
46 form << "without";
47 } else {
48 form << "while";
49 }
50
51 form << secondSyn;
52 }
53
54 // Attempt to compile the form, restarting if a bad word is generated.
55 std::set<std::string> badWords = {"raped"};
56
57 verbly::token tok = form;
58 std::list<std::string> words;
59 for (;;)
60 {
61 // Compile the form.
62 while (!tok.isComplete())
63 {
64 visit(tok);
65 }
66
67 std::string compiled = tok.compile();
68 words = verbly::split<std::list<std::string>>(compiled, " ");
69
70 // Ensure that there are no bad words in the output.
71 if (!std::any_of(std::begin(words), std::end(words), [&badWords] (const std::string& word) {
72 std::string canonWord;
73
74 for (char ch : word)
75 {
76 if (std::isalpha(ch))
77 {
78 canonWord.push_back(std::tolower(ch));
79 }
80 }
81
82 return (badWords.count(canonWord) == 1);
83 })) {
84 break;
85 } else {
86 std::cout << "Bad word generated." << std::endl;
87 }
88 }
89
90 // Put the form into title case.
91 for (std::string& word : words)
92 {
93 if ((word[0] == '"') && (word.length() > 1))
94 {
95 word[1] = std::toupper(word[1]);
96 } else {
97 word[0] = std::toupper(word[0]);
98 }
99 }
100
101 return verbly::implode(std::begin(words), std::end(words), " ");
102}
103
104verbly::filter sentence::parseSelrestrs(
105 verbly::selrestr selrestr) const
106{
107 switch (selrestr.getType())
108 {
109 case verbly::selrestr::type::empty:
110 {
111 return {};
112 }
113
114 case verbly::selrestr::type::singleton:
115 {
116 verbly::filter result;
117
118 if (selrestr.getRestriction() == "concrete")
119 {
120 result = (verbly::notion::wnid == 100001930); // physical entity
121 } else if (selrestr.getRestriction() == "time")
122 {
123 result = (verbly::notion::wnid == 100028270); // time
124 } else if (selrestr.getRestriction() == "state")
125 {
126 result = (verbly::notion::wnid == 100024720); // state
127 } else if (selrestr.getRestriction() == "abstract")
128 {
129 result = (verbly::notion::wnid == 100002137); // abstract entity
130 } else if (selrestr.getRestriction() == "scalar")
131 {
132 result = (verbly::notion::wnid == 103835412); // number
133 } else if (selrestr.getRestriction() == "currency")
134 {
135 result = (verbly::notion::wnid == 105050379); // currency
136 } else if (selrestr.getRestriction() == "location")
137 {
138 result = (verbly::notion::wnid == 100027167); // location
139 } else if (selrestr.getRestriction() == "organization")
140 {
141 result = (verbly::notion::wnid == 100237078); // organization
142 } else if (selrestr.getRestriction() == "int_control")
143 {
144 result = (verbly::notion::wnid == 100007347); // causal agent
145 } else if (selrestr.getRestriction() == "natural")
146 {
147 result = (verbly::notion::wnid == 100019128); // natural object
148 } else if (selrestr.getRestriction() == "phys_obj")
149 {
150 result = (verbly::notion::wnid == 100002684); // physical object
151 } else if (selrestr.getRestriction() == "solid")
152 {
153 result = (verbly::notion::wnid == 113860793); // solid
154 } else if (selrestr.getRestriction() == "shape")
155 {
156 result = (verbly::notion::wnid == 100027807); // shape
157 } else if (selrestr.getRestriction() == "substance")
158 {
159 result = (verbly::notion::wnid == 100019613); // substance
160 } else if (selrestr.getRestriction() == "idea")
161 {
162 result = (verbly::notion::wnid == 105803379); // idea
163 } else if (selrestr.getRestriction() == "sound")
164 {
165 result = (verbly::notion::wnid == 107111047); // sound
166 } else if (selrestr.getRestriction() == "communication")
167 {
168 result = (verbly::notion::wnid == 100033020); // communication
169 } else if (selrestr.getRestriction() == "region")
170 {
171 result = (verbly::notion::wnid == 105221895); // region
172 } else if (selrestr.getRestriction() == "place")
173 {
174 result = (verbly::notion::wnid == 100586262); // place
175 } else if (selrestr.getRestriction() == "machine")
176 {
177 result = (verbly::notion::wnid == 102958343); // machine
178 } else if (selrestr.getRestriction() == "animate")
179 {
180 result = (verbly::notion::wnid == 100004258); // animate thing
181 } else if (selrestr.getRestriction() == "plant")
182 {
183 result = (verbly::notion::wnid == 103956922); // plant
184 } else if (selrestr.getRestriction() == "comestible")
185 {
186 result = (verbly::notion::wnid == 100021265); // food
187 } else if (selrestr.getRestriction() == "artifact")
188 {
189 result = (verbly::notion::wnid == 100021939); // artifact
190 } else if (selrestr.getRestriction() == "vehicle")
191 {
192 result = (verbly::notion::wnid == 104524313); // vehicle
193 } else if (selrestr.getRestriction() == "human")
194 {
195 result = (verbly::notion::wnid == 100007846); // person
196 } else if (selrestr.getRestriction() == "animal")
197 {
198 result = (verbly::notion::wnid == 100015388); // animal
199 } else if (selrestr.getRestriction() == "body_part")
200 {
201 result = (verbly::notion::wnid == 105220461); // body part
202 } else if (selrestr.getRestriction() == "garment")
203 {
204 result = (verbly::notion::wnid == 103051540); // clothing
205 } else if (selrestr.getRestriction() == "tool")
206 {
207 result = (verbly::notion::wnid == 104451818); // tool
208 } else {
209 return {};
210 }
211
212 std::cout << selrestr.getRestriction() << " (" << selrestr.getPos() << ")" << std::endl;
213
214 if (selrestr.getPos())
215 {
216 return (verbly::notion::fullHypernyms %= result);
217 } else {
218 return !(verbly::notion::fullHypernyms %= result);
219 }
220 }
221
222 case verbly::selrestr::type::group:
223 {
224 std::cout << "or: " << selrestr.getOrlogic() << std::endl;
225 verbly::filter ret(selrestr.getOrlogic());
226
227 for (const verbly::selrestr& child : selrestr)
228 {
229 ret += parseSelrestrs(child);
230 }
231
232 return ret;
233 }
234 }
235}
236
237bool sentence::requiresSelrestr(
238 std::string restriction,
239 verbly::selrestr selrestr) const
240{
241 switch (selrestr.getType())
242 {
243 case verbly::selrestr::type::empty:
244 {
245 return false;
246 }
247
248 case verbly::selrestr::type::singleton:
249 {
250 if (selrestr.getRestriction() == restriction)
251 {
252 return selrestr.getPos();
253 } else {
254 return false;
255 }
256 }
257
258 case verbly::selrestr::type::group:
259 {
260 if (selrestr.getOrlogic())
261 {
262 return std::all_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) {
263 return requiresSelrestr(restriction, s);
264 });
265 } else {
266 return std::any_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) {
267 return requiresSelrestr(restriction, s);
268 });
269 }
270 }
271 }
272}
273
274verbly::word sentence::generateStandardNoun(
275 std::string role,
276 verbly::selrestr selrestrs) const
277{
278 std::geometric_distribution<int> tagdist(0.5); // 0.06
279 std::vector<verbly::word> result;
280 bool trySelection = true;
281
282 while (result.empty())
283 {
284 verbly::filter condition =
285 (verbly::notion::partOfSpeech == verbly::part_of_speech::noun)
286 && (verbly::form::proper == false)
287 //&& (verbly::form::complexity == 1)
288 // && (verbly::word::tagCount >= tagdist(rng_)) // Favor more common words
289 && (verbly::word::tagCount >= 1)
290 && !(verbly::word::usageDomains %= (verbly::notion::wnid == 106718862)); // Blacklist ethnic slurs
291
292 // Only use selection restrictions for a first attempt.
293 if (trySelection)
294 {
295 verbly::filter selrestrCondition = parseSelrestrs(selrestrs).compact();
296
297 if (selrestrCondition.getType() != verbly::filter::type::empty)
298 {
299 condition &= std::move(selrestrCondition);
300 } else if (role == "Attribute")
301 {
302 condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute
303 } else if (role == "Instrument")
304 {
305 condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 104451818)); // tool
306 } else if (role == "Agent")
307 {
308 condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100007347)); // causal agent
309 }
310
311 trySelection = false;
312 } else {
313 std::cout << "Selection failed" << std::endl;
314 }
315
316 result = database_.words(condition).all();
317 }
318
319 return result.front();
320}
321
322verbly::token sentence::generateStandardNounPhrase(
323 const verbly::word& noun,
324 std::string role,
325 bool plural,
326 bool definite) const
327{
328 verbly::token utter;
329 verbly::word sounder = noun;
330 verbly::word descript;
331
332 if (std::bernoulli_distribution(1.0/8.0)(rng_))
333 {
334 std::geometric_distribution<int> tagdist(0.2);
335 descript = database_.words(
336 (verbly::word::tagCount >= tagdist(rng_))
337 && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();
338
339 sounder = descript;
340 }
341
342 if ((std::bernoulli_distribution(1.0/3.0)(rng_)) && (definite))
343 {
344 utter << "the";
345
346 if (std::bernoulli_distribution(1.0/2.0)(rng_))
347 {
348 plural = true;
349 }
350 } else {
351 if ((role != "Theme") && (role != "Attribute") && std::bernoulli_distribution(1.0/2.0)(rng_))
352 {
353 utter << "your";
354 } else if (!plural) {
355 if (sounder.getLemma().getBaseForm().startsWithVowelSound())
356 {
357 utter << "an";
358 } else {
359 utter << "a";
360 }
361 }
362 }
363
364 if (descript)
365 {
366 utter << descript;
367 }
368
369 if (plural && noun.getLemma().hasInflection(verbly::inflection::plural))
370 {
371 utter << verbly::token(noun, verbly::inflection::plural);
372 } else {
373 utter << noun;
374 }
375
376 return utter;
377}
378
379verbly::token sentence::generateClause(
380 const verbly::token& it) const
381{
382 verbly::token utter;
383 std::geometric_distribution<int> tagdist(0.07);
384 std::vector<verbly::word> verbDataset;
385
386 verbly::filter frameCondition =
387 (verbly::frame::length >= 2)
388 && (verbly::frame::part(0) %= (
389 (verbly::part::type == verbly::part_type::noun_phrase)
390 && (verbly::part::role == "Agent"))
391 && !(verbly::frame::part() %= (
392 verbly::part::synrestr %= "adjp")));
393
394 if (it.hasSynrestr("experiencer"))
395 {
396 frameCondition &=
397 (verbly::frame::part(2) %=
398 (verbly::part::type == verbly::part_type::noun_phrase)
399 && !(verbly::part::synrestr %= "genitive")
400 && ((verbly::part::role == "Patient")
401 || (verbly::part::role == "Experiencer")));
402 }
403
404 verbly::filter verbCondition =
405 (verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
406 && frameCondition;
407
408 if (it.hasSynrestr("participle_phrase"))
409 {
410 verbCondition &= (verbly::lemma::form(verbly::inflection::ing_form));
411 } else if (it.hasSynrestr("progressive"))
412 {
413 verbCondition &= (verbly::lemma::form(verbly::inflection::s_form));
414 } else if (it.hasSynrestr("past_participle"))
415 {
416 verbCondition &= (verbly::lemma::form(verbly::inflection::past_participle));
417 }
418
419 // Because of the tag distribution, it's possible (albeit extremely unlikely)
420 // for the verb query to fail, so we loop until it succeeds.
421 while (verbDataset.empty())
422 {
423 verbDataset = database_.words(
424 verbCondition
425 && (verbly::word::tagCount >= tagdist(rng_))
426 ).all();
427 }
428
429 verbly::word verb = verbDataset.front();
430 verbly::frame frame = database_.frames(frameCondition && verb).first();
431 std::list<verbly::part> parts(std::begin(frame.getParts()), std::end(frame.getParts()));
432
433 if (it.hasSynrestr("experiencer"))
434 {
435 // Ignore the direct object.
436 parts.erase(std::next(parts.begin(), 2));
437 }
438
439 if (it.hasSynrestr("subjectless"))
440 {
441 // Ignore the subject.
442 parts.pop_front();
443 }
444
445 for (const verbly::part& part : parts)
446 {
447 switch (part.getType())
448 {
449 case verbly::part_type::noun_phrase:
450 {
451 std::cout << "NP: ";
452 for (auto& s : part.getNounSynrestrs())
453 {
454 std::cout << s << " ";
455 }
456 std::cout << std::endl;
457
458 if (requiresSelrestr("currency", part.getNounSelrestrs()))
459 {
460 int lead = std::uniform_int_distribution<int>(1,9)(rng_);
461 int tail = std::uniform_int_distribution<int>(0,6)(rng_);
462 std::string tailStr(tail, '0');
463
464 utter << ("$" + std::to_string(lead) + tailStr);
465 } else if (part.nounHasSynrestr("adjp"))
466 {
467 utter << std::set<std::string>({"adjective_phrase"});
468 } else if ((part.nounHasSynrestr("be_sc_ing"))
469 || (part.nounHasSynrestr("ac_ing"))
470 || (part.nounHasSynrestr("sc_ing"))
471 || (part.nounHasSynrestr("np_omit_ing"))
472 || (part.nounHasSynrestr("oc_ing")))
473 {
474 utter << std::set<std::string>({"participle_phrase", "subjectless"});
475 } else if ((part.nounHasSynrestr("poss_ing"))
476 || (part.nounHasSynrestr("possing"))
477 || (part.nounHasSynrestr("pos_ing")))
478 {
479 utter << "your";
480 utter << std::set<std::string>({"participle_phrase", "subjectless"});
481 } else if (part.nounHasSynrestr("genitive"))
482 {
483 utter << "your";
484 } else if (part.nounHasSynrestr("adv_loc"))
485 {
486 if (std::bernoulli_distribution(1.0/2.0)(rng_))
487 {
488 utter << "here";
489 } else {
490 utter << "there";
491 }
492 } else if (part.nounHasSynrestr("refl"))
493 {
494 utter << "yourself";
495 } else if ((part.nounHasSynrestr("sc_to_inf"))
496 || (part.nounHasSynrestr("ac_to_inf"))
497 || (part.nounHasSynrestr("vc_to_inf"))
498 || (part.nounHasSynrestr("rs_to_inf"))
499 || (part.nounHasSynrestr("oc_to_inf")))
500 {
501 utter << std::set<std::string>({"infinitive_phrase", "subjectless"});
502 } else if (part.nounHasSynrestr("oc_bare_inf"))
503 {
504 utter << std::set<std::string>({"infinitive_phrase", "bare", "subjectless"});
505 } else if (part.nounHasSynrestr("wh_comp"))
506 {
507 utter << "whether";
508
509 verbly::token sentence(std::set<std::string>({"progressive"}));
510 utter << generateClause(sentence);
511 } else if (part.nounHasSynrestr("that_comp"))
512 {
513 utter << "that";
514 utter << "they";
515
516 verbly::token sentence(std::set<std::string>({"subjectless"}));
517 utter << generateClause(sentence);
518 } else if (part.nounHasSynrestr("what_extract"))
519 {
520 utter << "what";
521
522 verbly::token sentence(std::set<std::string>({"progressive", "experiencer"}));
523 utter << generateClause(sentence);
524 } else if (part.nounHasSynrestr("how_extract"))
525 {
526 utter << "how";
527
528 verbly::token sentence(std::set<std::string>({"progressive"}));
529 utter << generateClause(sentence);
530 } else if (part.nounHasSynrestr("wh_inf"))
531 {
532 utter << "how";
533
534 verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless"}));
535 utter << generateClause(sentence);
536 } else if (part.nounHasSynrestr("what_inf"))
537 {
538 utter << "what";
539
540 verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless", "experiencer"}));
541 utter << generateClause(sentence);
542 } else if (part.nounHasSynrestr("wheth_inf"))
543 {
544 utter << "whether";
545
546 verbly::token sentence(std::set<std::string>({"infinitive_phrase", "subjectless"}));
547 utter << generateClause(sentence);
548 } else if (part.nounHasSynrestr("quotation"))
549 {
550 verbly::token sentence(std::set<std::string>({"participle_phrase"}));
551 while (!sentence.isComplete())
552 {
553 visit(sentence);
554 }
555
556 utter << ("\"" + sentence.compile() + "\"");
557 } else {
558 verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs());
559
560 bool plural = part.nounHasSynrestr("plural");
561 if (!plural)
562 {
563 plural = requiresSelrestr("plural", part.getNounSelrestrs());
564 }
565
566 utter << generateStandardNounPhrase(
567 noun,
568 part.getNounRole(),
569 plural,
570 part.nounHasSynrestr("definite"));
571
572 if (part.nounHasSynrestr("acc_ing") || part.nounHasSynrestr("ac_ing"))
573 {
574 utter << std::set<std::string>({"participle_phrase", "subjectless"});
575 }
576 }
577
578 break;
579 }
580
581 case verbly::part_type::verb:
582 {
583 std::cout << "V: " << verb.getBaseForm() << std::endl;
584
585 if (it.hasSynrestr("progressive"))
586 {
587 utter << verbly::token(verb, verbly::inflection::s_form);
588 } else if (it.hasSynrestr("past_participle"))
589 {
590 utter << verbly::token(verb, verbly::inflection::past_participle);
591 } else if (it.hasSynrestr("infinitive_phrase"))
592 {
593 if (!it.hasSynrestr("bare"))
594 {
595 utter << "to";
596 }
597
598 utter << verb;
599 } else if (it.hasSynrestr("participle_phrase"))
600 {
601 utter << verbly::token(verb, verbly::inflection::ing_form);
602 } else {
603 utter << verb;
604 }
605
606 break;
607 }
608
609 case verbly::part_type::preposition:
610 {
611 std::cout << "PREP" << std::endl;
612
613 if (part.isPrepositionLiteral())
614 {
615 int choiceIndex = std::uniform_int_distribution<int>(0, part.getPrepositionChoices().size()-1)(rng_);
616 utter << part.getPrepositionChoices()[choiceIndex];
617 } else {
618 verbly::filter pgf(true);
619 for (const std::string& choice : part.getPrepositionChoices())
620 {
621 pgf += (verbly::notion::prepositionGroup == choice);
622 }
623
624 utter << database_.words(pgf && (verbly::notion::partOfSpeech == verbly::part_of_speech::preposition)).first();
625 }
626
627 break;
628 }
629
630 case verbly::part_type::adjective:
631 {
632 std::cout << "ADJ" << std::endl;
633
634 utter << std::set<std::string>({"adjective_phrase"});
635
636 break;
637 }
638
639 case verbly::part_type::adverb:
640 {
641 std::cout << "ADV" << std::endl;
642
643 utter << std::set<std::string>({"adverb_phrase"});
644
645 break;
646 }
647
648 case verbly::part_type::literal:
649 {
650 std::cout << "LIT" << std::endl;
651
652 utter << part.getLiteralValue();
653
654 break;
655 }
656
657 case verbly::part_type::invalid:
658 {
659 // Nope
660
661 break;
662 }
663 }
664 }
665
666 if ((parts.size() == 1) && (std::bernoulli_distribution(1.0/4.0)(rng_)))
667 {
668 utter << std::set<std::string>({"adverb_phrase"});
669 }
670
671 return utter;
672}
673
674void sentence::visit(verbly::token& it) const
675{
676 switch (it.getType())
677 {
678 case verbly::token::type::utterance:
679 {
680 for (verbly::token& token : it)
681 {
682 if (!token.isComplete())
683 {
684 visit(token);
685
686 break;
687 }
688 }
689
690 break;
691 }
692
693 case verbly::token::type::fillin:
694 {
695 if (it.hasSynrestr("infinitive_phrase"))
696 {
697 it = generateClause(it);
698 } else if (it.hasSynrestr("adjective_phrase"))
699 {
700 verbly::token phrase;
701
702 if (std::bernoulli_distribution(1.0/6.0)(rng_))
703 {
704 phrase << std::set<std::string>({"adverb_phrase"});
705 }
706
707 if (std::bernoulli_distribution(1.0/4.0)(rng_))
708 {
709 phrase << std::set<std::string>({"participle_phrase", "subjectless"});
710 } else {
711 std::geometric_distribution<int> tagdist(0.2);
712 phrase << database_.words(
713 (verbly::word::tagCount >= tagdist(rng_))
714 && (verbly::notion::partOfSpeech == verbly::part_of_speech::adjective)).first();
715 }
716
717 it = phrase;
718 } else if (it.hasSynrestr("adverb_phrase"))
719 {
720 std::geometric_distribution<int> tagdist(1.0/23.0);
721
722 it = database_.words(
723 (verbly::notion::partOfSpeech == verbly::part_of_speech::adverb)
724 && (verbly::word::tagCount >= tagdist(rng_))
725 ).first();
726 } else if (it.hasSynrestr("participle_phrase"))
727 {
728 if (std::bernoulli_distribution(1.0/2.0)(rng_))
729 {
730 it = verbly::token(
731 database_.words(
732 (verbly::notion::partOfSpeech == verbly::part_of_speech::verb)
733 && (verbly::lemma::form(verbly::inflection::ing_form))).first(),
734 verbly::inflection::ing_form);
735 } else {
736 it = generateClause(it);
737 }
738 } else {
739 it = "*the reality of the situation*";
740 }
741
742 break;
743 }
744
745 case verbly::token::type::word:
746 case verbly::token::type::literal:
747 case verbly::token::type::part:
748 {
749 // Nope
750
751 break;
752 }
753 }
754}
diff --git a/sentence.h b/sentence.h new file mode 100644 index 0000000..abf6860 --- /dev/null +++ b/sentence.h
@@ -0,0 +1,39 @@
1#ifndef SENTENCE_H_81987F60
2#define SENTENCE_H_81987F60
3
4#include <verbly.h>
5#include <random>
6#include <string>
7
8class sentence {
9public:
10
11 sentence(
12 const verbly::database& database,
13 std::mt19937& rng);
14
15 std::string generate() const;
16
17private:
18
19 verbly::filter parseSelrestrs(verbly::selrestr selrestr) const;
20
21 bool requiresSelrestr(std::string restriction, verbly::selrestr selrestr) const;
22
23 verbly::word generateStandardNoun(std::string role, verbly::selrestr selrestrs) const;
24
25 verbly::token generateStandardNounPhrase(
26 const verbly::word& noun,
27 std::string role,
28 bool plural,
29 bool definite) const;
30
31 verbly::token generateClause(const verbly::token& it) const;
32
33 void visit(verbly::token& it) const;
34
35 const verbly::database& database_;
36 std::mt19937& rng_;
37};
38
39#endif /* end of include guard: SENTENCE_H_81987F60 */
diff --git a/vendor/libtwittercpp b/vendor/libtwittercpp new file mode 160000
Subproject df906121dd862c0f704e44f28ee079158c431c4
diff --git a/vendor/verbly b/vendor/verbly new file mode 160000
Subproject 5caeb000b00ff7833c3b3c44893d4beffc0afb8
diff --git a/vendor/yaml-cpp b/vendor/yaml-cpp new file mode 160000
Subproject bedb28fdb4fd52d97e02f6cb946cae631037089