diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-03-24 23:16:07 -0400 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-03-24 23:16:07 -0400 |
commit | eef5de613c75661e5d94baa086f6f2ddc26c7ed0 (patch) | |
tree | 180230f6a245c5bca94d894273f5d2b93ded3f04 /lib/adverb_query.cpp | |
parent | d5ee4e39e5b5b3b8daa85cd972802195ad35e965 (diff) | |
download | verbly-eef5de613c75661e5d94baa086f6f2ddc26c7ed0.tar.gz verbly-eef5de613c75661e5d94baa086f6f2ddc26c7ed0.tar.bz2 verbly-eef5de613c75661e5d94baa086f6f2ddc26c7ed0.zip |
Added verb frames
In addition: - Added prepositions. - Rewrote a lot of the query interface. It now, for a lot of relationships, supports nested AND, OR, and NOT logic. - Rewrote the token class. It is now a union-like class instead of being polymorphic, which means smart pointers are no longer necessary. - Querying with regards to word derivation has been temporarily removed. - Sentinel values are now supported for all word types. - The VerbNet data retrieved from http://verbs.colorado.edu/~mpalmer/projects/verbnet/downloads.html was found to not be perfectly satisfactory in some regards, especially regarding adjective phrases. A patch file is now included in the repository describing the changes made to the VerbNet v3.2 download for the canonical verbly datafile.
Diffstat (limited to 'lib/adverb_query.cpp')
-rw-r--r-- | lib/adverb_query.cpp | 514 |
1 files changed, 514 insertions, 0 deletions
diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp new file mode 100644 index 0000000..639f16f --- /dev/null +++ b/lib/adverb_query.cpp | |||
@@ -0,0 +1,514 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | adverb_query::adverb_query(const data& _data) : _data(_data) | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | adverb_query& adverb_query::limit(int _limit) | ||
11 | { | ||
12 | if ((_limit > 0) || (_limit == unlimited)) | ||
13 | { | ||
14 | this->_limit = _limit; | ||
15 | } | ||
16 | |||
17 | return *this; | ||
18 | } | ||
19 | |||
20 | adverb_query& adverb_query::random() | ||
21 | { | ||
22 | this->_random = true; | ||
23 | |||
24 | return *this; | ||
25 | } | ||
26 | |||
27 | adverb_query& adverb_query::except(const adverb& _word) | ||
28 | { | ||
29 | _except.push_back(_word); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | adverb_query& adverb_query::rhymes_with(const word& _word) | ||
35 | { | ||
36 | for (auto rhyme : _word.rhyme_phonemes()) | ||
37 | { | ||
38 | _rhymes.push_back(rhyme); | ||
39 | } | ||
40 | |||
41 | if (dynamic_cast<const adverb*>(&_word) != nullptr) | ||
42 | { | ||
43 | _except.push_back(dynamic_cast<const adverb&>(_word)); | ||
44 | } | ||
45 | |||
46 | return *this; | ||
47 | } | ||
48 | |||
49 | adverb_query& adverb_query::has_pronunciation() | ||
50 | { | ||
51 | this->_has_prn = true; | ||
52 | |||
53 | return *this; | ||
54 | } | ||
55 | |||
56 | adverb_query& adverb_query::requires_comparative_form() | ||
57 | { | ||
58 | _requires_comparative_form = true; | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | adverb_query& adverb_query::requires_superlative_form() | ||
64 | { | ||
65 | _requires_superlative_form = true; | ||
66 | |||
67 | return *this; | ||
68 | } | ||
69 | |||
70 | adverb_query& adverb_query::has_antonyms() | ||
71 | { | ||
72 | _has_antonyms = true; | ||
73 | |||
74 | return *this; | ||
75 | } | ||
76 | |||
77 | adverb_query& adverb_query::antonym_of(filter<adverb> _f) | ||
78 | { | ||
79 | _f.clean(); | ||
80 | _antonym_of = _f; | ||
81 | |||
82 | return *this; | ||
83 | } | ||
84 | |||
85 | adverb_query& adverb_query::has_synonyms() | ||
86 | { | ||
87 | _has_synonyms = true; | ||
88 | |||
89 | return *this; | ||
90 | } | ||
91 | |||
92 | adverb_query& adverb_query::synonym_of(filter<adverb> _f) | ||
93 | { | ||
94 | _f.clean(); | ||
95 | _synonym_of = _f; | ||
96 | |||
97 | return *this; | ||
98 | } | ||
99 | |||
100 | adverb_query& adverb_query::is_mannernymic() | ||
101 | { | ||
102 | _is_mannernymic = true; | ||
103 | |||
104 | return *this; | ||
105 | } | ||
106 | |||
107 | adverb_query& adverb_query::mannernym_of(filter<adjective> _f) | ||
108 | { | ||
109 | _f.clean(); | ||
110 | _mannernym_of = _f; | ||
111 | |||
112 | return *this; | ||
113 | } | ||
114 | /* | ||
115 | adverb_query& adverb_query::derived_from(const word& _w) | ||
116 | { | ||
117 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
118 | { | ||
119 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
120 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
121 | { | ||
122 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
123 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
124 | { | ||
125 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
126 | } | ||
127 | |||
128 | return *this; | ||
129 | } | ||
130 | |||
131 | adverb_query& adverb_query::not_derived_from(const word& _w) | ||
132 | { | ||
133 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
134 | { | ||
135 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
136 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
137 | { | ||
138 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
139 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
140 | { | ||
141 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
142 | } | ||
143 | |||
144 | return *this; | ||
145 | } | ||
146 | */ | ||
147 | std::list<adverb> adverb_query::run() const | ||
148 | { | ||
149 | std::stringstream construct; | ||
150 | construct << "SELECT adverb_id, base_form, comparative, superlative FROM adverbs"; | ||
151 | std::list<std::string> conditions; | ||
152 | |||
153 | if (_has_prn) | ||
154 | { | ||
155 | conditions.push_back("adverb_id IN (SELECT adverb_id FROM adverb_pronunciations)"); | ||
156 | } | ||
157 | |||
158 | if (!_rhymes.empty()) | ||
159 | { | ||
160 | std::list<std::string> clauses(_rhymes.size(), "pronunciation LIKE @RHMPRN"); | ||
161 | std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
162 | conditions.push_back(cond); | ||
163 | } | ||
164 | |||
165 | for (auto except : _except) | ||
166 | { | ||
167 | conditions.push_back("adverb_id != @EXCID"); | ||
168 | } | ||
169 | |||
170 | if (_requires_comparative_form) | ||
171 | { | ||
172 | conditions.push_back("comparative IS NOT NULL"); | ||
173 | } | ||
174 | |||
175 | if (_requires_superlative_form) | ||
176 | { | ||
177 | conditions.push_back("superlative IS NOT NULL"); | ||
178 | } | ||
179 | |||
180 | if (_has_antonyms) | ||
181 | { | ||
182 | conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_antonymy)"); | ||
183 | } | ||
184 | |||
185 | if (!_antonym_of.empty()) | ||
186 | { | ||
187 | std::stringstream cond; | ||
188 | if (_antonym_of.get_notlogic()) | ||
189 | { | ||
190 | cond << "adverb_id NOT IN"; | ||
191 | } else { | ||
192 | cond << "adverb_id IN"; | ||
193 | } | ||
194 | |||
195 | cond << "(SELECT adverb_2_id FROM adverb_antonymy WHERE "; | ||
196 | |||
197 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
198 | switch (f.get_type()) | ||
199 | { | ||
200 | case filter<adverb>::type::singleton: | ||
201 | { | ||
202 | if (notlogic == f.get_notlogic()) | ||
203 | { | ||
204 | return "adverb_1_id = @ANTID"; | ||
205 | } else { | ||
206 | return "adverb_1_id != @ANTID"; | ||
207 | } | ||
208 | } | ||
209 | |||
210 | case filter<adverb>::type::group: | ||
211 | { | ||
212 | bool truelogic = notlogic != f.get_notlogic(); | ||
213 | |||
214 | std::list<std::string> clauses; | ||
215 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
216 | return recur(f2, truelogic); | ||
217 | }); | ||
218 | |||
219 | if (truelogic == f.get_orlogic()) | ||
220 | { | ||
221 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
222 | } else { | ||
223 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
224 | } | ||
225 | } | ||
226 | } | ||
227 | }; | ||
228 | |||
229 | cond << recur(_antonym_of, _antonym_of.get_notlogic()); | ||
230 | cond << ")"; | ||
231 | conditions.push_back(cond.str()); | ||
232 | } | ||
233 | |||
234 | if (_has_synonyms) | ||
235 | { | ||
236 | conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_synonymy)"); | ||
237 | } | ||
238 | |||
239 | if (!_synonym_of.empty()) | ||
240 | { | ||
241 | std::stringstream cond; | ||
242 | if (_antonym_of.get_notlogic()) | ||
243 | { | ||
244 | cond << "adverb_id NOT IN"; | ||
245 | } else { | ||
246 | cond << "adverb_id IN"; | ||
247 | } | ||
248 | |||
249 | cond << "(SELECT adverb_2_id FROM adverb_synonymy WHERE "; | ||
250 | |||
251 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
252 | switch (f.get_type()) | ||
253 | { | ||
254 | case filter<adverb>::type::singleton: | ||
255 | { | ||
256 | if (notlogic == f.get_notlogic()) | ||
257 | { | ||
258 | return "adverb_1_id = @SYNID"; | ||
259 | } else { | ||
260 | return "adverb_1_id != @SYNID"; | ||
261 | } | ||
262 | } | ||
263 | |||
264 | case filter<adverb>::type::group: | ||
265 | { | ||
266 | bool truelogic = notlogic != f.get_notlogic(); | ||
267 | |||
268 | std::list<std::string> clauses; | ||
269 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
270 | return recur(f2, truelogic); | ||
271 | }); | ||
272 | |||
273 | if (truelogic == f.get_orlogic()) | ||
274 | { | ||
275 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
276 | } else { | ||
277 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
278 | } | ||
279 | } | ||
280 | } | ||
281 | }; | ||
282 | |||
283 | cond << recur(_synonym_of, _synonym_of.get_notlogic()); | ||
284 | cond << ")"; | ||
285 | conditions.push_back(cond.str()); | ||
286 | } | ||
287 | |||
288 | if (_is_mannernymic) | ||
289 | { | ||
290 | conditions.push_back("adverb_id IN (SELECT mannernym_id FROM mannernymy)"); | ||
291 | } | ||
292 | |||
293 | if (!_mannernym_of.empty()) | ||
294 | { | ||
295 | std::stringstream cond; | ||
296 | if (_antonym_of.get_notlogic()) | ||
297 | { | ||
298 | cond << "adverb_id NOT IN"; | ||
299 | } else { | ||
300 | cond << "adverb_id IN"; | ||
301 | } | ||
302 | |||
303 | cond << "(SELECT mannernym_id FROM mannernymy WHERE "; | ||
304 | |||
305 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
306 | switch (f.get_type()) | ||
307 | { | ||
308 | case filter<adjective>::type::singleton: | ||
309 | { | ||
310 | if (notlogic == f.get_notlogic()) | ||
311 | { | ||
312 | return "adjective_id = @AMANID"; | ||
313 | } else { | ||
314 | return "adjective_id != @AMANID"; | ||
315 | } | ||
316 | } | ||
317 | |||
318 | case filter<adjective>::type::group: | ||
319 | { | ||
320 | bool truelogic = notlogic != f.get_notlogic(); | ||
321 | |||
322 | std::list<std::string> clauses; | ||
323 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
324 | return recur(f2, truelogic); | ||
325 | }); | ||
326 | |||
327 | if (truelogic == f.get_orlogic()) | ||
328 | { | ||
329 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
330 | } else { | ||
331 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
332 | } | ||
333 | } | ||
334 | } | ||
335 | }; | ||
336 | |||
337 | cond << recur(_mannernym_of, _mannernym_of.get_notlogic()); | ||
338 | cond << ")"; | ||
339 | conditions.push_back(cond.str()); | ||
340 | } | ||
341 | |||
342 | /* if (!_derived_from_adjective.empty()) | ||
343 | { | ||
344 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); | ||
345 | std::string cond = "adverb_id IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
346 | conditions.push_back(cond); | ||
347 | } | ||
348 | |||
349 | if (!_not_derived_from_adjective.empty()) | ||
350 | { | ||
351 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); | ||
352 | std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
353 | conditions.push_back(cond); | ||
354 | } | ||
355 | |||
356 | if (!_derived_from_adverb.empty()) | ||
357 | { | ||
358 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_2_id = @DERADV"); | ||
359 | std::string cond = "adverb_id IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
360 | conditions.push_back(cond); | ||
361 | } | ||
362 | |||
363 | if (!_not_derived_from_adverb.empty()) | ||
364 | { | ||
365 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_2_id = @NDERADV"); | ||
366 | std::string cond = "adverb_id NOT IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
367 | conditions.push_back(cond); | ||
368 | } | ||
369 | |||
370 | if (!_derived_from_noun.empty()) | ||
371 | { | ||
372 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN"); | ||
373 | std::string cond = "adverb_id IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
374 | conditions.push_back(cond); | ||
375 | } | ||
376 | |||
377 | if (!_not_derived_from_noun.empty()) | ||
378 | { | ||
379 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); | ||
380 | std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
381 | conditions.push_back(cond); | ||
382 | }*/ | ||
383 | |||
384 | if (!conditions.empty()) | ||
385 | { | ||
386 | construct << " WHERE "; | ||
387 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
388 | } | ||
389 | |||
390 | if (_random) | ||
391 | { | ||
392 | construct << " ORDER BY RANDOM()"; | ||
393 | } | ||
394 | |||
395 | if (_limit != unlimited) | ||
396 | { | ||
397 | construct << " LIMIT " << _limit; | ||
398 | } | ||
399 | |||
400 | sqlite3_stmt* ppstmt; | ||
401 | std::string query = construct.str(); | ||
402 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
403 | { | ||
404 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
405 | } | ||
406 | |||
407 | if (!_rhymes.empty()) | ||
408 | { | ||
409 | int i = 0; | ||
410 | for (auto rhyme : _rhymes) | ||
411 | { | ||
412 | std::string rhymer = "%" + rhyme; | ||
413 | sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@RHMPRN"), rhymer.c_str(), rhymer.length(), SQLITE_STATIC); | ||
414 | |||
415 | i++; | ||
416 | } | ||
417 | } | ||
418 | |||
419 | for (auto except : _except) | ||
420 | { | ||
421 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@EXCID"), except._id); | ||
422 | } | ||
423 | |||
424 | for (auto antonym : _antonym_of.inorder_flatten()) | ||
425 | { | ||
426 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@ANTID"), antonym._id); | ||
427 | } | ||
428 | |||
429 | for (auto synonym : _synonym_of.inorder_flatten()) | ||
430 | { | ||
431 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SYNID"), synonym._id); | ||
432 | } | ||
433 | |||
434 | for (auto adj : _mannernym_of.inorder_flatten()) | ||
435 | { | ||
436 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@AMANID"), adj._id); | ||
437 | } | ||
438 | /* | ||
439 | for (auto adj : _derived_from_adjective) | ||
440 | { | ||
441 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
442 | } | ||
443 | |||
444 | for (auto adj : _not_derived_from_adjective) | ||
445 | { | ||
446 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
447 | } | ||
448 | |||
449 | for (auto adv : _derived_from_adverb) | ||
450 | { | ||
451 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
452 | } | ||
453 | |||
454 | for (auto adv : _not_derived_from_adverb) | ||
455 | { | ||
456 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
457 | } | ||
458 | |||
459 | for (auto n : _derived_from_noun) | ||
460 | { | ||
461 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
462 | } | ||
463 | |||
464 | for (auto n : _not_derived_from_noun) | ||
465 | { | ||
466 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
467 | }*/ | ||
468 | |||
469 | std::list<adverb> output; | ||
470 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
471 | { | ||
472 | adverb tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
473 | tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
474 | |||
475 | if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) | ||
476 | { | ||
477 | tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
478 | } | ||
479 | |||
480 | if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) | ||
481 | { | ||
482 | tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
483 | } | ||
484 | |||
485 | output.push_back(tnc); | ||
486 | } | ||
487 | |||
488 | sqlite3_finalize(ppstmt); | ||
489 | |||
490 | for (auto& adverb : output) | ||
491 | { | ||
492 | query = "SELECT pronunciation FROM adverb_pronunciations WHERE adverb_id = ?"; | ||
493 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
494 | { | ||
495 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
496 | } | ||
497 | |||
498 | sqlite3_bind_int(ppstmt, 1, adverb._id); | ||
499 | |||
500 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
501 | { | ||
502 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
503 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
504 | |||
505 | adverb.pronunciations.push_back(phonemes); | ||
506 | } | ||
507 | |||
508 | sqlite3_finalize(ppstmt); | ||
509 | } | ||
510 | |||
511 | return output; | ||
512 | } | ||
513 | |||
514 | }; | ||