summary refs log tree commit diff stats
path: root/lib/verb_query.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-01-16 18:02:50 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-01-16 18:02:50 -0500
commit6746da6edd7d9d50efe374eabbb79a3cac882d81 (patch)
treeff20917e08b08d36b9541c1371106596e7bec442 /lib/verb_query.cpp
parent4af7e55733098ca42f75a4ffaca1b0f6bab4dd36 (diff)
downloadverbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.gz
verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.bz2
verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.zip
Started structural rewrite
The new object structure was designed to build on the existing WordNet
structure, while also adding in all of the data that we get from other sources.
More information about this can be found on the project wiki.

The generator has already been completely rewritten to generate a
datafile that uses the new structure. In addition, a number of indexes
are created, which does double the size of the datafile, but also allows
for much faster lookups. Finally, the new generator is written modularly
and is a lot more readable than the old one.

The verbly interface to the new object structure has mostly been
completed, but has not been tested fully. There is a completely new
search API which utilizes a lot of operator overloading; documentation
on how to use it should go up at some point.

Token processing and verb frames are currently unimplemented. Source for
these have been left in the repository for now.
Diffstat (limited to 'lib/verb_query.cpp')
-rw-r--r--lib/verb_query.cpp315
1 files changed, 0 insertions, 315 deletions
diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp deleted file mode 100644 index 4e6c253..0000000 --- a/lib/verb_query.cpp +++ /dev/null
@@ -1,315 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 verb_query::verb_query(const data& _data) : _data(_data)
6 {
7
8 }
9
10 verb_query& verb_query::limit(int _limit)
11 {
12 if ((_limit > 0) || (_limit == unlimited))
13 {
14 this->_limit = _limit;
15 }
16
17 return *this;
18 }
19
20 verb_query& verb_query::random()
21 {
22 this->_random = true;
23
24 return *this;
25 }
26
27 verb_query& verb_query::except(const verb& _word)
28 {
29 _except.push_back(_word);
30
31 return *this;
32 }
33
34 verb_query& verb_query::rhymes_with(const word& _word)
35 {
36 for (auto rhyme : _word.get_rhymes())
37 {
38 _rhymes.push_back(rhyme);
39 }
40
41 if (dynamic_cast<const verb*>(&_word) != nullptr)
42 {
43 _except.push_back(dynamic_cast<const verb&>(_word));
44 }
45
46 return *this;
47 }
48
49 verb_query& verb_query::rhymes_with(rhyme _r)
50 {
51 _rhymes.push_back(_r);
52
53 return *this;
54 }
55
56 verb_query& verb_query::has_pronunciation()
57 {
58 this->_has_prn = true;
59
60 return *this;
61 }
62
63 verb_query& verb_query::has_rhyming_noun()
64 {
65 _has_rhyming_noun = true;
66
67 return *this;
68 }
69
70 verb_query& verb_query::has_rhyming_adjective()
71 {
72 _has_rhyming_adjective = true;
73
74 return *this;
75 }
76
77 verb_query& verb_query::has_rhyming_adverb()
78 {
79 _has_rhyming_adverb = true;
80
81 return *this;
82 }
83
84 verb_query& verb_query::has_rhyming_verb()
85 {
86 _has_rhyming_verb = true;
87
88 return *this;
89 }
90
91 verb_query& verb_query::with_stress(filter<std::vector<bool>> _arg)
92 {
93 _stress = _arg;
94
95 return *this;
96 }
97
98 verb_query& verb_query::has_frames()
99 {
100 this->_has_frames = true;
101
102 return *this;
103 }
104
105 std::list<verb> verb_query::run() const
106 {
107 std::stringstream construct;
108 construct << "SELECT verb_id, infinitive, past_tense, past_participle, ing_form, s_form FROM verbs";
109 std::list<std::string> conditions;
110 std::list<binding> bindings;
111
112 if (_has_prn)
113 {
114 conditions.push_back("verb_id IN (SELECT verb_id FROM verb_pronunciations)");
115 }
116
117 if (!_rhymes.empty())
118 {
119 std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)");
120 std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
121 conditions.push_back(cond);
122
123 for (auto rhy : _rhymes)
124 {
125 bindings.emplace_back(rhy.get_prerhyme());
126 bindings.emplace_back(rhy.get_rhyme());
127 }
128 }
129
130 if (_has_rhyming_noun)
131 {
132 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
133 }
134
135 if (_has_rhyming_adjective)
136 {
137 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
138 }
139
140 if (_has_rhyming_adverb)
141 {
142 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
143 }
144
145 if (_has_rhyming_verb)
146 {
147 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)");
148 }
149
150 if (!_stress.empty())
151 {
152 std::stringstream cond;
153 if (_stress.get_notlogic())
154 {
155 cond << "verb_id NOT IN";
156 } else {
157 cond << "verb_id IN";
158 }
159
160 cond << "(SELECT verb_id FROM verb_pronunciations WHERE ";
161
162 std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string {
163 switch (f.get_type())
164 {
165 case filter<std::vector<bool>>::type::singleton:
166 {
167 std::ostringstream _val;
168 for (auto syl : f.get_elem())
169 {
170 if (syl)
171 {
172 _val << "1";
173 } else {
174 _val << "0";
175 }
176 }
177
178 bindings.emplace_back(_val.str());
179
180 if (notlogic == f.get_notlogic())
181 {
182 return "stress = ?";
183 } else {
184 return "stress != ?";
185 }
186 }
187
188 case filter<std::vector<bool>>::type::group:
189 {
190 bool truelogic = notlogic != f.get_notlogic();
191
192 std::list<std::string> clauses;
193 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) {
194 return recur(f2, truelogic);
195 });
196
197 if (truelogic == f.get_orlogic())
198 {
199 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
200 } else {
201 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
202 }
203 }
204 }
205 };
206
207 cond << recur(_stress, _stress.get_notlogic());
208 cond << ")";
209 conditions.push_back(cond.str());
210 }
211
212 for (auto except : _except)
213 {
214 conditions.push_back("verb_id != ?");
215 bindings.emplace_back(except._id);
216 }
217
218 if (!_has_frames)
219 {
220 conditions.push_back("verb_id IN (SELECT verb_id FROM verb_groups)");
221 }
222
223 if (!conditions.empty())
224 {
225 construct << " WHERE ";
226 construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND ");
227 }
228
229 if (_random)
230 {
231 construct << " ORDER BY RANDOM()";
232 }
233
234 if (_limit != unlimited)
235 {
236 construct << " LIMIT " << _limit;
237 }
238
239 sqlite3_stmt* ppstmt;
240 std::string query = construct.str();
241 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
242 {
243 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
244 }
245
246 int i = 1;
247 for (auto& binding : bindings)
248 {
249 switch (binding.get_type())
250 {
251 case binding::type::integer:
252 {
253 sqlite3_bind_int(ppstmt, i, binding.get_integer());
254
255 break;
256 }
257
258 case binding::type::string:
259 {
260 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
261
262 break;
263 }
264 }
265
266 i++;
267 }
268
269 std::list<verb> output;
270 while (sqlite3_step(ppstmt) == SQLITE_ROW)
271 {
272 verb tnc {_data, sqlite3_column_int(ppstmt, 0)};
273 tnc._infinitive = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
274 tnc._past_tense = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
275 tnc._past_participle = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3)));
276 tnc._ing_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4)));
277 tnc._s_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 5)));
278
279 output.push_back(tnc);
280 }
281
282 sqlite3_finalize(ppstmt);
283
284 for (auto& verb : output)
285 {
286 query = "SELECT pronunciation, prerhyme, rhyme FROM verb_pronunciations WHERE verb_id = ?";
287 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
288 {
289 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
290 }
291
292 sqlite3_bind_int(ppstmt, 1, verb._id);
293
294 while (sqlite3_step(ppstmt) == SQLITE_ROW)
295 {
296 std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
297 auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " ");
298
299 verb.pronunciations.push_back(phonemes);
300
301 if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL))
302 {
303 std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
304 std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
305 verb.rhymes.emplace_back(prerhyme, rhyming);
306 }
307 }
308
309 sqlite3_finalize(ppstmt);
310 }
311
312 return output;
313 }
314
315};