diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-16 18:02:50 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-16 18:02:50 -0500 |
commit | 6746da6edd7d9d50efe374eabbb79a3cac882d81 (patch) | |
tree | ff20917e08b08d36b9541c1371106596e7bec442 /lib/verb_query.cpp | |
parent | 4af7e55733098ca42f75a4ffaca1b0f6bab4dd36 (diff) | |
download | verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.gz verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.bz2 verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.zip |
Started structural rewrite
The new object structure was designed to build on the existing WordNet structure, while also adding in all of the data that we get from other sources. More information about this can be found on the project wiki. The generator has already been completely rewritten to generate a datafile that uses the new structure. In addition, a number of indexes are created, which does double the size of the datafile, but also allows for much faster lookups. Finally, the new generator is written modularly and is a lot more readable than the old one. The verbly interface to the new object structure has mostly been completed, but has not been tested fully. There is a completely new search API which utilizes a lot of operator overloading; documentation on how to use it should go up at some point. Token processing and verb frames are currently unimplemented. Source for these have been left in the repository for now.
Diffstat (limited to 'lib/verb_query.cpp')
-rw-r--r-- | lib/verb_query.cpp | 315 |
1 files changed, 0 insertions, 315 deletions
diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp deleted file mode 100644 index 4e6c253..0000000 --- a/lib/verb_query.cpp +++ /dev/null | |||
@@ -1,315 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | verb_query::verb_query(const data& _data) : _data(_data) | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | verb_query& verb_query::limit(int _limit) | ||
11 | { | ||
12 | if ((_limit > 0) || (_limit == unlimited)) | ||
13 | { | ||
14 | this->_limit = _limit; | ||
15 | } | ||
16 | |||
17 | return *this; | ||
18 | } | ||
19 | |||
20 | verb_query& verb_query::random() | ||
21 | { | ||
22 | this->_random = true; | ||
23 | |||
24 | return *this; | ||
25 | } | ||
26 | |||
27 | verb_query& verb_query::except(const verb& _word) | ||
28 | { | ||
29 | _except.push_back(_word); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | verb_query& verb_query::rhymes_with(const word& _word) | ||
35 | { | ||
36 | for (auto rhyme : _word.get_rhymes()) | ||
37 | { | ||
38 | _rhymes.push_back(rhyme); | ||
39 | } | ||
40 | |||
41 | if (dynamic_cast<const verb*>(&_word) != nullptr) | ||
42 | { | ||
43 | _except.push_back(dynamic_cast<const verb&>(_word)); | ||
44 | } | ||
45 | |||
46 | return *this; | ||
47 | } | ||
48 | |||
49 | verb_query& verb_query::rhymes_with(rhyme _r) | ||
50 | { | ||
51 | _rhymes.push_back(_r); | ||
52 | |||
53 | return *this; | ||
54 | } | ||
55 | |||
56 | verb_query& verb_query::has_pronunciation() | ||
57 | { | ||
58 | this->_has_prn = true; | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | verb_query& verb_query::has_rhyming_noun() | ||
64 | { | ||
65 | _has_rhyming_noun = true; | ||
66 | |||
67 | return *this; | ||
68 | } | ||
69 | |||
70 | verb_query& verb_query::has_rhyming_adjective() | ||
71 | { | ||
72 | _has_rhyming_adjective = true; | ||
73 | |||
74 | return *this; | ||
75 | } | ||
76 | |||
77 | verb_query& verb_query::has_rhyming_adverb() | ||
78 | { | ||
79 | _has_rhyming_adverb = true; | ||
80 | |||
81 | return *this; | ||
82 | } | ||
83 | |||
84 | verb_query& verb_query::has_rhyming_verb() | ||
85 | { | ||
86 | _has_rhyming_verb = true; | ||
87 | |||
88 | return *this; | ||
89 | } | ||
90 | |||
91 | verb_query& verb_query::with_stress(filter<std::vector<bool>> _arg) | ||
92 | { | ||
93 | _stress = _arg; | ||
94 | |||
95 | return *this; | ||
96 | } | ||
97 | |||
98 | verb_query& verb_query::has_frames() | ||
99 | { | ||
100 | this->_has_frames = true; | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | std::list<verb> verb_query::run() const | ||
106 | { | ||
107 | std::stringstream construct; | ||
108 | construct << "SELECT verb_id, infinitive, past_tense, past_participle, ing_form, s_form FROM verbs"; | ||
109 | std::list<std::string> conditions; | ||
110 | std::list<binding> bindings; | ||
111 | |||
112 | if (_has_prn) | ||
113 | { | ||
114 | conditions.push_back("verb_id IN (SELECT verb_id FROM verb_pronunciations)"); | ||
115 | } | ||
116 | |||
117 | if (!_rhymes.empty()) | ||
118 | { | ||
119 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
120 | std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
121 | conditions.push_back(cond); | ||
122 | |||
123 | for (auto rhy : _rhymes) | ||
124 | { | ||
125 | bindings.emplace_back(rhy.get_prerhyme()); | ||
126 | bindings.emplace_back(rhy.get_rhyme()); | ||
127 | } | ||
128 | } | ||
129 | |||
130 | if (_has_rhyming_noun) | ||
131 | { | ||
132 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
133 | } | ||
134 | |||
135 | if (_has_rhyming_adjective) | ||
136 | { | ||
137 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
138 | } | ||
139 | |||
140 | if (_has_rhyming_adverb) | ||
141 | { | ||
142 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
143 | } | ||
144 | |||
145 | if (_has_rhyming_verb) | ||
146 | { | ||
147 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)"); | ||
148 | } | ||
149 | |||
150 | if (!_stress.empty()) | ||
151 | { | ||
152 | std::stringstream cond; | ||
153 | if (_stress.get_notlogic()) | ||
154 | { | ||
155 | cond << "verb_id NOT IN"; | ||
156 | } else { | ||
157 | cond << "verb_id IN"; | ||
158 | } | ||
159 | |||
160 | cond << "(SELECT verb_id FROM verb_pronunciations WHERE "; | ||
161 | |||
162 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
163 | switch (f.get_type()) | ||
164 | { | ||
165 | case filter<std::vector<bool>>::type::singleton: | ||
166 | { | ||
167 | std::ostringstream _val; | ||
168 | for (auto syl : f.get_elem()) | ||
169 | { | ||
170 | if (syl) | ||
171 | { | ||
172 | _val << "1"; | ||
173 | } else { | ||
174 | _val << "0"; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | bindings.emplace_back(_val.str()); | ||
179 | |||
180 | if (notlogic == f.get_notlogic()) | ||
181 | { | ||
182 | return "stress = ?"; | ||
183 | } else { | ||
184 | return "stress != ?"; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | case filter<std::vector<bool>>::type::group: | ||
189 | { | ||
190 | bool truelogic = notlogic != f.get_notlogic(); | ||
191 | |||
192 | std::list<std::string> clauses; | ||
193 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
194 | return recur(f2, truelogic); | ||
195 | }); | ||
196 | |||
197 | if (truelogic == f.get_orlogic()) | ||
198 | { | ||
199 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
200 | } else { | ||
201 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
202 | } | ||
203 | } | ||
204 | } | ||
205 | }; | ||
206 | |||
207 | cond << recur(_stress, _stress.get_notlogic()); | ||
208 | cond << ")"; | ||
209 | conditions.push_back(cond.str()); | ||
210 | } | ||
211 | |||
212 | for (auto except : _except) | ||
213 | { | ||
214 | conditions.push_back("verb_id != ?"); | ||
215 | bindings.emplace_back(except._id); | ||
216 | } | ||
217 | |||
218 | if (!_has_frames) | ||
219 | { | ||
220 | conditions.push_back("verb_id IN (SELECT verb_id FROM verb_groups)"); | ||
221 | } | ||
222 | |||
223 | if (!conditions.empty()) | ||
224 | { | ||
225 | construct << " WHERE "; | ||
226 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
227 | } | ||
228 | |||
229 | if (_random) | ||
230 | { | ||
231 | construct << " ORDER BY RANDOM()"; | ||
232 | } | ||
233 | |||
234 | if (_limit != unlimited) | ||
235 | { | ||
236 | construct << " LIMIT " << _limit; | ||
237 | } | ||
238 | |||
239 | sqlite3_stmt* ppstmt; | ||
240 | std::string query = construct.str(); | ||
241 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
242 | { | ||
243 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
244 | } | ||
245 | |||
246 | int i = 1; | ||
247 | for (auto& binding : bindings) | ||
248 | { | ||
249 | switch (binding.get_type()) | ||
250 | { | ||
251 | case binding::type::integer: | ||
252 | { | ||
253 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
254 | |||
255 | break; | ||
256 | } | ||
257 | |||
258 | case binding::type::string: | ||
259 | { | ||
260 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
261 | |||
262 | break; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | i++; | ||
267 | } | ||
268 | |||
269 | std::list<verb> output; | ||
270 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
271 | { | ||
272 | verb tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
273 | tnc._infinitive = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
274 | tnc._past_tense = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
275 | tnc._past_participle = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
276 | tnc._ing_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4))); | ||
277 | tnc._s_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 5))); | ||
278 | |||
279 | output.push_back(tnc); | ||
280 | } | ||
281 | |||
282 | sqlite3_finalize(ppstmt); | ||
283 | |||
284 | for (auto& verb : output) | ||
285 | { | ||
286 | query = "SELECT pronunciation, prerhyme, rhyme FROM verb_pronunciations WHERE verb_id = ?"; | ||
287 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
288 | { | ||
289 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
290 | } | ||
291 | |||
292 | sqlite3_bind_int(ppstmt, 1, verb._id); | ||
293 | |||
294 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
295 | { | ||
296 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
297 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
298 | |||
299 | verb.pronunciations.push_back(phonemes); | ||
300 | |||
301 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
302 | { | ||
303 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
304 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
305 | verb.rhymes.emplace_back(prerhyme, rhyming); | ||
306 | } | ||
307 | } | ||
308 | |||
309 | sqlite3_finalize(ppstmt); | ||
310 | } | ||
311 | |||
312 | return output; | ||
313 | } | ||
314 | |||
315 | }; | ||