diff options
Diffstat (limited to 'verbly')
-rw-r--r-- | verbly/CMakeLists.txt | 9 | ||||
-rw-r--r-- | verbly/LICENSE | 3 | ||||
-rw-r--r-- | verbly/generator/CMakeLists.txt | 12 | ||||
-rw-r--r-- | verbly/generator/generator.cpp | 1663 | ||||
-rw-r--r-- | verbly/generator/progress.h | 50 | ||||
-rw-r--r-- | verbly/generator/schema.sql | 252 | ||||
-rw-r--r-- | verbly/lib/adjective.cpp (renamed from verbly/adjective.cpp) | 104 | ||||
-rw-r--r-- | verbly/lib/adjective.h (renamed from verbly/adjective.h) | 10 | ||||
-rw-r--r-- | verbly/lib/adverb.cpp (renamed from verbly/adverb.cpp) | 104 | ||||
-rw-r--r-- | verbly/lib/adverb.h (renamed from verbly/adverb.h) | 13 | ||||
-rw-r--r-- | verbly/lib/c++14.h (renamed from verbly/c++14.h) | 0 | ||||
-rw-r--r-- | verbly/lib/data.cpp (renamed from verbly/data.cpp) | 0 | ||||
-rw-r--r-- | verbly/lib/data.h (renamed from verbly/data.h) | 0 | ||||
-rw-r--r-- | verbly/lib/noun.cpp (renamed from verbly/noun.cpp) | 116 | ||||
-rw-r--r-- | verbly/lib/noun.h (renamed from verbly/noun.h) | 12 | ||||
-rw-r--r-- | verbly/lib/token.cpp (renamed from verbly/token.cpp) | 0 | ||||
-rw-r--r-- | verbly/lib/token.h (renamed from verbly/token.h) | 0 | ||||
-rw-r--r-- | verbly/lib/util.h (renamed from verbly/util.h) | 0 | ||||
-rw-r--r-- | verbly/lib/verb.cpp (renamed from verbly/verb.cpp) | 0 | ||||
-rw-r--r-- | verbly/lib/verb.h (renamed from verbly/verb.h) | 0 | ||||
-rw-r--r-- | verbly/lib/verbly.h (renamed from verbly/verbly.h) | 0 | ||||
-rw-r--r-- | verbly/lib/word.cpp (renamed from verbly/word.cpp) | 0 | ||||
-rw-r--r-- | verbly/lib/word.h (renamed from verbly/word.h) | 0 |
23 files changed, 2348 insertions, 0 deletions
diff --git a/verbly/CMakeLists.txt b/verbly/CMakeLists.txt new file mode 100644 index 0000000..5a3e526 --- /dev/null +++ b/verbly/CMakeLists.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | cmake_minimum_required (VERSION 2.6) | ||
2 | project (verbly) | ||
3 | |||
4 | find_package(PkgConfig) | ||
5 | pkg_check_modules(sqlite3 sqlite3 REQUIRED) | ||
6 | |||
7 | add_library(verbly lib/data.cpp lib/adjective.cpp lib/noun.cpp lib/verb.cpp lib/adverb.cpp lib/token.cpp lib/word.cpp) | ||
8 | set_property(TARGET verbly PROPERTY CXX_STANDARD 11) | ||
9 | set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) | ||
diff --git a/verbly/LICENSE b/verbly/LICENSE new file mode 100644 index 0000000..4c4b690 --- /dev/null +++ b/verbly/LICENSE | |||
@@ -0,0 +1,3 @@ | |||
1 | WordNet Release 3.0 | ||
2 | |||
3 | This software and database is being provided to you, the LICENSEE, by Princeton University under the following license. By obtaining, using and/or copying this software and database, you agree that you have read, understood, and will comply with these terms and conditions.: Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used in advertising or publicity pertaining to distribution of the software and/or database. Title to copyright in this software, database and any associated documentation shall at all times remain with Princeton University and LICENSEE agrees to preserve same. \ No newline at end of file | ||
diff --git a/verbly/generator/CMakeLists.txt b/verbly/generator/CMakeLists.txt new file mode 100644 index 0000000..bbc3c4f --- /dev/null +++ b/verbly/generator/CMakeLists.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | cmake_minimum_required (VERSION 2.6) | ||
2 | project (generator) | ||
3 | |||
4 | find_package(PkgConfig) | ||
5 | pkg_check_modules(sqlite3 sqlite3 REQUIRED) | ||
6 | find_package(libxml2 REQUIRED) | ||
7 | |||
8 | include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR}) | ||
9 | add_executable(generator generator.cpp) | ||
10 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) | ||
11 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) | ||
12 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) | ||
diff --git a/verbly/generator/generator.cpp b/verbly/generator/generator.cpp new file mode 100644 index 0000000..faef5f7 --- /dev/null +++ b/verbly/generator/generator.cpp | |||
@@ -0,0 +1,1663 @@ | |||
1 | #include <libxml/parser.h> | ||
2 | #include <iostream> | ||
3 | #include <dirent.h> | ||
4 | #include <set> | ||
5 | #include <map> | ||
6 | #include <string> | ||
7 | #include <vector> | ||
8 | #include <fstream> | ||
9 | #include <sqlite3.h> | ||
10 | #include <sstream> | ||
11 | #include <regex> | ||
12 | #include <list> | ||
13 | #include "progress.h" | ||
14 | |||
15 | struct verb { | ||
16 | std::string infinitive; | ||
17 | std::string past_tense; | ||
18 | std::string past_participle; | ||
19 | std::string ing_form; | ||
20 | std::string s_form; | ||
21 | }; | ||
22 | |||
23 | struct adjective { | ||
24 | std::string base; | ||
25 | std::string comparative; | ||
26 | std::string superlative; | ||
27 | }; | ||
28 | |||
29 | struct noun { | ||
30 | std::string singular; | ||
31 | std::string plural; | ||
32 | }; | ||
33 | |||
34 | struct group { | ||
35 | std::string id; | ||
36 | std::set<std::string> members; | ||
37 | }; | ||
38 | |||
39 | std::map<std::string, group> groups; | ||
40 | std::map<std::string, verb> verbs; | ||
41 | std::map<std::string, adjective> adjectives; | ||
42 | std::map<std::string, noun> nouns; | ||
43 | std::map<int, std::map<int, int>> wn; | ||
44 | std::map<std::string, std::set<std::string>> pronunciations; | ||
45 | |||
46 | void print_usage() | ||
47 | { | ||
48 | std::cout << "Verbly Datafile Generator" << std::endl; | ||
49 | std::cout << "-------------------------" << std::endl; | ||
50 | std::cout << "Requires exactly six arguments." << std::endl; | ||
51 | std::cout << "1. The path to a VerbNet data directory." << std::endl; | ||
52 | std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl; | ||
53 | std::cout << "3. The path to an AGID infl.txt file." << std::endl; | ||
54 | std::cout << "4. The path to a WordNet prolog data directory." << std::endl; | ||
55 | std::cout << "5. The path to a CMUDICT pronunciation file." << std::endl; | ||
56 | std::cout << "6. Datafile output path." << std::endl; | ||
57 | |||
58 | exit(1); | ||
59 | } | ||
60 | |||
61 | void db_error(sqlite3* ppdb, std::string) | ||
62 | { | ||
63 | std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; | ||
64 | sqlite3_close_v2(ppdb); | ||
65 | print_usage(); | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | void parse_group(xmlNodePtr top, std::string filename) | ||
70 | { | ||
71 | xmlChar* key = xmlGetProp(top, (xmlChar*) "ID"); | ||
72 | if (key == 0) | ||
73 | { | ||
74 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
75 | print_usage(); | ||
76 | } | ||
77 | std::string vnid = key; | ||
78 | vnid = vnid.substr(vnid.find_first_of("-")+1); | ||
79 | xmlFree(key); | ||
80 | |||
81 | group g; | ||
82 | g.id = vnid; | ||
83 | |||
84 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) | ||
85 | { | ||
86 | if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) | ||
87 | { | ||
88 | for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) | ||
89 | { | ||
90 | if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) | ||
91 | { | ||
92 | key = xmlGetProp(member, (xmlChar*) "name"); | ||
93 | g.members.insert(key); | ||
94 | xmlFree(key); | ||
95 | } | ||
96 | } | ||
97 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) | ||
98 | { | ||
99 | for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) | ||
100 | { | ||
101 | if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) | ||
102 | { | ||
103 | for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) | ||
104 | { | ||
105 | |||
106 | } | ||
107 | } | ||
108 | } | ||
109 | } | ||
110 | } | ||
111 | }*/ | ||
112 | |||
113 | int main(int argc, char** argv) | ||
114 | { | ||
115 | if (argc != 7) | ||
116 | { | ||
117 | print_usage(); | ||
118 | } | ||
119 | |||
120 | /*DIR* dir; | ||
121 | if ((dir = opendir(argv[1])) == nullptr) | ||
122 | { | ||
123 | std::cout << "Invalid VerbNet data directory." << std::endl; | ||
124 | |||
125 | print_usage(); | ||
126 | } | ||
127 | |||
128 | struct dirent* ent; | ||
129 | while ((ent = readdir(dir)) != nullptr) | ||
130 | { | ||
131 | std::string filename(argv[1]); | ||
132 | if (filename.back() != '/') | ||
133 | { | ||
134 | filename += '/'; | ||
135 | } | ||
136 | |||
137 | filename += ent->d_name; | ||
138 | //std::cout << ent->d_name << std::endl; | ||
139 | |||
140 | if (filename.rfind(".xml") != filename.size() - 4) | ||
141 | { | ||
142 | continue; | ||
143 | } | ||
144 | |||
145 | xmlDocPtr doc = xmlParseFile(filename.c_str()); | ||
146 | if (doc == nullptr) | ||
147 | { | ||
148 | std::cout << "Error opening " << filename << std::endl; | ||
149 | print_usage(); | ||
150 | } | ||
151 | |||
152 | xmlNodePtr top = xmlDocGetRootElement(doc); | ||
153 | if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS"))) | ||
154 | { | ||
155 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
156 | print_usage(); | ||
157 | } | ||
158 | |||
159 | parse_group(top, filename); | ||
160 | } | ||
161 | |||
162 | closedir(dir);*/ | ||
163 | |||
164 | // Get verbs from AGID | ||
165 | std::cout << "Reading inflections..." << std::endl; | ||
166 | |||
167 | std::ifstream agidfile(argv[3]); | ||
168 | if (!agidfile.is_open()) | ||
169 | { | ||
170 | std::cout << "Could not open AGID file: " << argv[3] << std::endl; | ||
171 | print_usage(); | ||
172 | } | ||
173 | |||
174 | for (;;) | ||
175 | { | ||
176 | std::string line; | ||
177 | if (!getline(agidfile, line)) | ||
178 | { | ||
179 | break; | ||
180 | } | ||
181 | |||
182 | if (line.back() == '\r') | ||
183 | { | ||
184 | line.pop_back(); | ||
185 | } | ||
186 | |||
187 | int divider = line.find_first_of(" "); | ||
188 | std::string word = line.substr(0, divider); | ||
189 | line = line.substr(divider+1); | ||
190 | char type = line[0]; | ||
191 | |||
192 | if (line[1] == '?') | ||
193 | { | ||
194 | line.erase(0, 4); | ||
195 | } else { | ||
196 | line.erase(0, 3); | ||
197 | } | ||
198 | |||
199 | std::vector<std::string> forms; | ||
200 | while (!line.empty()) | ||
201 | { | ||
202 | std::string inflection; | ||
203 | if ((divider = line.find(" | ")) != std::string::npos) | ||
204 | { | ||
205 | inflection = line.substr(0, divider); | ||
206 | line = line.substr(divider + 3); | ||
207 | } else { | ||
208 | inflection = line; | ||
209 | line = ""; | ||
210 | } | ||
211 | |||
212 | if ((divider = inflection.find_first_of(",?")) != std::string::npos) | ||
213 | { | ||
214 | inflection = inflection.substr(0, divider); | ||
215 | } | ||
216 | |||
217 | forms.push_back(inflection); | ||
218 | } | ||
219 | |||
220 | switch (type) | ||
221 | { | ||
222 | case 'V': | ||
223 | { | ||
224 | verb v; | ||
225 | v.infinitive = word; | ||
226 | if (forms.size() == 4) | ||
227 | { | ||
228 | v.past_tense = forms[0]; | ||
229 | v.past_participle = forms[1]; | ||
230 | v.ing_form = forms[2]; | ||
231 | v.s_form = forms[3]; | ||
232 | } else if (forms.size() == 3) | ||
233 | { | ||
234 | v.past_tense = forms[0]; | ||
235 | v.past_participle = forms[0]; | ||
236 | v.ing_form = forms[1]; | ||
237 | v.s_form = forms[2]; | ||
238 | } else if (forms.size() == 8) | ||
239 | { | ||
240 | // As of AGID 2014.08.11, this is only "to be" | ||
241 | v.past_tense = forms[0]; | ||
242 | v.past_participle = forms[2]; | ||
243 | v.ing_form = forms[3]; | ||
244 | v.s_form = forms[4]; | ||
245 | } else { | ||
246 | // Words that don't fit the cases above as of AGID 2014.08.11: | ||
247 | // - may and shall do not conjugate the way we want them to | ||
248 | // - methinks only has a past tense and is an outlier | ||
249 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now | ||
250 | std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl; | ||
251 | } | ||
252 | |||
253 | verbs[word] = v; | ||
254 | |||
255 | break; | ||
256 | } | ||
257 | |||
258 | case 'A': | ||
259 | { | ||
260 | adjective adj; | ||
261 | adj.base = word; | ||
262 | if (forms.size() == 2) | ||
263 | { | ||
264 | adj.comparative = forms[0]; | ||
265 | adj.superlative = forms[1]; | ||
266 | } else { | ||
267 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" | ||
268 | std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl; | ||
269 | } | ||
270 | |||
271 | adjectives[word] = adj; | ||
272 | |||
273 | break; | ||
274 | } | ||
275 | |||
276 | case 'N': | ||
277 | { | ||
278 | noun n; | ||
279 | n.singular = word; | ||
280 | if (forms.size() == 1) | ||
281 | { | ||
282 | n.plural = forms[0]; | ||
283 | } else { | ||
284 | // As of AGID 2014.08.11, this is non-existent. | ||
285 | std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl; | ||
286 | } | ||
287 | |||
288 | nouns[word] = n; | ||
289 | |||
290 | break; | ||
291 | } | ||
292 | } | ||
293 | } | ||
294 | |||
295 | // Pronounciations | ||
296 | std::cout << "Reading pronunciations..." << std::endl; | ||
297 | |||
298 | std::ifstream pronfile(argv[5]); | ||
299 | if (!pronfile.is_open()) | ||
300 | { | ||
301 | std::cout << "Could not open CMUDICT file: " << argv[5] << std::endl; | ||
302 | print_usage(); | ||
303 | } | ||
304 | |||
305 | for (;;) | ||
306 | { | ||
307 | std::string line; | ||
308 | if (!getline(pronfile, line)) | ||
309 | { | ||
310 | break; | ||
311 | } | ||
312 | |||
313 | if (line.back() == '\r') | ||
314 | { | ||
315 | line.pop_back(); | ||
316 | } | ||
317 | |||
318 | std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); | ||
319 | std::smatch phoneme_data; | ||
320 | if (std::regex_search(line, phoneme_data, phoneme)) | ||
321 | { | ||
322 | std::string canonical(phoneme_data[1]); | ||
323 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | ||
324 | |||
325 | pronunciations[canonical].insert(phoneme_data[2]); | ||
326 | } | ||
327 | } | ||
328 | |||
329 | // Start writing output | ||
330 | std::cout << "Writing schema..." << std::endl; | ||
331 | |||
332 | sqlite3* ppdb; | ||
333 | if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) | ||
334 | { | ||
335 | std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl; | ||
336 | print_usage(); | ||
337 | } | ||
338 | |||
339 | std::ifstream schemafile("schema.sql"); | ||
340 | if (!schemafile.is_open()) | ||
341 | { | ||
342 | std::cout << "Could not find schema file" << std::endl; | ||
343 | print_usage(); | ||
344 | } | ||
345 | |||
346 | std::stringstream schemabuilder; | ||
347 | for (;;) | ||
348 | { | ||
349 | std::string line; | ||
350 | if (!getline(schemafile, line)) | ||
351 | { | ||
352 | break; | ||
353 | } | ||
354 | |||
355 | if (line.back() == '\r') | ||
356 | { | ||
357 | line.pop_back(); | ||
358 | } | ||
359 | |||
360 | schemabuilder << line << std::endl; | ||
361 | } | ||
362 | |||
363 | std::string schema = schemabuilder.str(); | ||
364 | while (!schema.empty()) | ||
365 | { | ||
366 | std::string query; | ||
367 | int divider = schema.find(";"); | ||
368 | if (divider != std::string::npos) | ||
369 | { | ||
370 | query = schema.substr(0, divider+1); | ||
371 | schema = schema.substr(divider+2); | ||
372 | } else { | ||
373 | break; | ||
374 | } | ||
375 | |||
376 | sqlite3_stmt* schmstmt; | ||
377 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK) | ||
378 | { | ||
379 | db_error(ppdb, query); | ||
380 | } | ||
381 | |||
382 | if (sqlite3_step(schmstmt) != SQLITE_DONE) | ||
383 | { | ||
384 | db_error(ppdb, query); | ||
385 | } | ||
386 | |||
387 | sqlite3_finalize(schmstmt); | ||
388 | } | ||
389 | |||
390 | { | ||
391 | progress ppgs("Writing verbs...", verbs.size()); | ||
392 | for (auto& mapping : verbs) | ||
393 | { | ||
394 | sqlite3_stmt* ppstmt; | ||
395 | std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)"); | ||
396 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
397 | { | ||
398 | db_error(ppdb, query); | ||
399 | } | ||
400 | |||
401 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC); | ||
402 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC); | ||
403 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC); | ||
404 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC); | ||
405 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC); | ||
406 | |||
407 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
408 | { | ||
409 | db_error(ppdb, query); | ||
410 | } | ||
411 | |||
412 | sqlite3_finalize(ppstmt); | ||
413 | |||
414 | std::string canonical(mapping.second.infinitive); | ||
415 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | ||
416 | if (pronunciations.count(canonical) == 1) | ||
417 | { | ||
418 | query = "SELECT last_insert_rowid()"; | ||
419 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
420 | { | ||
421 | db_error(ppdb, query); | ||
422 | } | ||
423 | |||
424 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
425 | { | ||
426 | db_error(ppdb, query); | ||
427 | } | ||
428 | |||
429 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
430 | |||
431 | sqlite3_finalize(ppstmt); | ||
432 | |||
433 | for (auto pronunciation : pronunciations[canonical]) | ||
434 | { | ||
435 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; | ||
436 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
437 | { | ||
438 | db_error(ppdb, query); | ||
439 | } | ||
440 | |||
441 | sqlite3_bind_int(ppstmt, 1, rowid); | ||
442 | sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); | ||
443 | |||
444 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
445 | { | ||
446 | db_error(ppdb, query); | ||
447 | } | ||
448 | |||
449 | sqlite3_finalize(ppstmt); | ||
450 | } | ||
451 | } | ||
452 | |||
453 | ppgs.update(); | ||
454 | } | ||
455 | } | ||
456 | |||
457 | // Get nouns/adjectives/adverbs from WordNet | ||
458 | // Useful relations: | ||
459 | // - s: master list | ||
460 | // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness) | ||
461 | // - at: variation (e.g. a measurement can be standard or nonstandard) | ||
462 | // - der: derivation (e.g. happy/happily, happily/happy) | ||
463 | // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue) | ||
464 | // - ins: instantiation (do we need this? let's see) | ||
465 | // - mm: member meronymy/holonymy (e.g. family/mother, family/child) | ||
466 | // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire) | ||
467 | // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber) | ||
468 | // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska) | ||
469 | // mannernymy (e.g. something done quickly is done in a manner that is quick) | ||
470 | // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) | ||
471 | // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) | ||
472 | // - syntax: positioning flags for some adjectives | ||
473 | std::string wnpref {argv[4]}; | ||
474 | if (wnpref.back() != '/') | ||
475 | { | ||
476 | wnpref += '/'; | ||
477 | } | ||
478 | |||
479 | // s table | ||
480 | { | ||
481 | std::ifstream wnsfile(wnpref + "wn_s.pl"); | ||
482 | if (!wnsfile.is_open()) | ||
483 | { | ||
484 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
485 | print_usage(); | ||
486 | } | ||
487 | |||
488 | std::list<std::string> lines; | ||
489 | for (;;) | ||
490 | { | ||
491 | std::string line; | ||
492 | if (!getline(wnsfile, line)) | ||
493 | { | ||
494 | break; | ||
495 | } | ||
496 | |||
497 | if (line.back() == '\r') | ||
498 | { | ||
499 | line.pop_back(); | ||
500 | } | ||
501 | |||
502 | lines.push_back(line); | ||
503 | } | ||
504 | |||
505 | progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size()); | ||
506 | for (auto line : lines) | ||
507 | { | ||
508 | ppgs.update(); | ||
509 | |||
510 | std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',"); | ||
511 | std::smatch relation_data; | ||
512 | if (!std::regex_search(line, relation_data, relation)) | ||
513 | { | ||
514 | continue; | ||
515 | } | ||
516 | |||
517 | int synset_id = stoi(relation_data[1]); | ||
518 | int wnum = stoi(relation_data[2]); | ||
519 | std::string word = relation_data[3]; | ||
520 | |||
521 | std::string query; | ||
522 | switch (synset_id / 100000000) | ||
523 | { | ||
524 | case 1: // Noun | ||
525 | { | ||
526 | if (nouns.count(word) == 1) | ||
527 | { | ||
528 | query = "INSERT INTO nouns (singular, plural) VALUES (?, ?)"; | ||
529 | } else { | ||
530 | query = "INSERT INTO nouns (singular) VALUES (?)"; | ||
531 | } | ||
532 | |||
533 | break; | ||
534 | } | ||
535 | |||
536 | case 2: // Verb | ||
537 | { | ||
538 | // Ignore | ||
539 | |||
540 | break; | ||
541 | } | ||
542 | |||
543 | case 3: // Adjective | ||
544 | { | ||
545 | if (adjectives.count(word) == 1) | ||
546 | { | ||
547 | query = "INSERT INTO adjectives (base_form, comparative, superlative) VALUES (?, ?, ?)"; | ||
548 | } else { | ||
549 | query = "INSERT INTO adjectives (base_form) VALUES (?)"; | ||
550 | } | ||
551 | |||
552 | break; | ||
553 | } | ||
554 | |||
555 | case 4: // Adverb | ||
556 | { | ||
557 | if (adjectives.count(word) == 1) | ||
558 | { | ||
559 | query = "INSERT INTO adverbs (base_form, comparative, superlative) VALUES (?, ?, ?)"; | ||
560 | } else { | ||
561 | query = "INSERT INTO adverbs (base_form) VALUES (?)"; | ||
562 | } | ||
563 | |||
564 | break; | ||
565 | } | ||
566 | } | ||
567 | |||
568 | sqlite3_stmt* ppstmt; | ||
569 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
570 | { | ||
571 | db_error(ppdb, query); | ||
572 | } | ||
573 | |||
574 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC); | ||
575 | switch (synset_id / 100000000) | ||
576 | { | ||
577 | case 1: // Noun | ||
578 | { | ||
579 | if (nouns.count(word) == 1) | ||
580 | { | ||
581 | sqlite3_bind_text(ppstmt, 2, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); | ||
582 | } | ||
583 | |||
584 | break; | ||
585 | } | ||
586 | |||
587 | case 3: // Adjective | ||
588 | case 4: // Adverb | ||
589 | { | ||
590 | if (adjectives.count(word) == 1) | ||
591 | { | ||
592 | sqlite3_bind_text(ppstmt, 2, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); | ||
593 | sqlite3_bind_text(ppstmt, 3, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); | ||
594 | } | ||
595 | |||
596 | break; | ||
597 | } | ||
598 | } | ||
599 | |||
600 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
601 | { | ||
602 | db_error(ppdb, query); | ||
603 | } | ||
604 | |||
605 | sqlite3_finalize(ppstmt); | ||
606 | |||
607 | query = "SELECT last_insert_rowid()"; | ||
608 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
609 | { | ||
610 | db_error(ppdb, query); | ||
611 | } | ||
612 | |||
613 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
614 | { | ||
615 | db_error(ppdb, query); | ||
616 | } | ||
617 | |||
618 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
619 | wn[synset_id][wnum] = rowid; | ||
620 | |||
621 | sqlite3_finalize(ppstmt); | ||
622 | |||
623 | std::string canonical(word); | ||
624 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | ||
625 | if (pronunciations.count(canonical) == 1) | ||
626 | { | ||
627 | for (auto pronunciation : pronunciations[canonical]) | ||
628 | { | ||
629 | switch (synset_id / 100000000) | ||
630 | { | ||
631 | case 1: // Noun | ||
632 | { | ||
633 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; | ||
634 | |||
635 | break; | ||
636 | } | ||
637 | |||
638 | case 3: // Adjective | ||
639 | { | ||
640 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; | ||
641 | |||
642 | break; | ||
643 | } | ||
644 | |||
645 | case 4: // Adverb | ||
646 | { | ||
647 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; | ||
648 | |||
649 | break; | ||
650 | } | ||
651 | } | ||
652 | |||
653 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
654 | { | ||
655 | db_error(ppdb, query); | ||
656 | } | ||
657 | |||
658 | sqlite3_bind_int(ppstmt, 1, rowid); | ||
659 | sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); | ||
660 | |||
661 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
662 | { | ||
663 | db_error(ppdb, query); | ||
664 | } | ||
665 | |||
666 | sqlite3_finalize(ppstmt); | ||
667 | } | ||
668 | } | ||
669 | } | ||
670 | } | ||
671 | |||
672 | // While we're working on s | ||
673 | { | ||
674 | progress ppgs("Writing word synonyms...", wn.size()); | ||
675 | for (auto sense : wn) | ||
676 | { | ||
677 | ppgs.update(); | ||
678 | |||
679 | for (auto word1 : sense.second) | ||
680 | { | ||
681 | for (auto word2 : sense.second) | ||
682 | { | ||
683 | if (word1 != word2) | ||
684 | { | ||
685 | std::string query; | ||
686 | switch (sense.first / 100000000) | ||
687 | { | ||
688 | case 1: // Noun | ||
689 | { | ||
690 | query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; | ||
691 | |||
692 | break; | ||
693 | } | ||
694 | |||
695 | case 2: // Verb | ||
696 | { | ||
697 | // Ignore | ||
698 | |||
699 | break; | ||
700 | } | ||
701 | |||
702 | case 3: // Adjective | ||
703 | { | ||
704 | query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; | ||
705 | |||
706 | break; | ||
707 | } | ||
708 | |||
709 | case 4: // Adverb | ||
710 | { | ||
711 | query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | ||
712 | |||
713 | break; | ||
714 | } | ||
715 | } | ||
716 | |||
717 | sqlite3_stmt* ppstmt; | ||
718 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
719 | { | ||
720 | db_error(ppdb, query); | ||
721 | } | ||
722 | |||
723 | sqlite3_bind_int(ppstmt, 1, word1.second); | ||
724 | sqlite3_bind_int(ppstmt, 2, word2.second); | ||
725 | |||
726 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
727 | { | ||
728 | db_error(ppdb, query); | ||
729 | } | ||
730 | |||
731 | sqlite3_finalize(ppstmt); | ||
732 | } | ||
733 | } | ||
734 | } | ||
735 | } | ||
736 | } | ||
737 | |||
738 | // ant table | ||
739 | { | ||
740 | std::ifstream wnantfile(wnpref + "wn_ant.pl"); | ||
741 | if (!wnantfile.is_open()) | ||
742 | { | ||
743 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
744 | print_usage(); | ||
745 | } | ||
746 | |||
747 | std::list<std::string> lines; | ||
748 | for (;;) | ||
749 | { | ||
750 | std::string line; | ||
751 | if (!getline(wnantfile, line)) | ||
752 | { | ||
753 | break; | ||
754 | } | ||
755 | |||
756 | if (line.back() == '\r') | ||
757 | { | ||
758 | line.pop_back(); | ||
759 | } | ||
760 | |||
761 | lines.push_back(line); | ||
762 | } | ||
763 | |||
764 | progress ppgs("Writing antonyms...", lines.size()); | ||
765 | for (auto line : lines) | ||
766 | { | ||
767 | ppgs.update(); | ||
768 | |||
769 | std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | ||
770 | std::smatch relation_data; | ||
771 | if (!std::regex_search(line, relation_data, relation)) | ||
772 | { | ||
773 | continue; | ||
774 | } | ||
775 | |||
776 | int synset_id_1 = stoi(relation_data[1]); | ||
777 | int wnum_1 = stoi(relation_data[2]); | ||
778 | int synset_id_2 = stoi(relation_data[3]); | ||
779 | int wnum_2 = stoi(relation_data[4]); | ||
780 | |||
781 | std::string query; | ||
782 | switch (synset_id_1 / 100000000) | ||
783 | { | ||
784 | case 1: // Noun | ||
785 | { | ||
786 | query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; | ||
787 | |||
788 | break; | ||
789 | } | ||
790 | |||
791 | case 2: // Verb | ||
792 | { | ||
793 | // Ignore | ||
794 | |||
795 | break; | ||
796 | } | ||
797 | |||
798 | case 3: // Adjective | ||
799 | { | ||
800 | query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; | ||
801 | |||
802 | break; | ||
803 | } | ||
804 | |||
805 | case 4: // Adverb | ||
806 | { | ||
807 | query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | ||
808 | |||
809 | break; | ||
810 | } | ||
811 | } | ||
812 | |||
813 | sqlite3_stmt* ppstmt; | ||
814 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
815 | { | ||
816 | db_error(ppdb, query); | ||
817 | } | ||
818 | |||
819 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
820 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
821 | |||
822 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
823 | { | ||
824 | db_error(ppdb, query); | ||
825 | } | ||
826 | |||
827 | sqlite3_finalize(ppstmt); | ||
828 | } | ||
829 | } | ||
830 | |||
831 | // at table | ||
832 | { | ||
833 | std::ifstream wnatfile(wnpref + "wn_at.pl"); | ||
834 | if (!wnatfile.is_open()) | ||
835 | { | ||
836 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
837 | print_usage(); | ||
838 | } | ||
839 | |||
840 | std::list<std::string> lines; | ||
841 | for (;;) | ||
842 | { | ||
843 | std::string line; | ||
844 | if (!getline(wnatfile, line)) | ||
845 | { | ||
846 | break; | ||
847 | } | ||
848 | |||
849 | if (line.back() == '\r') | ||
850 | { | ||
851 | line.pop_back(); | ||
852 | } | ||
853 | |||
854 | lines.push_back(line); | ||
855 | } | ||
856 | |||
857 | progress ppgs("Writing variations...", lines.size()); | ||
858 | for (auto line : lines) | ||
859 | { | ||
860 | ppgs.update(); | ||
861 | |||
862 | std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); | ||
863 | std::smatch relation_data; | ||
864 | if (!std::regex_search(line, relation_data, relation)) | ||
865 | { | ||
866 | continue; | ||
867 | } | ||
868 | |||
869 | int synset_id_1 = stoi(relation_data[1]); | ||
870 | int synset_id_2 = stoi(relation_data[2]); | ||
871 | std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)"); | ||
872 | |||
873 | for (auto mapping1 : wn[synset_id_1]) | ||
874 | { | ||
875 | for (auto mapping2 : wn[synset_id_2]) | ||
876 | { | ||
877 | sqlite3_stmt* ppstmt; | ||
878 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
879 | { | ||
880 | db_error(ppdb, query); | ||
881 | } | ||
882 | |||
883 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | ||
884 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
885 | |||
886 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
887 | { | ||
888 | db_error(ppdb, query); | ||
889 | } | ||
890 | |||
891 | sqlite3_finalize(ppstmt); | ||
892 | } | ||
893 | } | ||
894 | } | ||
895 | } | ||
896 | |||
897 | // der table | ||
898 | { | ||
899 | std::ifstream wnderfile(wnpref + "wn_der.pl"); | ||
900 | if (!wnderfile.is_open()) | ||
901 | { | ||
902 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
903 | print_usage(); | ||
904 | } | ||
905 | |||
906 | std::list<std::string> lines; | ||
907 | for (;;) | ||
908 | { | ||
909 | std::string line; | ||
910 | if (!getline(wnderfile, line)) | ||
911 | { | ||
912 | break; | ||
913 | } | ||
914 | |||
915 | if (line.back() == '\r') | ||
916 | { | ||
917 | line.pop_back(); | ||
918 | } | ||
919 | |||
920 | lines.push_back(line); | ||
921 | } | ||
922 | |||
923 | progress ppgs("Writing morphological derivation...", lines.size()); | ||
924 | for (auto line : lines) | ||
925 | { | ||
926 | ppgs.update(); | ||
927 | |||
928 | std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | ||
929 | std::smatch relation_data; | ||
930 | if (!std::regex_search(line, relation_data, relation)) | ||
931 | { | ||
932 | continue; | ||
933 | } | ||
934 | |||
935 | int synset_id_1 = stoi(relation_data[1]); | ||
936 | int wnum_1 = stoi(relation_data[2]); | ||
937 | int synset_id_2 = stoi(relation_data[3]); | ||
938 | int wnum_2 = stoi(relation_data[4]); | ||
939 | std::string query; | ||
940 | switch (synset_id_1 / 100000000) | ||
941 | { | ||
942 | case 1: // Noun | ||
943 | { | ||
944 | switch (synset_id_2 / 100000000) | ||
945 | { | ||
946 | case 1: // Noun | ||
947 | { | ||
948 | query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)"; | ||
949 | break; | ||
950 | } | ||
951 | |||
952 | case 3: // Adjective | ||
953 | { | ||
954 | query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)"; | ||
955 | break; | ||
956 | } | ||
957 | |||
958 | case 4: // Adverb | ||
959 | { | ||
960 | query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)"; | ||
961 | break; | ||
962 | } | ||
963 | } | ||
964 | |||
965 | break; | ||
966 | } | ||
967 | |||
968 | case 3: // Adjective | ||
969 | { | ||
970 | switch (synset_id_2 / 100000000) | ||
971 | { | ||
972 | case 1: // Noun | ||
973 | { | ||
974 | query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)"; | ||
975 | break; | ||
976 | } | ||
977 | |||
978 | case 3: // Adjective | ||
979 | { | ||
980 | query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)"; | ||
981 | break; | ||
982 | } | ||
983 | |||
984 | case 4: // Adverb | ||
985 | { | ||
986 | query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)"; | ||
987 | break; | ||
988 | } | ||
989 | } | ||
990 | |||
991 | break; | ||
992 | } | ||
993 | |||
994 | case 4: // Adverb | ||
995 | { | ||
996 | switch (synset_id_2 / 100000000) | ||
997 | { | ||
998 | case 1: // Noun | ||
999 | { | ||
1000 | query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)"; | ||
1001 | break; | ||
1002 | } | ||
1003 | |||
1004 | case 3: // Adjective | ||
1005 | { | ||
1006 | query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)"; | ||
1007 | break; | ||
1008 | } | ||
1009 | |||
1010 | case 4: // Adverb | ||
1011 | { | ||
1012 | query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | ||
1013 | break; | ||
1014 | } | ||
1015 | } | ||
1016 | |||
1017 | break; | ||
1018 | } | ||
1019 | } | ||
1020 | |||
1021 | sqlite3_stmt* ppstmt; | ||
1022 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
1023 | { | ||
1024 | db_error(ppdb, query); | ||
1025 | } | ||
1026 | |||
1027 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
1028 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
1029 | |||
1030 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1031 | { | ||
1032 | db_error(ppdb, query); | ||
1033 | } | ||
1034 | |||
1035 | sqlite3_finalize(ppstmt); | ||
1036 | } | ||
1037 | } | ||
1038 | |||
1039 | // hyp table | ||
1040 | { | ||
1041 | std::ifstream wnhypfile(wnpref + "wn_hyp.pl"); | ||
1042 | if (!wnhypfile.is_open()) | ||
1043 | { | ||
1044 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1045 | print_usage(); | ||
1046 | } | ||
1047 | |||
1048 | std::list<std::string> lines; | ||
1049 | for (;;) | ||
1050 | { | ||
1051 | std::string line; | ||
1052 | if (!getline(wnhypfile, line)) | ||
1053 | { | ||
1054 | break; | ||
1055 | } | ||
1056 | |||
1057 | if (line.back() == '\r') | ||
1058 | { | ||
1059 | line.pop_back(); | ||
1060 | } | ||
1061 | |||
1062 | lines.push_back(line); | ||
1063 | } | ||
1064 | |||
1065 | progress ppgs("Writing hypernyms...", lines.size()); | ||
1066 | for (auto line : lines) | ||
1067 | { | ||
1068 | ppgs.update(); | ||
1069 | |||
1070 | std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\."); | ||
1071 | std::smatch relation_data; | ||
1072 | if (!std::regex_search(line, relation_data, relation)) | ||
1073 | { | ||
1074 | continue; | ||
1075 | } | ||
1076 | |||
1077 | int synset_id_1 = stoi(relation_data[1]); | ||
1078 | int synset_id_2 = stoi(relation_data[2]); | ||
1079 | std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)"); | ||
1080 | |||
1081 | for (auto mapping1 : wn[synset_id_1]) | ||
1082 | { | ||
1083 | for (auto mapping2 : wn[synset_id_2]) | ||
1084 | { | ||
1085 | sqlite3_stmt* ppstmt; | ||
1086 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
1087 | { | ||
1088 | db_error(ppdb, query); | ||
1089 | } | ||
1090 | |||
1091 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | ||
1092 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
1093 | |||
1094 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1095 | { | ||
1096 | db_error(ppdb, query); | ||
1097 | } | ||
1098 | |||
1099 | sqlite3_finalize(ppstmt); | ||
1100 | } | ||
1101 | } | ||
1102 | } | ||
1103 | } | ||
1104 | |||
1105 | // ins table | ||
1106 | { | ||
1107 | std::ifstream wninsfile(wnpref + "wn_ins.pl"); | ||
1108 | if (!wninsfile.is_open()) | ||
1109 | { | ||
1110 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1111 | print_usage(); | ||
1112 | } | ||
1113 | |||
1114 | std::list<std::string> lines; | ||
1115 | for (;;) | ||
1116 | { | ||
1117 | std::string line; | ||
1118 | if (!getline(wninsfile, line)) | ||
1119 | { | ||
1120 | break; | ||
1121 | } | ||
1122 | |||
1123 | if (line.back() == '\r') | ||
1124 | { | ||
1125 | line.pop_back(); | ||
1126 | } | ||
1127 | |||
1128 | lines.push_back(line); | ||
1129 | } | ||
1130 | |||
1131 | progress ppgs("Writing instantiations...", lines.size()); | ||
1132 | for (auto line : lines) | ||
1133 | { | ||
1134 | ppgs.update(); | ||
1135 | |||
1136 | std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); | ||
1137 | std::smatch relation_data; | ||
1138 | if (!std::regex_search(line, relation_data, relation)) | ||
1139 | { | ||
1140 | continue; | ||
1141 | } | ||
1142 | |||
1143 | int synset_id_1 = stoi(relation_data[1]); | ||
1144 | int synset_id_2 = stoi(relation_data[2]); | ||
1145 | std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)"); | ||
1146 | |||
1147 | for (auto mapping1 : wn[synset_id_1]) | ||
1148 | { | ||
1149 | for (auto mapping2 : wn[synset_id_2]) | ||
1150 | { | ||
1151 | sqlite3_stmt* ppstmt; | ||
1152 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
1153 | { | ||
1154 | db_error(ppdb, query); | ||
1155 | } | ||
1156 | |||
1157 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | ||
1158 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
1159 | |||
1160 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1161 | { | ||
1162 | db_error(ppdb, query); | ||
1163 | } | ||
1164 | |||
1165 | sqlite3_finalize(ppstmt); | ||
1166 | } | ||
1167 | } | ||
1168 | } | ||
1169 | } | ||
1170 | |||
1171 | // mm table | ||
1172 | { | ||
1173 | std::ifstream wnmmfile(wnpref + "wn_mm.pl"); | ||
1174 | if (!wnmmfile.is_open()) | ||
1175 | { | ||
1176 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1177 | print_usage(); | ||
1178 | } | ||
1179 | |||
1180 | std::list<std::string> lines; | ||
1181 | for (;;) | ||
1182 | { | ||
1183 | std::string line; | ||
1184 | if (!getline(wnmmfile, line)) | ||
1185 | { | ||
1186 | break; | ||
1187 | } | ||
1188 | |||
1189 | if (line.back() == '\r') | ||
1190 | { | ||
1191 | line.pop_back(); | ||
1192 | } | ||
1193 | |||
1194 | lines.push_back(line); | ||
1195 | } | ||
1196 | |||
1197 | progress ppgs("Writing member meronyms...", lines.size()); | ||
1198 | for (auto line : lines) | ||
1199 | { | ||
1200 | ppgs.update(); | ||
1201 | |||
1202 | std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); | ||
1203 | std::smatch relation_data; | ||
1204 | if (!std::regex_search(line, relation_data, relation)) | ||
1205 | { | ||
1206 | continue; | ||
1207 | } | ||
1208 | |||
1209 | int synset_id_1 = stoi(relation_data[1]); | ||
1210 | int synset_id_2 = stoi(relation_data[2]); | ||
1211 | std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | ||
1212 | |||
1213 | for (auto mapping1 : wn[synset_id_1]) | ||
1214 | { | ||
1215 | for (auto mapping2 : wn[synset_id_2]) | ||
1216 | { | ||
1217 | sqlite3_stmt* ppstmt; | ||
1218 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
1219 | { | ||
1220 | db_error(ppdb, query); | ||
1221 | } | ||
1222 | |||
1223 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | ||
1224 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
1225 | |||
1226 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1227 | { | ||
1228 | db_error(ppdb, query); | ||
1229 | } | ||
1230 | |||
1231 | sqlite3_finalize(ppstmt); | ||
1232 | } | ||
1233 | } | ||
1234 | } | ||
1235 | } | ||
1236 | |||
1237 | // ms table | ||
1238 | { | ||
1239 | std::ifstream wnmsfile(wnpref + "wn_ms.pl"); | ||
1240 | if (!wnmsfile.is_open()) | ||
1241 | { | ||
1242 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1243 | print_usage(); | ||
1244 | } | ||
1245 | |||
1246 | std::list<std::string> lines; | ||
1247 | for (;;) | ||
1248 | { | ||
1249 | std::string line; | ||
1250 | if (!getline(wnmsfile, line)) | ||
1251 | { | ||
1252 | break; | ||
1253 | } | ||
1254 | |||
1255 | if (line.back() == '\r') | ||
1256 | { | ||
1257 | line.pop_back(); | ||
1258 | } | ||
1259 | |||
1260 | lines.push_back(line); | ||
1261 | } | ||
1262 | |||
1263 | progress ppgs("Writing substance meronyms...", lines.size()); | ||
1264 | for (auto line : lines) | ||
1265 | { | ||
1266 | ppgs.update(); | ||
1267 | |||
1268 | std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); | ||
1269 | std::smatch relation_data; | ||
1270 | if (!std::regex_search(line, relation_data, relation)) | ||
1271 | { | ||
1272 | continue; | ||
1273 | } | ||
1274 | |||
1275 | int synset_id_1 = stoi(relation_data[1]); | ||
1276 | int synset_id_2 = stoi(relation_data[2]); | ||
1277 | std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | ||
1278 | |||
1279 | for (auto mapping1 : wn[synset_id_1]) | ||
1280 | { | ||
1281 | for (auto mapping2 : wn[synset_id_2]) | ||
1282 | { | ||
1283 | sqlite3_stmt* ppstmt; | ||
1284 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
1285 | { | ||
1286 | db_error(ppdb, query); | ||
1287 | } | ||
1288 | |||
1289 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | ||
1290 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
1291 | |||
1292 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1293 | { | ||
1294 | db_error(ppdb, query); | ||
1295 | } | ||
1296 | |||
1297 | sqlite3_finalize(ppstmt); | ||
1298 | } | ||
1299 | } | ||
1300 | } | ||
1301 | } | ||
1302 | |||
1303 | // mm table | ||
1304 | { | ||
1305 | std::ifstream wnmpfile(wnpref + "wn_mp.pl"); | ||
1306 | if (!wnmpfile.is_open()) | ||
1307 | { | ||
1308 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1309 | print_usage(); | ||
1310 | } | ||
1311 | |||
1312 | std::list<std::string> lines; | ||
1313 | for (;;) | ||
1314 | { | ||
1315 | std::string line; | ||
1316 | if (!getline(wnmpfile, line)) | ||
1317 | { | ||
1318 | break; | ||
1319 | } | ||
1320 | |||
1321 | if (line.back() == '\r') | ||
1322 | { | ||
1323 | line.pop_back(); | ||
1324 | } | ||
1325 | |||
1326 | lines.push_back(line); | ||
1327 | } | ||
1328 | |||
1329 | progress ppgs("Writing part meronyms...", lines.size()); | ||
1330 | for (auto line : lines) | ||
1331 | { | ||
1332 | ppgs.update(); | ||
1333 | |||
1334 | std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); | ||
1335 | std::smatch relation_data; | ||
1336 | if (!std::regex_search(line, relation_data, relation)) | ||
1337 | { | ||
1338 | continue; | ||
1339 | } | ||
1340 | |||
1341 | int synset_id_1 = stoi(relation_data[1]); | ||
1342 | int synset_id_2 = stoi(relation_data[2]); | ||
1343 | std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | ||
1344 | |||
1345 | for (auto mapping1 : wn[synset_id_1]) | ||
1346 | { | ||
1347 | for (auto mapping2 : wn[synset_id_2]) | ||
1348 | { | ||
1349 | sqlite3_stmt* ppstmt; | ||
1350 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
1351 | { | ||
1352 | db_error(ppdb, query); | ||
1353 | } | ||
1354 | |||
1355 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | ||
1356 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
1357 | |||
1358 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1359 | { | ||
1360 | db_error(ppdb, query); | ||
1361 | } | ||
1362 | |||
1363 | sqlite3_finalize(ppstmt); | ||
1364 | } | ||
1365 | } | ||
1366 | } | ||
1367 | } | ||
1368 | |||
1369 | // per table | ||
1370 | { | ||
1371 | std::ifstream wnperfile(wnpref + "wn_per.pl"); | ||
1372 | if (!wnperfile.is_open()) | ||
1373 | { | ||
1374 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1375 | print_usage(); | ||
1376 | } | ||
1377 | |||
1378 | std::list<std::string> lines; | ||
1379 | for (;;) | ||
1380 | { | ||
1381 | std::string line; | ||
1382 | if (!getline(wnperfile, line)) | ||
1383 | { | ||
1384 | break; | ||
1385 | } | ||
1386 | |||
1387 | if (line.back() == '\r') | ||
1388 | { | ||
1389 | line.pop_back(); | ||
1390 | } | ||
1391 | |||
1392 | lines.push_back(line); | ||
1393 | } | ||
1394 | |||
1395 | progress ppgs("Writing pertainyms and mannernyms...", lines.size()); | ||
1396 | for (auto line : lines) | ||
1397 | { | ||
1398 | ppgs.update(); | ||
1399 | |||
1400 | std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); | ||
1401 | std::smatch relation_data; | ||
1402 | if (!std::regex_search(line, relation_data, relation)) | ||
1403 | { | ||
1404 | continue; | ||
1405 | } | ||
1406 | |||
1407 | int synset_id_1 = stoi(relation_data[1]); | ||
1408 | int wnum_1 = stoi(relation_data[2]); | ||
1409 | int synset_id_2 = stoi(relation_data[3]); | ||
1410 | int wnum_2 = stoi(relation_data[4]); | ||
1411 | std::string query; | ||
1412 | switch (synset_id_1 / 100000000) | ||
1413 | { | ||
1414 | case 3: // Adjective | ||
1415 | { | ||
1416 | // This is a pertainym, the second word should be a noun | ||
1417 | // Technically it can be an adjective but we're ignoring that | ||
1418 | if (synset_id_2 / 100000000 != 1) | ||
1419 | { | ||
1420 | continue; | ||
1421 | } | ||
1422 | |||
1423 | query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)"; | ||
1424 | |||
1425 | break; | ||
1426 | } | ||
1427 | |||
1428 | case 4: // Adverb | ||
1429 | { | ||
1430 | // This is a mannernym, the second word should be an adjective | ||
1431 | if (synset_id_2 / 100000000 != 3) | ||
1432 | { | ||
1433 | continue; | ||
1434 | } | ||
1435 | |||
1436 | query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)"; | ||
1437 | |||
1438 | break; | ||
1439 | } | ||
1440 | } | ||
1441 | |||
1442 | sqlite3_stmt* ppstmt; | ||
1443 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1444 | { | ||
1445 | db_error(ppdb, query); | ||
1446 | } | ||
1447 | |||
1448 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
1449 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
1450 | |||
1451 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1452 | { | ||
1453 | db_error(ppdb, query); | ||
1454 | } | ||
1455 | |||
1456 | sqlite3_finalize(ppstmt); | ||
1457 | } | ||
1458 | } | ||
1459 | |||
1460 | // sa table | ||
1461 | { | ||
1462 | std::ifstream wnsafile(wnpref + "wn_sa.pl"); | ||
1463 | if (!wnsafile.is_open()) | ||
1464 | { | ||
1465 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1466 | print_usage(); | ||
1467 | } | ||
1468 | |||
1469 | std::list<std::string> lines; | ||
1470 | for (;;) | ||
1471 | { | ||
1472 | std::string line; | ||
1473 | if (!getline(wnsafile, line)) | ||
1474 | { | ||
1475 | break; | ||
1476 | } | ||
1477 | |||
1478 | if (line.back() == '\r') | ||
1479 | { | ||
1480 | line.pop_back(); | ||
1481 | } | ||
1482 | |||
1483 | lines.push_back(line); | ||
1484 | } | ||
1485 | |||
1486 | progress ppgs("Writing specifications...", lines.size()); | ||
1487 | for (auto line : lines) | ||
1488 | { | ||
1489 | ppgs.update(); | ||
1490 | |||
1491 | std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\."); | ||
1492 | std::smatch relation_data; | ||
1493 | if (!std::regex_search(line, relation_data, relation)) | ||
1494 | { | ||
1495 | continue; | ||
1496 | } | ||
1497 | |||
1498 | int synset_id_1 = stoi(relation_data[1]); | ||
1499 | int wnum_1 = stoi(relation_data[2]); | ||
1500 | int synset_id_2 = stoi(relation_data[3]); | ||
1501 | int wnum_2 = stoi(relation_data[4]); | ||
1502 | std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)"); | ||
1503 | |||
1504 | sqlite3_stmt* ppstmt; | ||
1505 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1506 | { | ||
1507 | db_error(ppdb, query); | ||
1508 | } | ||
1509 | |||
1510 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
1511 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
1512 | |||
1513 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1514 | { | ||
1515 | db_error(ppdb, query); | ||
1516 | } | ||
1517 | |||
1518 | sqlite3_finalize(ppstmt); | ||
1519 | } | ||
1520 | } | ||
1521 | |||
1522 | // sim table | ||
1523 | { | ||
1524 | std::ifstream wnsimfile(wnpref + "wn_sim.pl"); | ||
1525 | if (!wnsimfile.is_open()) | ||
1526 | { | ||
1527 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1528 | print_usage(); | ||
1529 | } | ||
1530 | |||
1531 | std::list<std::string> lines; | ||
1532 | for (;;) | ||
1533 | { | ||
1534 | std::string line; | ||
1535 | if (!getline(wnsimfile, line)) | ||
1536 | { | ||
1537 | break; | ||
1538 | } | ||
1539 | |||
1540 | if (line.back() == '\r') | ||
1541 | { | ||
1542 | line.pop_back(); | ||
1543 | } | ||
1544 | |||
1545 | lines.push_back(line); | ||
1546 | } | ||
1547 | |||
1548 | progress ppgs("Writing sense synonyms...", lines.size()); | ||
1549 | for (auto line : lines) | ||
1550 | { | ||
1551 | ppgs.update(); | ||
1552 | |||
1553 | std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); | ||
1554 | std::smatch relation_data; | ||
1555 | if (!std::regex_search(line, relation_data, relation)) | ||
1556 | { | ||
1557 | continue; | ||
1558 | } | ||
1559 | |||
1560 | int synset_id_1 = stoi(relation_data[1]); | ||
1561 | int synset_id_2 = stoi(relation_data[2]); | ||
1562 | std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"); | ||
1563 | |||
1564 | for (auto mapping1 : wn[synset_id_1]) | ||
1565 | { | ||
1566 | for (auto mapping2 : wn[synset_id_2]) | ||
1567 | { | ||
1568 | sqlite3_stmt* ppstmt; | ||
1569 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
1570 | { | ||
1571 | db_error(ppdb, query); | ||
1572 | } | ||
1573 | |||
1574 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | ||
1575 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
1576 | |||
1577 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1578 | { | ||
1579 | db_error(ppdb, query); | ||
1580 | } | ||
1581 | |||
1582 | sqlite3_reset(ppstmt); | ||
1583 | sqlite3_clear_bindings(ppstmt); | ||
1584 | |||
1585 | sqlite3_bind_int(ppstmt, 1, mapping2.second); | ||
1586 | sqlite3_bind_int(ppstmt, 2, mapping1.second); | ||
1587 | |||
1588 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1589 | { | ||
1590 | db_error(ppdb, query); | ||
1591 | } | ||
1592 | |||
1593 | sqlite3_finalize(ppstmt); | ||
1594 | } | ||
1595 | } | ||
1596 | } | ||
1597 | } | ||
1598 | |||
1599 | // syntax table | ||
1600 | { | ||
1601 | std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl"); | ||
1602 | if (!wnsyntaxfile.is_open()) | ||
1603 | { | ||
1604 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1605 | print_usage(); | ||
1606 | } | ||
1607 | |||
1608 | std::list<std::string> lines; | ||
1609 | for (;;) | ||
1610 | { | ||
1611 | std::string line; | ||
1612 | if (!getline(wnsyntaxfile, line)) | ||
1613 | { | ||
1614 | break; | ||
1615 | } | ||
1616 | |||
1617 | if (line.back() == '\r') | ||
1618 | { | ||
1619 | line.pop_back(); | ||
1620 | } | ||
1621 | |||
1622 | lines.push_back(line); | ||
1623 | } | ||
1624 | |||
1625 | progress ppgs("Writing adjective syntax markers...", lines.size()); | ||
1626 | for (auto line : lines) | ||
1627 | { | ||
1628 | ppgs.update(); | ||
1629 | |||
1630 | std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); | ||
1631 | std::smatch relation_data; | ||
1632 | if (!std::regex_search(line, relation_data, relation)) | ||
1633 | { | ||
1634 | continue; | ||
1635 | } | ||
1636 | |||
1637 | int synset_id = stoi(relation_data[1]); | ||
1638 | int wnum = stoi(relation_data[2]); | ||
1639 | std::string syn = relation_data[3]; | ||
1640 | std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?"); | ||
1641 | |||
1642 | sqlite3_stmt* ppstmt; | ||
1643 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
1644 | { | ||
1645 | db_error(ppdb, query); | ||
1646 | } | ||
1647 | |||
1648 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC); | ||
1649 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); | ||
1650 | |||
1651 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1652 | { | ||
1653 | db_error(ppdb, query); | ||
1654 | } | ||
1655 | |||
1656 | sqlite3_finalize(ppstmt); | ||
1657 | } | ||
1658 | } | ||
1659 | |||
1660 | sqlite3_close_v2(ppdb); | ||
1661 | |||
1662 | std::cout << "Done." << std::endl; | ||
1663 | } | ||
diff --git a/verbly/generator/progress.h b/verbly/generator/progress.h new file mode 100644 index 0000000..81f07a3 --- /dev/null +++ b/verbly/generator/progress.h | |||
@@ -0,0 +1,50 @@ | |||
1 | #ifndef PROGRESS_H_A34EF856 | ||
2 | #define PROGRESS_H_A34EF856 | ||
3 | |||
4 | #include <string> | ||
5 | |||
6 | class progress { | ||
7 | private: | ||
8 | std::string message; | ||
9 | int total; | ||
10 | int cur = 0; | ||
11 | int lprint = 0; | ||
12 | |||
13 | public: | ||
14 | progress(std::string message, int total) : message(message), total(total) | ||
15 | { | ||
16 | std::cout << message << " 0%" << std::flush; | ||
17 | } | ||
18 | |||
19 | void update(int val) | ||
20 | { | ||
21 | if (val <= total) | ||
22 | { | ||
23 | cur = val; | ||
24 | } else { | ||
25 | cur = total; | ||
26 | } | ||
27 | |||
28 | int pp = cur * 100 / total; | ||
29 | if (pp != lprint) | ||
30 | { | ||
31 | lprint = pp; | ||
32 | |||
33 | std::cout << "\b\b\b\b" << std::right; | ||
34 | std::cout.width(3); | ||
35 | std::cout << pp << "%" << std::flush; | ||
36 | } | ||
37 | } | ||
38 | |||
39 | void update() | ||
40 | { | ||
41 | update(cur+1); | ||
42 | } | ||
43 | |||
44 | ~progress() | ||
45 | { | ||
46 | std::cout << "\b\b\b\b100%" << std::endl; | ||
47 | } | ||
48 | }; | ||
49 | |||
50 | #endif /* end of include guard: PROGRESS_H_A34EF856 */ | ||
diff --git a/verbly/generator/schema.sql b/verbly/generator/schema.sql new file mode 100644 index 0000000..b4efe0a --- /dev/null +++ b/verbly/generator/schema.sql | |||
@@ -0,0 +1,252 @@ | |||
1 | DROP TABLE IF EXISTS `verbs`; | ||
2 | CREATE TABLE `verbs` ( | ||
3 | `verb_id` INTEGER PRIMARY KEY, | ||
4 | `infinitive` VARCHAR(32) NOT NULL, | ||
5 | `past_tense` VARCHAR(32) NOT NULL, | ||
6 | `past_participle` VARCHAR(32) NOT NULL, | ||
7 | `ing_form` VARCHAR(32) NOT NULL, | ||
8 | `s_form` VARCHAR(32) NOT NULL | ||
9 | ); | ||
10 | |||
11 | DROP TABLE IF EXISTS `groups`; | ||
12 | CREATE TABLE `groups` ( | ||
13 | `group_id` INTEGER PRIMARY KEY, | ||
14 | `parent_id` INTEGER, | ||
15 | FOREIGN KEY (`parent_id`) REFERENCES `groups`(`group_id`) | ||
16 | ); | ||
17 | |||
18 | DROP TABLE IF EXISTS `frames`; | ||
19 | CREATE TABLE `frames` ( | ||
20 | `frame_id` INTEGER PRIMARY KEY, | ||
21 | `group_id` INTEGER NOT NULL, | ||
22 | `data` BLOB NOT NULL, | ||
23 | FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) | ||
24 | ); | ||
25 | |||
26 | DROP TABLE IF EXISTS `verb_groups`; | ||
27 | CREATE TABLE `verb_groups` ( | ||
28 | `verb_id` INTEGER NOT NULL, | ||
29 | `group_id` INTEGER NOT NULL, | ||
30 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`), | ||
31 | FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) | ||
32 | ); | ||
33 | |||
34 | DROP TABLE IF EXISTS `adjectives`; | ||
35 | CREATE TABLE `adjectives` ( | ||
36 | `adjective_id` INTEGER PRIMARY KEY, | ||
37 | `base_form` VARCHAR(32) NOT NULL, | ||
38 | `comparative` VARCHAR(32), | ||
39 | `superlative` VARCHAR(32), | ||
40 | `position` CHAR(1) | ||
41 | ); | ||
42 | |||
43 | DROP TABLE IF EXISTS `adverbs`; | ||
44 | CREATE TABLE `adverbs` ( | ||
45 | `adverb_id` INTEGER PRIMARY KEY, | ||
46 | `base_form` VARCHAR(32) NOT NULL, | ||
47 | `comparative` VARCHAR(32), | ||
48 | `superlative` VARCHAR(32) | ||
49 | ); | ||
50 | |||
51 | DROP TABLE IF EXISTS `nouns`; | ||
52 | CREATE TABLE `nouns` ( | ||
53 | `noun_id` INTEGER PRIMARY KEY, | ||
54 | `singular` VARCHAR(32) NOT NULL, | ||
55 | `plural` VARCHAR(32) | ||
56 | ); | ||
57 | |||
58 | DROP TABLE IF EXISTS `hypernymy`; | ||
59 | CREATE TABLE `hypernymy` ( | ||
60 | `hypernym_id` INTEGER NOT NULL, | ||
61 | `hyponym_id` INTEGER NOT NULL, | ||
62 | FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`), | ||
63 | FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`) | ||
64 | ); | ||
65 | |||
66 | DROP TABLE IF EXISTS `instantiation`; | ||
67 | CREATE TABLE `instantiation` ( | ||
68 | `class_id` INTEGER NOT NULL, | ||
69 | `instance_id` INTEGER NOT NULL, | ||
70 | FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`), | ||
71 | FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`) | ||
72 | ); | ||
73 | |||
74 | DROP TABLE IF EXISTS `member_meronymy`; | ||
75 | CREATE TABLE `member_meronymy` ( | ||
76 | `meronym_id` INTEGER NOT NULL, | ||
77 | `holonym_id` INTEGER NOT NULL, | ||
78 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
79 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
80 | ); | ||
81 | |||
82 | DROP TABLE IF EXISTS `part_meronymy`; | ||
83 | CREATE TABLE `part_meronymy` ( | ||
84 | `meronym_id` INTEGER NOT NULL, | ||
85 | `holonym_id` INTEGER NOT NULL, | ||
86 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
87 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
88 | ); | ||
89 | |||
90 | DROP TABLE IF EXISTS `substance_meronymy`; | ||
91 | CREATE TABLE `substance_meronymy` ( | ||
92 | `meronym_id` INTEGER NOT NULL, | ||
93 | `holonym_id` INTEGER NOT NULL, | ||
94 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
95 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
96 | ); | ||
97 | |||
98 | DROP TABLE IF EXISTS `variation`; | ||
99 | CREATE TABLE `variation` ( | ||
100 | `noun_id` INTEGER NOT NULL, | ||
101 | `adjective_id` INTEGER NOT NULL, | ||
102 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
103 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
104 | ); | ||
105 | |||
106 | DROP TABLE IF EXISTS `noun_antonymy`; | ||
107 | CREATE TABLE `noun_antonymy` ( | ||
108 | `noun_1_id` INTEGER NOT NULL, | ||
109 | `noun_2_id` INTEGER NOT NULL, | ||
110 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), | ||
111 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) | ||
112 | ); | ||
113 | |||
114 | DROP TABLE IF EXISTS `adjective_antonymy`; | ||
115 | CREATE TABLE `adjective_antonymy` ( | ||
116 | `adjective_1_id` INTEGER NOT NULL, | ||
117 | `adjective_2_id` INTEGER NOT NULL, | ||
118 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | ||
119 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | ||
120 | ); | ||
121 | |||
122 | DROP TABLE IF EXISTS `adverb_antonymy`; | ||
123 | CREATE TABLE `adverb_antonymy` ( | ||
124 | `adverb_1_id` INTEGER NOT NULL, | ||
125 | `adverb_2_id` INTEGER NOT NULL, | ||
126 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | ||
127 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | ||
128 | ); | ||
129 | |||
130 | DROP TABLE IF EXISTS `specification`; | ||
131 | CREATE TABLE `specification` ( | ||
132 | `general_id` INTEGER NOT NULL, | ||
133 | `specific_id` INTEGER NOT NULL, | ||
134 | FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`), | ||
135 | FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`) | ||
136 | ); | ||
137 | |||
138 | DROP TABLE IF EXISTS `pertainymy`; | ||
139 | CREATE TABLE `pertainymy` ( | ||
140 | `noun_id` INTEGER NOT NULL, | ||
141 | `pertainym_id` INTEGER NOT NULL, | ||
142 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
143 | FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`) | ||
144 | ); | ||
145 | |||
146 | DROP TABLE IF EXISTS `mannernymy`; | ||
147 | CREATE TABLE `mannernymy` ( | ||
148 | `adjective_id` INTEGER NOT NULL, | ||
149 | `mannernym_id` INTEGER NOT NULL, | ||
150 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), | ||
151 | FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`) | ||
152 | ); | ||
153 | |||
154 | DROP TABLE IF EXISTS `noun_synonymy`; | ||
155 | CREATE TABLE `noun_synonymy` ( | ||
156 | `noun_1_id` INTEGER NOT NULL, | ||
157 | `noun_2_id` INTEGER NOT NULL, | ||
158 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`), | ||
159 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`) | ||
160 | ); | ||
161 | |||
162 | DROP TABLE IF EXISTS `adjective_synonymy`; | ||
163 | CREATE TABLE `adjective_synonymy` ( | ||
164 | `adjective_1_id` INTEGER NOT NULL, | ||
165 | `adjective_2_id` INTEGER NOT NULL, | ||
166 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | ||
167 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | ||
168 | ); | ||
169 | |||
170 | DROP TABLE IF EXISTS `adverb_synonymy`; | ||
171 | CREATE TABLE `adverb_synonymy` ( | ||
172 | `adverb_1_id` INTEGER NOT NULL, | ||
173 | `adverb_2_id` INTEGER NOT NULL, | ||
174 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | ||
175 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | ||
176 | ); | ||
177 | |||
178 | DROP TABLE IF EXISTS `noun_pronunciations`; | ||
179 | CREATE TABLE `noun_pronunciations` ( | ||
180 | `noun_id` INTEGER NOT NULL, | ||
181 | `pronunciation` VARCHAR(64) NOT NULL, | ||
182 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) | ||
183 | ); | ||
184 | |||
185 | DROP TABLE IF EXISTS `verb_pronunciations`; | ||
186 | CREATE TABLE `verb_pronunciations` ( | ||
187 | `verb_id` INTEGER NOT NULL, | ||
188 | `pronunciation` VARCHAR(64) NOT NULL, | ||
189 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) | ||
190 | ); | ||
191 | |||
192 | DROP TABLE IF EXISTS `adjective_pronunciations`; | ||
193 | CREATE TABLE `adjective_pronunciations` ( | ||
194 | `adjective_id` INTEGER NOT NULL, | ||
195 | `pronunciation` VARCHAR(64) NOT NULL, | ||
196 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
197 | ); | ||
198 | |||
199 | DROP TABLE IF EXISTS `adverb_pronunciations`; | ||
200 | CREATE TABLE `adverb_pronunciations` ( | ||
201 | `adverb_id` INTEGER NOT NULL, | ||
202 | `pronunciation` VARCHAR(64) NOT NULL, | ||
203 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | ||
204 | ); | ||
205 | |||
206 | DROP TABLE IF EXISTS `noun_noun_derivation`; | ||
207 | CREATE TABLE `noun_noun_derivation` ( | ||
208 | `noun_1_id` INTEGER NOT NULL, | ||
209 | `noun_2_id` INTEGER NOT NULL, | ||
210 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), | ||
211 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) | ||
212 | ); | ||
213 | |||
214 | DROP TABLE IF EXISTS `noun_adjective_derivation`; | ||
215 | CREATE TABLE `noun_adjective_derivation` ( | ||
216 | `noun_id` INTEGER NOT NULL, | ||
217 | `adjective_id` INTEGER NOT NULL, | ||
218 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
219 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
220 | ); | ||
221 | |||
222 | DROP TABLE IF EXISTS `noun_adverb_derivation`; | ||
223 | CREATE TABLE `noun_adverb_derivation` ( | ||
224 | `noun_id` INTEGER NOT NULL, | ||
225 | `adverb_id` INTEGER NOT NULL, | ||
226 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
227 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | ||
228 | ); | ||
229 | |||
230 | DROP TABLE IF EXISTS `adjective_adjective_derivation`; | ||
231 | CREATE TABLE `adjective_adjective_derivation` ( | ||
232 | `adjective_1_id` INTEGER NOT NULL, | ||
233 | `adjective_2_id` INTEGER NOT NULL, | ||
234 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | ||
235 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | ||
236 | ); | ||
237 | |||
238 | DROP TABLE IF EXISTS `adjective_adverb_derivation`; | ||
239 | CREATE TABLE `adjective_adverb_derivation` ( | ||
240 | `adjective_id` INTEGER NOT NULL, | ||
241 | `adverb_id` INTEGER NOT NULL, | ||
242 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), | ||
243 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`) | ||
244 | ); | ||
245 | |||
246 | DROP TABLE IF EXISTS `adverb_adverb_derivation`; | ||
247 | CREATE TABLE `adverb_adverb_derivation` ( | ||
248 | `adverb_1_id` INTEGER NOT NULL, | ||
249 | `adverb_2_id` INTEGER NOT NULL, | ||
250 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | ||
251 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | ||
252 | ); | ||
diff --git a/verbly/adjective.cpp b/verbly/lib/adjective.cpp index 0f4087f..b2b53e4 100644 --- a/verbly/adjective.cpp +++ b/verbly/lib/adjective.cpp | |||
@@ -261,6 +261,38 @@ namespace verbly { | |||
261 | return *this; | 261 | return *this; |
262 | } | 262 | } |
263 | 263 | ||
264 | adjective_query& adjective_query::derived_from(const word& _w) | ||
265 | { | ||
266 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
267 | { | ||
268 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
269 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
270 | { | ||
271 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
272 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
273 | { | ||
274 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
275 | } | ||
276 | |||
277 | return *this; | ||
278 | } | ||
279 | |||
280 | adjective_query& adjective_query::not_derived_from(const word& _w) | ||
281 | { | ||
282 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
283 | { | ||
284 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
285 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
286 | { | ||
287 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
288 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
289 | { | ||
290 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
291 | } | ||
292 | |||
293 | return *this; | ||
294 | } | ||
295 | |||
264 | std::list<adjective> adjective_query::run() const | 296 | std::list<adjective> adjective_query::run() const |
265 | { | 297 | { |
266 | std::stringstream construct; | 298 | std::stringstream construct; |
@@ -423,6 +455,48 @@ namespace verbly { | |||
423 | conditions.push_back(cond); | 455 | conditions.push_back(cond); |
424 | } | 456 | } |
425 | 457 | ||
458 | if (!_derived_from_adjective.empty()) | ||
459 | { | ||
460 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_2_id = @DERADJ"); | ||
461 | std::string cond = "adjective_id IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
462 | conditions.push_back(cond); | ||
463 | } | ||
464 | |||
465 | if (!_not_derived_from_adjective.empty()) | ||
466 | { | ||
467 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_2_id = @NDERADJ"); | ||
468 | std::string cond = "adjective_id NOT IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
469 | conditions.push_back(cond); | ||
470 | } | ||
471 | |||
472 | if (!_derived_from_adverb.empty()) | ||
473 | { | ||
474 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); | ||
475 | std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
476 | conditions.push_back(cond); | ||
477 | } | ||
478 | |||
479 | if (!_not_derived_from_adverb.empty()) | ||
480 | { | ||
481 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); | ||
482 | std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
483 | conditions.push_back(cond); | ||
484 | } | ||
485 | |||
486 | if (!_derived_from_noun.empty()) | ||
487 | { | ||
488 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN"); | ||
489 | std::string cond = "adjective_id IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
490 | conditions.push_back(cond); | ||
491 | } | ||
492 | |||
493 | if (!_not_derived_from_noun.empty()) | ||
494 | { | ||
495 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); | ||
496 | std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
497 | conditions.push_back(cond); | ||
498 | } | ||
499 | |||
426 | if (!conditions.empty()) | 500 | if (!conditions.empty()) |
427 | { | 501 | { |
428 | construct << " WHERE "; | 502 | construct << " WHERE "; |
@@ -522,6 +596,36 @@ namespace verbly { | |||
522 | { | 596 | { |
523 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@MANID"), mannernym._id); | 597 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@MANID"), mannernym._id); |
524 | } | 598 | } |
599 | |||
600 | for (auto adj : _derived_from_adjective) | ||
601 | { | ||
602 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
603 | } | ||
604 | |||
605 | for (auto adj : _not_derived_from_adjective) | ||
606 | { | ||
607 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
608 | } | ||
609 | |||
610 | for (auto adv : _derived_from_adverb) | ||
611 | { | ||
612 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
613 | } | ||
614 | |||
615 | for (auto adv : _not_derived_from_adverb) | ||
616 | { | ||
617 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
618 | } | ||
619 | |||
620 | for (auto n : _derived_from_noun) | ||
621 | { | ||
622 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
623 | } | ||
624 | |||
625 | for (auto n : _not_derived_from_noun) | ||
626 | { | ||
627 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
628 | } | ||
525 | 629 | ||
526 | std::list<adjective> output; | 630 | std::list<adjective> output; |
527 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | 631 | while (sqlite3_step(ppstmt) == SQLITE_ROW) |
diff --git a/verbly/adjective.h b/verbly/lib/adjective.h index 4927d59..3dcab9b 100644 --- a/verbly/adjective.h +++ b/verbly/lib/adjective.h | |||
@@ -85,6 +85,9 @@ namespace verbly { | |||
85 | adjective_query& is_mannernymic(bool _is_mannernymic); | 85 | adjective_query& is_mannernymic(bool _is_mannernymic); |
86 | adjective_query& anti_mannernym_of(const adverb& _adv); | 86 | adjective_query& anti_mannernym_of(const adverb& _adv); |
87 | 87 | ||
88 | adjective_query& derived_from(const word& _w); | ||
89 | adjective_query& not_derived_from(const word& _w); | ||
90 | |||
88 | std::list<adjective> run() const; | 91 | std::list<adjective> run() const; |
89 | 92 | ||
90 | const static int unlimited = -1; | 93 | const static int unlimited = -1; |
@@ -126,6 +129,13 @@ namespace verbly { | |||
126 | 129 | ||
127 | bool _is_mannernymic = false; | 130 | bool _is_mannernymic = false; |
128 | std::list<adverb> _anti_mannernym_of; | 131 | std::list<adverb> _anti_mannernym_of; |
132 | |||
133 | std::list<adjective> _derived_from_adjective; | ||
134 | std::list<adjective> _not_derived_from_adjective; | ||
135 | std::list<adverb> _derived_from_adverb; | ||
136 | std::list<adverb> _not_derived_from_adverb; | ||
137 | std::list<noun> _derived_from_noun; | ||
138 | std::list<noun> _not_derived_from_noun; | ||
129 | }; | 139 | }; |
130 | 140 | ||
131 | }; | 141 | }; |
diff --git a/verbly/adverb.cpp b/verbly/lib/adverb.cpp index 9bb5a0d..8fcddad 100644 --- a/verbly/adverb.cpp +++ b/verbly/lib/adverb.cpp | |||
@@ -168,6 +168,38 @@ namespace verbly { | |||
168 | return *this; | 168 | return *this; |
169 | } | 169 | } |
170 | 170 | ||
171 | adverb_query& adverb_query::derived_from(const word& _w) | ||
172 | { | ||
173 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
174 | { | ||
175 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
176 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
177 | { | ||
178 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
179 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
180 | { | ||
181 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
182 | } | ||
183 | |||
184 | return *this; | ||
185 | } | ||
186 | |||
187 | adverb_query& adverb_query::not_derived_from(const word& _w) | ||
188 | { | ||
189 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
190 | { | ||
191 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
192 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
193 | { | ||
194 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
195 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
196 | { | ||
197 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
198 | } | ||
199 | |||
200 | return *this; | ||
201 | } | ||
202 | |||
171 | std::list<adverb> adverb_query::run() const | 203 | std::list<adverb> adverb_query::run() const |
172 | { | 204 | { |
173 | std::stringstream construct; | 205 | std::stringstream construct; |
@@ -251,6 +283,48 @@ namespace verbly { | |||
251 | conditions.push_back(cond); | 283 | conditions.push_back(cond); |
252 | } | 284 | } |
253 | 285 | ||
286 | if (!_derived_from_adjective.empty()) | ||
287 | { | ||
288 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); | ||
289 | std::string cond = "adverb_id IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
290 | conditions.push_back(cond); | ||
291 | } | ||
292 | |||
293 | if (!_not_derived_from_adjective.empty()) | ||
294 | { | ||
295 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); | ||
296 | std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
297 | conditions.push_back(cond); | ||
298 | } | ||
299 | |||
300 | if (!_derived_from_adverb.empty()) | ||
301 | { | ||
302 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_2_id = @DERADV"); | ||
303 | std::string cond = "adverb_id IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
304 | conditions.push_back(cond); | ||
305 | } | ||
306 | |||
307 | if (!_not_derived_from_adverb.empty()) | ||
308 | { | ||
309 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_2_id = @NDERADV"); | ||
310 | std::string cond = "adverb_id NOT IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
311 | conditions.push_back(cond); | ||
312 | } | ||
313 | |||
314 | if (!_derived_from_noun.empty()) | ||
315 | { | ||
316 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN"); | ||
317 | std::string cond = "adverb_id IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
318 | conditions.push_back(cond); | ||
319 | } | ||
320 | |||
321 | if (!_not_derived_from_noun.empty()) | ||
322 | { | ||
323 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); | ||
324 | std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
325 | conditions.push_back(cond); | ||
326 | } | ||
327 | |||
254 | if (!conditions.empty()) | 328 | if (!conditions.empty()) |
255 | { | 329 | { |
256 | construct << " WHERE "; | 330 | construct << " WHERE "; |
@@ -315,6 +389,36 @@ namespace verbly { | |||
315 | { | 389 | { |
316 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@AMANID"), adj._id); | 390 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@AMANID"), adj._id); |
317 | } | 391 | } |
392 | |||
393 | for (auto adj : _derived_from_adjective) | ||
394 | { | ||
395 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
396 | } | ||
397 | |||
398 | for (auto adj : _not_derived_from_adjective) | ||
399 | { | ||
400 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
401 | } | ||
402 | |||
403 | for (auto adv : _derived_from_adverb) | ||
404 | { | ||
405 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
406 | } | ||
407 | |||
408 | for (auto adv : _not_derived_from_adverb) | ||
409 | { | ||
410 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
411 | } | ||
412 | |||
413 | for (auto n : _derived_from_noun) | ||
414 | { | ||
415 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
416 | } | ||
417 | |||
418 | for (auto n : _not_derived_from_noun) | ||
419 | { | ||
420 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
421 | } | ||
318 | 422 | ||
319 | std::list<adverb> output; | 423 | std::list<adverb> output; |
320 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | 424 | while (sqlite3_step(ppstmt) == SQLITE_ROW) |
diff --git a/verbly/adverb.h b/verbly/lib/adverb.h index 42c3492..65e3c5c 100644 --- a/verbly/adverb.h +++ b/verbly/lib/adverb.h | |||
@@ -24,6 +24,9 @@ namespace verbly { | |||
24 | adverb_query antonyms() const; | 24 | adverb_query antonyms() const; |
25 | adverb_query synonyms() const; | 25 | adverb_query synonyms() const; |
26 | adjective_query anti_mannernyms() const; | 26 | adjective_query anti_mannernyms() const; |
27 | |||
28 | adverb_query& derived_from(const word& _w); | ||
29 | adverb_query& not_derived_from(const word& _w); | ||
27 | }; | 30 | }; |
28 | 31 | ||
29 | class adverb_query { | 32 | class adverb_query { |
@@ -50,6 +53,9 @@ namespace verbly { | |||
50 | adverb_query& is_mannernymic(bool _arg); | 53 | adverb_query& is_mannernymic(bool _arg); |
51 | adverb_query& mannernym_of(const adjective& _adj); | 54 | adverb_query& mannernym_of(const adjective& _adj); |
52 | 55 | ||
56 | adverb_query& derived_from(const word& _w); | ||
57 | adverb_query& not_derived_from(const word& _w); | ||
58 | |||
53 | std::list<adverb> run() const; | 59 | std::list<adverb> run() const; |
54 | 60 | ||
55 | const static int unlimited = -1; | 61 | const static int unlimited = -1; |
@@ -75,6 +81,13 @@ namespace verbly { | |||
75 | 81 | ||
76 | bool _is_mannernymic = false; | 82 | bool _is_mannernymic = false; |
77 | std::list<adjective> _mannernym_of; | 83 | std::list<adjective> _mannernym_of; |
84 | |||
85 | std::list<adjective> _derived_from_adjective; | ||
86 | std::list<adjective> _not_derived_from_adjective; | ||
87 | std::list<adverb> _derived_from_adverb; | ||
88 | std::list<adverb> _not_derived_from_adverb; | ||
89 | std::list<noun> _derived_from_noun; | ||
90 | std::list<noun> _not_derived_from_noun; | ||
78 | }; | 91 | }; |
79 | 92 | ||
80 | }; | 93 | }; |
diff --git a/verbly/c++14.h b/verbly/lib/c++14.h index b3efbe2..b3efbe2 100644 --- a/verbly/c++14.h +++ b/verbly/lib/c++14.h | |||
diff --git a/verbly/data.cpp b/verbly/lib/data.cpp index 57a8850..57a8850 100644 --- a/verbly/data.cpp +++ b/verbly/lib/data.cpp | |||
diff --git a/verbly/data.h b/verbly/lib/data.h index 37092d7..37092d7 100644 --- a/verbly/data.h +++ b/verbly/lib/data.h | |||
diff --git a/verbly/noun.cpp b/verbly/lib/noun.cpp index 9336a1c..43fda2e 100644 --- a/verbly/noun.cpp +++ b/verbly/lib/noun.cpp | |||
@@ -323,6 +323,13 @@ namespace verbly { | |||
323 | return *this; | 323 | return *this; |
324 | } | 324 | } |
325 | 325 | ||
326 | noun_query& noun_query::is_not_proper(bool _arg) | ||
327 | { | ||
328 | _is_not_proper = _arg; | ||
329 | |||
330 | return *this; | ||
331 | } | ||
332 | |||
326 | noun_query& noun_query::instance_of(const noun& _noun) | 333 | noun_query& noun_query::instance_of(const noun& _noun) |
327 | { | 334 | { |
328 | _instance_of.push_back(_noun); | 335 | _instance_of.push_back(_noun); |
@@ -428,6 +435,38 @@ namespace verbly { | |||
428 | return *this; | 435 | return *this; |
429 | } | 436 | } |
430 | 437 | ||
438 | noun_query& noun_query::derived_from(const word& _w) | ||
439 | { | ||
440 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
441 | { | ||
442 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
443 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
444 | { | ||
445 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
446 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
447 | { | ||
448 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
449 | } | ||
450 | |||
451 | return *this; | ||
452 | } | ||
453 | |||
454 | noun_query& noun_query::not_derived_from(const word& _w) | ||
455 | { | ||
456 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
457 | { | ||
458 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
459 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
460 | { | ||
461 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
462 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
463 | { | ||
464 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
465 | } | ||
466 | |||
467 | return *this; | ||
468 | } | ||
469 | |||
431 | std::list<noun> noun_query::run() const | 470 | std::list<noun> noun_query::run() const |
432 | { | 471 | { |
433 | std::stringstream construct; | 472 | std::stringstream construct; |
@@ -608,6 +647,11 @@ namespace verbly { | |||
608 | conditions.push_back("noun_id IN (SELECT instance_id FROM instantiation)"); | 647 | conditions.push_back("noun_id IN (SELECT instance_id FROM instantiation)"); |
609 | } | 648 | } |
610 | 649 | ||
650 | if (_is_not_proper) | ||
651 | { | ||
652 | conditions.push_back("noun_id NOT IN (SELECT instance_id FROM instantiation)"); | ||
653 | } | ||
654 | |||
611 | if (!_instance_of.empty()) | 655 | if (!_instance_of.empty()) |
612 | { | 656 | { |
613 | std::list<std::string> clauses(_instance_of.size(), "class_id = @CLSID"); | 657 | std::list<std::string> clauses(_instance_of.size(), "class_id = @CLSID"); |
@@ -703,6 +747,48 @@ namespace verbly { | |||
703 | conditions.push_back(cond); | 747 | conditions.push_back(cond); |
704 | } | 748 | } |
705 | 749 | ||
750 | if (!_derived_from_adjective.empty()) | ||
751 | { | ||
752 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); | ||
753 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
754 | conditions.push_back(cond); | ||
755 | } | ||
756 | |||
757 | if (!_not_derived_from_adjective.empty()) | ||
758 | { | ||
759 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); | ||
760 | std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
761 | conditions.push_back(cond); | ||
762 | } | ||
763 | |||
764 | if (!_derived_from_adverb.empty()) | ||
765 | { | ||
766 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); | ||
767 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
768 | conditions.push_back(cond); | ||
769 | } | ||
770 | |||
771 | if (!_not_derived_from_adverb.empty()) | ||
772 | { | ||
773 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); | ||
774 | std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
775 | conditions.push_back(cond); | ||
776 | } | ||
777 | |||
778 | if (!_derived_from_noun.empty()) | ||
779 | { | ||
780 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_2_id = @DERN"); | ||
781 | std::string cond = "noun_id IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
782 | conditions.push_back(cond); | ||
783 | } | ||
784 | |||
785 | if (!_not_derived_from_noun.empty()) | ||
786 | { | ||
787 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_2_id = @NDERN"); | ||
788 | std::string cond = "noun_id NOT IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
789 | conditions.push_back(cond); | ||
790 | } | ||
791 | |||
706 | if (!conditions.empty()) | 792 | if (!conditions.empty()) |
707 | { | 793 | { |
708 | construct << " WHERE "; | 794 | construct << " WHERE "; |
@@ -872,6 +958,36 @@ namespace verbly { | |||
872 | { | 958 | { |
873 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@VALID"), value._id); | 959 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@VALID"), value._id); |
874 | } | 960 | } |
961 | |||
962 | for (auto adj : _derived_from_adjective) | ||
963 | { | ||
964 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
965 | } | ||
966 | |||
967 | for (auto adj : _not_derived_from_adjective) | ||
968 | { | ||
969 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
970 | } | ||
971 | |||
972 | for (auto adv : _derived_from_adverb) | ||
973 | { | ||
974 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
975 | } | ||
976 | |||
977 | for (auto adv : _not_derived_from_adverb) | ||
978 | { | ||
979 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
980 | } | ||
981 | |||
982 | for (auto n : _derived_from_noun) | ||
983 | { | ||
984 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
985 | } | ||
986 | |||
987 | for (auto n : _not_derived_from_noun) | ||
988 | { | ||
989 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
990 | } | ||
875 | 991 | ||
876 | std::list<noun> output; | 992 | std::list<noun> output; |
877 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | 993 | while (sqlite3_step(ppstmt) == SQLITE_ROW) |
diff --git a/verbly/noun.h b/verbly/lib/noun.h index f5ba256..da76866 100644 --- a/verbly/noun.h +++ b/verbly/lib/noun.h | |||
@@ -78,6 +78,7 @@ namespace verbly { | |||
78 | noun_query& not_member_holonym_of(const noun& _noun); | 78 | noun_query& not_member_holonym_of(const noun& _noun); |
79 | 79 | ||
80 | noun_query& is_proper(bool _arg); | 80 | noun_query& is_proper(bool _arg); |
81 | noun_query& is_not_proper(bool _arg); | ||
81 | noun_query& instance_of(const noun& _noun); | 82 | noun_query& instance_of(const noun& _noun); |
82 | noun_query& not_instance_of(const noun& _noun); | 83 | noun_query& not_instance_of(const noun& _noun); |
83 | 84 | ||
@@ -99,6 +100,9 @@ namespace verbly { | |||
99 | noun_query& is_attribute(bool _arg); | 100 | noun_query& is_attribute(bool _arg); |
100 | noun_query& attribute_of(const adjective& _adj); | 101 | noun_query& attribute_of(const adjective& _adj); |
101 | 102 | ||
103 | noun_query& derived_from(const word& _w); | ||
104 | noun_query& not_derived_from(const word& _w); | ||
105 | |||
102 | std::list<noun> run() const; | 106 | std::list<noun> run() const; |
103 | 107 | ||
104 | const static int unlimited = -1; | 108 | const static int unlimited = -1; |
@@ -144,6 +148,7 @@ namespace verbly { | |||
144 | std::list<noun> _not_member_holonym_of; | 148 | std::list<noun> _not_member_holonym_of; |
145 | 149 | ||
146 | bool _is_proper = false; | 150 | bool _is_proper = false; |
151 | bool _is_not_proper = false; | ||
147 | std::list<noun> _instance_of; | 152 | std::list<noun> _instance_of; |
148 | std::list<noun> _not_instance_of; | 153 | std::list<noun> _not_instance_of; |
149 | 154 | ||
@@ -164,6 +169,13 @@ namespace verbly { | |||
164 | 169 | ||
165 | bool _is_attribute = false; | 170 | bool _is_attribute = false; |
166 | std::list<adjective> _attribute_of; | 171 | std::list<adjective> _attribute_of; |
172 | |||
173 | std::list<adjective> _derived_from_adjective; | ||
174 | std::list<adjective> _not_derived_from_adjective; | ||
175 | std::list<adverb> _derived_from_adverb; | ||
176 | std::list<adverb> _not_derived_from_adverb; | ||
177 | std::list<noun> _derived_from_noun; | ||
178 | std::list<noun> _not_derived_from_noun; | ||
167 | }; | 179 | }; |
168 | 180 | ||
169 | }; | 181 | }; |
diff --git a/verbly/token.cpp b/verbly/lib/token.cpp index aa8f50e..aa8f50e 100644 --- a/verbly/token.cpp +++ b/verbly/lib/token.cpp | |||
diff --git a/verbly/token.h b/verbly/lib/token.h index 44d99cb..44d99cb 100644 --- a/verbly/token.h +++ b/verbly/lib/token.h | |||
diff --git a/verbly/util.h b/verbly/lib/util.h index 815b47c..815b47c 100644 --- a/verbly/util.h +++ b/verbly/lib/util.h | |||
diff --git a/verbly/verb.cpp b/verbly/lib/verb.cpp index 23f7c92..23f7c92 100644 --- a/verbly/verb.cpp +++ b/verbly/lib/verb.cpp | |||
diff --git a/verbly/verb.h b/verbly/lib/verb.h index 7cc87e2..7cc87e2 100644 --- a/verbly/verb.h +++ b/verbly/lib/verb.h | |||
diff --git a/verbly/verbly.h b/verbly/lib/verbly.h index b9f5367..b9f5367 100644 --- a/verbly/verbly.h +++ b/verbly/lib/verbly.h | |||
diff --git a/verbly/word.cpp b/verbly/lib/word.cpp index c50e7d3..c50e7d3 100644 --- a/verbly/word.cpp +++ b/verbly/lib/word.cpp | |||
diff --git a/verbly/word.h b/verbly/lib/word.h index 23ddb2b..23ddb2b 100644 --- a/verbly/word.h +++ b/verbly/lib/word.h | |||