about summary refs log tree commit diff stats
path: root/verbly
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2016-03-16 21:35:35 -0400
committerKelly Rauchenberger <fefferburbia@gmail.com>2016-03-16 21:35:35 -0400
commit8b1333d0e6e2b9a5014bdbff2987d899f5413fee (patch)
treeab5c864b61cab267ad3118e8cbe637c151448aa5 /verbly
parent3aceae8ab1eb5992110ea57a9479bbc3177feb21 (diff)
downloadfurries-8b1333d0e6e2b9a5014bdbff2987d899f5413fee.tar.gz
furries-8b1333d0e6e2b9a5014bdbff2987d899f5413fee.tar.bz2
furries-8b1333d0e6e2b9a5014bdbff2987d899f5413fee.zip
Added word derivational relationships (kind of eh at the moment) and moved verbly into its own directory
Diffstat (limited to 'verbly')
-rw-r--r--verbly/CMakeLists.txt9
-rw-r--r--verbly/LICENSE3
-rw-r--r--verbly/generator/CMakeLists.txt12
-rw-r--r--verbly/generator/generator.cpp1663
-rw-r--r--verbly/generator/progress.h50
-rw-r--r--verbly/generator/schema.sql252
-rw-r--r--verbly/lib/adjective.cpp (renamed from verbly/adjective.cpp)104
-rw-r--r--verbly/lib/adjective.h (renamed from verbly/adjective.h)10
-rw-r--r--verbly/lib/adverb.cpp (renamed from verbly/adverb.cpp)104
-rw-r--r--verbly/lib/adverb.h (renamed from verbly/adverb.h)13
-rw-r--r--verbly/lib/c++14.h (renamed from verbly/c++14.h)0
-rw-r--r--verbly/lib/data.cpp (renamed from verbly/data.cpp)0
-rw-r--r--verbly/lib/data.h (renamed from verbly/data.h)0
-rw-r--r--verbly/lib/noun.cpp (renamed from verbly/noun.cpp)116
-rw-r--r--verbly/lib/noun.h (renamed from verbly/noun.h)12
-rw-r--r--verbly/lib/token.cpp (renamed from verbly/token.cpp)0
-rw-r--r--verbly/lib/token.h (renamed from verbly/token.h)0
-rw-r--r--verbly/lib/util.h (renamed from verbly/util.h)0
-rw-r--r--verbly/lib/verb.cpp (renamed from verbly/verb.cpp)0
-rw-r--r--verbly/lib/verb.h (renamed from verbly/verb.h)0
-rw-r--r--verbly/lib/verbly.h (renamed from verbly/verbly.h)0
-rw-r--r--verbly/lib/word.cpp (renamed from verbly/word.cpp)0
-rw-r--r--verbly/lib/word.h (renamed from verbly/word.h)0
23 files changed, 2348 insertions, 0 deletions
diff --git a/verbly/CMakeLists.txt b/verbly/CMakeLists.txt new file mode 100644 index 0000000..5a3e526 --- /dev/null +++ b/verbly/CMakeLists.txt
@@ -0,0 +1,9 @@
1cmake_minimum_required (VERSION 2.6)
2project (verbly)
3
4find_package(PkgConfig)
5pkg_check_modules(sqlite3 sqlite3 REQUIRED)
6
7add_library(verbly lib/data.cpp lib/adjective.cpp lib/noun.cpp lib/verb.cpp lib/adverb.cpp lib/token.cpp lib/word.cpp)
8set_property(TARGET verbly PROPERTY CXX_STANDARD 11)
9set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON)
diff --git a/verbly/LICENSE b/verbly/LICENSE new file mode 100644 index 0000000..4c4b690 --- /dev/null +++ b/verbly/LICENSE
@@ -0,0 +1,3 @@
1WordNet Release 3.0
2
3This software and database is being provided to you, the LICENSEE, by Princeton University under the following license. By obtaining, using and/or copying this software and database, you agree that you have read, understood, and will comply with these terms and conditions.: Permission to use, copy, modify and distribute this software and database and its documentation for any purpose and without fee or royalty is hereby granted, provided that you agree to comply with the following copyright notice and statements, including the disclaimer, and that the same appear on ALL copies of the software, database and documentation, including modifications that you make for internal use or for distribution. WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved. THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. The name of Princeton University or Princeton may not be used in advertising or publicity pertaining to distribution of the software and/or database. Title to copyright in this software, database and any associated documentation shall at all times remain with Princeton University and LICENSEE agrees to preserve same. \ No newline at end of file
diff --git a/verbly/generator/CMakeLists.txt b/verbly/generator/CMakeLists.txt new file mode 100644 index 0000000..bbc3c4f --- /dev/null +++ b/verbly/generator/CMakeLists.txt
@@ -0,0 +1,12 @@
1cmake_minimum_required (VERSION 2.6)
2project (generator)
3
4find_package(PkgConfig)
5pkg_check_modules(sqlite3 sqlite3 REQUIRED)
6find_package(libxml2 REQUIRED)
7
8include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR})
9add_executable(generator generator.cpp)
10set_property(TARGET generator PROPERTY CXX_STANDARD 11)
11set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON)
12target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES})
diff --git a/verbly/generator/generator.cpp b/verbly/generator/generator.cpp new file mode 100644 index 0000000..faef5f7 --- /dev/null +++ b/verbly/generator/generator.cpp
@@ -0,0 +1,1663 @@
1#include <libxml/parser.h>
2#include <iostream>
3#include <dirent.h>
4#include <set>
5#include <map>
6#include <string>
7#include <vector>
8#include <fstream>
9#include <sqlite3.h>
10#include <sstream>
11#include <regex>
12#include <list>
13#include "progress.h"
14
15struct verb {
16 std::string infinitive;
17 std::string past_tense;
18 std::string past_participle;
19 std::string ing_form;
20 std::string s_form;
21};
22
23struct adjective {
24 std::string base;
25 std::string comparative;
26 std::string superlative;
27};
28
29struct noun {
30 std::string singular;
31 std::string plural;
32};
33
34struct group {
35 std::string id;
36 std::set<std::string> members;
37};
38
39std::map<std::string, group> groups;
40std::map<std::string, verb> verbs;
41std::map<std::string, adjective> adjectives;
42std::map<std::string, noun> nouns;
43std::map<int, std::map<int, int>> wn;
44std::map<std::string, std::set<std::string>> pronunciations;
45
46void print_usage()
47{
48 std::cout << "Verbly Datafile Generator" << std::endl;
49 std::cout << "-------------------------" << std::endl;
50 std::cout << "Requires exactly six arguments." << std::endl;
51 std::cout << "1. The path to a VerbNet data directory." << std::endl;
52 std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl;
53 std::cout << "3. The path to an AGID infl.txt file." << std::endl;
54 std::cout << "4. The path to a WordNet prolog data directory." << std::endl;
55 std::cout << "5. The path to a CMUDICT pronunciation file." << std::endl;
56 std::cout << "6. Datafile output path." << std::endl;
57
58 exit(1);
59}
60
61void db_error(sqlite3* ppdb, std::string)
62{
63 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
64 sqlite3_close_v2(ppdb);
65 print_usage();
66}
67
68/*
69void parse_group(xmlNodePtr top, std::string filename)
70{
71 xmlChar* key = xmlGetProp(top, (xmlChar*) "ID");
72 if (key == 0)
73 {
74 std::cout << "Bad VerbNet file format: " << filename << std::endl;
75 print_usage();
76 }
77 std::string vnid = key;
78 vnid = vnid.substr(vnid.find_first_of("-")+1);
79 xmlFree(key);
80
81 group g;
82 g.id = vnid;
83
84 for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
85 {
86 if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS"))
87 {
88 for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
89 {
90 if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER"))
91 {
92 key = xmlGetProp(member, (xmlChar*) "name");
93 g.members.insert(key);
94 xmlFree(key);
95 }
96 }
97 } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES"))
98 {
99 for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next)
100 {
101 if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME"))
102 {
103 for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
104 {
105
106 }
107 }
108 }
109 }
110 }
111}*/
112
113int main(int argc, char** argv)
114{
115 if (argc != 7)
116 {
117 print_usage();
118 }
119
120 /*DIR* dir;
121 if ((dir = opendir(argv[1])) == nullptr)
122 {
123 std::cout << "Invalid VerbNet data directory." << std::endl;
124
125 print_usage();
126 }
127
128 struct dirent* ent;
129 while ((ent = readdir(dir)) != nullptr)
130 {
131 std::string filename(argv[1]);
132 if (filename.back() != '/')
133 {
134 filename += '/';
135 }
136
137 filename += ent->d_name;
138 //std::cout << ent->d_name << std::endl;
139
140 if (filename.rfind(".xml") != filename.size() - 4)
141 {
142 continue;
143 }
144
145 xmlDocPtr doc = xmlParseFile(filename.c_str());
146 if (doc == nullptr)
147 {
148 std::cout << "Error opening " << filename << std::endl;
149 print_usage();
150 }
151
152 xmlNodePtr top = xmlDocGetRootElement(doc);
153 if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS")))
154 {
155 std::cout << "Bad VerbNet file format: " << filename << std::endl;
156 print_usage();
157 }
158
159 parse_group(top, filename);
160 }
161
162 closedir(dir);*/
163
164 // Get verbs from AGID
165 std::cout << "Reading inflections..." << std::endl;
166
167 std::ifstream agidfile(argv[3]);
168 if (!agidfile.is_open())
169 {
170 std::cout << "Could not open AGID file: " << argv[3] << std::endl;
171 print_usage();
172 }
173
174 for (;;)
175 {
176 std::string line;
177 if (!getline(agidfile, line))
178 {
179 break;
180 }
181
182 if (line.back() == '\r')
183 {
184 line.pop_back();
185 }
186
187 int divider = line.find_first_of(" ");
188 std::string word = line.substr(0, divider);
189 line = line.substr(divider+1);
190 char type = line[0];
191
192 if (line[1] == '?')
193 {
194 line.erase(0, 4);
195 } else {
196 line.erase(0, 3);
197 }
198
199 std::vector<std::string> forms;
200 while (!line.empty())
201 {
202 std::string inflection;
203 if ((divider = line.find(" | ")) != std::string::npos)
204 {
205 inflection = line.substr(0, divider);
206 line = line.substr(divider + 3);
207 } else {
208 inflection = line;
209 line = "";
210 }
211
212 if ((divider = inflection.find_first_of(",?")) != std::string::npos)
213 {
214 inflection = inflection.substr(0, divider);
215 }
216
217 forms.push_back(inflection);
218 }
219
220 switch (type)
221 {
222 case 'V':
223 {
224 verb v;
225 v.infinitive = word;
226 if (forms.size() == 4)
227 {
228 v.past_tense = forms[0];
229 v.past_participle = forms[1];
230 v.ing_form = forms[2];
231 v.s_form = forms[3];
232 } else if (forms.size() == 3)
233 {
234 v.past_tense = forms[0];
235 v.past_participle = forms[0];
236 v.ing_form = forms[1];
237 v.s_form = forms[2];
238 } else if (forms.size() == 8)
239 {
240 // As of AGID 2014.08.11, this is only "to be"
241 v.past_tense = forms[0];
242 v.past_participle = forms[2];
243 v.ing_form = forms[3];
244 v.s_form = forms[4];
245 } else {
246 // Words that don't fit the cases above as of AGID 2014.08.11:
247 // - may and shall do not conjugate the way we want them to
248 // - methinks only has a past tense and is an outlier
249 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
250 std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl;
251 }
252
253 verbs[word] = v;
254
255 break;
256 }
257
258 case 'A':
259 {
260 adjective adj;
261 adj.base = word;
262 if (forms.size() == 2)
263 {
264 adj.comparative = forms[0];
265 adj.superlative = forms[1];
266 } else {
267 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
268 std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl;
269 }
270
271 adjectives[word] = adj;
272
273 break;
274 }
275
276 case 'N':
277 {
278 noun n;
279 n.singular = word;
280 if (forms.size() == 1)
281 {
282 n.plural = forms[0];
283 } else {
284 // As of AGID 2014.08.11, this is non-existent.
285 std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl;
286 }
287
288 nouns[word] = n;
289
290 break;
291 }
292 }
293 }
294
295 // Pronounciations
296 std::cout << "Reading pronunciations..." << std::endl;
297
298 std::ifstream pronfile(argv[5]);
299 if (!pronfile.is_open())
300 {
301 std::cout << "Could not open CMUDICT file: " << argv[5] << std::endl;
302 print_usage();
303 }
304
305 for (;;)
306 {
307 std::string line;
308 if (!getline(pronfile, line))
309 {
310 break;
311 }
312
313 if (line.back() == '\r')
314 {
315 line.pop_back();
316 }
317
318 std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)");
319 std::smatch phoneme_data;
320 if (std::regex_search(line, phoneme_data, phoneme))
321 {
322 std::string canonical(phoneme_data[1]);
323 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
324
325 pronunciations[canonical].insert(phoneme_data[2]);
326 }
327 }
328
329 // Start writing output
330 std::cout << "Writing schema..." << std::endl;
331
332 sqlite3* ppdb;
333 if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
334 {
335 std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl;
336 print_usage();
337 }
338
339 std::ifstream schemafile("schema.sql");
340 if (!schemafile.is_open())
341 {
342 std::cout << "Could not find schema file" << std::endl;
343 print_usage();
344 }
345
346 std::stringstream schemabuilder;
347 for (;;)
348 {
349 std::string line;
350 if (!getline(schemafile, line))
351 {
352 break;
353 }
354
355 if (line.back() == '\r')
356 {
357 line.pop_back();
358 }
359
360 schemabuilder << line << std::endl;
361 }
362
363 std::string schema = schemabuilder.str();
364 while (!schema.empty())
365 {
366 std::string query;
367 int divider = schema.find(";");
368 if (divider != std::string::npos)
369 {
370 query = schema.substr(0, divider+1);
371 schema = schema.substr(divider+2);
372 } else {
373 break;
374 }
375
376 sqlite3_stmt* schmstmt;
377 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK)
378 {
379 db_error(ppdb, query);
380 }
381
382 if (sqlite3_step(schmstmt) != SQLITE_DONE)
383 {
384 db_error(ppdb, query);
385 }
386
387 sqlite3_finalize(schmstmt);
388 }
389
390 {
391 progress ppgs("Writing verbs...", verbs.size());
392 for (auto& mapping : verbs)
393 {
394 sqlite3_stmt* ppstmt;
395 std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)");
396 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
397 {
398 db_error(ppdb, query);
399 }
400
401 sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC);
402 sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC);
403 sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC);
404 sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC);
405 sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC);
406
407 if (sqlite3_step(ppstmt) != SQLITE_DONE)
408 {
409 db_error(ppdb, query);
410 }
411
412 sqlite3_finalize(ppstmt);
413
414 std::string canonical(mapping.second.infinitive);
415 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
416 if (pronunciations.count(canonical) == 1)
417 {
418 query = "SELECT last_insert_rowid()";
419 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
420 {
421 db_error(ppdb, query);
422 }
423
424 if (sqlite3_step(ppstmt) != SQLITE_ROW)
425 {
426 db_error(ppdb, query);
427 }
428
429 int rowid = sqlite3_column_int(ppstmt, 0);
430
431 sqlite3_finalize(ppstmt);
432
433 for (auto pronunciation : pronunciations[canonical])
434 {
435 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)";
436 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
437 {
438 db_error(ppdb, query);
439 }
440
441 sqlite3_bind_int(ppstmt, 1, rowid);
442 sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC);
443
444 if (sqlite3_step(ppstmt) != SQLITE_DONE)
445 {
446 db_error(ppdb, query);
447 }
448
449 sqlite3_finalize(ppstmt);
450 }
451 }
452
453 ppgs.update();
454 }
455 }
456
457 // Get nouns/adjectives/adverbs from WordNet
458 // Useful relations:
459 // - s: master list
460 // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness)
461 // - at: variation (e.g. a measurement can be standard or nonstandard)
462 // - der: derivation (e.g. happy/happily, happily/happy)
463 // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue)
464 // - ins: instantiation (do we need this? let's see)
465 // - mm: member meronymy/holonymy (e.g. family/mother, family/child)
466 // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire)
467 // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber)
468 // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska)
469 // mannernymy (e.g. something done quickly is done in a manner that is quick)
470 // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific))
471 // - sim: synonymy (e.g. cheerful/happy, happy/cheerful)
472 // - syntax: positioning flags for some adjectives
473 std::string wnpref {argv[4]};
474 if (wnpref.back() != '/')
475 {
476 wnpref += '/';
477 }
478
479 // s table
480 {
481 std::ifstream wnsfile(wnpref + "wn_s.pl");
482 if (!wnsfile.is_open())
483 {
484 std::cout << "Invalid WordNet data directory." << std::endl;
485 print_usage();
486 }
487
488 std::list<std::string> lines;
489 for (;;)
490 {
491 std::string line;
492 if (!getline(wnsfile, line))
493 {
494 break;
495 }
496
497 if (line.back() == '\r')
498 {
499 line.pop_back();
500 }
501
502 lines.push_back(line);
503 }
504
505 progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size());
506 for (auto line : lines)
507 {
508 ppgs.update();
509
510 std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',");
511 std::smatch relation_data;
512 if (!std::regex_search(line, relation_data, relation))
513 {
514 continue;
515 }
516
517 int synset_id = stoi(relation_data[1]);
518 int wnum = stoi(relation_data[2]);
519 std::string word = relation_data[3];
520
521 std::string query;
522 switch (synset_id / 100000000)
523 {
524 case 1: // Noun
525 {
526 if (nouns.count(word) == 1)
527 {
528 query = "INSERT INTO nouns (singular, plural) VALUES (?, ?)";
529 } else {
530 query = "INSERT INTO nouns (singular) VALUES (?)";
531 }
532
533 break;
534 }
535
536 case 2: // Verb
537 {
538 // Ignore
539
540 break;
541 }
542
543 case 3: // Adjective
544 {
545 if (adjectives.count(word) == 1)
546 {
547 query = "INSERT INTO adjectives (base_form, comparative, superlative) VALUES (?, ?, ?)";
548 } else {
549 query = "INSERT INTO adjectives (base_form) VALUES (?)";
550 }
551
552 break;
553 }
554
555 case 4: // Adverb
556 {
557 if (adjectives.count(word) == 1)
558 {
559 query = "INSERT INTO adverbs (base_form, comparative, superlative) VALUES (?, ?, ?)";
560 } else {
561 query = "INSERT INTO adverbs (base_form) VALUES (?)";
562 }
563
564 break;
565 }
566 }
567
568 sqlite3_stmt* ppstmt;
569 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
570 {
571 db_error(ppdb, query);
572 }
573
574 sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC);
575 switch (synset_id / 100000000)
576 {
577 case 1: // Noun
578 {
579 if (nouns.count(word) == 1)
580 {
581 sqlite3_bind_text(ppstmt, 2, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC);
582 }
583
584 break;
585 }
586
587 case 3: // Adjective
588 case 4: // Adverb
589 {
590 if (adjectives.count(word) == 1)
591 {
592 sqlite3_bind_text(ppstmt, 2, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC);
593 sqlite3_bind_text(ppstmt, 3, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC);
594 }
595
596 break;
597 }
598 }
599
600 if (sqlite3_step(ppstmt) != SQLITE_DONE)
601 {
602 db_error(ppdb, query);
603 }
604
605 sqlite3_finalize(ppstmt);
606
607 query = "SELECT last_insert_rowid()";
608 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
609 {
610 db_error(ppdb, query);
611 }
612
613 if (sqlite3_step(ppstmt) != SQLITE_ROW)
614 {
615 db_error(ppdb, query);
616 }
617
618 int rowid = sqlite3_column_int(ppstmt, 0);
619 wn[synset_id][wnum] = rowid;
620
621 sqlite3_finalize(ppstmt);
622
623 std::string canonical(word);
624 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
625 if (pronunciations.count(canonical) == 1)
626 {
627 for (auto pronunciation : pronunciations[canonical])
628 {
629 switch (synset_id / 100000000)
630 {
631 case 1: // Noun
632 {
633 query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)";
634
635 break;
636 }
637
638 case 3: // Adjective
639 {
640 query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)";
641
642 break;
643 }
644
645 case 4: // Adverb
646 {
647 query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)";
648
649 break;
650 }
651 }
652
653 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
654 {
655 db_error(ppdb, query);
656 }
657
658 sqlite3_bind_int(ppstmt, 1, rowid);
659 sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC);
660
661 if (sqlite3_step(ppstmt) != SQLITE_DONE)
662 {
663 db_error(ppdb, query);
664 }
665
666 sqlite3_finalize(ppstmt);
667 }
668 }
669 }
670 }
671
672 // While we're working on s
673 {
674 progress ppgs("Writing word synonyms...", wn.size());
675 for (auto sense : wn)
676 {
677 ppgs.update();
678
679 for (auto word1 : sense.second)
680 {
681 for (auto word2 : sense.second)
682 {
683 if (word1 != word2)
684 {
685 std::string query;
686 switch (sense.first / 100000000)
687 {
688 case 1: // Noun
689 {
690 query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
691
692 break;
693 }
694
695 case 2: // Verb
696 {
697 // Ignore
698
699 break;
700 }
701
702 case 3: // Adjective
703 {
704 query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
705
706 break;
707 }
708
709 case 4: // Adverb
710 {
711 query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
712
713 break;
714 }
715 }
716
717 sqlite3_stmt* ppstmt;
718 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
719 {
720 db_error(ppdb, query);
721 }
722
723 sqlite3_bind_int(ppstmt, 1, word1.second);
724 sqlite3_bind_int(ppstmt, 2, word2.second);
725
726 if (sqlite3_step(ppstmt) != SQLITE_DONE)
727 {
728 db_error(ppdb, query);
729 }
730
731 sqlite3_finalize(ppstmt);
732 }
733 }
734 }
735 }
736 }
737
738 // ant table
739 {
740 std::ifstream wnantfile(wnpref + "wn_ant.pl");
741 if (!wnantfile.is_open())
742 {
743 std::cout << "Invalid WordNet data directory." << std::endl;
744 print_usage();
745 }
746
747 std::list<std::string> lines;
748 for (;;)
749 {
750 std::string line;
751 if (!getline(wnantfile, line))
752 {
753 break;
754 }
755
756 if (line.back() == '\r')
757 {
758 line.pop_back();
759 }
760
761 lines.push_back(line);
762 }
763
764 progress ppgs("Writing antonyms...", lines.size());
765 for (auto line : lines)
766 {
767 ppgs.update();
768
769 std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
770 std::smatch relation_data;
771 if (!std::regex_search(line, relation_data, relation))
772 {
773 continue;
774 }
775
776 int synset_id_1 = stoi(relation_data[1]);
777 int wnum_1 = stoi(relation_data[2]);
778 int synset_id_2 = stoi(relation_data[3]);
779 int wnum_2 = stoi(relation_data[4]);
780
781 std::string query;
782 switch (synset_id_1 / 100000000)
783 {
784 case 1: // Noun
785 {
786 query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
787
788 break;
789 }
790
791 case 2: // Verb
792 {
793 // Ignore
794
795 break;
796 }
797
798 case 3: // Adjective
799 {
800 query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
801
802 break;
803 }
804
805 case 4: // Adverb
806 {
807 query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
808
809 break;
810 }
811 }
812
813 sqlite3_stmt* ppstmt;
814 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
815 {
816 db_error(ppdb, query);
817 }
818
819 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
820 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
821
822 if (sqlite3_step(ppstmt) != SQLITE_DONE)
823 {
824 db_error(ppdb, query);
825 }
826
827 sqlite3_finalize(ppstmt);
828 }
829 }
830
831 // at table
832 {
833 std::ifstream wnatfile(wnpref + "wn_at.pl");
834 if (!wnatfile.is_open())
835 {
836 std::cout << "Invalid WordNet data directory." << std::endl;
837 print_usage();
838 }
839
840 std::list<std::string> lines;
841 for (;;)
842 {
843 std::string line;
844 if (!getline(wnatfile, line))
845 {
846 break;
847 }
848
849 if (line.back() == '\r')
850 {
851 line.pop_back();
852 }
853
854 lines.push_back(line);
855 }
856
857 progress ppgs("Writing variations...", lines.size());
858 for (auto line : lines)
859 {
860 ppgs.update();
861
862 std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
863 std::smatch relation_data;
864 if (!std::regex_search(line, relation_data, relation))
865 {
866 continue;
867 }
868
869 int synset_id_1 = stoi(relation_data[1]);
870 int synset_id_2 = stoi(relation_data[2]);
871 std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)");
872
873 for (auto mapping1 : wn[synset_id_1])
874 {
875 for (auto mapping2 : wn[synset_id_2])
876 {
877 sqlite3_stmt* ppstmt;
878 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
879 {
880 db_error(ppdb, query);
881 }
882
883 sqlite3_bind_int(ppstmt, 1, mapping1.second);
884 sqlite3_bind_int(ppstmt, 2, mapping2.second);
885
886 if (sqlite3_step(ppstmt) != SQLITE_DONE)
887 {
888 db_error(ppdb, query);
889 }
890
891 sqlite3_finalize(ppstmt);
892 }
893 }
894 }
895 }
896
897 // der table
898 {
899 std::ifstream wnderfile(wnpref + "wn_der.pl");
900 if (!wnderfile.is_open())
901 {
902 std::cout << "Invalid WordNet data directory." << std::endl;
903 print_usage();
904 }
905
906 std::list<std::string> lines;
907 for (;;)
908 {
909 std::string line;
910 if (!getline(wnderfile, line))
911 {
912 break;
913 }
914
915 if (line.back() == '\r')
916 {
917 line.pop_back();
918 }
919
920 lines.push_back(line);
921 }
922
923 progress ppgs("Writing morphological derivation...", lines.size());
924 for (auto line : lines)
925 {
926 ppgs.update();
927
928 std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
929 std::smatch relation_data;
930 if (!std::regex_search(line, relation_data, relation))
931 {
932 continue;
933 }
934
935 int synset_id_1 = stoi(relation_data[1]);
936 int wnum_1 = stoi(relation_data[2]);
937 int synset_id_2 = stoi(relation_data[3]);
938 int wnum_2 = stoi(relation_data[4]);
939 std::string query;
940 switch (synset_id_1 / 100000000)
941 {
942 case 1: // Noun
943 {
944 switch (synset_id_2 / 100000000)
945 {
946 case 1: // Noun
947 {
948 query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)";
949 break;
950 }
951
952 case 3: // Adjective
953 {
954 query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)";
955 break;
956 }
957
958 case 4: // Adverb
959 {
960 query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)";
961 break;
962 }
963 }
964
965 break;
966 }
967
968 case 3: // Adjective
969 {
970 switch (synset_id_2 / 100000000)
971 {
972 case 1: // Noun
973 {
974 query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)";
975 break;
976 }
977
978 case 3: // Adjective
979 {
980 query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)";
981 break;
982 }
983
984 case 4: // Adverb
985 {
986 query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)";
987 break;
988 }
989 }
990
991 break;
992 }
993
994 case 4: // Adverb
995 {
996 switch (synset_id_2 / 100000000)
997 {
998 case 1: // Noun
999 {
1000 query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)";
1001 break;
1002 }
1003
1004 case 3: // Adjective
1005 {
1006 query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)";
1007 break;
1008 }
1009
1010 case 4: // Adverb
1011 {
1012 query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)";
1013 break;
1014 }
1015 }
1016
1017 break;
1018 }
1019 }
1020
1021 sqlite3_stmt* ppstmt;
1022 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1023 {
1024 db_error(ppdb, query);
1025 }
1026
1027 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1028 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
1029
1030 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1031 {
1032 db_error(ppdb, query);
1033 }
1034
1035 sqlite3_finalize(ppstmt);
1036 }
1037 }
1038
1039 // hyp table
1040 {
1041 std::ifstream wnhypfile(wnpref + "wn_hyp.pl");
1042 if (!wnhypfile.is_open())
1043 {
1044 std::cout << "Invalid WordNet data directory." << std::endl;
1045 print_usage();
1046 }
1047
1048 std::list<std::string> lines;
1049 for (;;)
1050 {
1051 std::string line;
1052 if (!getline(wnhypfile, line))
1053 {
1054 break;
1055 }
1056
1057 if (line.back() == '\r')
1058 {
1059 line.pop_back();
1060 }
1061
1062 lines.push_back(line);
1063 }
1064
1065 progress ppgs("Writing hypernyms...", lines.size());
1066 for (auto line : lines)
1067 {
1068 ppgs.update();
1069
1070 std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\.");
1071 std::smatch relation_data;
1072 if (!std::regex_search(line, relation_data, relation))
1073 {
1074 continue;
1075 }
1076
1077 int synset_id_1 = stoi(relation_data[1]);
1078 int synset_id_2 = stoi(relation_data[2]);
1079 std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)");
1080
1081 for (auto mapping1 : wn[synset_id_1])
1082 {
1083 for (auto mapping2 : wn[synset_id_2])
1084 {
1085 sqlite3_stmt* ppstmt;
1086 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1087 {
1088 db_error(ppdb, query);
1089 }
1090
1091 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1092 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1093
1094 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1095 {
1096 db_error(ppdb, query);
1097 }
1098
1099 sqlite3_finalize(ppstmt);
1100 }
1101 }
1102 }
1103 }
1104
1105 // ins table
1106 {
1107 std::ifstream wninsfile(wnpref + "wn_ins.pl");
1108 if (!wninsfile.is_open())
1109 {
1110 std::cout << "Invalid WordNet data directory." << std::endl;
1111 print_usage();
1112 }
1113
1114 std::list<std::string> lines;
1115 for (;;)
1116 {
1117 std::string line;
1118 if (!getline(wninsfile, line))
1119 {
1120 break;
1121 }
1122
1123 if (line.back() == '\r')
1124 {
1125 line.pop_back();
1126 }
1127
1128 lines.push_back(line);
1129 }
1130
1131 progress ppgs("Writing instantiations...", lines.size());
1132 for (auto line : lines)
1133 {
1134 ppgs.update();
1135
1136 std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
1137 std::smatch relation_data;
1138 if (!std::regex_search(line, relation_data, relation))
1139 {
1140 continue;
1141 }
1142
1143 int synset_id_1 = stoi(relation_data[1]);
1144 int synset_id_2 = stoi(relation_data[2]);
1145 std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)");
1146
1147 for (auto mapping1 : wn[synset_id_1])
1148 {
1149 for (auto mapping2 : wn[synset_id_2])
1150 {
1151 sqlite3_stmt* ppstmt;
1152 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1153 {
1154 db_error(ppdb, query);
1155 }
1156
1157 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1158 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1159
1160 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1161 {
1162 db_error(ppdb, query);
1163 }
1164
1165 sqlite3_finalize(ppstmt);
1166 }
1167 }
1168 }
1169 }
1170
1171 // mm table
1172 {
1173 std::ifstream wnmmfile(wnpref + "wn_mm.pl");
1174 if (!wnmmfile.is_open())
1175 {
1176 std::cout << "Invalid WordNet data directory." << std::endl;
1177 print_usage();
1178 }
1179
1180 std::list<std::string> lines;
1181 for (;;)
1182 {
1183 std::string line;
1184 if (!getline(wnmmfile, line))
1185 {
1186 break;
1187 }
1188
1189 if (line.back() == '\r')
1190 {
1191 line.pop_back();
1192 }
1193
1194 lines.push_back(line);
1195 }
1196
1197 progress ppgs("Writing member meronyms...", lines.size());
1198 for (auto line : lines)
1199 {
1200 ppgs.update();
1201
1202 std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
1203 std::smatch relation_data;
1204 if (!std::regex_search(line, relation_data, relation))
1205 {
1206 continue;
1207 }
1208
1209 int synset_id_1 = stoi(relation_data[1]);
1210 int synset_id_2 = stoi(relation_data[2]);
1211 std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1212
1213 for (auto mapping1 : wn[synset_id_1])
1214 {
1215 for (auto mapping2 : wn[synset_id_2])
1216 {
1217 sqlite3_stmt* ppstmt;
1218 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1219 {
1220 db_error(ppdb, query);
1221 }
1222
1223 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1224 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1225
1226 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1227 {
1228 db_error(ppdb, query);
1229 }
1230
1231 sqlite3_finalize(ppstmt);
1232 }
1233 }
1234 }
1235 }
1236
1237 // ms table
1238 {
1239 std::ifstream wnmsfile(wnpref + "wn_ms.pl");
1240 if (!wnmsfile.is_open())
1241 {
1242 std::cout << "Invalid WordNet data directory." << std::endl;
1243 print_usage();
1244 }
1245
1246 std::list<std::string> lines;
1247 for (;;)
1248 {
1249 std::string line;
1250 if (!getline(wnmsfile, line))
1251 {
1252 break;
1253 }
1254
1255 if (line.back() == '\r')
1256 {
1257 line.pop_back();
1258 }
1259
1260 lines.push_back(line);
1261 }
1262
1263 progress ppgs("Writing substance meronyms...", lines.size());
1264 for (auto line : lines)
1265 {
1266 ppgs.update();
1267
1268 std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
1269 std::smatch relation_data;
1270 if (!std::regex_search(line, relation_data, relation))
1271 {
1272 continue;
1273 }
1274
1275 int synset_id_1 = stoi(relation_data[1]);
1276 int synset_id_2 = stoi(relation_data[2]);
1277 std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1278
1279 for (auto mapping1 : wn[synset_id_1])
1280 {
1281 for (auto mapping2 : wn[synset_id_2])
1282 {
1283 sqlite3_stmt* ppstmt;
1284 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1285 {
1286 db_error(ppdb, query);
1287 }
1288
1289 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1290 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1291
1292 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1293 {
1294 db_error(ppdb, query);
1295 }
1296
1297 sqlite3_finalize(ppstmt);
1298 }
1299 }
1300 }
1301 }
1302
1303 // mm table
1304 {
1305 std::ifstream wnmpfile(wnpref + "wn_mp.pl");
1306 if (!wnmpfile.is_open())
1307 {
1308 std::cout << "Invalid WordNet data directory." << std::endl;
1309 print_usage();
1310 }
1311
1312 std::list<std::string> lines;
1313 for (;;)
1314 {
1315 std::string line;
1316 if (!getline(wnmpfile, line))
1317 {
1318 break;
1319 }
1320
1321 if (line.back() == '\r')
1322 {
1323 line.pop_back();
1324 }
1325
1326 lines.push_back(line);
1327 }
1328
1329 progress ppgs("Writing part meronyms...", lines.size());
1330 for (auto line : lines)
1331 {
1332 ppgs.update();
1333
1334 std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
1335 std::smatch relation_data;
1336 if (!std::regex_search(line, relation_data, relation))
1337 {
1338 continue;
1339 }
1340
1341 int synset_id_1 = stoi(relation_data[1]);
1342 int synset_id_2 = stoi(relation_data[2]);
1343 std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1344
1345 for (auto mapping1 : wn[synset_id_1])
1346 {
1347 for (auto mapping2 : wn[synset_id_2])
1348 {
1349 sqlite3_stmt* ppstmt;
1350 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1351 {
1352 db_error(ppdb, query);
1353 }
1354
1355 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1356 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1357
1358 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1359 {
1360 db_error(ppdb, query);
1361 }
1362
1363 sqlite3_finalize(ppstmt);
1364 }
1365 }
1366 }
1367 }
1368
1369 // per table
1370 {
1371 std::ifstream wnperfile(wnpref + "wn_per.pl");
1372 if (!wnperfile.is_open())
1373 {
1374 std::cout << "Invalid WordNet data directory." << std::endl;
1375 print_usage();
1376 }
1377
1378 std::list<std::string> lines;
1379 for (;;)
1380 {
1381 std::string line;
1382 if (!getline(wnperfile, line))
1383 {
1384 break;
1385 }
1386
1387 if (line.back() == '\r')
1388 {
1389 line.pop_back();
1390 }
1391
1392 lines.push_back(line);
1393 }
1394
1395 progress ppgs("Writing pertainyms and mannernyms...", lines.size());
1396 for (auto line : lines)
1397 {
1398 ppgs.update();
1399
1400 std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
1401 std::smatch relation_data;
1402 if (!std::regex_search(line, relation_data, relation))
1403 {
1404 continue;
1405 }
1406
1407 int synset_id_1 = stoi(relation_data[1]);
1408 int wnum_1 = stoi(relation_data[2]);
1409 int synset_id_2 = stoi(relation_data[3]);
1410 int wnum_2 = stoi(relation_data[4]);
1411 std::string query;
1412 switch (synset_id_1 / 100000000)
1413 {
1414 case 3: // Adjective
1415 {
1416 // This is a pertainym, the second word should be a noun
1417 // Technically it can be an adjective but we're ignoring that
1418 if (synset_id_2 / 100000000 != 1)
1419 {
1420 continue;
1421 }
1422
1423 query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)";
1424
1425 break;
1426 }
1427
1428 case 4: // Adverb
1429 {
1430 // This is a mannernym, the second word should be an adjective
1431 if (synset_id_2 / 100000000 != 3)
1432 {
1433 continue;
1434 }
1435
1436 query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)";
1437
1438 break;
1439 }
1440 }
1441
1442 sqlite3_stmt* ppstmt;
1443 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1444 {
1445 db_error(ppdb, query);
1446 }
1447
1448 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1449 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
1450
1451 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1452 {
1453 db_error(ppdb, query);
1454 }
1455
1456 sqlite3_finalize(ppstmt);
1457 }
1458 }
1459
1460 // sa table
1461 {
1462 std::ifstream wnsafile(wnpref + "wn_sa.pl");
1463 if (!wnsafile.is_open())
1464 {
1465 std::cout << "Invalid WordNet data directory." << std::endl;
1466 print_usage();
1467 }
1468
1469 std::list<std::string> lines;
1470 for (;;)
1471 {
1472 std::string line;
1473 if (!getline(wnsafile, line))
1474 {
1475 break;
1476 }
1477
1478 if (line.back() == '\r')
1479 {
1480 line.pop_back();
1481 }
1482
1483 lines.push_back(line);
1484 }
1485
1486 progress ppgs("Writing specifications...", lines.size());
1487 for (auto line : lines)
1488 {
1489 ppgs.update();
1490
1491 std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\.");
1492 std::smatch relation_data;
1493 if (!std::regex_search(line, relation_data, relation))
1494 {
1495 continue;
1496 }
1497
1498 int synset_id_1 = stoi(relation_data[1]);
1499 int wnum_1 = stoi(relation_data[2]);
1500 int synset_id_2 = stoi(relation_data[3]);
1501 int wnum_2 = stoi(relation_data[4]);
1502 std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)");
1503
1504 sqlite3_stmt* ppstmt;
1505 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1506 {
1507 db_error(ppdb, query);
1508 }
1509
1510 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1511 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
1512
1513 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1514 {
1515 db_error(ppdb, query);
1516 }
1517
1518 sqlite3_finalize(ppstmt);
1519 }
1520 }
1521
1522 // sim table
1523 {
1524 std::ifstream wnsimfile(wnpref + "wn_sim.pl");
1525 if (!wnsimfile.is_open())
1526 {
1527 std::cout << "Invalid WordNet data directory." << std::endl;
1528 print_usage();
1529 }
1530
1531 std::list<std::string> lines;
1532 for (;;)
1533 {
1534 std::string line;
1535 if (!getline(wnsimfile, line))
1536 {
1537 break;
1538 }
1539
1540 if (line.back() == '\r')
1541 {
1542 line.pop_back();
1543 }
1544
1545 lines.push_back(line);
1546 }
1547
1548 progress ppgs("Writing sense synonyms...", lines.size());
1549 for (auto line : lines)
1550 {
1551 ppgs.update();
1552
1553 std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
1554 std::smatch relation_data;
1555 if (!std::regex_search(line, relation_data, relation))
1556 {
1557 continue;
1558 }
1559
1560 int synset_id_1 = stoi(relation_data[1]);
1561 int synset_id_2 = stoi(relation_data[2]);
1562 std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)");
1563
1564 for (auto mapping1 : wn[synset_id_1])
1565 {
1566 for (auto mapping2 : wn[synset_id_2])
1567 {
1568 sqlite3_stmt* ppstmt;
1569 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1570 {
1571 db_error(ppdb, query);
1572 }
1573
1574 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1575 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1576
1577 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1578 {
1579 db_error(ppdb, query);
1580 }
1581
1582 sqlite3_reset(ppstmt);
1583 sqlite3_clear_bindings(ppstmt);
1584
1585 sqlite3_bind_int(ppstmt, 1, mapping2.second);
1586 sqlite3_bind_int(ppstmt, 2, mapping1.second);
1587
1588 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1589 {
1590 db_error(ppdb, query);
1591 }
1592
1593 sqlite3_finalize(ppstmt);
1594 }
1595 }
1596 }
1597 }
1598
1599 // syntax table
1600 {
1601 std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl");
1602 if (!wnsyntaxfile.is_open())
1603 {
1604 std::cout << "Invalid WordNet data directory." << std::endl;
1605 print_usage();
1606 }
1607
1608 std::list<std::string> lines;
1609 for (;;)
1610 {
1611 std::string line;
1612 if (!getline(wnsyntaxfile, line))
1613 {
1614 break;
1615 }
1616
1617 if (line.back() == '\r')
1618 {
1619 line.pop_back();
1620 }
1621
1622 lines.push_back(line);
1623 }
1624
1625 progress ppgs("Writing adjective syntax markers...", lines.size());
1626 for (auto line : lines)
1627 {
1628 ppgs.update();
1629
1630 std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
1631 std::smatch relation_data;
1632 if (!std::regex_search(line, relation_data, relation))
1633 {
1634 continue;
1635 }
1636
1637 int synset_id = stoi(relation_data[1]);
1638 int wnum = stoi(relation_data[2]);
1639 std::string syn = relation_data[3];
1640 std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?");
1641
1642 sqlite3_stmt* ppstmt;
1643 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1644 {
1645 db_error(ppdb, query);
1646 }
1647
1648 sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC);
1649 sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]);
1650
1651 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1652 {
1653 db_error(ppdb, query);
1654 }
1655
1656 sqlite3_finalize(ppstmt);
1657 }
1658 }
1659
1660 sqlite3_close_v2(ppdb);
1661
1662 std::cout << "Done." << std::endl;
1663}
diff --git a/verbly/generator/progress.h b/verbly/generator/progress.h new file mode 100644 index 0000000..81f07a3 --- /dev/null +++ b/verbly/generator/progress.h
@@ -0,0 +1,50 @@
1#ifndef PROGRESS_H_A34EF856
2#define PROGRESS_H_A34EF856
3
4#include <string>
5
6class progress {
7 private:
8 std::string message;
9 int total;
10 int cur = 0;
11 int lprint = 0;
12
13 public:
14 progress(std::string message, int total) : message(message), total(total)
15 {
16 std::cout << message << " 0%" << std::flush;
17 }
18
19 void update(int val)
20 {
21 if (val <= total)
22 {
23 cur = val;
24 } else {
25 cur = total;
26 }
27
28 int pp = cur * 100 / total;
29 if (pp != lprint)
30 {
31 lprint = pp;
32
33 std::cout << "\b\b\b\b" << std::right;
34 std::cout.width(3);
35 std::cout << pp << "%" << std::flush;
36 }
37 }
38
39 void update()
40 {
41 update(cur+1);
42 }
43
44 ~progress()
45 {
46 std::cout << "\b\b\b\b100%" << std::endl;
47 }
48};
49
50#endif /* end of include guard: PROGRESS_H_A34EF856 */
diff --git a/verbly/generator/schema.sql b/verbly/generator/schema.sql new file mode 100644 index 0000000..b4efe0a --- /dev/null +++ b/verbly/generator/schema.sql
@@ -0,0 +1,252 @@
1DROP TABLE IF EXISTS `verbs`;
2CREATE TABLE `verbs` (
3 `verb_id` INTEGER PRIMARY KEY,
4 `infinitive` VARCHAR(32) NOT NULL,
5 `past_tense` VARCHAR(32) NOT NULL,
6 `past_participle` VARCHAR(32) NOT NULL,
7 `ing_form` VARCHAR(32) NOT NULL,
8 `s_form` VARCHAR(32) NOT NULL
9);
10
11DROP TABLE IF EXISTS `groups`;
12CREATE TABLE `groups` (
13 `group_id` INTEGER PRIMARY KEY,
14 `parent_id` INTEGER,
15 FOREIGN KEY (`parent_id`) REFERENCES `groups`(`group_id`)
16);
17
18DROP TABLE IF EXISTS `frames`;
19CREATE TABLE `frames` (
20 `frame_id` INTEGER PRIMARY KEY,
21 `group_id` INTEGER NOT NULL,
22 `data` BLOB NOT NULL,
23 FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`)
24);
25
26DROP TABLE IF EXISTS `verb_groups`;
27CREATE TABLE `verb_groups` (
28 `verb_id` INTEGER NOT NULL,
29 `group_id` INTEGER NOT NULL,
30 FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`),
31 FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`)
32);
33
34DROP TABLE IF EXISTS `adjectives`;
35CREATE TABLE `adjectives` (
36 `adjective_id` INTEGER PRIMARY KEY,
37 `base_form` VARCHAR(32) NOT NULL,
38 `comparative` VARCHAR(32),
39 `superlative` VARCHAR(32),
40 `position` CHAR(1)
41);
42
43DROP TABLE IF EXISTS `adverbs`;
44CREATE TABLE `adverbs` (
45 `adverb_id` INTEGER PRIMARY KEY,
46 `base_form` VARCHAR(32) NOT NULL,
47 `comparative` VARCHAR(32),
48 `superlative` VARCHAR(32)
49);
50
51DROP TABLE IF EXISTS `nouns`;
52CREATE TABLE `nouns` (
53 `noun_id` INTEGER PRIMARY KEY,
54 `singular` VARCHAR(32) NOT NULL,
55 `plural` VARCHAR(32)
56);
57
58DROP TABLE IF EXISTS `hypernymy`;
59CREATE TABLE `hypernymy` (
60 `hypernym_id` INTEGER NOT NULL,
61 `hyponym_id` INTEGER NOT NULL,
62 FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`),
63 FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`)
64);
65
66DROP TABLE IF EXISTS `instantiation`;
67CREATE TABLE `instantiation` (
68 `class_id` INTEGER NOT NULL,
69 `instance_id` INTEGER NOT NULL,
70 FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`),
71 FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`)
72);
73
74DROP TABLE IF EXISTS `member_meronymy`;
75CREATE TABLE `member_meronymy` (
76 `meronym_id` INTEGER NOT NULL,
77 `holonym_id` INTEGER NOT NULL,
78 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
79 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
80);
81
82DROP TABLE IF EXISTS `part_meronymy`;
83CREATE TABLE `part_meronymy` (
84 `meronym_id` INTEGER NOT NULL,
85 `holonym_id` INTEGER NOT NULL,
86 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
87 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
88);
89
90DROP TABLE IF EXISTS `substance_meronymy`;
91CREATE TABLE `substance_meronymy` (
92 `meronym_id` INTEGER NOT NULL,
93 `holonym_id` INTEGER NOT NULL,
94 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
95 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
96);
97
98DROP TABLE IF EXISTS `variation`;
99CREATE TABLE `variation` (
100 `noun_id` INTEGER NOT NULL,
101 `adjective_id` INTEGER NOT NULL,
102 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
103 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
104);
105
106DROP TABLE IF EXISTS `noun_antonymy`;
107CREATE TABLE `noun_antonymy` (
108 `noun_1_id` INTEGER NOT NULL,
109 `noun_2_id` INTEGER NOT NULL,
110 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`),
111 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`)
112);
113
114DROP TABLE IF EXISTS `adjective_antonymy`;
115CREATE TABLE `adjective_antonymy` (
116 `adjective_1_id` INTEGER NOT NULL,
117 `adjective_2_id` INTEGER NOT NULL,
118 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
119 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
120);
121
122DROP TABLE IF EXISTS `adverb_antonymy`;
123CREATE TABLE `adverb_antonymy` (
124 `adverb_1_id` INTEGER NOT NULL,
125 `adverb_2_id` INTEGER NOT NULL,
126 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
127 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
128);
129
130DROP TABLE IF EXISTS `specification`;
131CREATE TABLE `specification` (
132 `general_id` INTEGER NOT NULL,
133 `specific_id` INTEGER NOT NULL,
134 FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`),
135 FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`)
136);
137
138DROP TABLE IF EXISTS `pertainymy`;
139CREATE TABLE `pertainymy` (
140 `noun_id` INTEGER NOT NULL,
141 `pertainym_id` INTEGER NOT NULL,
142 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
143 FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`)
144);
145
146DROP TABLE IF EXISTS `mannernymy`;
147CREATE TABLE `mannernymy` (
148 `adjective_id` INTEGER NOT NULL,
149 `mannernym_id` INTEGER NOT NULL,
150 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`),
151 FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`)
152);
153
154DROP TABLE IF EXISTS `noun_synonymy`;
155CREATE TABLE `noun_synonymy` (
156 `noun_1_id` INTEGER NOT NULL,
157 `noun_2_id` INTEGER NOT NULL,
158 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`),
159 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`)
160);
161
162DROP TABLE IF EXISTS `adjective_synonymy`;
163CREATE TABLE `adjective_synonymy` (
164 `adjective_1_id` INTEGER NOT NULL,
165 `adjective_2_id` INTEGER NOT NULL,
166 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
167 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
168);
169
170DROP TABLE IF EXISTS `adverb_synonymy`;
171CREATE TABLE `adverb_synonymy` (
172 `adverb_1_id` INTEGER NOT NULL,
173 `adverb_2_id` INTEGER NOT NULL,
174 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
175 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
176);
177
178DROP TABLE IF EXISTS `noun_pronunciations`;
179CREATE TABLE `noun_pronunciations` (
180 `noun_id` INTEGER NOT NULL,
181 `pronunciation` VARCHAR(64) NOT NULL,
182 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`)
183);
184
185DROP TABLE IF EXISTS `verb_pronunciations`;
186CREATE TABLE `verb_pronunciations` (
187 `verb_id` INTEGER NOT NULL,
188 `pronunciation` VARCHAR(64) NOT NULL,
189 FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`)
190);
191
192DROP TABLE IF EXISTS `adjective_pronunciations`;
193CREATE TABLE `adjective_pronunciations` (
194 `adjective_id` INTEGER NOT NULL,
195 `pronunciation` VARCHAR(64) NOT NULL,
196 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
197);
198
199DROP TABLE IF EXISTS `adverb_pronunciations`;
200CREATE TABLE `adverb_pronunciations` (
201 `adverb_id` INTEGER NOT NULL,
202 `pronunciation` VARCHAR(64) NOT NULL,
203 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`)
204);
205
206DROP TABLE IF EXISTS `noun_noun_derivation`;
207CREATE TABLE `noun_noun_derivation` (
208 `noun_1_id` INTEGER NOT NULL,
209 `noun_2_id` INTEGER NOT NULL,
210 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`),
211 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`)
212);
213
214DROP TABLE IF EXISTS `noun_adjective_derivation`;
215CREATE TABLE `noun_adjective_derivation` (
216 `noun_id` INTEGER NOT NULL,
217 `adjective_id` INTEGER NOT NULL,
218 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
219 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
220);
221
222DROP TABLE IF EXISTS `noun_adverb_derivation`;
223CREATE TABLE `noun_adverb_derivation` (
224 `noun_id` INTEGER NOT NULL,
225 `adverb_id` INTEGER NOT NULL,
226 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
227 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`)
228);
229
230DROP TABLE IF EXISTS `adjective_adjective_derivation`;
231CREATE TABLE `adjective_adjective_derivation` (
232 `adjective_1_id` INTEGER NOT NULL,
233 `adjective_2_id` INTEGER NOT NULL,
234 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
235 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
236);
237
238DROP TABLE IF EXISTS `adjective_adverb_derivation`;
239CREATE TABLE `adjective_adverb_derivation` (
240 `adjective_id` INTEGER NOT NULL,
241 `adverb_id` INTEGER NOT NULL,
242 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`),
243 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`)
244);
245
246DROP TABLE IF EXISTS `adverb_adverb_derivation`;
247CREATE TABLE `adverb_adverb_derivation` (
248 `adverb_1_id` INTEGER NOT NULL,
249 `adverb_2_id` INTEGER NOT NULL,
250 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
251 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
252);
diff --git a/verbly/adjective.cpp b/verbly/lib/adjective.cpp index 0f4087f..b2b53e4 100644 --- a/verbly/adjective.cpp +++ b/verbly/lib/adjective.cpp
@@ -261,6 +261,38 @@ namespace verbly {
261 return *this; 261 return *this;
262 } 262 }
263 263
264 adjective_query& adjective_query::derived_from(const word& _w)
265 {
266 if (dynamic_cast<const adjective*>(&_w) != nullptr)
267 {
268 _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
269 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
270 {
271 _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
272 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
273 {
274 _derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
275 }
276
277 return *this;
278 }
279
280 adjective_query& adjective_query::not_derived_from(const word& _w)
281 {
282 if (dynamic_cast<const adjective*>(&_w) != nullptr)
283 {
284 _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
285 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
286 {
287 _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
288 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
289 {
290 _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
291 }
292
293 return *this;
294 }
295
264 std::list<adjective> adjective_query::run() const 296 std::list<adjective> adjective_query::run() const
265 { 297 {
266 std::stringstream construct; 298 std::stringstream construct;
@@ -423,6 +455,48 @@ namespace verbly {
423 conditions.push_back(cond); 455 conditions.push_back(cond);
424 } 456 }
425 457
458 if (!_derived_from_adjective.empty())
459 {
460 std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_2_id = @DERADJ");
461 std::string cond = "adjective_id IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
462 conditions.push_back(cond);
463 }
464
465 if (!_not_derived_from_adjective.empty())
466 {
467 std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_2_id = @NDERADJ");
468 std::string cond = "adjective_id NOT IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
469 conditions.push_back(cond);
470 }
471
472 if (!_derived_from_adverb.empty())
473 {
474 std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV");
475 std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
476 conditions.push_back(cond);
477 }
478
479 if (!_not_derived_from_adverb.empty())
480 {
481 std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV");
482 std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
483 conditions.push_back(cond);
484 }
485
486 if (!_derived_from_noun.empty())
487 {
488 std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN");
489 std::string cond = "adjective_id IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
490 conditions.push_back(cond);
491 }
492
493 if (!_not_derived_from_noun.empty())
494 {
495 std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN");
496 std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
497 conditions.push_back(cond);
498 }
499
426 if (!conditions.empty()) 500 if (!conditions.empty())
427 { 501 {
428 construct << " WHERE "; 502 construct << " WHERE ";
@@ -522,6 +596,36 @@ namespace verbly {
522 { 596 {
523 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@MANID"), mannernym._id); 597 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@MANID"), mannernym._id);
524 } 598 }
599
600 for (auto adj : _derived_from_adjective)
601 {
602 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id);
603 }
604
605 for (auto adj : _not_derived_from_adjective)
606 {
607 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id);
608 }
609
610 for (auto adv : _derived_from_adverb)
611 {
612 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id);
613 }
614
615 for (auto adv : _not_derived_from_adverb)
616 {
617 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id);
618 }
619
620 for (auto n : _derived_from_noun)
621 {
622 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id);
623 }
624
625 for (auto n : _not_derived_from_noun)
626 {
627 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id);
628 }
525 629
526 std::list<adjective> output; 630 std::list<adjective> output;
527 while (sqlite3_step(ppstmt) == SQLITE_ROW) 631 while (sqlite3_step(ppstmt) == SQLITE_ROW)
diff --git a/verbly/adjective.h b/verbly/lib/adjective.h index 4927d59..3dcab9b 100644 --- a/verbly/adjective.h +++ b/verbly/lib/adjective.h
@@ -85,6 +85,9 @@ namespace verbly {
85 adjective_query& is_mannernymic(bool _is_mannernymic); 85 adjective_query& is_mannernymic(bool _is_mannernymic);
86 adjective_query& anti_mannernym_of(const adverb& _adv); 86 adjective_query& anti_mannernym_of(const adverb& _adv);
87 87
88 adjective_query& derived_from(const word& _w);
89 adjective_query& not_derived_from(const word& _w);
90
88 std::list<adjective> run() const; 91 std::list<adjective> run() const;
89 92
90 const static int unlimited = -1; 93 const static int unlimited = -1;
@@ -126,6 +129,13 @@ namespace verbly {
126 129
127 bool _is_mannernymic = false; 130 bool _is_mannernymic = false;
128 std::list<adverb> _anti_mannernym_of; 131 std::list<adverb> _anti_mannernym_of;
132
133 std::list<adjective> _derived_from_adjective;
134 std::list<adjective> _not_derived_from_adjective;
135 std::list<adverb> _derived_from_adverb;
136 std::list<adverb> _not_derived_from_adverb;
137 std::list<noun> _derived_from_noun;
138 std::list<noun> _not_derived_from_noun;
129 }; 139 };
130 140
131}; 141};
diff --git a/verbly/adverb.cpp b/verbly/lib/adverb.cpp index 9bb5a0d..8fcddad 100644 --- a/verbly/adverb.cpp +++ b/verbly/lib/adverb.cpp
@@ -168,6 +168,38 @@ namespace verbly {
168 return *this; 168 return *this;
169 } 169 }
170 170
171 adverb_query& adverb_query::derived_from(const word& _w)
172 {
173 if (dynamic_cast<const adjective*>(&_w) != nullptr)
174 {
175 _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
176 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
177 {
178 _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
179 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
180 {
181 _derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
182 }
183
184 return *this;
185 }
186
187 adverb_query& adverb_query::not_derived_from(const word& _w)
188 {
189 if (dynamic_cast<const adjective*>(&_w) != nullptr)
190 {
191 _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
192 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
193 {
194 _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
195 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
196 {
197 _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
198 }
199
200 return *this;
201 }
202
171 std::list<adverb> adverb_query::run() const 203 std::list<adverb> adverb_query::run() const
172 { 204 {
173 std::stringstream construct; 205 std::stringstream construct;
@@ -251,6 +283,48 @@ namespace verbly {
251 conditions.push_back(cond); 283 conditions.push_back(cond);
252 } 284 }
253 285
286 if (!_derived_from_adjective.empty())
287 {
288 std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ");
289 std::string cond = "adverb_id IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
290 conditions.push_back(cond);
291 }
292
293 if (!_not_derived_from_adjective.empty())
294 {
295 std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ");
296 std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
297 conditions.push_back(cond);
298 }
299
300 if (!_derived_from_adverb.empty())
301 {
302 std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_2_id = @DERADV");
303 std::string cond = "adverb_id IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
304 conditions.push_back(cond);
305 }
306
307 if (!_not_derived_from_adverb.empty())
308 {
309 std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_2_id = @NDERADV");
310 std::string cond = "adverb_id NOT IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
311 conditions.push_back(cond);
312 }
313
314 if (!_derived_from_noun.empty())
315 {
316 std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN");
317 std::string cond = "adverb_id IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
318 conditions.push_back(cond);
319 }
320
321 if (!_not_derived_from_noun.empty())
322 {
323 std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN");
324 std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
325 conditions.push_back(cond);
326 }
327
254 if (!conditions.empty()) 328 if (!conditions.empty())
255 { 329 {
256 construct << " WHERE "; 330 construct << " WHERE ";
@@ -315,6 +389,36 @@ namespace verbly {
315 { 389 {
316 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@AMANID"), adj._id); 390 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@AMANID"), adj._id);
317 } 391 }
392
393 for (auto adj : _derived_from_adjective)
394 {
395 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id);
396 }
397
398 for (auto adj : _not_derived_from_adjective)
399 {
400 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id);
401 }
402
403 for (auto adv : _derived_from_adverb)
404 {
405 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id);
406 }
407
408 for (auto adv : _not_derived_from_adverb)
409 {
410 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id);
411 }
412
413 for (auto n : _derived_from_noun)
414 {
415 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id);
416 }
417
418 for (auto n : _not_derived_from_noun)
419 {
420 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id);
421 }
318 422
319 std::list<adverb> output; 423 std::list<adverb> output;
320 while (sqlite3_step(ppstmt) == SQLITE_ROW) 424 while (sqlite3_step(ppstmt) == SQLITE_ROW)
diff --git a/verbly/adverb.h b/verbly/lib/adverb.h index 42c3492..65e3c5c 100644 --- a/verbly/adverb.h +++ b/verbly/lib/adverb.h
@@ -24,6 +24,9 @@ namespace verbly {
24 adverb_query antonyms() const; 24 adverb_query antonyms() const;
25 adverb_query synonyms() const; 25 adverb_query synonyms() const;
26 adjective_query anti_mannernyms() const; 26 adjective_query anti_mannernyms() const;
27
28 adverb_query& derived_from(const word& _w);
29 adverb_query& not_derived_from(const word& _w);
27 }; 30 };
28 31
29 class adverb_query { 32 class adverb_query {
@@ -50,6 +53,9 @@ namespace verbly {
50 adverb_query& is_mannernymic(bool _arg); 53 adverb_query& is_mannernymic(bool _arg);
51 adverb_query& mannernym_of(const adjective& _adj); 54 adverb_query& mannernym_of(const adjective& _adj);
52 55
56 adverb_query& derived_from(const word& _w);
57 adverb_query& not_derived_from(const word& _w);
58
53 std::list<adverb> run() const; 59 std::list<adverb> run() const;
54 60
55 const static int unlimited = -1; 61 const static int unlimited = -1;
@@ -75,6 +81,13 @@ namespace verbly {
75 81
76 bool _is_mannernymic = false; 82 bool _is_mannernymic = false;
77 std::list<adjective> _mannernym_of; 83 std::list<adjective> _mannernym_of;
84
85 std::list<adjective> _derived_from_adjective;
86 std::list<adjective> _not_derived_from_adjective;
87 std::list<adverb> _derived_from_adverb;
88 std::list<adverb> _not_derived_from_adverb;
89 std::list<noun> _derived_from_noun;
90 std::list<noun> _not_derived_from_noun;
78 }; 91 };
79 92
80}; 93};
diff --git a/verbly/c++14.h b/verbly/lib/c++14.h index b3efbe2..b3efbe2 100644 --- a/verbly/c++14.h +++ b/verbly/lib/c++14.h
diff --git a/verbly/data.cpp b/verbly/lib/data.cpp index 57a8850..57a8850 100644 --- a/verbly/data.cpp +++ b/verbly/lib/data.cpp
diff --git a/verbly/data.h b/verbly/lib/data.h index 37092d7..37092d7 100644 --- a/verbly/data.h +++ b/verbly/lib/data.h
diff --git a/verbly/noun.cpp b/verbly/lib/noun.cpp index 9336a1c..43fda2e 100644 --- a/verbly/noun.cpp +++ b/verbly/lib/noun.cpp
@@ -323,6 +323,13 @@ namespace verbly {
323 return *this; 323 return *this;
324 } 324 }
325 325
326 noun_query& noun_query::is_not_proper(bool _arg)
327 {
328 _is_not_proper = _arg;
329
330 return *this;
331 }
332
326 noun_query& noun_query::instance_of(const noun& _noun) 333 noun_query& noun_query::instance_of(const noun& _noun)
327 { 334 {
328 _instance_of.push_back(_noun); 335 _instance_of.push_back(_noun);
@@ -428,6 +435,38 @@ namespace verbly {
428 return *this; 435 return *this;
429 } 436 }
430 437
438 noun_query& noun_query::derived_from(const word& _w)
439 {
440 if (dynamic_cast<const adjective*>(&_w) != nullptr)
441 {
442 _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
443 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
444 {
445 _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
446 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
447 {
448 _derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
449 }
450
451 return *this;
452 }
453
454 noun_query& noun_query::not_derived_from(const word& _w)
455 {
456 if (dynamic_cast<const adjective*>(&_w) != nullptr)
457 {
458 _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
459 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
460 {
461 _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
462 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
463 {
464 _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
465 }
466
467 return *this;
468 }
469
431 std::list<noun> noun_query::run() const 470 std::list<noun> noun_query::run() const
432 { 471 {
433 std::stringstream construct; 472 std::stringstream construct;
@@ -608,6 +647,11 @@ namespace verbly {
608 conditions.push_back("noun_id IN (SELECT instance_id FROM instantiation)"); 647 conditions.push_back("noun_id IN (SELECT instance_id FROM instantiation)");
609 } 648 }
610 649
650 if (_is_not_proper)
651 {
652 conditions.push_back("noun_id NOT IN (SELECT instance_id FROM instantiation)");
653 }
654
611 if (!_instance_of.empty()) 655 if (!_instance_of.empty())
612 { 656 {
613 std::list<std::string> clauses(_instance_of.size(), "class_id = @CLSID"); 657 std::list<std::string> clauses(_instance_of.size(), "class_id = @CLSID");
@@ -703,6 +747,48 @@ namespace verbly {
703 conditions.push_back(cond); 747 conditions.push_back(cond);
704 } 748 }
705 749
750 if (!_derived_from_adjective.empty())
751 {
752 std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ");
753 std::string cond = "noun_id IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
754 conditions.push_back(cond);
755 }
756
757 if (!_not_derived_from_adjective.empty())
758 {
759 std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ");
760 std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
761 conditions.push_back(cond);
762 }
763
764 if (!_derived_from_adverb.empty())
765 {
766 std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV");
767 std::string cond = "noun_id IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
768 conditions.push_back(cond);
769 }
770
771 if (!_not_derived_from_adverb.empty())
772 {
773 std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV");
774 std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
775 conditions.push_back(cond);
776 }
777
778 if (!_derived_from_noun.empty())
779 {
780 std::list<std::string> clauses(_derived_from_noun.size(), "noun_2_id = @DERN");
781 std::string cond = "noun_id IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
782 conditions.push_back(cond);
783 }
784
785 if (!_not_derived_from_noun.empty())
786 {
787 std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_2_id = @NDERN");
788 std::string cond = "noun_id NOT IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
789 conditions.push_back(cond);
790 }
791
706 if (!conditions.empty()) 792 if (!conditions.empty())
707 { 793 {
708 construct << " WHERE "; 794 construct << " WHERE ";
@@ -872,6 +958,36 @@ namespace verbly {
872 { 958 {
873 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@VALID"), value._id); 959 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@VALID"), value._id);
874 } 960 }
961
962 for (auto adj : _derived_from_adjective)
963 {
964 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id);
965 }
966
967 for (auto adj : _not_derived_from_adjective)
968 {
969 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id);
970 }
971
972 for (auto adv : _derived_from_adverb)
973 {
974 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id);
975 }
976
977 for (auto adv : _not_derived_from_adverb)
978 {
979 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id);
980 }
981
982 for (auto n : _derived_from_noun)
983 {
984 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id);
985 }
986
987 for (auto n : _not_derived_from_noun)
988 {
989 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id);
990 }
875 991
876 std::list<noun> output; 992 std::list<noun> output;
877 while (sqlite3_step(ppstmt) == SQLITE_ROW) 993 while (sqlite3_step(ppstmt) == SQLITE_ROW)
diff --git a/verbly/noun.h b/verbly/lib/noun.h index f5ba256..da76866 100644 --- a/verbly/noun.h +++ b/verbly/lib/noun.h
@@ -78,6 +78,7 @@ namespace verbly {
78 noun_query& not_member_holonym_of(const noun& _noun); 78 noun_query& not_member_holonym_of(const noun& _noun);
79 79
80 noun_query& is_proper(bool _arg); 80 noun_query& is_proper(bool _arg);
81 noun_query& is_not_proper(bool _arg);
81 noun_query& instance_of(const noun& _noun); 82 noun_query& instance_of(const noun& _noun);
82 noun_query& not_instance_of(const noun& _noun); 83 noun_query& not_instance_of(const noun& _noun);
83 84
@@ -99,6 +100,9 @@ namespace verbly {
99 noun_query& is_attribute(bool _arg); 100 noun_query& is_attribute(bool _arg);
100 noun_query& attribute_of(const adjective& _adj); 101 noun_query& attribute_of(const adjective& _adj);
101 102
103 noun_query& derived_from(const word& _w);
104 noun_query& not_derived_from(const word& _w);
105
102 std::list<noun> run() const; 106 std::list<noun> run() const;
103 107
104 const static int unlimited = -1; 108 const static int unlimited = -1;
@@ -144,6 +148,7 @@ namespace verbly {
144 std::list<noun> _not_member_holonym_of; 148 std::list<noun> _not_member_holonym_of;
145 149
146 bool _is_proper = false; 150 bool _is_proper = false;
151 bool _is_not_proper = false;
147 std::list<noun> _instance_of; 152 std::list<noun> _instance_of;
148 std::list<noun> _not_instance_of; 153 std::list<noun> _not_instance_of;
149 154
@@ -164,6 +169,13 @@ namespace verbly {
164 169
165 bool _is_attribute = false; 170 bool _is_attribute = false;
166 std::list<adjective> _attribute_of; 171 std::list<adjective> _attribute_of;
172
173 std::list<adjective> _derived_from_adjective;
174 std::list<adjective> _not_derived_from_adjective;
175 std::list<adverb> _derived_from_adverb;
176 std::list<adverb> _not_derived_from_adverb;
177 std::list<noun> _derived_from_noun;
178 std::list<noun> _not_derived_from_noun;
167 }; 179 };
168 180
169}; 181};
diff --git a/verbly/token.cpp b/verbly/lib/token.cpp index aa8f50e..aa8f50e 100644 --- a/verbly/token.cpp +++ b/verbly/lib/token.cpp
diff --git a/verbly/token.h b/verbly/lib/token.h index 44d99cb..44d99cb 100644 --- a/verbly/token.h +++ b/verbly/lib/token.h
diff --git a/verbly/util.h b/verbly/lib/util.h index 815b47c..815b47c 100644 --- a/verbly/util.h +++ b/verbly/lib/util.h
diff --git a/verbly/verb.cpp b/verbly/lib/verb.cpp index 23f7c92..23f7c92 100644 --- a/verbly/verb.cpp +++ b/verbly/lib/verb.cpp
diff --git a/verbly/verb.h b/verbly/lib/verb.h index 7cc87e2..7cc87e2 100644 --- a/verbly/verb.h +++ b/verbly/lib/verb.h
diff --git a/verbly/verbly.h b/verbly/lib/verbly.h index b9f5367..b9f5367 100644 --- a/verbly/verbly.h +++ b/verbly/lib/verbly.h
diff --git a/verbly/word.cpp b/verbly/lib/word.cpp index c50e7d3..c50e7d3 100644 --- a/verbly/word.cpp +++ b/verbly/lib/word.cpp
diff --git a/verbly/word.h b/verbly/lib/word.h index 23ddb2b..23ddb2b 100644 --- a/verbly/word.h +++ b/verbly/lib/word.h