about summary refs log tree commit diff stats
path: root/verbly/generator
diff options
context:
space:
mode:
Diffstat (limited to 'verbly/generator')
-rw-r--r--verbly/generator/CMakeLists.txt12
-rw-r--r--verbly/generator/generator.cpp1663
-rw-r--r--verbly/generator/progress.h50
-rw-r--r--verbly/generator/schema.sql252
4 files changed, 1977 insertions, 0 deletions
diff --git a/verbly/generator/CMakeLists.txt b/verbly/generator/CMakeLists.txt new file mode 100644 index 0000000..bbc3c4f --- /dev/null +++ b/verbly/generator/CMakeLists.txt
@@ -0,0 +1,12 @@
1cmake_minimum_required (VERSION 2.6)
2project (generator)
3
4find_package(PkgConfig)
5pkg_check_modules(sqlite3 sqlite3 REQUIRED)
6find_package(libxml2 REQUIRED)
7
8include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR})
9add_executable(generator generator.cpp)
10set_property(TARGET generator PROPERTY CXX_STANDARD 11)
11set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON)
12target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES})
diff --git a/verbly/generator/generator.cpp b/verbly/generator/generator.cpp new file mode 100644 index 0000000..faef5f7 --- /dev/null +++ b/verbly/generator/generator.cpp
@@ -0,0 +1,1663 @@
1#include <libxml/parser.h>
2#include <iostream>
3#include <dirent.h>
4#include <set>
5#include <map>
6#include <string>
7#include <vector>
8#include <fstream>
9#include <sqlite3.h>
10#include <sstream>
11#include <regex>
12#include <list>
13#include "progress.h"
14
15struct verb {
16 std::string infinitive;
17 std::string past_tense;
18 std::string past_participle;
19 std::string ing_form;
20 std::string s_form;
21};
22
23struct adjective {
24 std::string base;
25 std::string comparative;
26 std::string superlative;
27};
28
29struct noun {
30 std::string singular;
31 std::string plural;
32};
33
34struct group {
35 std::string id;
36 std::set<std::string> members;
37};
38
39std::map<std::string, group> groups;
40std::map<std::string, verb> verbs;
41std::map<std::string, adjective> adjectives;
42std::map<std::string, noun> nouns;
43std::map<int, std::map<int, int>> wn;
44std::map<std::string, std::set<std::string>> pronunciations;
45
46void print_usage()
47{
48 std::cout << "Verbly Datafile Generator" << std::endl;
49 std::cout << "-------------------------" << std::endl;
50 std::cout << "Requires exactly six arguments." << std::endl;
51 std::cout << "1. The path to a VerbNet data directory." << std::endl;
52 std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl;
53 std::cout << "3. The path to an AGID infl.txt file." << std::endl;
54 std::cout << "4. The path to a WordNet prolog data directory." << std::endl;
55 std::cout << "5. The path to a CMUDICT pronunciation file." << std::endl;
56 std::cout << "6. Datafile output path." << std::endl;
57
58 exit(1);
59}
60
61void db_error(sqlite3* ppdb, std::string)
62{
63 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
64 sqlite3_close_v2(ppdb);
65 print_usage();
66}
67
68/*
69void parse_group(xmlNodePtr top, std::string filename)
70{
71 xmlChar* key = xmlGetProp(top, (xmlChar*) "ID");
72 if (key == 0)
73 {
74 std::cout << "Bad VerbNet file format: " << filename << std::endl;
75 print_usage();
76 }
77 std::string vnid = key;
78 vnid = vnid.substr(vnid.find_first_of("-")+1);
79 xmlFree(key);
80
81 group g;
82 g.id = vnid;
83
84 for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
85 {
86 if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS"))
87 {
88 for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
89 {
90 if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER"))
91 {
92 key = xmlGetProp(member, (xmlChar*) "name");
93 g.members.insert(key);
94 xmlFree(key);
95 }
96 }
97 } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES"))
98 {
99 for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next)
100 {
101 if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME"))
102 {
103 for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
104 {
105
106 }
107 }
108 }
109 }
110 }
111}*/
112
113int main(int argc, char** argv)
114{
115 if (argc != 7)
116 {
117 print_usage();
118 }
119
120 /*DIR* dir;
121 if ((dir = opendir(argv[1])) == nullptr)
122 {
123 std::cout << "Invalid VerbNet data directory." << std::endl;
124
125 print_usage();
126 }
127
128 struct dirent* ent;
129 while ((ent = readdir(dir)) != nullptr)
130 {
131 std::string filename(argv[1]);
132 if (filename.back() != '/')
133 {
134 filename += '/';
135 }
136
137 filename += ent->d_name;
138 //std::cout << ent->d_name << std::endl;
139
140 if (filename.rfind(".xml") != filename.size() - 4)
141 {
142 continue;
143 }
144
145 xmlDocPtr doc = xmlParseFile(filename.c_str());
146 if (doc == nullptr)
147 {
148 std::cout << "Error opening " << filename << std::endl;
149 print_usage();
150 }
151
152 xmlNodePtr top = xmlDocGetRootElement(doc);
153 if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS")))
154 {
155 std::cout << "Bad VerbNet file format: " << filename << std::endl;
156 print_usage();
157 }
158
159 parse_group(top, filename);
160 }
161
162 closedir(dir);*/
163
164 // Get verbs from AGID
165 std::cout << "Reading inflections..." << std::endl;
166
167 std::ifstream agidfile(argv[3]);
168 if (!agidfile.is_open())
169 {
170 std::cout << "Could not open AGID file: " << argv[3] << std::endl;
171 print_usage();
172 }
173
174 for (;;)
175 {
176 std::string line;
177 if (!getline(agidfile, line))
178 {
179 break;
180 }
181
182 if (line.back() == '\r')
183 {
184 line.pop_back();
185 }
186
187 int divider = line.find_first_of(" ");
188 std::string word = line.substr(0, divider);
189 line = line.substr(divider+1);
190 char type = line[0];
191
192 if (line[1] == '?')
193 {
194 line.erase(0, 4);
195 } else {
196 line.erase(0, 3);
197 }
198
199 std::vector<std::string> forms;
200 while (!line.empty())
201 {
202 std::string inflection;
203 if ((divider = line.find(" | ")) != std::string::npos)
204 {
205 inflection = line.substr(0, divider);
206 line = line.substr(divider + 3);
207 } else {
208 inflection = line;
209 line = "";
210 }
211
212 if ((divider = inflection.find_first_of(",?")) != std::string::npos)
213 {
214 inflection = inflection.substr(0, divider);
215 }
216
217 forms.push_back(inflection);
218 }
219
220 switch (type)
221 {
222 case 'V':
223 {
224 verb v;
225 v.infinitive = word;
226 if (forms.size() == 4)
227 {
228 v.past_tense = forms[0];
229 v.past_participle = forms[1];
230 v.ing_form = forms[2];
231 v.s_form = forms[3];
232 } else if (forms.size() == 3)
233 {
234 v.past_tense = forms[0];
235 v.past_participle = forms[0];
236 v.ing_form = forms[1];
237 v.s_form = forms[2];
238 } else if (forms.size() == 8)
239 {
240 // As of AGID 2014.08.11, this is only "to be"
241 v.past_tense = forms[0];
242 v.past_participle = forms[2];
243 v.ing_form = forms[3];
244 v.s_form = forms[4];
245 } else {
246 // Words that don't fit the cases above as of AGID 2014.08.11:
247 // - may and shall do not conjugate the way we want them to
248 // - methinks only has a past tense and is an outlier
249 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
250 std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl;
251 }
252
253 verbs[word] = v;
254
255 break;
256 }
257
258 case 'A':
259 {
260 adjective adj;
261 adj.base = word;
262 if (forms.size() == 2)
263 {
264 adj.comparative = forms[0];
265 adj.superlative = forms[1];
266 } else {
267 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
268 std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl;
269 }
270
271 adjectives[word] = adj;
272
273 break;
274 }
275
276 case 'N':
277 {
278 noun n;
279 n.singular = word;
280 if (forms.size() == 1)
281 {
282 n.plural = forms[0];
283 } else {
284 // As of AGID 2014.08.11, this is non-existent.
285 std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl;
286 }
287
288 nouns[word] = n;
289
290 break;
291 }
292 }
293 }
294
295 // Pronounciations
296 std::cout << "Reading pronunciations..." << std::endl;
297
298 std::ifstream pronfile(argv[5]);
299 if (!pronfile.is_open())
300 {
301 std::cout << "Could not open CMUDICT file: " << argv[5] << std::endl;
302 print_usage();
303 }
304
305 for (;;)
306 {
307 std::string line;
308 if (!getline(pronfile, line))
309 {
310 break;
311 }
312
313 if (line.back() == '\r')
314 {
315 line.pop_back();
316 }
317
318 std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)");
319 std::smatch phoneme_data;
320 if (std::regex_search(line, phoneme_data, phoneme))
321 {
322 std::string canonical(phoneme_data[1]);
323 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
324
325 pronunciations[canonical].insert(phoneme_data[2]);
326 }
327 }
328
329 // Start writing output
330 std::cout << "Writing schema..." << std::endl;
331
332 sqlite3* ppdb;
333 if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
334 {
335 std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl;
336 print_usage();
337 }
338
339 std::ifstream schemafile("schema.sql");
340 if (!schemafile.is_open())
341 {
342 std::cout << "Could not find schema file" << std::endl;
343 print_usage();
344 }
345
346 std::stringstream schemabuilder;
347 for (;;)
348 {
349 std::string line;
350 if (!getline(schemafile, line))
351 {
352 break;
353 }
354
355 if (line.back() == '\r')
356 {
357 line.pop_back();
358 }
359
360 schemabuilder << line << std::endl;
361 }
362
363 std::string schema = schemabuilder.str();
364 while (!schema.empty())
365 {
366 std::string query;
367 int divider = schema.find(";");
368 if (divider != std::string::npos)
369 {
370 query = schema.substr(0, divider+1);
371 schema = schema.substr(divider+2);
372 } else {
373 break;
374 }
375
376 sqlite3_stmt* schmstmt;
377 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK)
378 {
379 db_error(ppdb, query);
380 }
381
382 if (sqlite3_step(schmstmt) != SQLITE_DONE)
383 {
384 db_error(ppdb, query);
385 }
386
387 sqlite3_finalize(schmstmt);
388 }
389
390 {
391 progress ppgs("Writing verbs...", verbs.size());
392 for (auto& mapping : verbs)
393 {
394 sqlite3_stmt* ppstmt;
395 std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)");
396 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
397 {
398 db_error(ppdb, query);
399 }
400
401 sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC);
402 sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC);
403 sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC);
404 sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC);
405 sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC);
406
407 if (sqlite3_step(ppstmt) != SQLITE_DONE)
408 {
409 db_error(ppdb, query);
410 }
411
412 sqlite3_finalize(ppstmt);
413
414 std::string canonical(mapping.second.infinitive);
415 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
416 if (pronunciations.count(canonical) == 1)
417 {
418 query = "SELECT last_insert_rowid()";
419 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
420 {
421 db_error(ppdb, query);
422 }
423
424 if (sqlite3_step(ppstmt) != SQLITE_ROW)
425 {
426 db_error(ppdb, query);
427 }
428
429 int rowid = sqlite3_column_int(ppstmt, 0);
430
431 sqlite3_finalize(ppstmt);
432
433 for (auto pronunciation : pronunciations[canonical])
434 {
435 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)";
436 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
437 {
438 db_error(ppdb, query);
439 }
440
441 sqlite3_bind_int(ppstmt, 1, rowid);
442 sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC);
443
444 if (sqlite3_step(ppstmt) != SQLITE_DONE)
445 {
446 db_error(ppdb, query);
447 }
448
449 sqlite3_finalize(ppstmt);
450 }
451 }
452
453 ppgs.update();
454 }
455 }
456
457 // Get nouns/adjectives/adverbs from WordNet
458 // Useful relations:
459 // - s: master list
460 // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness)
461 // - at: variation (e.g. a measurement can be standard or nonstandard)
462 // - der: derivation (e.g. happy/happily, happily/happy)
463 // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue)
464 // - ins: instantiation (do we need this? let's see)
465 // - mm: member meronymy/holonymy (e.g. family/mother, family/child)
466 // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire)
467 // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber)
468 // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska)
469 // mannernymy (e.g. something done quickly is done in a manner that is quick)
470 // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific))
471 // - sim: synonymy (e.g. cheerful/happy, happy/cheerful)
472 // - syntax: positioning flags for some adjectives
473 std::string wnpref {argv[4]};
474 if (wnpref.back() != '/')
475 {
476 wnpref += '/';
477 }
478
479 // s table
480 {
481 std::ifstream wnsfile(wnpref + "wn_s.pl");
482 if (!wnsfile.is_open())
483 {
484 std::cout << "Invalid WordNet data directory." << std::endl;
485 print_usage();
486 }
487
488 std::list<std::string> lines;
489 for (;;)
490 {
491 std::string line;
492 if (!getline(wnsfile, line))
493 {
494 break;
495 }
496
497 if (line.back() == '\r')
498 {
499 line.pop_back();
500 }
501
502 lines.push_back(line);
503 }
504
505 progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size());
506 for (auto line : lines)
507 {
508 ppgs.update();
509
510 std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',");
511 std::smatch relation_data;
512 if (!std::regex_search(line, relation_data, relation))
513 {
514 continue;
515 }
516
517 int synset_id = stoi(relation_data[1]);
518 int wnum = stoi(relation_data[2]);
519 std::string word = relation_data[3];
520
521 std::string query;
522 switch (synset_id / 100000000)
523 {
524 case 1: // Noun
525 {
526 if (nouns.count(word) == 1)
527 {
528 query = "INSERT INTO nouns (singular, plural) VALUES (?, ?)";
529 } else {
530 query = "INSERT INTO nouns (singular) VALUES (?)";
531 }
532
533 break;
534 }
535
536 case 2: // Verb
537 {
538 // Ignore
539
540 break;
541 }
542
543 case 3: // Adjective
544 {
545 if (adjectives.count(word) == 1)
546 {
547 query = "INSERT INTO adjectives (base_form, comparative, superlative) VALUES (?, ?, ?)";
548 } else {
549 query = "INSERT INTO adjectives (base_form) VALUES (?)";
550 }
551
552 break;
553 }
554
555 case 4: // Adverb
556 {
557 if (adjectives.count(word) == 1)
558 {
559 query = "INSERT INTO adverbs (base_form, comparative, superlative) VALUES (?, ?, ?)";
560 } else {
561 query = "INSERT INTO adverbs (base_form) VALUES (?)";
562 }
563
564 break;
565 }
566 }
567
568 sqlite3_stmt* ppstmt;
569 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
570 {
571 db_error(ppdb, query);
572 }
573
574 sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC);
575 switch (synset_id / 100000000)
576 {
577 case 1: // Noun
578 {
579 if (nouns.count(word) == 1)
580 {
581 sqlite3_bind_text(ppstmt, 2, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC);
582 }
583
584 break;
585 }
586
587 case 3: // Adjective
588 case 4: // Adverb
589 {
590 if (adjectives.count(word) == 1)
591 {
592 sqlite3_bind_text(ppstmt, 2, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC);
593 sqlite3_bind_text(ppstmt, 3, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC);
594 }
595
596 break;
597 }
598 }
599
600 if (sqlite3_step(ppstmt) != SQLITE_DONE)
601 {
602 db_error(ppdb, query);
603 }
604
605 sqlite3_finalize(ppstmt);
606
607 query = "SELECT last_insert_rowid()";
608 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
609 {
610 db_error(ppdb, query);
611 }
612
613 if (sqlite3_step(ppstmt) != SQLITE_ROW)
614 {
615 db_error(ppdb, query);
616 }
617
618 int rowid = sqlite3_column_int(ppstmt, 0);
619 wn[synset_id][wnum] = rowid;
620
621 sqlite3_finalize(ppstmt);
622
623 std::string canonical(word);
624 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
625 if (pronunciations.count(canonical) == 1)
626 {
627 for (auto pronunciation : pronunciations[canonical])
628 {
629 switch (synset_id / 100000000)
630 {
631 case 1: // Noun
632 {
633 query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)";
634
635 break;
636 }
637
638 case 3: // Adjective
639 {
640 query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)";
641
642 break;
643 }
644
645 case 4: // Adverb
646 {
647 query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)";
648
649 break;
650 }
651 }
652
653 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
654 {
655 db_error(ppdb, query);
656 }
657
658 sqlite3_bind_int(ppstmt, 1, rowid);
659 sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC);
660
661 if (sqlite3_step(ppstmt) != SQLITE_DONE)
662 {
663 db_error(ppdb, query);
664 }
665
666 sqlite3_finalize(ppstmt);
667 }
668 }
669 }
670 }
671
672 // While we're working on s
673 {
674 progress ppgs("Writing word synonyms...", wn.size());
675 for (auto sense : wn)
676 {
677 ppgs.update();
678
679 for (auto word1 : sense.second)
680 {
681 for (auto word2 : sense.second)
682 {
683 if (word1 != word2)
684 {
685 std::string query;
686 switch (sense.first / 100000000)
687 {
688 case 1: // Noun
689 {
690 query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
691
692 break;
693 }
694
695 case 2: // Verb
696 {
697 // Ignore
698
699 break;
700 }
701
702 case 3: // Adjective
703 {
704 query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
705
706 break;
707 }
708
709 case 4: // Adverb
710 {
711 query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
712
713 break;
714 }
715 }
716
717 sqlite3_stmt* ppstmt;
718 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
719 {
720 db_error(ppdb, query);
721 }
722
723 sqlite3_bind_int(ppstmt, 1, word1.second);
724 sqlite3_bind_int(ppstmt, 2, word2.second);
725
726 if (sqlite3_step(ppstmt) != SQLITE_DONE)
727 {
728 db_error(ppdb, query);
729 }
730
731 sqlite3_finalize(ppstmt);
732 }
733 }
734 }
735 }
736 }
737
738 // ant table
739 {
740 std::ifstream wnantfile(wnpref + "wn_ant.pl");
741 if (!wnantfile.is_open())
742 {
743 std::cout << "Invalid WordNet data directory." << std::endl;
744 print_usage();
745 }
746
747 std::list<std::string> lines;
748 for (;;)
749 {
750 std::string line;
751 if (!getline(wnantfile, line))
752 {
753 break;
754 }
755
756 if (line.back() == '\r')
757 {
758 line.pop_back();
759 }
760
761 lines.push_back(line);
762 }
763
764 progress ppgs("Writing antonyms...", lines.size());
765 for (auto line : lines)
766 {
767 ppgs.update();
768
769 std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
770 std::smatch relation_data;
771 if (!std::regex_search(line, relation_data, relation))
772 {
773 continue;
774 }
775
776 int synset_id_1 = stoi(relation_data[1]);
777 int wnum_1 = stoi(relation_data[2]);
778 int synset_id_2 = stoi(relation_data[3]);
779 int wnum_2 = stoi(relation_data[4]);
780
781 std::string query;
782 switch (synset_id_1 / 100000000)
783 {
784 case 1: // Noun
785 {
786 query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
787
788 break;
789 }
790
791 case 2: // Verb
792 {
793 // Ignore
794
795 break;
796 }
797
798 case 3: // Adjective
799 {
800 query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
801
802 break;
803 }
804
805 case 4: // Adverb
806 {
807 query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
808
809 break;
810 }
811 }
812
813 sqlite3_stmt* ppstmt;
814 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
815 {
816 db_error(ppdb, query);
817 }
818
819 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
820 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
821
822 if (sqlite3_step(ppstmt) != SQLITE_DONE)
823 {
824 db_error(ppdb, query);
825 }
826
827 sqlite3_finalize(ppstmt);
828 }
829 }
830
831 // at table
832 {
833 std::ifstream wnatfile(wnpref + "wn_at.pl");
834 if (!wnatfile.is_open())
835 {
836 std::cout << "Invalid WordNet data directory." << std::endl;
837 print_usage();
838 }
839
840 std::list<std::string> lines;
841 for (;;)
842 {
843 std::string line;
844 if (!getline(wnatfile, line))
845 {
846 break;
847 }
848
849 if (line.back() == '\r')
850 {
851 line.pop_back();
852 }
853
854 lines.push_back(line);
855 }
856
857 progress ppgs("Writing variations...", lines.size());
858 for (auto line : lines)
859 {
860 ppgs.update();
861
862 std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
863 std::smatch relation_data;
864 if (!std::regex_search(line, relation_data, relation))
865 {
866 continue;
867 }
868
869 int synset_id_1 = stoi(relation_data[1]);
870 int synset_id_2 = stoi(relation_data[2]);
871 std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)");
872
873 for (auto mapping1 : wn[synset_id_1])
874 {
875 for (auto mapping2 : wn[synset_id_2])
876 {
877 sqlite3_stmt* ppstmt;
878 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
879 {
880 db_error(ppdb, query);
881 }
882
883 sqlite3_bind_int(ppstmt, 1, mapping1.second);
884 sqlite3_bind_int(ppstmt, 2, mapping2.second);
885
886 if (sqlite3_step(ppstmt) != SQLITE_DONE)
887 {
888 db_error(ppdb, query);
889 }
890
891 sqlite3_finalize(ppstmt);
892 }
893 }
894 }
895 }
896
897 // der table
898 {
899 std::ifstream wnderfile(wnpref + "wn_der.pl");
900 if (!wnderfile.is_open())
901 {
902 std::cout << "Invalid WordNet data directory." << std::endl;
903 print_usage();
904 }
905
906 std::list<std::string> lines;
907 for (;;)
908 {
909 std::string line;
910 if (!getline(wnderfile, line))
911 {
912 break;
913 }
914
915 if (line.back() == '\r')
916 {
917 line.pop_back();
918 }
919
920 lines.push_back(line);
921 }
922
923 progress ppgs("Writing morphological derivation...", lines.size());
924 for (auto line : lines)
925 {
926 ppgs.update();
927
928 std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
929 std::smatch relation_data;
930 if (!std::regex_search(line, relation_data, relation))
931 {
932 continue;
933 }
934
935 int synset_id_1 = stoi(relation_data[1]);
936 int wnum_1 = stoi(relation_data[2]);
937 int synset_id_2 = stoi(relation_data[3]);
938 int wnum_2 = stoi(relation_data[4]);
939 std::string query;
940 switch (synset_id_1 / 100000000)
941 {
942 case 1: // Noun
943 {
944 switch (synset_id_2 / 100000000)
945 {
946 case 1: // Noun
947 {
948 query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)";
949 break;
950 }
951
952 case 3: // Adjective
953 {
954 query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)";
955 break;
956 }
957
958 case 4: // Adverb
959 {
960 query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)";
961 break;
962 }
963 }
964
965 break;
966 }
967
968 case 3: // Adjective
969 {
970 switch (synset_id_2 / 100000000)
971 {
972 case 1: // Noun
973 {
974 query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)";
975 break;
976 }
977
978 case 3: // Adjective
979 {
980 query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)";
981 break;
982 }
983
984 case 4: // Adverb
985 {
986 query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)";
987 break;
988 }
989 }
990
991 break;
992 }
993
994 case 4: // Adverb
995 {
996 switch (synset_id_2 / 100000000)
997 {
998 case 1: // Noun
999 {
1000 query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)";
1001 break;
1002 }
1003
1004 case 3: // Adjective
1005 {
1006 query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)";
1007 break;
1008 }
1009
1010 case 4: // Adverb
1011 {
1012 query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)";
1013 break;
1014 }
1015 }
1016
1017 break;
1018 }
1019 }
1020
1021 sqlite3_stmt* ppstmt;
1022 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1023 {
1024 db_error(ppdb, query);
1025 }
1026
1027 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1028 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
1029
1030 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1031 {
1032 db_error(ppdb, query);
1033 }
1034
1035 sqlite3_finalize(ppstmt);
1036 }
1037 }
1038
1039 // hyp table
1040 {
1041 std::ifstream wnhypfile(wnpref + "wn_hyp.pl");
1042 if (!wnhypfile.is_open())
1043 {
1044 std::cout << "Invalid WordNet data directory." << std::endl;
1045 print_usage();
1046 }
1047
1048 std::list<std::string> lines;
1049 for (;;)
1050 {
1051 std::string line;
1052 if (!getline(wnhypfile, line))
1053 {
1054 break;
1055 }
1056
1057 if (line.back() == '\r')
1058 {
1059 line.pop_back();
1060 }
1061
1062 lines.push_back(line);
1063 }
1064
1065 progress ppgs("Writing hypernyms...", lines.size());
1066 for (auto line : lines)
1067 {
1068 ppgs.update();
1069
1070 std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\.");
1071 std::smatch relation_data;
1072 if (!std::regex_search(line, relation_data, relation))
1073 {
1074 continue;
1075 }
1076
1077 int synset_id_1 = stoi(relation_data[1]);
1078 int synset_id_2 = stoi(relation_data[2]);
1079 std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)");
1080
1081 for (auto mapping1 : wn[synset_id_1])
1082 {
1083 for (auto mapping2 : wn[synset_id_2])
1084 {
1085 sqlite3_stmt* ppstmt;
1086 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1087 {
1088 db_error(ppdb, query);
1089 }
1090
1091 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1092 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1093
1094 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1095 {
1096 db_error(ppdb, query);
1097 }
1098
1099 sqlite3_finalize(ppstmt);
1100 }
1101 }
1102 }
1103 }
1104
1105 // ins table
1106 {
1107 std::ifstream wninsfile(wnpref + "wn_ins.pl");
1108 if (!wninsfile.is_open())
1109 {
1110 std::cout << "Invalid WordNet data directory." << std::endl;
1111 print_usage();
1112 }
1113
1114 std::list<std::string> lines;
1115 for (;;)
1116 {
1117 std::string line;
1118 if (!getline(wninsfile, line))
1119 {
1120 break;
1121 }
1122
1123 if (line.back() == '\r')
1124 {
1125 line.pop_back();
1126 }
1127
1128 lines.push_back(line);
1129 }
1130
1131 progress ppgs("Writing instantiations...", lines.size());
1132 for (auto line : lines)
1133 {
1134 ppgs.update();
1135
1136 std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
1137 std::smatch relation_data;
1138 if (!std::regex_search(line, relation_data, relation))
1139 {
1140 continue;
1141 }
1142
1143 int synset_id_1 = stoi(relation_data[1]);
1144 int synset_id_2 = stoi(relation_data[2]);
1145 std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)");
1146
1147 for (auto mapping1 : wn[synset_id_1])
1148 {
1149 for (auto mapping2 : wn[synset_id_2])
1150 {
1151 sqlite3_stmt* ppstmt;
1152 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1153 {
1154 db_error(ppdb, query);
1155 }
1156
1157 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1158 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1159
1160 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1161 {
1162 db_error(ppdb, query);
1163 }
1164
1165 sqlite3_finalize(ppstmt);
1166 }
1167 }
1168 }
1169 }
1170
1171 // mm table
1172 {
1173 std::ifstream wnmmfile(wnpref + "wn_mm.pl");
1174 if (!wnmmfile.is_open())
1175 {
1176 std::cout << "Invalid WordNet data directory." << std::endl;
1177 print_usage();
1178 }
1179
1180 std::list<std::string> lines;
1181 for (;;)
1182 {
1183 std::string line;
1184 if (!getline(wnmmfile, line))
1185 {
1186 break;
1187 }
1188
1189 if (line.back() == '\r')
1190 {
1191 line.pop_back();
1192 }
1193
1194 lines.push_back(line);
1195 }
1196
1197 progress ppgs("Writing member meronyms...", lines.size());
1198 for (auto line : lines)
1199 {
1200 ppgs.update();
1201
1202 std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
1203 std::smatch relation_data;
1204 if (!std::regex_search(line, relation_data, relation))
1205 {
1206 continue;
1207 }
1208
1209 int synset_id_1 = stoi(relation_data[1]);
1210 int synset_id_2 = stoi(relation_data[2]);
1211 std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1212
1213 for (auto mapping1 : wn[synset_id_1])
1214 {
1215 for (auto mapping2 : wn[synset_id_2])
1216 {
1217 sqlite3_stmt* ppstmt;
1218 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1219 {
1220 db_error(ppdb, query);
1221 }
1222
1223 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1224 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1225
1226 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1227 {
1228 db_error(ppdb, query);
1229 }
1230
1231 sqlite3_finalize(ppstmt);
1232 }
1233 }
1234 }
1235 }
1236
1237 // ms table
1238 {
1239 std::ifstream wnmsfile(wnpref + "wn_ms.pl");
1240 if (!wnmsfile.is_open())
1241 {
1242 std::cout << "Invalid WordNet data directory." << std::endl;
1243 print_usage();
1244 }
1245
1246 std::list<std::string> lines;
1247 for (;;)
1248 {
1249 std::string line;
1250 if (!getline(wnmsfile, line))
1251 {
1252 break;
1253 }
1254
1255 if (line.back() == '\r')
1256 {
1257 line.pop_back();
1258 }
1259
1260 lines.push_back(line);
1261 }
1262
1263 progress ppgs("Writing substance meronyms...", lines.size());
1264 for (auto line : lines)
1265 {
1266 ppgs.update();
1267
1268 std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
1269 std::smatch relation_data;
1270 if (!std::regex_search(line, relation_data, relation))
1271 {
1272 continue;
1273 }
1274
1275 int synset_id_1 = stoi(relation_data[1]);
1276 int synset_id_2 = stoi(relation_data[2]);
1277 std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1278
1279 for (auto mapping1 : wn[synset_id_1])
1280 {
1281 for (auto mapping2 : wn[synset_id_2])
1282 {
1283 sqlite3_stmt* ppstmt;
1284 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1285 {
1286 db_error(ppdb, query);
1287 }
1288
1289 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1290 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1291
1292 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1293 {
1294 db_error(ppdb, query);
1295 }
1296
1297 sqlite3_finalize(ppstmt);
1298 }
1299 }
1300 }
1301 }
1302
1303 // mm table
1304 {
1305 std::ifstream wnmpfile(wnpref + "wn_mp.pl");
1306 if (!wnmpfile.is_open())
1307 {
1308 std::cout << "Invalid WordNet data directory." << std::endl;
1309 print_usage();
1310 }
1311
1312 std::list<std::string> lines;
1313 for (;;)
1314 {
1315 std::string line;
1316 if (!getline(wnmpfile, line))
1317 {
1318 break;
1319 }
1320
1321 if (line.back() == '\r')
1322 {
1323 line.pop_back();
1324 }
1325
1326 lines.push_back(line);
1327 }
1328
1329 progress ppgs("Writing part meronyms...", lines.size());
1330 for (auto line : lines)
1331 {
1332 ppgs.update();
1333
1334 std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
1335 std::smatch relation_data;
1336 if (!std::regex_search(line, relation_data, relation))
1337 {
1338 continue;
1339 }
1340
1341 int synset_id_1 = stoi(relation_data[1]);
1342 int synset_id_2 = stoi(relation_data[2]);
1343 std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1344
1345 for (auto mapping1 : wn[synset_id_1])
1346 {
1347 for (auto mapping2 : wn[synset_id_2])
1348 {
1349 sqlite3_stmt* ppstmt;
1350 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1351 {
1352 db_error(ppdb, query);
1353 }
1354
1355 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1356 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1357
1358 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1359 {
1360 db_error(ppdb, query);
1361 }
1362
1363 sqlite3_finalize(ppstmt);
1364 }
1365 }
1366 }
1367 }
1368
1369 // per table
1370 {
1371 std::ifstream wnperfile(wnpref + "wn_per.pl");
1372 if (!wnperfile.is_open())
1373 {
1374 std::cout << "Invalid WordNet data directory." << std::endl;
1375 print_usage();
1376 }
1377
1378 std::list<std::string> lines;
1379 for (;;)
1380 {
1381 std::string line;
1382 if (!getline(wnperfile, line))
1383 {
1384 break;
1385 }
1386
1387 if (line.back() == '\r')
1388 {
1389 line.pop_back();
1390 }
1391
1392 lines.push_back(line);
1393 }
1394
1395 progress ppgs("Writing pertainyms and mannernyms...", lines.size());
1396 for (auto line : lines)
1397 {
1398 ppgs.update();
1399
1400 std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
1401 std::smatch relation_data;
1402 if (!std::regex_search(line, relation_data, relation))
1403 {
1404 continue;
1405 }
1406
1407 int synset_id_1 = stoi(relation_data[1]);
1408 int wnum_1 = stoi(relation_data[2]);
1409 int synset_id_2 = stoi(relation_data[3]);
1410 int wnum_2 = stoi(relation_data[4]);
1411 std::string query;
1412 switch (synset_id_1 / 100000000)
1413 {
1414 case 3: // Adjective
1415 {
1416 // This is a pertainym, the second word should be a noun
1417 // Technically it can be an adjective but we're ignoring that
1418 if (synset_id_2 / 100000000 != 1)
1419 {
1420 continue;
1421 }
1422
1423 query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)";
1424
1425 break;
1426 }
1427
1428 case 4: // Adverb
1429 {
1430 // This is a mannernym, the second word should be an adjective
1431 if (synset_id_2 / 100000000 != 3)
1432 {
1433 continue;
1434 }
1435
1436 query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)";
1437
1438 break;
1439 }
1440 }
1441
1442 sqlite3_stmt* ppstmt;
1443 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1444 {
1445 db_error(ppdb, query);
1446 }
1447
1448 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1449 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
1450
1451 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1452 {
1453 db_error(ppdb, query);
1454 }
1455
1456 sqlite3_finalize(ppstmt);
1457 }
1458 }
1459
1460 // sa table
1461 {
1462 std::ifstream wnsafile(wnpref + "wn_sa.pl");
1463 if (!wnsafile.is_open())
1464 {
1465 std::cout << "Invalid WordNet data directory." << std::endl;
1466 print_usage();
1467 }
1468
1469 std::list<std::string> lines;
1470 for (;;)
1471 {
1472 std::string line;
1473 if (!getline(wnsafile, line))
1474 {
1475 break;
1476 }
1477
1478 if (line.back() == '\r')
1479 {
1480 line.pop_back();
1481 }
1482
1483 lines.push_back(line);
1484 }
1485
1486 progress ppgs("Writing specifications...", lines.size());
1487 for (auto line : lines)
1488 {
1489 ppgs.update();
1490
1491 std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\.");
1492 std::smatch relation_data;
1493 if (!std::regex_search(line, relation_data, relation))
1494 {
1495 continue;
1496 }
1497
1498 int synset_id_1 = stoi(relation_data[1]);
1499 int wnum_1 = stoi(relation_data[2]);
1500 int synset_id_2 = stoi(relation_data[3]);
1501 int wnum_2 = stoi(relation_data[4]);
1502 std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)");
1503
1504 sqlite3_stmt* ppstmt;
1505 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1506 {
1507 db_error(ppdb, query);
1508 }
1509
1510 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1511 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
1512
1513 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1514 {
1515 db_error(ppdb, query);
1516 }
1517
1518 sqlite3_finalize(ppstmt);
1519 }
1520 }
1521
1522 // sim table
1523 {
1524 std::ifstream wnsimfile(wnpref + "wn_sim.pl");
1525 if (!wnsimfile.is_open())
1526 {
1527 std::cout << "Invalid WordNet data directory." << std::endl;
1528 print_usage();
1529 }
1530
1531 std::list<std::string> lines;
1532 for (;;)
1533 {
1534 std::string line;
1535 if (!getline(wnsimfile, line))
1536 {
1537 break;
1538 }
1539
1540 if (line.back() == '\r')
1541 {
1542 line.pop_back();
1543 }
1544
1545 lines.push_back(line);
1546 }
1547
1548 progress ppgs("Writing sense synonyms...", lines.size());
1549 for (auto line : lines)
1550 {
1551 ppgs.update();
1552
1553 std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
1554 std::smatch relation_data;
1555 if (!std::regex_search(line, relation_data, relation))
1556 {
1557 continue;
1558 }
1559
1560 int synset_id_1 = stoi(relation_data[1]);
1561 int synset_id_2 = stoi(relation_data[2]);
1562 std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)");
1563
1564 for (auto mapping1 : wn[synset_id_1])
1565 {
1566 for (auto mapping2 : wn[synset_id_2])
1567 {
1568 sqlite3_stmt* ppstmt;
1569 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1570 {
1571 db_error(ppdb, query);
1572 }
1573
1574 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1575 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1576
1577 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1578 {
1579 db_error(ppdb, query);
1580 }
1581
1582 sqlite3_reset(ppstmt);
1583 sqlite3_clear_bindings(ppstmt);
1584
1585 sqlite3_bind_int(ppstmt, 1, mapping2.second);
1586 sqlite3_bind_int(ppstmt, 2, mapping1.second);
1587
1588 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1589 {
1590 db_error(ppdb, query);
1591 }
1592
1593 sqlite3_finalize(ppstmt);
1594 }
1595 }
1596 }
1597 }
1598
1599 // syntax table
1600 {
1601 std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl");
1602 if (!wnsyntaxfile.is_open())
1603 {
1604 std::cout << "Invalid WordNet data directory." << std::endl;
1605 print_usage();
1606 }
1607
1608 std::list<std::string> lines;
1609 for (;;)
1610 {
1611 std::string line;
1612 if (!getline(wnsyntaxfile, line))
1613 {
1614 break;
1615 }
1616
1617 if (line.back() == '\r')
1618 {
1619 line.pop_back();
1620 }
1621
1622 lines.push_back(line);
1623 }
1624
1625 progress ppgs("Writing adjective syntax markers...", lines.size());
1626 for (auto line : lines)
1627 {
1628 ppgs.update();
1629
1630 std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
1631 std::smatch relation_data;
1632 if (!std::regex_search(line, relation_data, relation))
1633 {
1634 continue;
1635 }
1636
1637 int synset_id = stoi(relation_data[1]);
1638 int wnum = stoi(relation_data[2]);
1639 std::string syn = relation_data[3];
1640 std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?");
1641
1642 sqlite3_stmt* ppstmt;
1643 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1644 {
1645 db_error(ppdb, query);
1646 }
1647
1648 sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC);
1649 sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]);
1650
1651 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1652 {
1653 db_error(ppdb, query);
1654 }
1655
1656 sqlite3_finalize(ppstmt);
1657 }
1658 }
1659
1660 sqlite3_close_v2(ppdb);
1661
1662 std::cout << "Done." << std::endl;
1663}
diff --git a/verbly/generator/progress.h b/verbly/generator/progress.h new file mode 100644 index 0000000..81f07a3 --- /dev/null +++ b/verbly/generator/progress.h
@@ -0,0 +1,50 @@
1#ifndef PROGRESS_H_A34EF856
2#define PROGRESS_H_A34EF856
3
4#include <string>
5
6class progress {
7 private:
8 std::string message;
9 int total;
10 int cur = 0;
11 int lprint = 0;
12
13 public:
14 progress(std::string message, int total) : message(message), total(total)
15 {
16 std::cout << message << " 0%" << std::flush;
17 }
18
19 void update(int val)
20 {
21 if (val <= total)
22 {
23 cur = val;
24 } else {
25 cur = total;
26 }
27
28 int pp = cur * 100 / total;
29 if (pp != lprint)
30 {
31 lprint = pp;
32
33 std::cout << "\b\b\b\b" << std::right;
34 std::cout.width(3);
35 std::cout << pp << "%" << std::flush;
36 }
37 }
38
39 void update()
40 {
41 update(cur+1);
42 }
43
44 ~progress()
45 {
46 std::cout << "\b\b\b\b100%" << std::endl;
47 }
48};
49
50#endif /* end of include guard: PROGRESS_H_A34EF856 */
diff --git a/verbly/generator/schema.sql b/verbly/generator/schema.sql new file mode 100644 index 0000000..b4efe0a --- /dev/null +++ b/verbly/generator/schema.sql
@@ -0,0 +1,252 @@
1DROP TABLE IF EXISTS `verbs`;
2CREATE TABLE `verbs` (
3 `verb_id` INTEGER PRIMARY KEY,
4 `infinitive` VARCHAR(32) NOT NULL,
5 `past_tense` VARCHAR(32) NOT NULL,
6 `past_participle` VARCHAR(32) NOT NULL,
7 `ing_form` VARCHAR(32) NOT NULL,
8 `s_form` VARCHAR(32) NOT NULL
9);
10
11DROP TABLE IF EXISTS `groups`;
12CREATE TABLE `groups` (
13 `group_id` INTEGER PRIMARY KEY,
14 `parent_id` INTEGER,
15 FOREIGN KEY (`parent_id`) REFERENCES `groups`(`group_id`)
16);
17
18DROP TABLE IF EXISTS `frames`;
19CREATE TABLE `frames` (
20 `frame_id` INTEGER PRIMARY KEY,
21 `group_id` INTEGER NOT NULL,
22 `data` BLOB NOT NULL,
23 FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`)
24);
25
26DROP TABLE IF EXISTS `verb_groups`;
27CREATE TABLE `verb_groups` (
28 `verb_id` INTEGER NOT NULL,
29 `group_id` INTEGER NOT NULL,
30 FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`),
31 FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`)
32);
33
34DROP TABLE IF EXISTS `adjectives`;
35CREATE TABLE `adjectives` (
36 `adjective_id` INTEGER PRIMARY KEY,
37 `base_form` VARCHAR(32) NOT NULL,
38 `comparative` VARCHAR(32),
39 `superlative` VARCHAR(32),
40 `position` CHAR(1)
41);
42
43DROP TABLE IF EXISTS `adverbs`;
44CREATE TABLE `adverbs` (
45 `adverb_id` INTEGER PRIMARY KEY,
46 `base_form` VARCHAR(32) NOT NULL,
47 `comparative` VARCHAR(32),
48 `superlative` VARCHAR(32)
49);
50
51DROP TABLE IF EXISTS `nouns`;
52CREATE TABLE `nouns` (
53 `noun_id` INTEGER PRIMARY KEY,
54 `singular` VARCHAR(32) NOT NULL,
55 `plural` VARCHAR(32)
56);
57
58DROP TABLE IF EXISTS `hypernymy`;
59CREATE TABLE `hypernymy` (
60 `hypernym_id` INTEGER NOT NULL,
61 `hyponym_id` INTEGER NOT NULL,
62 FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`),
63 FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`)
64);
65
66DROP TABLE IF EXISTS `instantiation`;
67CREATE TABLE `instantiation` (
68 `class_id` INTEGER NOT NULL,
69 `instance_id` INTEGER NOT NULL,
70 FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`),
71 FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`)
72);
73
74DROP TABLE IF EXISTS `member_meronymy`;
75CREATE TABLE `member_meronymy` (
76 `meronym_id` INTEGER NOT NULL,
77 `holonym_id` INTEGER NOT NULL,
78 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
79 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
80);
81
82DROP TABLE IF EXISTS `part_meronymy`;
83CREATE TABLE `part_meronymy` (
84 `meronym_id` INTEGER NOT NULL,
85 `holonym_id` INTEGER NOT NULL,
86 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
87 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
88);
89
90DROP TABLE IF EXISTS `substance_meronymy`;
91CREATE TABLE `substance_meronymy` (
92 `meronym_id` INTEGER NOT NULL,
93 `holonym_id` INTEGER NOT NULL,
94 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
95 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
96);
97
98DROP TABLE IF EXISTS `variation`;
99CREATE TABLE `variation` (
100 `noun_id` INTEGER NOT NULL,
101 `adjective_id` INTEGER NOT NULL,
102 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
103 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
104);
105
106DROP TABLE IF EXISTS `noun_antonymy`;
107CREATE TABLE `noun_antonymy` (
108 `noun_1_id` INTEGER NOT NULL,
109 `noun_2_id` INTEGER NOT NULL,
110 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`),
111 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`)
112);
113
114DROP TABLE IF EXISTS `adjective_antonymy`;
115CREATE TABLE `adjective_antonymy` (
116 `adjective_1_id` INTEGER NOT NULL,
117 `adjective_2_id` INTEGER NOT NULL,
118 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
119 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
120);
121
122DROP TABLE IF EXISTS `adverb_antonymy`;
123CREATE TABLE `adverb_antonymy` (
124 `adverb_1_id` INTEGER NOT NULL,
125 `adverb_2_id` INTEGER NOT NULL,
126 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
127 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
128);
129
130DROP TABLE IF EXISTS `specification`;
131CREATE TABLE `specification` (
132 `general_id` INTEGER NOT NULL,
133 `specific_id` INTEGER NOT NULL,
134 FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`),
135 FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`)
136);
137
138DROP TABLE IF EXISTS `pertainymy`;
139CREATE TABLE `pertainymy` (
140 `noun_id` INTEGER NOT NULL,
141 `pertainym_id` INTEGER NOT NULL,
142 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
143 FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`)
144);
145
146DROP TABLE IF EXISTS `mannernymy`;
147CREATE TABLE `mannernymy` (
148 `adjective_id` INTEGER NOT NULL,
149 `mannernym_id` INTEGER NOT NULL,
150 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`),
151 FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`)
152);
153
154DROP TABLE IF EXISTS `noun_synonymy`;
155CREATE TABLE `noun_synonymy` (
156 `noun_1_id` INTEGER NOT NULL,
157 `noun_2_id` INTEGER NOT NULL,
158 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`),
159 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`)
160);
161
162DROP TABLE IF EXISTS `adjective_synonymy`;
163CREATE TABLE `adjective_synonymy` (
164 `adjective_1_id` INTEGER NOT NULL,
165 `adjective_2_id` INTEGER NOT NULL,
166 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
167 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
168);
169
170DROP TABLE IF EXISTS `adverb_synonymy`;
171CREATE TABLE `adverb_synonymy` (
172 `adverb_1_id` INTEGER NOT NULL,
173 `adverb_2_id` INTEGER NOT NULL,
174 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
175 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
176);
177
178DROP TABLE IF EXISTS `noun_pronunciations`;
179CREATE TABLE `noun_pronunciations` (
180 `noun_id` INTEGER NOT NULL,
181 `pronunciation` VARCHAR(64) NOT NULL,
182 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`)
183);
184
185DROP TABLE IF EXISTS `verb_pronunciations`;
186CREATE TABLE `verb_pronunciations` (
187 `verb_id` INTEGER NOT NULL,
188 `pronunciation` VARCHAR(64) NOT NULL,
189 FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`)
190);
191
192DROP TABLE IF EXISTS `adjective_pronunciations`;
193CREATE TABLE `adjective_pronunciations` (
194 `adjective_id` INTEGER NOT NULL,
195 `pronunciation` VARCHAR(64) NOT NULL,
196 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
197);
198
199DROP TABLE IF EXISTS `adverb_pronunciations`;
200CREATE TABLE `adverb_pronunciations` (
201 `adverb_id` INTEGER NOT NULL,
202 `pronunciation` VARCHAR(64) NOT NULL,
203 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`)
204);
205
206DROP TABLE IF EXISTS `noun_noun_derivation`;
207CREATE TABLE `noun_noun_derivation` (
208 `noun_1_id` INTEGER NOT NULL,
209 `noun_2_id` INTEGER NOT NULL,
210 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`),
211 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`)
212);
213
214DROP TABLE IF EXISTS `noun_adjective_derivation`;
215CREATE TABLE `noun_adjective_derivation` (
216 `noun_id` INTEGER NOT NULL,
217 `adjective_id` INTEGER NOT NULL,
218 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
219 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
220);
221
222DROP TABLE IF EXISTS `noun_adverb_derivation`;
223CREATE TABLE `noun_adverb_derivation` (
224 `noun_id` INTEGER NOT NULL,
225 `adverb_id` INTEGER NOT NULL,
226 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
227 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`)
228);
229
230DROP TABLE IF EXISTS `adjective_adjective_derivation`;
231CREATE TABLE `adjective_adjective_derivation` (
232 `adjective_1_id` INTEGER NOT NULL,
233 `adjective_2_id` INTEGER NOT NULL,
234 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
235 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
236);
237
238DROP TABLE IF EXISTS `adjective_adverb_derivation`;
239CREATE TABLE `adjective_adverb_derivation` (
240 `adjective_id` INTEGER NOT NULL,
241 `adverb_id` INTEGER NOT NULL,
242 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`),
243 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`)
244);
245
246DROP TABLE IF EXISTS `adverb_adverb_derivation`;
247CREATE TABLE `adverb_adverb_derivation` (
248 `adverb_1_id` INTEGER NOT NULL,
249 `adverb_2_id` INTEGER NOT NULL,
250 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
251 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
252);