about summary refs log tree commit diff stats
path: root/generator.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2016-03-16 11:27:16 -0400
committerKelly Rauchenberger <fefferburbia@gmail.com>2016-03-16 11:27:16 -0400
commit3aceae8ab1eb5992110ea57a9479bbc3177feb21 (patch)
tree13167a266805344efb7bb1d900486f782c23285e /generator.cpp
parente1be2716746e75cf6ed37e86461a7f580a964564 (diff)
downloadfurries-3aceae8ab1eb5992110ea57a9479bbc3177feb21.tar.gz
furries-3aceae8ab1eb5992110ea57a9479bbc3177feb21.tar.bz2
furries-3aceae8ab1eb5992110ea57a9479bbc3177feb21.zip
Added more inflections, word relationships, and pronunciations
Nouns, adjectives, and adverbs now have inflected forms. A large number of WordNet word relationships (all noun-noun relationships, plus synonymy and antonymy for all word types except verbs) have been added. Additionally, CMUDICT is now being used to store word pronunciations for rhyming purposes. Verbly is now also a compiled library rather than being header-only due to the complexity of the query interface.
Diffstat (limited to 'generator.cpp')
-rw-r--r--generator.cpp1303
1 files changed, 1186 insertions, 117 deletions
diff --git a/generator.cpp b/generator.cpp index c389963..305d121 100644 --- a/generator.cpp +++ b/generator.cpp
@@ -9,6 +9,8 @@
9#include <sqlite3.h> 9#include <sqlite3.h>
10#include <sstream> 10#include <sstream>
11#include <regex> 11#include <regex>
12#include <list>
13#include "progress.h"
12 14
13struct verb { 15struct verb {
14 std::string infinitive; 16 std::string infinitive;
@@ -18,6 +20,17 @@ struct verb {
18 std::string s_form; 20 std::string s_form;
19}; 21};
20 22
23struct adjective {
24 std::string base;
25 std::string comparative;
26 std::string superlative;
27};
28
29struct noun {
30 std::string singular;
31 std::string plural;
32};
33
21struct group { 34struct group {
22 std::string id; 35 std::string id;
23 std::set<std::string> members; 36 std::set<std::string> members;
@@ -25,21 +38,33 @@ struct group {
25 38
26std::map<std::string, group> groups; 39std::map<std::string, group> groups;
27std::map<std::string, verb> verbs; 40std::map<std::string, verb> verbs;
41std::map<std::string, adjective> adjectives;
42std::map<std::string, noun> nouns;
28std::map<int, std::map<int, int>> wn; 43std::map<int, std::map<int, int>> wn;
44std::map<std::string, std::set<std::string>> pronunciations;
29 45
30void print_usage() 46void print_usage()
31{ 47{
32 std::cout << "Verbly Datafile Generator" << std::endl; 48 std::cout << "Verbly Datafile Generator" << std::endl;
33 std::cout << "-------------------------" << std::endl; 49 std::cout << "-------------------------" << std::endl;
34 std::cout << "Requires exactly four arguments." << std::endl; 50 std::cout << "Requires exactly six arguments." << std::endl;
35 std::cout << "1. The path to a VerbNet data directory." << std::endl; 51 std::cout << "1. The path to a VerbNet data directory." << std::endl;
36 std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl; 52 std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl;
37 std::cout << "3. The path to an AGID infl.txt file." << std::endl; 53 std::cout << "3. The path to an AGID infl.txt file." << std::endl;
38 std::cout << "4. The path to a WordNet prolog data directory." << std::endl; 54 std::cout << "4. The path to a WordNet prolog data directory." << std::endl;
39 std::cout << "5. Datafile output path." << std::endl; 55 std::cout << "5. The path to a CMUDICT pronunciation file." << std::endl;
56 std::cout << "6. Datafile output path." << std::endl;
40 57
41 exit(1); 58 exit(1);
42} 59}
60
61void db_error(sqlite3* ppdb, std::string)
62{
63 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
64 sqlite3_close_v2(ppdb);
65 print_usage();
66}
67
43/* 68/*
44void parse_group(xmlNodePtr top, std::string filename) 69void parse_group(xmlNodePtr top, std::string filename)
45{ 70{
@@ -87,7 +112,7 @@ void parse_group(xmlNodePtr top, std::string filename)
87 112
88int main(int argc, char** argv) 113int main(int argc, char** argv)
89{ 114{
90 if (argc != 6) 115 if (argc != 7)
91 { 116 {
92 print_usage(); 117 print_usage();
93 } 118 }
@@ -137,7 +162,7 @@ int main(int argc, char** argv)
137 closedir(dir);*/ 162 closedir(dir);*/
138 163
139 // Get verbs from AGID 164 // Get verbs from AGID
140 std::cout << "Reading verb inflection..." << std::endl; 165 std::cout << "Reading inflections..." << std::endl;
141 166
142 std::ifstream agidfile(argv[3]); 167 std::ifstream agidfile(argv[3]);
143 if (!agidfile.is_open()) 168 if (!agidfile.is_open())
@@ -162,11 +187,7 @@ int main(int argc, char** argv)
162 int divider = line.find_first_of(" "); 187 int divider = line.find_first_of(" ");
163 std::string word = line.substr(0, divider); 188 std::string word = line.substr(0, divider);
164 line = line.substr(divider+1); 189 line = line.substr(divider+1);
165 190 char type = line[0];
166 if (line[0] != 'V')
167 {
168 continue;
169 }
170 191
171 if (line[1] == '?') 192 if (line[1] == '?')
172 { 193 {
@@ -174,7 +195,7 @@ int main(int argc, char** argv)
174 } else { 195 } else {
175 line.erase(0, 3); 196 line.erase(0, 3);
176 } 197 }
177 198
178 std::vector<std::string> forms; 199 std::vector<std::string> forms;
179 while (!line.empty()) 200 while (!line.empty())
180 { 201 {
@@ -187,52 +208,129 @@ int main(int argc, char** argv)
187 inflection = line; 208 inflection = line;
188 line = ""; 209 line = "";
189 } 210 }
190 211
191 if ((divider = inflection.find_first_of(",?")) != std::string::npos) 212 if ((divider = inflection.find_first_of(",?")) != std::string::npos)
192 { 213 {
193 inflection = inflection.substr(0, divider); 214 inflection = inflection.substr(0, divider);
194 } 215 }
195 216
196 forms.push_back(inflection); 217 forms.push_back(inflection);
197 } 218 }
198 219
199 verb v; 220 switch (type)
200 v.infinitive = word;
201 if (forms.size() == 4)
202 { 221 {
203 v.past_tense = forms[0]; 222 case 'V':
204 v.past_participle = forms[1]; 223 {
205 v.ing_form = forms[2]; 224 verb v;
206 v.s_form = forms[3]; 225 v.infinitive = word;
207 } else if (forms.size() == 3) 226 if (forms.size() == 4)
227 {
228 v.past_tense = forms[0];
229 v.past_participle = forms[1];
230 v.ing_form = forms[2];
231 v.s_form = forms[3];
232 } else if (forms.size() == 3)
233 {
234 v.past_tense = forms[0];
235 v.past_participle = forms[0];
236 v.ing_form = forms[1];
237 v.s_form = forms[2];
238 } else if (forms.size() == 8)
239 {
240 // As of AGID 2014.08.11, this is only "to be"
241 v.past_tense = forms[0];
242 v.past_participle = forms[2];
243 v.ing_form = forms[3];
244 v.s_form = forms[4];
245 } else {
246 // Words that don't fit the cases above as of AGID 2014.08.11:
247 // - may and shall do not conjugate the way we want them to
248 // - methinks only has a past tense and is an outlier
249 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
250 std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl;
251 }
252
253 verbs[word] = v;
254
255 break;
256 }
257
258 case 'A':
259 {
260 adjective adj;
261 adj.base = word;
262 if (forms.size() == 2)
263 {
264 adj.comparative = forms[0];
265 adj.superlative = forms[1];
266 } else {
267 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
268 std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl;
269 }
270
271 adjectives[word] = adj;
272
273 break;
274 }
275
276 case 'N':
277 {
278 noun n;
279 n.singular = word;
280 if (forms.size() == 1)
281 {
282 n.plural = forms[0];
283 } else {
284 // As of AGID 2014.08.11, this is non-existent.
285 std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl;
286 }
287
288 nouns[word] = n;
289
290 break;
291 }
292 }
293 }
294
295 // Pronounciations
296 std::cout << "Reading pronunciations..." << std::endl;
297
298 std::ifstream pronfile(argv[5]);
299 if (!pronfile.is_open())
300 {
301 std::cout << "Could not open CMUDICT file: " << argv[5] << std::endl;
302 print_usage();
303 }
304
305 for (;;)
306 {
307 std::string line;
308 if (!getline(pronfile, line))
208 { 309 {
209 v.past_tense = forms[0]; 310 break;
210 v.past_participle = forms[0]; 311 }
211 v.ing_form = forms[1]; 312
212 v.s_form = forms[2]; 313 if (line.back() == '\r')
213 } else if (forms.size() == 8)
214 { 314 {
215 // As of AGID 2014.08.11, this is only "to be" 315 line.pop_back();
216 v.past_tense = forms[0];
217 v.past_participle = forms[2];
218 v.ing_form = forms[3];
219 v.s_form = forms[4];
220 } else {
221 // Words that don't fit the cases above as of AGID 2014.08.11:
222 // - may and shall do not conjugate the way we want them to
223 // - methinks only has a past tense and is an outlier
224 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
225 std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl;
226 } 316 }
227 317
228 verbs[word] = v; 318 std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)");
319 std::smatch phoneme_data;
320 if (std::regex_search(line, phoneme_data, phoneme))
321 {
322 std::string canonical(phoneme_data[1]);
323 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
324
325 pronunciations[canonical].insert(phoneme_data[2]);
326 }
229 } 327 }
230 328
231 // Start writing output 329 // Start writing output
232 std::cout << "Writing output..." << std::endl; 330 std::cout << "Writing schema..." << std::endl;
233 331
234 sqlite3* ppdb; 332 sqlite3* ppdb;
235 if (sqlite3_open_v2(argv[5], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) 333 if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
236 { 334 {
237 std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl; 335 std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl;
238 print_usage(); 336 print_usage();
@@ -278,47 +376,82 @@ int main(int argc, char** argv)
278 sqlite3_stmt* schmstmt; 376 sqlite3_stmt* schmstmt;
279 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK) 377 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK)
280 { 378 {
281 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; 379 db_error(ppdb, query);
282 sqlite3_close_v2(ppdb);
283 print_usage();
284 } 380 }
285 381
286 if (sqlite3_step(schmstmt) != SQLITE_DONE) 382 if (sqlite3_step(schmstmt) != SQLITE_DONE)
287 { 383 {
288 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; 384 db_error(ppdb, query);
289 sqlite3_close_v2(ppdb);
290 print_usage();
291 } 385 }
292 386
293 sqlite3_finalize(schmstmt); 387 sqlite3_finalize(schmstmt);
294 } 388 }
295 389
296 std::cout << "Writing verbs..." << std::endl;
297 for (auto& mapping : verbs)
298 { 390 {
299 sqlite3_stmt* ppstmt; 391 progress ppgs("Writing verbs...", verbs.size());
300 std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)"); 392 for (auto& mapping : verbs)
301 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
302 { 393 {
303 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; 394 sqlite3_stmt* ppstmt;
304 sqlite3_close_v2(ppdb); 395 std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)");
305 print_usage(); 396 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
306 } 397 {
398 db_error(ppdb, query);
399 }
307 400
308 sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC); 401 sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC);
309 sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC); 402 sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC);
310 sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC); 403 sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC);
311 sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC); 404 sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC);
312 sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC); 405 sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC);
313 406
314 if (sqlite3_step(ppstmt) != SQLITE_DONE) 407 if (sqlite3_step(ppstmt) != SQLITE_DONE)
315 { 408 {
316 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; 409 db_error(ppdb, query);
317 sqlite3_close_v2(ppdb); 410 }
318 print_usage();
319 }
320 411
321 sqlite3_finalize(ppstmt); 412 sqlite3_finalize(ppstmt);
413
414 std::string canonical(mapping.second.infinitive);
415 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
416 if (pronunciations.count(canonical) == 1)
417 {
418 query = "SELECT last_insert_rowid()";
419 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
420 {
421 db_error(ppdb, query);
422 }
423
424 if (sqlite3_step(ppstmt) != SQLITE_ROW)
425 {
426 db_error(ppdb, query);
427 }
428
429 int rowid = sqlite3_column_int(ppstmt, 0);
430
431 sqlite3_finalize(ppstmt);
432
433 for (auto pronunciation : pronunciations[canonical])
434 {
435 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)";
436 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
437 {
438 db_error(ppdb, query);
439 }
440
441 sqlite3_bind_int(ppstmt, 1, rowid);
442 sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC);
443
444 if (sqlite3_step(ppstmt) != SQLITE_DONE)
445 {
446 db_error(ppdb, query);
447 }
448
449 sqlite3_finalize(ppstmt);
450 }
451 }
452
453 ppgs.update();
454 }
322 } 455 }
323 456
324 // Get nouns/adjectives/adverbs from WordNet 457 // Get nouns/adjectives/adverbs from WordNet
@@ -342,110 +475,1046 @@ int main(int argc, char** argv)
342 wnpref += '/'; 475 wnpref += '/';
343 } 476 }
344 477
345 std::cout << "Reading words from WordNet..." << std::endl; 478 // s table
346 std::ifstream wnsfile(wnpref + "wn_s.pl");
347 if (!wnsfile.is_open())
348 { 479 {
349 std::cout << "Invalid WordNet data directory." << std::endl; 480 std::ifstream wnsfile(wnpref + "wn_s.pl");
350 print_usage(); 481 if (!wnsfile.is_open())
482 {
483 std::cout << "Invalid WordNet data directory." << std::endl;
484 print_usage();
485 }
486
487 std::list<std::string> lines;
488 for (;;)
489 {
490 std::string line;
491 if (!getline(wnsfile, line))
492 {
493 break;
494 }
495
496 if (line.back() == '\r')
497 {
498 line.pop_back();
499 }
500
501 lines.push_back(line);
502 }
503
504 progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size());
505 for (auto line : lines)
506 {
507 ppgs.update();
508
509 std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',");
510 std::smatch relation_data;
511 if (!std::regex_search(line, relation_data, relation))
512 {
513 continue;
514 }
515
516 int synset_id = stoi(relation_data[1]);
517 int wnum = stoi(relation_data[2]);
518 std::string word = relation_data[3];
519
520 std::string query;
521 switch (synset_id / 100000000)
522 {
523 case 1: // Noun
524 {
525 if (nouns.count(word) == 1)
526 {
527 query = "INSERT INTO nouns (singular, plural) VALUES (?, ?)";
528 } else {
529 query = "INSERT INTO nouns (singular) VALUES (?)";
530 }
531
532 break;
533 }
534
535 case 2: // Verb
536 {
537 // Ignore
538
539 break;
540 }
541
542 case 3: // Adjective
543 {
544 if (adjectives.count(word) == 1)
545 {
546 query = "INSERT INTO adjectives (base_form, comparative, superlative) VALUES (?, ?, ?)";
547 } else {
548 query = "INSERT INTO adjectives (base_form) VALUES (?)";
549 }
550
551 break;
552 }
553
554 case 4: // Adverb
555 {
556 if (adjectives.count(word) == 1)
557 {
558 query = "INSERT INTO adverbs (base_form, comparative, superlative) VALUES (?, ?, ?)";
559 } else {
560 query = "INSERT INTO adverbs (base_form) VALUES (?)";
561 }
562
563 break;
564 }
565 }
566
567 sqlite3_stmt* ppstmt;
568 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
569 {
570 db_error(ppdb, query);
571 }
572
573 sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC);
574 switch (synset_id / 100000000)
575 {
576 case 1: // Noun
577 {
578 if (nouns.count(word) == 1)
579 {
580 sqlite3_bind_text(ppstmt, 2, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC);
581 }
582
583 break;
584 }
585
586 case 3: // Adjective
587 case 4: // Adverb
588 {
589 if (adjectives.count(word) == 1)
590 {
591 sqlite3_bind_text(ppstmt, 2, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC);
592 sqlite3_bind_text(ppstmt, 3, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC);
593 }
594
595 break;
596 }
597 }
598
599 if (sqlite3_step(ppstmt) != SQLITE_DONE)
600 {
601 db_error(ppdb, query);
602 }
603
604 sqlite3_finalize(ppstmt);
605
606 query = "SELECT last_insert_rowid()";
607 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
608 {
609 db_error(ppdb, query);
610 }
611
612 if (sqlite3_step(ppstmt) != SQLITE_ROW)
613 {
614 db_error(ppdb, query);
615 }
616
617 int rowid = sqlite3_column_int(ppstmt, 0);
618 wn[synset_id][wnum] = rowid;
619
620 sqlite3_finalize(ppstmt);
621
622 std::string canonical(word);
623 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
624 if (pronunciations.count(canonical) == 1)
625 {
626 for (auto pronunciation : pronunciations[canonical])
627 {
628 switch (synset_id / 100000000)
629 {
630 case 1: // Noun
631 {
632 query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)";
633
634 break;
635 }
636
637 case 3: // Adjective
638 {
639 query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)";
640
641 break;
642 }
643
644 case 4: // Adverb
645 {
646 query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)";
647
648 break;
649 }
650 }
651
652 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
653 {
654 db_error(ppdb, query);
655 }
656
657 sqlite3_bind_int(ppstmt, 1, rowid);
658 sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC);
659
660 if (sqlite3_step(ppstmt) != SQLITE_DONE)
661 {
662 db_error(ppdb, query);
663 }
664
665 sqlite3_finalize(ppstmt);
666 }
667 }
668 }
351 } 669 }
352 670
353 for (;;) 671 // While we're working on s
354 { 672 {
355 std::string line; 673 progress ppgs("Writing word synonyms...", wn.size());
356 if (!getline(wnsfile, line)) 674 for (auto sense : wn)
357 { 675 {
358 break; 676 ppgs.update();
677
678 for (auto word1 : sense.second)
679 {
680 for (auto word2 : sense.second)
681 {
682 if (word1 != word2)
683 {
684 std::string query;
685 switch (sense.first / 100000000)
686 {
687 case 1: // Noun
688 {
689 query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
690
691 break;
692 }
693
694 case 2: // Verb
695 {
696 // Ignore
697
698 break;
699 }
700
701 case 3: // Adjective
702 {
703 query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
704
705 break;
706 }
707
708 case 4: // Adverb
709 {
710 query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
711
712 break;
713 }
714 }
715
716 sqlite3_stmt* ppstmt;
717 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
718 {
719 db_error(ppdb, query);
720 }
721
722 sqlite3_bind_int(ppstmt, 1, word1.second);
723 sqlite3_bind_int(ppstmt, 2, word2.second);
724
725 if (sqlite3_step(ppstmt) != SQLITE_DONE)
726 {
727 db_error(ppdb, query);
728 }
729
730 sqlite3_finalize(ppstmt);
731 }
732 }
733 }
734 }
735 }
736
737 // ant table
738 {
739 std::ifstream wnantfile(wnpref + "wn_ant.pl");
740 if (!wnantfile.is_open())
741 {
742 std::cout << "Invalid WordNet data directory." << std::endl;
743 print_usage();
359 } 744 }
745
746 std::list<std::string> lines;
747 for (;;)
748 {
749 std::string line;
750 if (!getline(wnantfile, line))
751 {
752 break;
753 }
360 754
361 if (line.back() == '\r') 755 if (line.back() == '\r')
756 {
757 line.pop_back();
758 }
759
760 lines.push_back(line);
761 }
762
763 progress ppgs("Writing antonyms...", lines.size());
764 for (auto line : lines)
362 { 765 {
363 line.pop_back(); 766 ppgs.update();
767
768 std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
769 std::smatch relation_data;
770 if (!std::regex_search(line, relation_data, relation))
771 {
772 continue;
773 }
774
775 int synset_id_1 = stoi(relation_data[1]);
776 int wnum_1 = stoi(relation_data[2]);
777 int synset_id_2 = stoi(relation_data[3]);
778 int wnum_2 = stoi(relation_data[4]);
779
780 std::string query;
781 switch (synset_id_1 / 100000000)
782 {
783 case 1: // Noun
784 {
785 query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
786
787 break;
788 }
789
790 case 2: // Verb
791 {
792 // Ignore
793
794 break;
795 }
796
797 case 3: // Adjective
798 {
799 query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
800
801 break;
802 }
803
804 case 4: // Adverb
805 {
806 query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
807
808 break;
809 }
810 }
811
812 sqlite3_stmt* ppstmt;
813 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
814 {
815 db_error(ppdb, query);
816 }
817
818 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
819 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
820
821 if (sqlite3_step(ppstmt) != SQLITE_DONE)
822 {
823 db_error(ppdb, query);
824 }
825
826 sqlite3_finalize(ppstmt);
827 }
828 }
829
830 // at table
831 {
832 std::ifstream wnatfile(wnpref + "wn_at.pl");
833 if (!wnatfile.is_open())
834 {
835 std::cout << "Invalid WordNet data directory." << std::endl;
836 print_usage();
364 } 837 }
838
839 std::list<std::string> lines;
840 for (;;)
841 {
842 std::string line;
843 if (!getline(wnatfile, line))
844 {
845 break;
846 }
365 847
366 std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',"); 848 if (line.back() == '\r')
367 std::smatch relation_data; 849 {
368 if (!std::regex_search(line, relation_data, relation)) 850 line.pop_back();
851 }
852
853 lines.push_back(line);
854 }
855
856 progress ppgs("Writing variations...", lines.size());
857 for (auto line : lines)
369 { 858 {
370 continue; 859 ppgs.update();
860
861 std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
862 std::smatch relation_data;
863 if (!std::regex_search(line, relation_data, relation))
864 {
865 continue;
866 }
867
868 int synset_id_1 = stoi(relation_data[1]);
869 int synset_id_2 = stoi(relation_data[2]);
870 std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)");
871
872 for (auto mapping1 : wn[synset_id_1])
873 {
874 for (auto mapping2 : wn[synset_id_2])
875 {
876 sqlite3_stmt* ppstmt;
877 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
878 {
879 db_error(ppdb, query);
880 }
881
882 sqlite3_bind_int(ppstmt, 1, mapping1.second);
883 sqlite3_bind_int(ppstmt, 2, mapping2.second);
884
885 if (sqlite3_step(ppstmt) != SQLITE_DONE)
886 {
887 db_error(ppdb, query);
888 }
889
890 sqlite3_finalize(ppstmt);
891 }
892 }
371 } 893 }
894 }
895
896 // hyp table
897 {
898 std::ifstream wnhypfile(wnpref + "wn_hyp.pl");
899 if (!wnhypfile.is_open())
900 {
901 std::cout << "Invalid WordNet data directory." << std::endl;
902 print_usage();
903 }
904
905 std::list<std::string> lines;
906 for (;;)
907 {
908 std::string line;
909 if (!getline(wnhypfile, line))
910 {
911 break;
912 }
372 913
373 int synset_id = stoi(relation_data[1]); 914 if (line.back() == '\r')
374 int wnum = stoi(relation_data[2]); 915 {
375 std::string word = relation_data[3]; 916 line.pop_back();
917 }
918
919 lines.push_back(line);
920 }
376 921
377 std::string query; 922 progress ppgs("Writing hypernyms...", lines.size());
378 switch (synset_id / 100000000) 923 for (auto line : lines)
379 { 924 {
380 case 1: // Noun 925 ppgs.update();
926
927 std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\.");
928 std::smatch relation_data;
929 if (!std::regex_search(line, relation_data, relation))
930 {
931 continue;
932 }
933
934 int synset_id_1 = stoi(relation_data[1]);
935 int synset_id_2 = stoi(relation_data[2]);
936 std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)");
937
938 for (auto mapping1 : wn[synset_id_1])
939 {
940 for (auto mapping2 : wn[synset_id_2])
941 {
942 sqlite3_stmt* ppstmt;
943 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
944 {
945 db_error(ppdb, query);
946 }
947
948 sqlite3_bind_int(ppstmt, 1, mapping1.second);
949 sqlite3_bind_int(ppstmt, 2, mapping2.second);
950
951 if (sqlite3_step(ppstmt) != SQLITE_DONE)
952 {
953 db_error(ppdb, query);
954 }
955
956 sqlite3_finalize(ppstmt);
957 }
958 }
959 }
960 }
961
962 // ins table
963 {
964 std::ifstream wninsfile(wnpref + "wn_ins.pl");
965 if (!wninsfile.is_open())
966 {
967 std::cout << "Invalid WordNet data directory." << std::endl;
968 print_usage();
969 }
970
971 std::list<std::string> lines;
972 for (;;)
973 {
974 std::string line;
975 if (!getline(wninsfile, line))
381 { 976 {
382 query = "INSERT INTO nouns (form) VALUES (?)";
383
384 break; 977 break;
385 } 978 }
979
980 if (line.back() == '\r')
981 {
982 line.pop_back();
983 }
386 984
387 case 2: // Verb 985 lines.push_back(line);
986 }
987
988 progress ppgs("Writing instantiations...", lines.size());
989 for (auto line : lines)
990 {
991 ppgs.update();
992
993 std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
994 std::smatch relation_data;
995 if (!std::regex_search(line, relation_data, relation))
996 {
997 continue;
998 }
999
1000 int synset_id_1 = stoi(relation_data[1]);
1001 int synset_id_2 = stoi(relation_data[2]);
1002 std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)");
1003
1004 for (auto mapping1 : wn[synset_id_1])
1005 {
1006 for (auto mapping2 : wn[synset_id_2])
1007 {
1008 sqlite3_stmt* ppstmt;
1009 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1010 {
1011 db_error(ppdb, query);
1012 }
1013
1014 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1015 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1016
1017 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1018 {
1019 db_error(ppdb, query);
1020 }
1021
1022 sqlite3_finalize(ppstmt);
1023 }
1024 }
1025 }
1026 }
1027
1028 // mm table
1029 {
1030 std::ifstream wnmmfile(wnpref + "wn_mm.pl");
1031 if (!wnmmfile.is_open())
1032 {
1033 std::cout << "Invalid WordNet data directory." << std::endl;
1034 print_usage();
1035 }
1036
1037 std::list<std::string> lines;
1038 for (;;)
1039 {
1040 std::string line;
1041 if (!getline(wnmmfile, line))
388 { 1042 {
389 // Ignore
390
391 break; 1043 break;
392 } 1044 }
1045
1046 if (line.back() == '\r')
1047 {
1048 line.pop_back();
1049 }
1050
1051 lines.push_back(line);
1052 }
1053
1054 progress ppgs("Writing member meronyms...", lines.size());
1055 for (auto line : lines)
1056 {
1057 ppgs.update();
393 1058
394 case 3: // Adjective 1059 std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
1060 std::smatch relation_data;
1061 if (!std::regex_search(line, relation_data, relation))
1062 {
1063 continue;
1064 }
1065
1066 int synset_id_1 = stoi(relation_data[1]);
1067 int synset_id_2 = stoi(relation_data[2]);
1068 std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1069
1070 for (auto mapping1 : wn[synset_id_1])
1071 {
1072 for (auto mapping2 : wn[synset_id_2])
1073 {
1074 sqlite3_stmt* ppstmt;
1075 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1076 {
1077 db_error(ppdb, query);
1078 }
1079
1080 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1081 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1082
1083 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1084 {
1085 db_error(ppdb, query);
1086 }
1087
1088 sqlite3_finalize(ppstmt);
1089 }
1090 }
1091 }
1092 }
1093
1094 // ms table
1095 {
1096 std::ifstream wnmsfile(wnpref + "wn_ms.pl");
1097 if (!wnmsfile.is_open())
1098 {
1099 std::cout << "Invalid WordNet data directory." << std::endl;
1100 print_usage();
1101 }
1102
1103 std::list<std::string> lines;
1104 for (;;)
1105 {
1106 std::string line;
1107 if (!getline(wnmsfile, line))
395 { 1108 {
396 query = "INSERT INTO adjectives (form) VALUES (?)";
397
398 break; 1109 break;
399 } 1110 }
1111
1112 if (line.back() == '\r')
1113 {
1114 line.pop_back();
1115 }
400 1116
401 case 4: // Adverb 1117 lines.push_back(line);
1118 }
1119
1120 progress ppgs("Writing substance meronyms...", lines.size());
1121 for (auto line : lines)
1122 {
1123 ppgs.update();
1124
1125 std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
1126 std::smatch relation_data;
1127 if (!std::regex_search(line, relation_data, relation))
1128 {
1129 continue;
1130 }
1131
1132 int synset_id_1 = stoi(relation_data[1]);
1133 int synset_id_2 = stoi(relation_data[2]);
1134 std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1135
1136 for (auto mapping1 : wn[synset_id_1])
1137 {
1138 for (auto mapping2 : wn[synset_id_2])
1139 {
1140 sqlite3_stmt* ppstmt;
1141 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1142 {
1143 db_error(ppdb, query);
1144 }
1145
1146 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1147 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1148
1149 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1150 {
1151 db_error(ppdb, query);
1152 }
1153
1154 sqlite3_finalize(ppstmt);
1155 }
1156 }
1157 }
1158 }
1159
1160 // mm table
1161 {
1162 std::ifstream wnmpfile(wnpref + "wn_mp.pl");
1163 if (!wnmpfile.is_open())
1164 {
1165 std::cout << "Invalid WordNet data directory." << std::endl;
1166 print_usage();
1167 }
1168
1169 std::list<std::string> lines;
1170 for (;;)
1171 {
1172 std::string line;
1173 if (!getline(wnmpfile, line))
402 { 1174 {
403 query = "INSERT INTO adverbs (form) VALUES (?)";
404
405 break; 1175 break;
406 } 1176 }
1177
1178 if (line.back() == '\r')
1179 {
1180 line.pop_back();
1181 }
1182
1183 lines.push_back(line);
407 } 1184 }
408 1185
409 sqlite3_stmt* ppstmt; 1186 progress ppgs("Writing part meronyms...", lines.size());
410 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) 1187 for (auto line : lines)
1188 {
1189 ppgs.update();
1190
1191 std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
1192 std::smatch relation_data;
1193 if (!std::regex_search(line, relation_data, relation))
1194 {
1195 continue;
1196 }
1197
1198 int synset_id_1 = stoi(relation_data[1]);
1199 int synset_id_2 = stoi(relation_data[2]);
1200 std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1201
1202 for (auto mapping1 : wn[synset_id_1])
1203 {
1204 for (auto mapping2 : wn[synset_id_2])
1205 {
1206 sqlite3_stmt* ppstmt;
1207 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1208 {
1209 db_error(ppdb, query);
1210 }
1211
1212 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1213 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1214
1215 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1216 {
1217 db_error(ppdb, query);
1218 }
1219
1220 sqlite3_finalize(ppstmt);
1221 }
1222 }
1223 }
1224 }
1225
1226 // per table
1227 {
1228 std::ifstream wnperfile(wnpref + "wn_per.pl");
1229 if (!wnperfile.is_open())
411 { 1230 {
412 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; 1231 std::cout << "Invalid WordNet data directory." << std::endl;
413 sqlite3_close_v2(ppdb);
414 print_usage(); 1232 print_usage();
415 } 1233 }
1234
1235 std::list<std::string> lines;
1236 for (;;)
1237 {
1238 std::string line;
1239 if (!getline(wnperfile, line))
1240 {
1241 break;
1242 }
1243
1244 if (line.back() == '\r')
1245 {
1246 line.pop_back();
1247 }
1248
1249 lines.push_back(line);
1250 }
1251
1252 progress ppgs("Writing pertainyms and mannernyms...", lines.size());
1253 for (auto line : lines)
1254 {
1255 ppgs.update();
1256
1257 std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
1258 std::smatch relation_data;
1259 if (!std::regex_search(line, relation_data, relation))
1260 {
1261 continue;
1262 }
1263
1264 int synset_id_1 = stoi(relation_data[1]);
1265 int wnum_1 = stoi(relation_data[2]);
1266 int synset_id_2 = stoi(relation_data[3]);
1267 int wnum_2 = stoi(relation_data[4]);
1268 std::string query;
1269 switch (synset_id_1 / 100000000)
1270 {
1271 case 3: // Adjective
1272 {
1273 // This is a pertainym, the second word should be a noun
1274 // Technically it can be an adjective but we're ignoring that
1275 if (synset_id_2 / 100000000 != 1)
1276 {
1277 continue;
1278 }
1279
1280 query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)";
1281
1282 break;
1283 }
1284
1285 case 4: // Adverb
1286 {
1287 // This is a mannernym, the second word should be an adjective
1288 if (synset_id_2 / 100000000 != 3)
1289 {
1290 continue;
1291 }
1292
1293 query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)";
1294
1295 break;
1296 }
1297 }
1298
1299 sqlite3_stmt* ppstmt;
1300 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1301 {
1302 db_error(ppdb, query);
1303 }
416 1304
417 sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC); 1305 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1306 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
418 1307
419 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1308 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1309 {
1310 db_error(ppdb, query);
1311 }
1312
1313 sqlite3_finalize(ppstmt);
1314 }
1315 }
1316
1317 // sa table
1318 {
1319 std::ifstream wnsafile(wnpref + "wn_sa.pl");
1320 if (!wnsafile.is_open())
420 { 1321 {
421 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; 1322 std::cout << "Invalid WordNet data directory." << std::endl;
422 sqlite3_close_v2(ppdb);
423 print_usage(); 1323 print_usage();
424 } 1324 }
1325
1326 std::list<std::string> lines;
1327 for (;;)
1328 {
1329 std::string line;
1330 if (!getline(wnsafile, line))
1331 {
1332 break;
1333 }
1334
1335 if (line.back() == '\r')
1336 {
1337 line.pop_back();
1338 }
1339
1340 lines.push_back(line);
1341 }
425 1342
426 sqlite3_finalize(ppstmt); 1343 progress ppgs("Writing specifications...", lines.size());
1344 for (auto line : lines)
1345 {
1346 ppgs.update();
1347
1348 std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\.");
1349 std::smatch relation_data;
1350 if (!std::regex_search(line, relation_data, relation))
1351 {
1352 continue;
1353 }
1354
1355 int synset_id_1 = stoi(relation_data[1]);
1356 int wnum_1 = stoi(relation_data[2]);
1357 int synset_id_2 = stoi(relation_data[3]);
1358 int wnum_2 = stoi(relation_data[4]);
1359 std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)");
1360
1361 sqlite3_stmt* ppstmt;
1362 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1363 {
1364 db_error(ppdb, query);
1365 }
1366
1367 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1368 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
427 1369
428 query = "SELECT last_insert_rowid()"; 1370 if (sqlite3_step(ppstmt) != SQLITE_DONE)
429 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) 1371 {
1372 db_error(ppdb, query);
1373 }
1374
1375 sqlite3_finalize(ppstmt);
1376 }
1377 }
1378 /*
1379 // sim table
1380 {
1381 std::ifstream wnsimfile(wnpref + "wn_sim.pl");
1382 if (!wnsimfile.is_open())
430 { 1383 {
431 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; 1384 std::cout << "Invalid WordNet data directory." << std::endl;
432 sqlite3_close_v2(ppdb);
433 print_usage(); 1385 print_usage();
434 } 1386 }
1387
1388 std::list<std::string> lines;
1389 for (;;)
1390 {
1391 std::string line;
1392 if (!getline(wnsimfile, line))
1393 {
1394 break;
1395 }
1396
1397 if (line.back() == '\r')
1398 {
1399 line.pop_back();
1400 }
1401
1402 lines.push_back(line);
1403 }
435 1404
436 if (sqlite3_step(ppstmt) != SQLITE_ROW) 1405 progress ppgs("Writing sense synonyms...", lines.size());
1406 for (auto line : lines)
1407 {
1408 ppgs.update();
1409
1410 std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
1411 std::smatch relation_data;
1412 if (!std::regex_search(line, relation_data, relation))
1413 {
1414 continue;
1415 }
1416
1417 int synset_id_1 = stoi(relation_data[1]);
1418 int synset_id_2 = stoi(relation_data[2]);
1419 std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)");
1420
1421 for (auto mapping1 : wn[synset_id_1])
1422 {
1423 for (auto mapping2 : wn[synset_id_2])
1424 {
1425 sqlite3_stmt* ppstmt;
1426 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1427 {
1428 db_error(ppdb, query);
1429 }
1430
1431 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1432 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1433
1434 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1435 {
1436 db_error(ppdb, query);
1437 }
1438
1439 sqlite3_reset(ppstmt);
1440 sqlite3_clear_bindings(ppstmt);
1441
1442 sqlite3_bind_int(ppstmt, 1, mapping2.second);
1443 sqlite3_bind_int(ppstmt, 2, mapping1.second);
1444
1445 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1446 {
1447 db_error(ppdb, query);
1448 }
1449
1450 sqlite3_finalize(ppstmt);
1451 }
1452 }
1453 }
1454 }
1455 */
1456 // syntax table
1457 {
1458 std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl");
1459 if (!wnsyntaxfile.is_open())
437 { 1460 {
438 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; 1461 std::cout << "Invalid WordNet data directory." << std::endl;
439 sqlite3_close_v2(ppdb);
440 print_usage(); 1462 print_usage();
441 } 1463 }
1464
1465 std::list<std::string> lines;
1466 for (;;)
1467 {
1468 std::string line;
1469 if (!getline(wnsyntaxfile, line))
1470 {
1471 break;
1472 }
442 1473
443 wn[synset_id][wnum] = sqlite3_column_int(ppstmt, 0); 1474 if (line.back() == '\r')
1475 {
1476 line.pop_back();
1477 }
1478
1479 lines.push_back(line);
1480 }
444 1481
445 sqlite3_finalize(ppstmt); 1482 progress ppgs("Writing adjective syntax markers...", lines.size());
1483 for (auto line : lines)
1484 {
1485 ppgs.update();
1486
1487 std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
1488 std::smatch relation_data;
1489 if (!std::regex_search(line, relation_data, relation))
1490 {
1491 continue;
1492 }
1493
1494 int synset_id = stoi(relation_data[1]);
1495 int wnum = stoi(relation_data[2]);
1496 std::string syn = relation_data[3];
1497 std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?");
1498
1499 sqlite3_stmt* ppstmt;
1500 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1501 {
1502 db_error(ppdb, query);
1503 }
1504
1505 sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC);
1506 sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]);
1507
1508 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1509 {
1510 db_error(ppdb, query);
1511 }
1512
1513 sqlite3_finalize(ppstmt);
1514 }
446 } 1515 }
447 1516
448 sqlite3_close_v2(ppdb); 1517 sqlite3_close_v2(ppdb);
449 1518
450 std::cout << "Done." << std::endl; 1519 std::cout << "Done." << std::endl;
451} \ No newline at end of file 1520}