summary refs log tree commit diff stats
path: root/generator
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2018-03-31 23:05:02 -0400
committerKelly Rauchenberger <fefferburbia@gmail.com>2018-03-31 23:05:02 -0400
commit75e947fa0021547f460496d1c3aef5b61af4c669 (patch)
tree64559a5329b3e5983ffdfe7ee2ad65c7c938e98d /generator
parent3554df2e34e63364eea3a7998e0dfb0e6be65ca4 (diff)
downloadverbly-75e947fa0021547f460496d1c3aef5b61af4c669.tar.gz
verbly-75e947fa0021547f460496d1c3aef5b61af4c669.tar.bz2
verbly-75e947fa0021547f460496d1c3aef5b61af4c669.zip
Migrated generator to hkutil
Diffstat (limited to 'generator')
-rw-r--r--generator/CMakeLists.txt8
-rw-r--r--generator/form.cpp35
-rw-r--r--generator/form.h3
-rw-r--r--generator/frame.cpp2
-rw-r--r--generator/frame.h10
-rw-r--r--generator/generator.cpp275
-rw-r--r--generator/generator.h2
-rw-r--r--generator/group.cpp59
-rw-r--r--generator/group.h4
-rw-r--r--generator/lemma.cpp17
-rw-r--r--generator/lemma.h4
-rw-r--r--generator/notion.cpp18
-rw-r--r--generator/notion.h22
-rw-r--r--generator/progress.h56
-rw-r--r--generator/pronunciation.cpp55
-rw-r--r--generator/pronunciation.h7
-rw-r--r--generator/word.cpp9
-rw-r--r--generator/word.h4
18 files changed, 328 insertions, 262 deletions
diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 95a11b5..8c070d2 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt
@@ -5,8 +5,12 @@ find_package(PkgConfig)
5pkg_check_modules(sqlite3 sqlite3 REQUIRED) 5pkg_check_modules(sqlite3 sqlite3 REQUIRED)
6find_package(libxml2 REQUIRED) 6find_package(libxml2 REQUIRED)
7 7
8include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR}) 8include_directories(
9add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp database.cpp field.cpp generator.cpp main.cpp) 9 ${sqlite3_INCLUDE_DIR}
10 ${LIBXML2_INCLUDE_DIR}
11 ../vendor/hkutil)
12
13add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp generator.cpp main.cpp)
10set_property(TARGET generator PROPERTY CXX_STANDARD 11) 14set_property(TARGET generator PROPERTY CXX_STANDARD 11)
11set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) 15set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON)
12target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) 16target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES})
diff --git a/generator/form.cpp b/generator/form.cpp index f616344..c66820c 100644 --- a/generator/form.cpp +++ b/generator/form.cpp
@@ -1,8 +1,7 @@
1#include "form.h" 1#include "form.h"
2#include <algorithm> 2#include <algorithm>
3#include <list> 3#include <list>
4#include "database.h" 4#include <cctype>
5#include "field.h"
6#include "pronunciation.h" 5#include "pronunciation.h"
7 6
8namespace verbly { 7namespace verbly {
@@ -14,7 +13,7 @@ namespace verbly {
14 id_(nextId_++), 13 id_(nextId_++),
15 text_(text), 14 text_(text),
16 complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), 15 complexity_(std::count(std::begin(text), std::end(text), ' ') + 1),
17 proper_(std::any_of(std::begin(text), std::end(text), std::isupper)), 16 proper_(std::any_of(std::begin(text), std::end(text), ::isupper)),
18 length_(text.length()) 17 length_(text.length())
19 { 18 {
20 } 19 }
@@ -24,28 +23,30 @@ namespace verbly {
24 pronunciations_.insert(&p); 23 pronunciations_.insert(&p);
25 } 24 }
26 25
27 database& operator<<(database& db, const form& arg) 26 hatkirby::database& operator<<(hatkirby::database& db, const form& arg)
28 { 27 {
29 // Serialize the form first. 28 // Serialize the form first.
30 { 29 {
31 std::list<field> fields; 30 db.insertIntoTable(
32 fields.emplace_back("form_id", arg.getId()); 31 "forms",
33 fields.emplace_back("form", arg.getText()); 32 {
34 fields.emplace_back("complexity", arg.getComplexity()); 33 { "form_id", arg.getId() },
35 fields.emplace_back("proper", arg.isProper()); 34 { "form", arg.getText() },
36 fields.emplace_back("length", arg.getLength()); 35 { "complexity", arg.getComplexity() },
37 36 { "proper", arg.isProper() },
38 db.insertIntoTable("forms", std::move(fields)); 37 { "length", arg.getLength() }
38 });
39 } 39 }
40 40
41 // Then, serialize the form/pronunciation relationship. 41 // Then, serialize the form/pronunciation relationship.
42 for (const pronunciation* p : arg.getPronunciations()) 42 for (const pronunciation* p : arg.getPronunciations())
43 { 43 {
44 std::list<field> fields; 44 db.insertIntoTable(
45 fields.emplace_back("form_id", arg.getId()); 45 "forms_pronunciations",
46 fields.emplace_back("pronunciation_id", p->getId()); 46 {
47 47 { "form_id", arg.getId() },
48 db.insertIntoTable("forms_pronunciations", std::move(fields)); 48 { "pronunciation_id", p->getId() }
49 });
49 } 50 }
50 51
51 return db; 52 return db;
diff --git a/generator/form.h b/generator/form.h index 37fd3cc..f3dd779 100644 --- a/generator/form.h +++ b/generator/form.h
@@ -3,6 +3,7 @@
3 3
4#include <string> 4#include <string>
5#include <set> 5#include <set>
6#include <hkutil/database.h>
6 7
7namespace verbly { 8namespace verbly {
8 namespace generator { 9 namespace generator {
@@ -68,7 +69,7 @@ namespace verbly {
68 69
69 // Serializer 70 // Serializer
70 71
71 database& operator<<(database& db, const form& arg); 72 hatkirby::database& operator<<(hatkirby::database& db, const form& arg);
72 73
73 }; 74 };
74}; 75};
diff --git a/generator/frame.cpp b/generator/frame.cpp index 4e4ac5f..60cedc4 100644 --- a/generator/frame.cpp +++ b/generator/frame.cpp
@@ -1,6 +1,4 @@
1#include "frame.h" 1#include "frame.h"
2#include "database.h"
3#include "field.h"
4 2
5namespace verbly { 3namespace verbly {
6 namespace generator { 4 namespace generator {
diff --git a/generator/frame.h b/generator/frame.h index d26d500..3e15b39 100644 --- a/generator/frame.h +++ b/generator/frame.h
@@ -8,8 +8,6 @@
8namespace verbly { 8namespace verbly {
9 namespace generator { 9 namespace generator {
10 10
11 class database;
12
13 class frame { 11 class frame {
14 public: 12 public:
15 13
@@ -20,9 +18,9 @@ namespace verbly {
20 // Constructor 18 // Constructor
21 19
22 frame(); 20 frame();
23 21
24 // Duplication 22 // Duplication
25 23
26 static frame duplicate(const frame& other); 24 static frame duplicate(const frame& other);
27 25
28 // Mutators 26 // Mutators
@@ -35,12 +33,12 @@ namespace verbly {
35 { 33 {
36 return id_; 34 return id_;
37 } 35 }
38 36
39 int getLength() const 37 int getLength() const
40 { 38 {
41 return parts_.size(); 39 return parts_.size();
42 } 40 }
43 41
44 const part& operator[](int index) const 42 const part& operator[](int index) const
45 { 43 {
46 return parts_.at(index); 44 return parts_.at(index);
diff --git a/generator/generator.cpp b/generator/generator.cpp index e34ca69..785ec87 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp
@@ -1,16 +1,14 @@
1#include "generator.h" 1#include "generator.h"
2#include <cassert>
3#include <stdexcept> 2#include <stdexcept>
4#include <iostream> 3#include <iostream>
5#include <regex> 4#include <regex>
6#include <dirent.h> 5#include <dirent.h>
7#include <fstream> 6#include <fstream>
8#include "../lib/enums.h" 7#include <hkutil/string.h>
9#include "progress.h" 8#include <hkutil/progress.h>
10#include "role.h" 9#include "role.h"
11#include "part.h" 10#include "part.h"
12#include "field.h" 11#include "../lib/enums.h"
13#include "../lib/util.h"
14#include "../lib/version.h" 12#include "../lib/version.h"
15 13
16namespace verbly { 14namespace verbly {
@@ -28,7 +26,7 @@ namespace verbly {
28 wordNetPath_(wordNetPath), 26 wordNetPath_(wordNetPath),
29 cmudictPath_(cmudictPath), 27 cmudictPath_(cmudictPath),
30 imageNetPath_(imageNetPath), 28 imageNetPath_(imageNetPath),
31 db_(outputPath) 29 db_(outputPath, hatkirby::dbmode::create)
32 { 30 {
33 // Ensure VerbNet directory exists 31 // Ensure VerbNet directory exists
34 DIR* dir; 32 DIR* dir;
@@ -53,7 +51,8 @@ namespace verbly {
53 51
54 // Ensure WordNet tables exist 52 // Ensure WordNet tables exist
55 for (std::string table : { 53 for (std::string table : {
56 "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax" 54 "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per",
55 "sa", "sim", "syntax"
57 }) 56 })
58 { 57 {
59 if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl")) 58 if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl"))
@@ -166,13 +165,15 @@ namespace verbly {
166 void generator::readWordNetSynsets() 165 void generator::readWordNetSynsets()
167 { 166 {
168 std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); 167 std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl"));
169 progress ppgs("Reading synsets from WordNet...", lines.size()); 168 hatkirby::progress ppgs("Reading synsets from WordNet...", lines.size());
170 169
171 for (std::string line : lines) 170 for (std::string line : lines)
172 { 171 {
173 ppgs.update(); 172 ppgs.update();
174 173
175 std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); 174 std::regex relation(
175 "^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$");
176
176 std::smatch relation_data; 177 std::smatch relation_data;
177 if (!std::regex_search(line, relation_data, relation)) 178 if (!std::regex_search(line, relation_data, relation))
178 { 179 {
@@ -206,7 +207,10 @@ namespace verbly {
206 void generator::readAdjectivePositioning() 207 void generator::readAdjectivePositioning()
207 { 208 {
208 std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl")); 209 std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl"));
209 progress ppgs("Reading adjective positionings from WordNet...", lines.size()); 210
211 hatkirby::progress ppgs(
212 "Reading adjective positionings from WordNet...",
213 lines.size());
210 214
211 for (std::string line : lines) 215 for (std::string line : lines)
212 { 216 {
@@ -279,7 +283,10 @@ namespace verbly {
279 void generator::readWordNetSenseKeys() 283 void generator::readWordNetSenseKeys()
280 { 284 {
281 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl")); 285 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl"));
282 progress ppgs("Reading sense keys from WordNet...", lines.size()); 286
287 hatkirby::progress ppgs(
288 "Reading sense keys from WordNet...",
289 lines.size());
283 290
284 for (std::string line : lines) 291 for (std::string line : lines)
285 { 292 {
@@ -350,7 +357,8 @@ namespace verbly {
350 } 357 }
351 358
352 xmlNodePtr top = xmlDocGetRootElement(doc); 359 xmlNodePtr top = xmlDocGetRootElement(doc);
353 if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS")))) 360 if ((top == nullptr) ||
361 (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS"))))
354 { 362 {
355 throw std::logic_error("Bad VerbNet file format: " + filename); 363 throw std::logic_error("Bad VerbNet file format: " + filename);
356 } 364 }
@@ -360,7 +368,8 @@ namespace verbly {
360 createGroup(top); 368 createGroup(top);
361 } catch (const std::exception& e) 369 } catch (const std::exception& e)
362 { 370 {
363 std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename)); 371 std::throw_with_nested(
372 std::logic_error("Error parsing VerbNet file: " + filename));
364 } 373 }
365 } 374 }
366 375
@@ -370,7 +379,7 @@ namespace verbly {
370 void generator::readAgidInflections() 379 void generator::readAgidInflections()
371 { 380 {
372 std::list<std::string> lines(readFile(agidPath_)); 381 std::list<std::string> lines(readFile(agidPath_));
373 progress ppgs("Reading inflections from AGID...", lines.size()); 382 hatkirby::progress ppgs("Reading inflections from AGID...", lines.size());
374 383
375 for (std::string line : lines) 384 for (std::string line : lines)
376 { 385 {
@@ -395,12 +404,17 @@ namespace verbly {
395 404
396 lemma& curLemma = lookupOrCreateLemma(infinitive); 405 lemma& curLemma = lookupOrCreateLemma(infinitive);
397 406
407 auto inflWordList =
408 hatkirby::split<std::list<std::string>>(line, " | ");
409
398 std::vector<std::list<std::string>> agidForms; 410 std::vector<std::list<std::string>> agidForms;
399 for (std::string inflForms : split<std::list<std::string>>(line, " | ")) 411 for (std::string inflForms : inflWordList)
400 { 412 {
401 std::list<std::string> forms; 413 auto inflFormList =
414 hatkirby::split<std::list<std::string>>(std::move(inflForms), ", ");
402 415
403 for (std::string inflForm : split<std::list<std::string>>(std::move(inflForms), ", ")) 416 std::list<std::string> forms;
417 for (std::string inflForm : inflFormList)
404 { 418 {
405 int sympos = inflForm.find_first_of("~<!? "); 419 int sympos = inflForm.find_first_of("~<!? ");
406 if (sympos != std::string::npos) 420 if (sympos != std::string::npos)
@@ -443,7 +457,8 @@ namespace verbly {
443 // - may and shall do not conjugate the way we want them to 457 // - may and shall do not conjugate the way we want them to
444 // - methinks only has a past tense and is an outlier 458 // - methinks only has a past tense and is an outlier
445 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now 459 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
446 std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; 460 std::cout << " Ignoring verb \"" << infinitive
461 << "\" due to non-standard number of forms." << std::endl;
447 } 462 }
448 463
449 // For verbs in particular, we sometimes create a notion and a word 464 // For verbs in particular, we sometimes create a notion and a word
@@ -452,9 +467,13 @@ namespace verbly {
452 // that this verb appears in the AGID data but not in either WordNet 467 // that this verb appears in the AGID data but not in either WordNet
453 // or VerbNet. 468 // or VerbNet.
454 if (!wordsByBaseForm_.count(infinitive) 469 if (!wordsByBaseForm_.count(infinitive)
455 || !std::any_of(std::begin(wordsByBaseForm_.at(infinitive)), std::end(wordsByBaseForm_.at(infinitive)), [] (word* w) { 470 || !std::any_of(
456 return w->getNotion().getPartOfSpeech() == part_of_speech::verb; 471 std::begin(wordsByBaseForm_.at(infinitive)),
457 })) 472 std::end(wordsByBaseForm_.at(infinitive)),
473 [] (word* w) {
474 return (w->getNotion().getPartOfSpeech() ==
475 part_of_speech::verb);
476 }))
458 { 477 {
459 notion& n = createNotion(part_of_speech::verb); 478 notion& n = createNotion(part_of_speech::verb);
460 createWord(n, curLemma); 479 createWord(n, curLemma);
@@ -471,7 +490,8 @@ namespace verbly {
471 mappedForms[inflection::superlative] = agidForms[1]; 490 mappedForms[inflection::superlative] = agidForms[1];
472 } else { 491 } else {
473 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" 492 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
474 std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; 493 std::cout << " Ignoring adjective/adverb \"" << infinitive
494 << "\" due to non-standard number of forms." << std::endl;
475 } 495 }
476 496
477 break; 497 break;
@@ -484,7 +504,8 @@ namespace verbly {
484 mappedForms[inflection::plural] = agidForms[0]; 504 mappedForms[inflection::plural] = agidForms[0];
485 } else { 505 } else {
486 // As of AGID 2014.08.11, this is non-existent. 506 // As of AGID 2014.08.11, this is non-existent.
487 std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl; 507 std::cout << " Ignoring noun \"" << infinitive
508 << "\" due to non-standard number of forms." << std::endl;
488 } 509 }
489 510
490 break; 511 break;
@@ -496,7 +517,9 @@ namespace verbly {
496 { 517 {
497 for (std::string infl : std::move(mapping.second)) 518 for (std::string infl : std::move(mapping.second))
498 { 519 {
499 curLemma.addInflection(mapping.first, lookupOrCreateForm(std::move(infl))); 520 curLemma.addInflection(
521 mapping.first,
522 lookupOrCreateForm(std::move(infl)));
500 } 523 }
501 } 524 }
502 } 525 }
@@ -505,7 +528,7 @@ namespace verbly {
505 void generator::readPrepositions() 528 void generator::readPrepositions()
506 { 529 {
507 std::list<std::string> lines(readFile("prepositions.txt")); 530 std::list<std::string> lines(readFile("prepositions.txt"));
508 progress ppgs("Reading prepositions...", lines.size()); 531 hatkirby::progress ppgs("Reading prepositions...", lines.size());
509 532
510 for (std::string line : lines) 533 for (std::string line : lines)
511 { 534 {
@@ -515,7 +538,9 @@ namespace verbly {
515 std::smatch relation_data; 538 std::smatch relation_data;
516 std::regex_search(line, relation_data, relation); 539 std::regex_search(line, relation_data, relation);
517 std::string prep = relation_data[1]; 540 std::string prep = relation_data[1];
518 auto groups = split<std::list<std::string>>(relation_data[2], ", "); 541
542 auto groups =
543 hatkirby::split<std::list<std::string>>(relation_data[2], ", ");
519 544
520 notion& n = createNotion(part_of_speech::preposition); 545 notion& n = createNotion(part_of_speech::preposition);
521 lemma& l = lookupOrCreateLemma(prep); 546 lemma& l = lookupOrCreateLemma(prep);
@@ -528,7 +553,10 @@ namespace verbly {
528 void generator::readCmudictPronunciations() 553 void generator::readCmudictPronunciations()
529 { 554 {
530 std::list<std::string> lines(readFile(cmudictPath_)); 555 std::list<std::string> lines(readFile(cmudictPath_));
531 progress ppgs("Reading pronunciations from CMUDICT...", lines.size()); 556
557 hatkirby::progress ppgs(
558 "Reading pronunciations from CMUDICT...",
559 lines.size());
532 560
533 for (std::string line : lines) 561 for (std::string line : lines)
534 { 562 {
@@ -538,8 +566,7 @@ namespace verbly {
538 std::smatch phoneme_data; 566 std::smatch phoneme_data;
539 if (std::regex_search(line, phoneme_data, phoneme)) 567 if (std::regex_search(line, phoneme_data, phoneme))
540 { 568 {
541 std::string canonical(phoneme_data[1]); 569 std::string canonical = hatkirby::lowercase(phoneme_data[1]);
542 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
543 570
544 if (!formByText_.count(canonical)) 571 if (!formByText_.count(canonical))
545 { 572 {
@@ -575,13 +602,14 @@ namespace verbly {
575 } 602 }
576 603
577 std::string schema = schemaBuilder.str(); 604 std::string schema = schemaBuilder.str();
578 auto queries = split<std::list<std::string>>(schema, ";"); 605 auto queries = hatkirby::split<std::list<std::string>>(schema, ";");
579 progress ppgs("Writing database schema...", queries.size()); 606
607 hatkirby::progress ppgs("Writing database schema...", queries.size());
580 for (std::string query : queries) 608 for (std::string query : queries)
581 { 609 {
582 if (!queries.empty()) 610 if (!queries.empty())
583 { 611 {
584 db_.runQuery(query); 612 db_.execute(query);
585 } 613 }
586 614
587 ppgs.update(); 615 ppgs.update();
@@ -590,10 +618,6 @@ namespace verbly {
590 618
591 void generator::writeVersion() 619 void generator::writeVersion()
592 { 620 {
593 std::list<field> fields;
594 fields.emplace_back("major", DATABASE_MAJOR_VERSION);
595 fields.emplace_back("minor", DATABASE_MINOR_VERSION);
596
597 db_.insertIntoTable( 621 db_.insertIntoTable(
598 "version", 622 "version",
599 { 623 {
@@ -605,7 +629,7 @@ namespace verbly {
605 void generator::dumpObjects() 629 void generator::dumpObjects()
606 { 630 {
607 { 631 {
608 progress ppgs("Writing notions...", notions_.size()); 632 hatkirby::progress ppgs("Writing notions...", notions_.size());
609 633
610 for (notion& n : notions_) 634 for (notion& n : notions_)
611 { 635 {
@@ -616,7 +640,7 @@ namespace verbly {
616 } 640 }
617 641
618 { 642 {
619 progress ppgs("Writing words...", words_.size()); 643 hatkirby::progress ppgs("Writing words...", words_.size());
620 644
621 for (word& w : words_) 645 for (word& w : words_)
622 { 646 {
@@ -627,7 +651,7 @@ namespace verbly {
627 } 651 }
628 652
629 { 653 {
630 progress ppgs("Writing lemmas...", lemmas_.size()); 654 hatkirby::progress ppgs("Writing lemmas...", lemmas_.size());
631 655
632 for (lemma& l : lemmas_) 656 for (lemma& l : lemmas_)
633 { 657 {
@@ -638,7 +662,7 @@ namespace verbly {
638 } 662 }
639 663
640 { 664 {
641 progress ppgs("Writing forms...", forms_.size()); 665 hatkirby::progress ppgs("Writing forms...", forms_.size());
642 666
643 for (form& f : forms_) 667 for (form& f : forms_)
644 { 668 {
@@ -649,7 +673,7 @@ namespace verbly {
649 } 673 }
650 674
651 { 675 {
652 progress ppgs("Writing pronunciations...", pronunciations_.size()); 676 hatkirby::progress ppgs("Writing pronunciations...", pronunciations_.size());
653 677
654 for (pronunciation& p : pronunciations_) 678 for (pronunciation& p : pronunciations_)
655 { 679 {
@@ -660,7 +684,7 @@ namespace verbly {
660 } 684 }
661 685
662 { 686 {
663 progress ppgs("Writing verb frames...", groups_.size()); 687 hatkirby::progress ppgs("Writing verb frames...", groups_.size());
664 688
665 for (group& g : groups_) 689 for (group& g : groups_)
666 { 690 {
@@ -674,22 +698,30 @@ namespace verbly {
674 void generator::readWordNetAntonymy() 698 void generator::readWordNetAntonymy()
675 { 699 {
676 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); 700 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl"));
677 progress ppgs("Writing antonyms...", lines.size()); 701 hatkirby::progress ppgs("Writing antonyms...", lines.size());
678 for (auto line : lines) 702 for (auto line : lines)
679 { 703 {
680 ppgs.update(); 704 ppgs.update();
681 705
682 std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); 706 std::regex relation(
707 "^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
708
683 std::smatch relation_data; 709 std::smatch relation_data;
684 if (!std::regex_search(line, relation_data, relation)) 710 if (!std::regex_search(line, relation_data, relation))
685 { 711 {
686 continue; 712 continue;
687 } 713 }
688 714
689 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); 715 std::pair<int, int> lookup1(
690 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); 716 std::stoi(relation_data[1]),
717 std::stoi(relation_data[2]));
718
719 std::pair<int, int> lookup2(
720 std::stoi(relation_data[3]),
721 std::stoi(relation_data[4]));
691 722
692 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) 723 if (wordByWnidAndWnum_.count(lookup1) &&
724 wordByWnidAndWnum_.count(lookup2))
693 { 725 {
694 word& word1 = *wordByWnidAndWnum_.at(lookup1); 726 word& word1 = *wordByWnidAndWnum_.at(lookup1);
695 word& word2 = *wordByWnidAndWnum_.at(lookup2); 727 word& word2 = *wordByWnidAndWnum_.at(lookup2);
@@ -707,7 +739,7 @@ namespace verbly {
707 void generator::readWordNetVariation() 739 void generator::readWordNetVariation()
708 { 740 {
709 std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl")); 741 std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl"));
710 progress ppgs("Writing variation...", lines.size()); 742 hatkirby::progress ppgs("Writing variation...", lines.size());
711 for (auto line : lines) 743 for (auto line : lines)
712 { 744 {
713 ppgs.update(); 745 ppgs.update();
@@ -730,7 +762,7 @@ namespace verbly {
730 db_.insertIntoTable( 762 db_.insertIntoTable(
731 "variation", 763 "variation",
732 { 764 {
733 { "noun_id", notion1.getId() } 765 { "noun_id", notion1.getId() },
734 { "adjective_id", notion2.getId() } 766 { "adjective_id", notion2.getId() }
735 }); 767 });
736 } 768 }
@@ -740,20 +772,32 @@ namespace verbly {
740 void generator::readWordNetClasses() 772 void generator::readWordNetClasses()
741 { 773 {
742 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); 774 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl"));
743 progress ppgs("Writing usage, topicality, and regionality...", lines.size()); 775
776 hatkirby::progress ppgs(
777 "Writing usage, topicality, and regionality...",
778 lines.size());
779
744 for (auto line : lines) 780 for (auto line : lines)
745 { 781 {
746 ppgs.update(); 782 ppgs.update();
747 783
748 std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); 784 std::regex relation(
785 "^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\.");
786
749 std::smatch relation_data; 787 std::smatch relation_data;
750 if (!std::regex_search(line, relation_data, relation)) 788 if (!std::regex_search(line, relation_data, relation))
751 { 789 {
752 continue; 790 continue;
753 } 791 }
754 792
755 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); 793 std::pair<int, int> lookup1(
756 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); 794 std::stoi(relation_data[1]),
795 std::stoi(relation_data[2]));
796
797 std::pair<int, int> lookup2(
798 std::stoi(relation_data[3]),
799 std::stoi(relation_data[4]));
800
757 std::string class_type = relation_data[5]; 801 std::string class_type = relation_data[5];
758 802
759 std::string table_name; 803 std::string table_name;
@@ -773,18 +817,30 @@ namespace verbly {
773 817
774 if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) 818 if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first)))
775 { 819 {
776 std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) { 820 auto& wordSet = wordsByWnid_.at(lookup1.first);
777 return w->getId(); 821
778 }); 822 std::transform(
823 std::begin(wordSet),
824 std::end(wordSet),
825 std::back_inserter(leftJoin),
826 [] (word* w) {
827 return w->getId();
828 });
779 } else if (wordByWnidAndWnum_.count(lookup1)) { 829 } else if (wordByWnidAndWnum_.count(lookup1)) {
780 leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); 830 leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId());
781 } 831 }
782 832
783 if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) 833 if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first)))
784 { 834 {
785 std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) { 835 auto& wordSet = wordsByWnid_.at(lookup2.first);
786 return w->getId(); 836
787 }); 837 std::transform(
838 std::begin(wordSet),
839 std::end(wordSet),
840 std::back_inserter(rightJoin),
841 [] (word* w) {
842 return w->getId();
843 });
788 } else if (wordByWnidAndWnum_.count(lookup2)) { 844 } else if (wordByWnidAndWnum_.count(lookup2)) {
789 rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); 845 rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId());
790 } 846 }
@@ -807,7 +863,7 @@ namespace verbly {
807 void generator::readWordNetCausality() 863 void generator::readWordNetCausality()
808 { 864 {
809 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl")); 865 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl"));
810 progress ppgs("Writing causality...", lines.size()); 866 hatkirby::progress ppgs("Writing causality...", lines.size());
811 for (auto line : lines) 867 for (auto line : lines)
812 { 868 {
813 ppgs.update(); 869 ppgs.update();
@@ -840,7 +896,7 @@ namespace verbly {
840 void generator::readWordNetEntailment() 896 void generator::readWordNetEntailment()
841 { 897 {
842 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl")); 898 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl"));
843 progress ppgs("Writing entailment...", lines.size()); 899 hatkirby::progress ppgs("Writing entailment...", lines.size());
844 for (auto line : lines) 900 for (auto line : lines)
845 { 901 {
846 ppgs.update(); 902 ppgs.update();
@@ -873,7 +929,7 @@ namespace verbly {
873 void generator::readWordNetHypernymy() 929 void generator::readWordNetHypernymy()
874 { 930 {
875 std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl")); 931 std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl"));
876 progress ppgs("Writing hypernymy...", lines.size()); 932 hatkirby::progress ppgs("Writing hypernymy...", lines.size());
877 for (auto line : lines) 933 for (auto line : lines)
878 { 934 {
879 ppgs.update(); 935 ppgs.update();
@@ -906,7 +962,7 @@ namespace verbly {
906 void generator::readWordNetInstantiation() 962 void generator::readWordNetInstantiation()
907 { 963 {
908 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl")); 964 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl"));
909 progress ppgs("Writing instantiation...", lines.size()); 965 hatkirby::progress ppgs("Writing instantiation...", lines.size());
910 for (auto line : lines) 966 for (auto line : lines)
911 { 967 {
912 ppgs.update(); 968 ppgs.update();
@@ -939,7 +995,7 @@ namespace verbly {
939 void generator::readWordNetMemberMeronymy() 995 void generator::readWordNetMemberMeronymy()
940 { 996 {
941 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl")); 997 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl"));
942 progress ppgs("Writing member meronymy...", lines.size()); 998 hatkirby::progress ppgs("Writing member meronymy...", lines.size());
943 for (auto line : lines) 999 for (auto line : lines)
944 { 1000 {
945 ppgs.update(); 1001 ppgs.update();
@@ -972,7 +1028,7 @@ namespace verbly {
972 void generator::readWordNetPartMeronymy() 1028 void generator::readWordNetPartMeronymy()
973 { 1029 {
974 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl")); 1030 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl"));
975 progress ppgs("Writing part meronymy...", lines.size()); 1031 hatkirby::progress ppgs("Writing part meronymy...", lines.size());
976 for (auto line : lines) 1032 for (auto line : lines)
977 { 1033 {
978 ppgs.update(); 1034 ppgs.update();
@@ -1005,7 +1061,7 @@ namespace verbly {
1005 void generator::readWordNetSubstanceMeronymy() 1061 void generator::readWordNetSubstanceMeronymy()
1006 { 1062 {
1007 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl")); 1063 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl"));
1008 progress ppgs("Writing substance meronymy...", lines.size()); 1064 hatkirby::progress ppgs("Writing substance meronymy...", lines.size());
1009 for (auto line : lines) 1065 for (auto line : lines)
1010 { 1066 {
1011 ppgs.update(); 1067 ppgs.update();
@@ -1038,27 +1094,40 @@ namespace verbly {
1038 void generator::readWordNetPertainymy() 1094 void generator::readWordNetPertainymy()
1039 { 1095 {
1040 std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); 1096 std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl"));
1041 progress ppgs("Writing pertainymy and mannernymy...", lines.size()); 1097
1098 hatkirby::progress ppgs(
1099 "Writing pertainymy and mannernymy...",
1100 lines.size());
1101
1042 for (auto line : lines) 1102 for (auto line : lines)
1043 { 1103 {
1044 ppgs.update(); 1104 ppgs.update();
1045 1105
1046 std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); 1106 std::regex relation(
1107 "^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
1108
1047 std::smatch relation_data; 1109 std::smatch relation_data;
1048 if (!std::regex_search(line, relation_data, relation)) 1110 if (!std::regex_search(line, relation_data, relation))
1049 { 1111 {
1050 continue; 1112 continue;
1051 } 1113 }
1052 1114
1053 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); 1115 std::pair<int, int> lookup1(
1054 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); 1116 std::stoi(relation_data[1]),
1117 std::stoi(relation_data[2]));
1055 1118
1056 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) 1119 std::pair<int, int> lookup2(
1120 std::stoi(relation_data[3]),
1121 std::stoi(relation_data[4]));
1122
1123 if (wordByWnidAndWnum_.count(lookup1) &&
1124 wordByWnidAndWnum_.count(lookup2))
1057 { 1125 {
1058 word& word1 = *wordByWnidAndWnum_.at(lookup1); 1126 word& word1 = *wordByWnidAndWnum_.at(lookup1);
1059 word& word2 = *wordByWnidAndWnum_.at(lookup2); 1127 word& word2 = *wordByWnidAndWnum_.at(lookup2);
1060 1128
1061 if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective) 1129 if (word1.getNotion().getPartOfSpeech() ==
1130 part_of_speech::adjective)
1062 { 1131 {
1063 db_.insertIntoTable( 1132 db_.insertIntoTable(
1064 "pertainymy", 1133 "pertainymy",
@@ -1066,7 +1135,8 @@ namespace verbly {
1066 { "pertainym_id", word1.getId() }, 1135 { "pertainym_id", word1.getId() },
1067 { "noun_id", word2.getId() } 1136 { "noun_id", word2.getId() }
1068 }); 1137 });
1069 } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb) 1138 } else if (word1.getNotion().getPartOfSpeech() ==
1139 part_of_speech::adverb)
1070 { 1140 {
1071 db_.insertIntoTable( 1141 db_.insertIntoTable(
1072 "mannernymy", 1142 "mannernymy",
@@ -1082,7 +1152,7 @@ namespace verbly {
1082 void generator::readWordNetSpecification() 1152 void generator::readWordNetSpecification()
1083 { 1153 {
1084 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl")); 1154 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl"));
1085 progress ppgs("Writing specifications...", lines.size()); 1155 hatkirby::progress ppgs("Writing specifications...", lines.size());
1086 for (auto line : lines) 1156 for (auto line : lines)
1087 { 1157 {
1088 ppgs.update(); 1158 ppgs.update();
@@ -1094,10 +1164,17 @@ namespace verbly {
1094 continue; 1164 continue;
1095 } 1165 }
1096 1166
1097 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); 1167 std::pair<int, int> lookup1(
1098 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); 1168 std::stoi(relation_data[1]),
1169 std::stoi(relation_data[2]));
1170
1171 std::pair<int, int> lookup2(
1172 std::stoi(relation_data[3]),
1173 std::stoi(relation_data[4]));
1099 1174
1100 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) 1175
1176 if (wordByWnidAndWnum_.count(lookup1) &&
1177 wordByWnidAndWnum_.count(lookup2))
1101 { 1178 {
1102 word& word1 = *wordByWnidAndWnum_.at(lookup1); 1179 word& word1 = *wordByWnidAndWnum_.at(lookup1);
1103 word& word2 = *wordByWnidAndWnum_.at(lookup2); 1180 word& word2 = *wordByWnidAndWnum_.at(lookup2);
@@ -1115,7 +1192,7 @@ namespace verbly {
1115 void generator::readWordNetSimilarity() 1192 void generator::readWordNetSimilarity()
1116 { 1193 {
1117 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl")); 1194 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl"));
1118 progress ppgs("Writing adjective similarity...", lines.size()); 1195 hatkirby::progress ppgs("Writing adjective similarity...", lines.size());
1119 for (auto line : lines) 1196 for (auto line : lines)
1120 { 1197 {
1121 ppgs.update(); 1198 ppgs.update();
@@ -1149,7 +1226,7 @@ namespace verbly {
1149 { 1226 {
1150 std::cout << "Analyzing data..." << std::endl; 1227 std::cout << "Analyzing data..." << std::endl;
1151 1228
1152 db_.runQuery("ANALYZE"); 1229 db_.execute("ANALYZE");
1153 } 1230 }
1154 1231
1155 std::list<std::string> generator::readFile(std::string path) 1232 std::list<std::string> generator::readFile(std::string path)
@@ -1183,7 +1260,8 @@ namespace verbly {
1183 case 2: return part_of_speech::verb; 1260 case 2: return part_of_speech::verb;
1184 case 3: return part_of_speech::adjective; 1261 case 3: return part_of_speech::adjective;
1185 case 4: return part_of_speech::adverb; 1262 case 4: return part_of_speech::adverb;
1186 default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid)); 1263 default: throw std::domain_error(
1264 "Invalid WordNet synset ID: " + std::to_string(wnid));
1187 } 1265 }
1188 } 1266 }
1189 1267
@@ -1296,20 +1374,30 @@ namespace verbly {
1296 std::string wnSenses(reinterpret_cast<const char*>(key)); 1374 std::string wnSenses(reinterpret_cast<const char*>(key));
1297 xmlFree(key); 1375 xmlFree(key);
1298 1376
1299 auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " "); 1377 auto wnSenseKeys =
1378 hatkirby::split<std::list<std::string>>(wnSenses, " ");
1379
1300 if (!wnSenseKeys.empty()) 1380 if (!wnSenseKeys.empty())
1301 { 1381 {
1302 std::list<std::string> tempKeys; 1382 std::list<std::string> tempKeys;
1303 1383
1304 std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) { 1384 std::transform(
1305 return sense + "::"; 1385 std::begin(wnSenseKeys),
1306 }); 1386 std::end(wnSenseKeys),
1387 std::back_inserter(tempKeys),
1388 [] (std::string sense) {
1389 return sense + "::";
1390 });
1307 1391
1308 std::list<std::string> filteredKeys; 1392 std::list<std::string> filteredKeys;
1309 1393
1310 std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) { 1394 std::remove_copy_if(
1311 return !wnSenseKeys_.count(sense); 1395 std::begin(tempKeys),
1312 }); 1396 std::end(tempKeys),
1397 std::back_inserter(filteredKeys),
1398 [&] (std::string sense) {
1399 return !wnSenseKeys_.count(sense);
1400 });
1313 1401
1314 wnSenseKeys = std::move(filteredKeys); 1402 wnSenseKeys = std::move(filteredKeys);
1315 } 1403 }
@@ -1431,10 +1519,15 @@ namespace verbly {
1431 std::string choicesStr = reinterpret_cast<const char*>(key); 1519 std::string choicesStr = reinterpret_cast<const char*>(key);
1432 xmlFree(key); 1520 xmlFree(key);
1433 1521
1434 for (std::string choice : split<std::list<std::string>>(choicesStr, " ")) 1522 auto choices =
1523 hatkirby::split<std::list<std::string>>(
1524 choicesStr, " ");
1525
1526 for (std::string choice : choices)
1435 { 1527 {
1436 int chloc; 1528 int chloc;
1437 while ((chloc = choice.find_first_of("_")) != std::string::npos) 1529 while ((chloc = choice.find_first_of("_"))
1530 != std::string::npos)
1438 { 1531 {
1439 choice.replace(chloc, 1, " "); 1532 choice.replace(chloc, 1, " ");
1440 } 1533 }
@@ -1444,7 +1537,9 @@ namespace verbly {
1444 } else { 1537 } else {
1445 partLiteral = false; 1538 partLiteral = false;
1446 1539
1447 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) 1540 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode;
1541 npnode != nullptr;
1542 npnode = npnode->next)
1448 { 1543 {
1449 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) 1544 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
1450 { 1545 {
diff --git a/generator/generator.h b/generator/generator.h index 52073bc..cd99f88 100644 --- a/generator/generator.h +++ b/generator/generator.h
@@ -6,7 +6,7 @@
6#include <list> 6#include <list>
7#include <set> 7#include <set>
8#include <libxml/parser.h> 8#include <libxml/parser.h>
9#include "database.h" 9#include <hkutil/database.h>
10#include "notion.h" 10#include "notion.h"
11#include "word.h" 11#include "word.h"
12#include "lemma.h" 12#include "lemma.h"
diff --git a/generator/group.cpp b/generator/group.cpp index 5b23578..1ffb9d9 100644 --- a/generator/group.cpp +++ b/generator/group.cpp
@@ -1,10 +1,8 @@
1#include "group.h" 1#include "group.h"
2#include <stdexcept> 2#include <stdexcept>
3#include <list> 3#include <list>
4#include "database.h" 4#include <hkutil/string.h>
5#include "field.h"
6#include "frame.h" 5#include "frame.h"
7#include "../lib/util.h"
8 6
9namespace verbly { 7namespace verbly {
10 namespace generator { 8 namespace generator {
@@ -50,28 +48,26 @@ namespace verbly {
50 return roles_.at(name); 48 return roles_.at(name);
51 } 49 }
52 50
53 database& operator<<(database& db, const group& arg) 51 hatkirby::database& operator<<(hatkirby::database& db, const group& arg)
54 { 52 {
55 // Serialize each frame 53 // Serialize each frame
56 for (const frame& f : arg.getFrames()) 54 for (const frame& f : arg.getFrames())
57 { 55 {
58 // First, serialize the group/frame relationship 56 // First, serialize the group/frame relationship
59 { 57 db.insertIntoTable(
60 std::list<field> fields; 58 "frames",
61 59 {
62 fields.emplace_back("frame_id", f.getId()); 60 { "frame_id", f.getId() },
63 fields.emplace_back("group_id", arg.getId()); 61 { "group_id", arg.getId() },
64 fields.emplace_back("length", f.getLength()); 62 { "length", f.getLength() }
65 63 });
66 db.insertIntoTable("frames", std::move(fields));
67 }
68 64
69 // Then, serialize the frame parts in the context of the group 65 // Then, serialize the frame parts in the context of the group
70 for (int partIndex = 0; partIndex < f.getLength(); partIndex++) 66 for (int partIndex = 0; partIndex < f.getLength(); partIndex++)
71 { 67 {
72 const part& p = f[partIndex]; 68 const part& p = f[partIndex];
73 69
74 std::list<field> fields; 70 std::list<hatkirby::column> fields;
75 fields.emplace_back("part_id", p.getId()); 71 fields.emplace_back("part_id", p.getId());
76 fields.emplace_back("frame_id", f.getId()); 72 fields.emplace_back("frame_id", f.getId());
77 fields.emplace_back("part_index", partIndex); 73 fields.emplace_back("part_index", partIndex);
@@ -92,23 +88,23 @@ namespace verbly {
92 88
93 for (const std::string& s : partSelrestrs) 89 for (const std::string& s : partSelrestrs)
94 { 90 {
95 std::list<field> selrestrFields; 91 db.insertIntoTable(
96 92 "selrestrs",
97 selrestrFields.emplace_back("part_id", p.getId()); 93 {
98 selrestrFields.emplace_back("selrestr", s); 94 { "part_id", p.getId() },
99 95 { "selrestr", s }
100 db.insertIntoTable("selrestrs", std::move(selrestrFields)); 96 });
101 } 97 }
102 98
103 // Short interlude to serialize the synrestrs 99 // Short interlude to serialize the synrestrs
104 for (const std::string& s : p.getNounSynrestrs()) 100 for (const std::string& s : p.getNounSynrestrs())
105 { 101 {
106 std::list<field> synrestrFields; 102 db.insertIntoTable(
107 103 "synrestrs",
108 synrestrFields.emplace_back("part_id", p.getId()); 104 {
109 synrestrFields.emplace_back("synrestr", s); 105 { "part_id", p.getId() },
110 106 { "synrestr", s }
111 db.insertIntoTable("synrestrs", std::move(synrestrFields)); 107 });
112 } 108 }
113 109
114 break; 110 break;
@@ -117,10 +113,17 @@ namespace verbly {
117 case part::type::preposition: 113 case part::type::preposition:
118 { 114 {
119 std::set<std::string> setChoices = p.getPrepositionChoices(); 115 std::set<std::string> setChoices = p.getPrepositionChoices();
120 std::string serializedChoices = implode(std::begin(setChoices), std::end(setChoices), ","); 116
117 std::string serializedChoices =
118 hatkirby::implode(
119 std::begin(setChoices),
120 std::end(setChoices),
121 ",");
121 122
122 fields.emplace_back("prepositions", std::move(serializedChoices)); 123 fields.emplace_back("prepositions", std::move(serializedChoices));
123 fields.emplace_back("preposition_literality", p.isPrepositionLiteral() ? 1 : 0); 124
125 fields.emplace_back("preposition_literality",
126 p.isPrepositionLiteral() ? 1 : 0);
124 127
125 break; 128 break;
126 } 129 }
diff --git a/generator/group.h b/generator/group.h index a7f3a17..f912920 100644 --- a/generator/group.h +++ b/generator/group.h
@@ -6,13 +6,13 @@
6#include <string> 6#include <string>
7#include <cassert> 7#include <cassert>
8#include <list> 8#include <list>
9#include <hkutil/database.h>
9#include "role.h" 10#include "role.h"
10 11
11namespace verbly { 12namespace verbly {
12 namespace generator { 13 namespace generator {
13 14
14 class frame; 15 class frame;
15 class database;
16 16
17 class group { 17 class group {
18 public: 18 public:
@@ -67,7 +67,7 @@ namespace verbly {
67 67
68 // Serializer 68 // Serializer
69 69
70 database& operator<<(database& db, const group& arg); 70 hatkirby::database& operator<<(hatkirby::database& db, const group& arg);
71 71
72 }; 72 };
73}; 73};
diff --git a/generator/lemma.cpp b/generator/lemma.cpp index e66b153..33ab037 100644 --- a/generator/lemma.cpp +++ b/generator/lemma.cpp
@@ -1,8 +1,6 @@
1#include "lemma.h" 1#include "lemma.h"
2#include <list> 2#include <list>
3#include <cassert> 3#include <cassert>
4#include "field.h"
5#include "database.h"
6#include "form.h" 4#include "form.h"
7 5
8namespace verbly { 6namespace verbly {
@@ -35,7 +33,7 @@ namespace verbly {
35 } 33 }
36 } 34 }
37 35
38 database& operator<<(database& db, const lemma& arg) 36 hatkirby::database& operator<<(hatkirby::database& db, const lemma& arg)
39 { 37 {
40 for (inflection type : { 38 for (inflection type : {
41 inflection::base, 39 inflection::base,
@@ -49,12 +47,13 @@ namespace verbly {
49 { 47 {
50 for (const form* f : arg.getInflections(type)) 48 for (const form* f : arg.getInflections(type))
51 { 49 {
52 std::list<field> fields; 50 db.insertIntoTable(
53 fields.emplace_back("lemma_id", arg.getId()); 51 "lemmas_forms",
54 fields.emplace_back("form_id", f->getId()); 52 {
55 fields.emplace_back("category", static_cast<int>(type)); 53 { "lemma_id", arg.getId() },
56 54 { "form_id", f->getId() },
57 db.insertIntoTable("lemmas_forms", std::move(fields)); 55 { "category", static_cast<int>(type) }
56 });
58 } 57 }
59 } 58 }
60 59
diff --git a/generator/lemma.h b/generator/lemma.h index f68667f..f7d5491 100644 --- a/generator/lemma.h +++ b/generator/lemma.h
@@ -4,12 +4,12 @@
4#include <string> 4#include <string>
5#include <map> 5#include <map>
6#include <set> 6#include <set>
7#include <hkutil/database.h>
7#include "../lib/enums.h" 8#include "../lib/enums.h"
8 9
9namespace verbly { 10namespace verbly {
10 namespace generator { 11 namespace generator {
11 12
12 class database;
13 class form; 13 class form;
14 14
15 class lemma { 15 class lemma {
@@ -50,7 +50,7 @@ namespace verbly {
50 50
51 // Serializer 51 // Serializer
52 52
53 database& operator<<(database& db, const lemma& arg); 53 hatkirby::database& operator<<(hatkirby::database& db, const lemma& arg);
54 54
55 }; 55 };
56}; 56};
diff --git a/generator/notion.cpp b/generator/notion.cpp index 35ba7b1..f388767 100644 --- a/generator/notion.cpp +++ b/generator/notion.cpp
@@ -1,8 +1,4 @@
1#include "notion.h" 1#include "notion.h"
2#include <string>
3#include <list>
4#include "database.h"
5#include "field.h"
6 2
7namespace verbly { 3namespace verbly {
8 namespace generator { 4 namespace generator {
@@ -28,21 +24,25 @@ namespace verbly {
28 24
29 void notion::incrementNumOfImages() 25 void notion::incrementNumOfImages()
30 { 26 {
31 // Calling code should always call hasWnid and check that the notion is a noun first. 27 if (!hasWnid_ || (partOfSpeech_ != part_of_speech::noun))
32 assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); 28 {
29 throw std::domain_error("Notion is not a noun with wnid");
30 }
33 31
34 numOfImages_++; 32 numOfImages_++;
35 } 33 }
36 34
37 void notion::setPrepositionGroups(std::list<std::string> groups) 35 void notion::setPrepositionGroups(std::list<std::string> groups)
38 { 36 {
39 // Calling code should always check that the notion is a preposition first. 37 if (partOfSpeech_ != part_of_speech::preposition)
40 assert(partOfSpeech_ == part_of_speech::preposition); 38 {
39 throw std::domain_error("Notion is not a preposition");
40 }
41 41
42 prepositionGroups_ = groups; 42 prepositionGroups_ = groups;
43 } 43 }
44 44
45 database& operator<<(database& db, const notion& arg) 45 hatkirby::database& operator<<(hatkirby::database& db, const notion& arg)
46 { 46 {
47 // First, serialize the notion 47 // First, serialize the notion
48 { 48 {
diff --git a/generator/notion.h b/generator/notion.h index 817e66a..3bff916 100644 --- a/generator/notion.h +++ b/generator/notion.h
@@ -1,9 +1,9 @@
1#ifndef NOTION_H_221DE2BC 1#ifndef NOTION_H_221DE2BC
2#define NOTION_H_221DE2BC 2#define NOTION_H_221DE2BC
3 3
4#include <cassert>
5#include <list> 4#include <list>
6#include <string> 5#include <string>
6#include <hkutil/database.h>
7#include "../lib/enums.h" 7#include "../lib/enums.h"
8 8
9namespace verbly { 9namespace verbly {
@@ -43,24 +43,30 @@ namespace verbly {
43 43
44 int getWnid() const 44 int getWnid() const
45 { 45 {
46 // Calling code should always call hasWnid first. 46 if (!hasWnid_)
47 assert(hasWnid_); 47 {
48 throw std::domain_error("Notion does not have wnid");
49 }
48 50
49 return wnid_; 51 return wnid_;
50 } 52 }
51 53
52 int getNumOfImages() const 54 int getNumOfImages() const
53 { 55 {
54 // Calling code should always call hasWnid and check that the notion is a noun first. 56 if (!hasWnid_ || (partOfSpeech_ != part_of_speech::noun))
55 assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); 57 {
58 throw std::domain_error("Notion is not a noun with wnid");
59 }
56 60
57 return numOfImages_; 61 return numOfImages_;
58 } 62 }
59 63
60 std::list<std::string> getPrepositionGroups() const 64 std::list<std::string> getPrepositionGroups() const
61 { 65 {
62 // Calling code should always check that the notion is a preposition first. 66 if (partOfSpeech_ != part_of_speech::preposition)
63 assert(partOfSpeech_ == part_of_speech::preposition); 67 {
68 throw std::domain_error("Notion is not a preposition");
69 }
64 70
65 return prepositionGroups_; 71 return prepositionGroups_;
66 } 72 }
@@ -81,7 +87,7 @@ namespace verbly {
81 87
82 // Serializer 88 // Serializer
83 89
84 database& operator<<(database& db, const notion& arg); 90 hatkirby::database& operator<<(hatkirby::database& db, const notion& arg);
85 91
86 }; 92 };
87}; 93};
diff --git a/generator/progress.h b/generator/progress.h deleted file mode 100644 index 76cde48..0000000 --- a/generator/progress.h +++ /dev/null
@@ -1,56 +0,0 @@
1#ifndef PROGRESS_H_A34EF856
2#define PROGRESS_H_A34EF856
3
4#include <string>
5
6namespace verbly {
7 namespace generator {
8
9 class progress {
10 private:
11 std::string message;
12 int total;
13 int cur = 0;
14 int lprint = 0;
15
16 public:
17 progress(std::string message, int total) : message(message), total(total)
18 {
19 std::cout << message << " 0%" << std::flush;
20 }
21
22 void update(int val)
23 {
24 if (val <= total)
25 {
26 cur = val;
27 } else {
28 cur = total;
29 }
30
31 int pp = cur * 100 / total;
32 if (pp != lprint)
33 {
34 lprint = pp;
35
36 std::cout << "\b\b\b\b" << std::right;
37 std::cout.width(3);
38 std::cout << pp << "%" << std::flush;
39 }
40 }
41
42 void update()
43 {
44 update(cur+1);
45 }
46
47 ~progress()
48 {
49 std::cout << "\b\b\b\b100%" << std::endl;
50 }
51 };
52
53 };
54};
55
56#endif /* end of include guard: PROGRESS_H_A34EF856 */
diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp index eb07607..3075d42 100644 --- a/generator/pronunciation.cpp +++ b/generator/pronunciation.cpp
@@ -3,9 +3,7 @@
3#include <algorithm> 3#include <algorithm>
4#include <cctype> 4#include <cctype>
5#include <iterator> 5#include <iterator>
6#include "database.h" 6#include <hkutil/string.h>
7#include "field.h"
8#include "../lib/util.h"
9 7
10namespace verbly { 8namespace verbly {
11 namespace generator { 9 namespace generator {
@@ -16,28 +14,45 @@ namespace verbly {
16 id_(nextId_++), 14 id_(nextId_++),
17 phonemes_(phonemes) 15 phonemes_(phonemes)
18 { 16 {
19 auto phonemeList = split<std::list<std::string>>(phonemes, " "); 17 auto phonemeList =
20 18 hatkirby::split<std::list<std::string>>(phonemes, " ");
21 auto rhymeStart = std::find_if(std::begin(phonemeList), std::end(phonemeList), [] (std::string phoneme) { 19
22 return phoneme.find("1") != std::string::npos; 20 std::list<std::string>::iterator rhymeStart =
23 }); 21 std::find_if(
22 std::begin(phonemeList),
23 std::end(phonemeList),
24 [] (std::string phoneme) {
25 return phoneme.find("1") != std::string::npos;
26 });
24 27
25 // Rhyme detection 28 // Rhyme detection
26 if (rhymeStart != std::end(phonemeList)) 29 if (rhymeStart != std::end(phonemeList))
27 { 30 {
28 std::list<std::string> rhymePhonemes; 31 std::list<std::string> rhymePhonemes;
29 32
30 std::transform(rhymeStart, std::end(phonemeList), std::back_inserter(rhymePhonemes), [] (std::string phoneme) { 33 std::transform(
31 std::string naked; 34 rhymeStart,
32 35 std::end(phonemeList),
33 std::remove_copy_if(std::begin(phoneme), std::end(phoneme), std::back_inserter(naked), [] (char ch) { 36 std::back_inserter(rhymePhonemes),
34 return std::isdigit(ch); 37 [] (std::string phoneme) {
38 std::string naked;
39
40 std::remove_copy_if(
41 std::begin(phoneme),
42 std::end(phoneme),
43 std::back_inserter(naked),
44 [] (char ch) {
45 return std::isdigit(ch);
46 });
47
48 return naked;
35 }); 49 });
36 50
37 return naked; 51 rhyme_ =
38 }); 52 hatkirby::implode(
39 53 std::begin(rhymePhonemes),
40 rhyme_ = implode(std::begin(rhymePhonemes), std::end(rhymePhonemes), " "); 54 std::end(rhymePhonemes),
55 " ");
41 56
42 if (rhymeStart != std::begin(phonemeList)) 57 if (rhymeStart != std::begin(phonemeList))
43 { 58 {
@@ -63,9 +78,11 @@ namespace verbly {
63 } 78 }
64 } 79 }
65 80
66 database& operator<<(database& db, const pronunciation& arg) 81 hatkirby::database& operator<<(
82 hatkirby::database& db,
83 const pronunciation& arg)
67 { 84 {
68 std::list<field> fields; 85 std::list<hatkirby::column> fields;
69 86
70 fields.emplace_back("pronunciation_id", arg.getId()); 87 fields.emplace_back("pronunciation_id", arg.getId());
71 fields.emplace_back("phonemes", arg.getPhonemes()); 88 fields.emplace_back("phonemes", arg.getPhonemes());
diff --git a/generator/pronunciation.h b/generator/pronunciation.h index 81be6c4..163e55e 100644 --- a/generator/pronunciation.h +++ b/generator/pronunciation.h
@@ -3,12 +3,11 @@
3 3
4#include <string> 4#include <string>
5#include <cassert> 5#include <cassert>
6#include <hkutil/database.h>
6 7
7namespace verbly { 8namespace verbly {
8 namespace generator { 9 namespace generator {
9 10
10 class database;
11
12 class pronunciation { 11 class pronunciation {
13 public: 12 public:
14 13
@@ -74,7 +73,9 @@ namespace verbly {
74 73
75 // Serializer 74 // Serializer
76 75
77 database& operator<<(database& db, const pronunciation& arg); 76 hatkirby::database& operator<<(
77 hatkirby::database& db,
78 const pronunciation& arg);
78 79
79 }; 80 };
80}; 81};
diff --git a/generator/word.cpp b/generator/word.cpp index b3fc490..360cd6a 100644 --- a/generator/word.cpp +++ b/generator/word.cpp
@@ -1,10 +1,8 @@
1#include "word.h" 1#include "word.h"
2#include <list> 2#include <list>
3#include <string> 3#include <string>
4#include "database.h"
5#include "notion.h" 4#include "notion.h"
6#include "lemma.h" 5#include "lemma.h"
7#include "field.h"
8#include "group.h" 6#include "group.h"
9 7
10namespace verbly { 8namespace verbly {
@@ -43,9 +41,9 @@ namespace verbly {
43 verbGroup_ = &verbGroup; 41 verbGroup_ = &verbGroup;
44 } 42 }
45 43
46 database& operator<<(database& db, const word& arg) 44 hatkirby::database& operator<<(hatkirby::database& db, const word& arg)
47 { 45 {
48 std::list<field> fields; 46 std::list<hatkirby::column> fields;
49 47
50 fields.emplace_back("word_id", arg.getId()); 48 fields.emplace_back("word_id", arg.getId());
51 fields.emplace_back("notion_id", arg.getNotion().getId()); 49 fields.emplace_back("notion_id", arg.getNotion().getId());
@@ -59,7 +57,8 @@ namespace verbly {
59 if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective) 57 if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective)
60 && (arg.getAdjectivePosition() != positioning::undefined)) 58 && (arg.getAdjectivePosition() != positioning::undefined))
61 { 59 {
62 fields.emplace_back("position", static_cast<int>(arg.getAdjectivePosition())); 60 fields.emplace_back("position",
61 static_cast<int>(arg.getAdjectivePosition()));
63 } 62 }
64 63
65 if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb) 64 if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb)
diff --git a/generator/word.h b/generator/word.h index a994ec3..2e469d4 100644 --- a/generator/word.h +++ b/generator/word.h
@@ -2,6 +2,7 @@
2#define WORD_H_91F99D46 2#define WORD_H_91F99D46
3 3
4#include <cassert> 4#include <cassert>
5#include <hkutil/database.h>
5#include "../lib/enums.h" 6#include "../lib/enums.h"
6 7
7namespace verbly { 8namespace verbly {
@@ -9,7 +10,6 @@ namespace verbly {
9 10
10 class notion; 11 class notion;
11 class lemma; 12 class lemma;
12 class database;
13 class group; 13 class group;
14 14
15 class word { 15 class word {
@@ -102,7 +102,7 @@ namespace verbly {
102 102
103 // Serializer 103 // Serializer
104 104
105 database& operator<<(database& db, const word& arg); 105 hatkirby::database& operator<<(hatkirby::database& db, const word& arg);
106 106
107 }; 107 };
108}; 108};