diff options
Diffstat (limited to 'generator')
| -rw-r--r-- | generator/CMakeLists.txt | 8 | ||||
| -rw-r--r-- | generator/form.cpp | 35 | ||||
| -rw-r--r-- | generator/form.h | 3 | ||||
| -rw-r--r-- | generator/frame.cpp | 2 | ||||
| -rw-r--r-- | generator/frame.h | 10 | ||||
| -rw-r--r-- | generator/generator.cpp | 275 | ||||
| -rw-r--r-- | generator/generator.h | 2 | ||||
| -rw-r--r-- | generator/group.cpp | 59 | ||||
| -rw-r--r-- | generator/group.h | 4 | ||||
| -rw-r--r-- | generator/lemma.cpp | 17 | ||||
| -rw-r--r-- | generator/lemma.h | 4 | ||||
| -rw-r--r-- | generator/notion.cpp | 18 | ||||
| -rw-r--r-- | generator/notion.h | 22 | ||||
| -rw-r--r-- | generator/progress.h | 56 | ||||
| -rw-r--r-- | generator/pronunciation.cpp | 55 | ||||
| -rw-r--r-- | generator/pronunciation.h | 7 | ||||
| -rw-r--r-- | generator/word.cpp | 9 | ||||
| -rw-r--r-- | generator/word.h | 4 |
18 files changed, 328 insertions, 262 deletions
| diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 95a11b5..8c070d2 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt | |||
| @@ -5,8 +5,12 @@ find_package(PkgConfig) | |||
| 5 | pkg_check_modules(sqlite3 sqlite3 REQUIRED) | 5 | pkg_check_modules(sqlite3 sqlite3 REQUIRED) |
| 6 | find_package(libxml2 REQUIRED) | 6 | find_package(libxml2 REQUIRED) |
| 7 | 7 | ||
| 8 | include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR}) | 8 | include_directories( |
| 9 | add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp database.cpp field.cpp generator.cpp main.cpp) | 9 | ${sqlite3_INCLUDE_DIR} |
| 10 | ${LIBXML2_INCLUDE_DIR} | ||
| 11 | ../vendor/hkutil) | ||
| 12 | |||
| 13 | add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp generator.cpp main.cpp) | ||
| 10 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) | 14 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) |
| 11 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) | 15 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) |
| 12 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) | 16 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) |
| diff --git a/generator/form.cpp b/generator/form.cpp index f616344..c66820c 100644 --- a/generator/form.cpp +++ b/generator/form.cpp | |||
| @@ -1,8 +1,7 @@ | |||
| 1 | #include "form.h" | 1 | #include "form.h" |
| 2 | #include <algorithm> | 2 | #include <algorithm> |
| 3 | #include <list> | 3 | #include <list> |
| 4 | #include "database.h" | 4 | #include <cctype> |
| 5 | #include "field.h" | ||
| 6 | #include "pronunciation.h" | 5 | #include "pronunciation.h" |
| 7 | 6 | ||
| 8 | namespace verbly { | 7 | namespace verbly { |
| @@ -14,7 +13,7 @@ namespace verbly { | |||
| 14 | id_(nextId_++), | 13 | id_(nextId_++), |
| 15 | text_(text), | 14 | text_(text), |
| 16 | complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), | 15 | complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), |
| 17 | proper_(std::any_of(std::begin(text), std::end(text), std::isupper)), | 16 | proper_(std::any_of(std::begin(text), std::end(text), ::isupper)), |
| 18 | length_(text.length()) | 17 | length_(text.length()) |
| 19 | { | 18 | { |
| 20 | } | 19 | } |
| @@ -24,28 +23,30 @@ namespace verbly { | |||
| 24 | pronunciations_.insert(&p); | 23 | pronunciations_.insert(&p); |
| 25 | } | 24 | } |
| 26 | 25 | ||
| 27 | database& operator<<(database& db, const form& arg) | 26 | hatkirby::database& operator<<(hatkirby::database& db, const form& arg) |
| 28 | { | 27 | { |
| 29 | // Serialize the form first. | 28 | // Serialize the form first. |
| 30 | { | 29 | { |
| 31 | std::list<field> fields; | 30 | db.insertIntoTable( |
| 32 | fields.emplace_back("form_id", arg.getId()); | 31 | "forms", |
| 33 | fields.emplace_back("form", arg.getText()); | 32 | { |
| 34 | fields.emplace_back("complexity", arg.getComplexity()); | 33 | { "form_id", arg.getId() }, |
| 35 | fields.emplace_back("proper", arg.isProper()); | 34 | { "form", arg.getText() }, |
| 36 | fields.emplace_back("length", arg.getLength()); | 35 | { "complexity", arg.getComplexity() }, |
| 37 | 36 | { "proper", arg.isProper() }, | |
| 38 | db.insertIntoTable("forms", std::move(fields)); | 37 | { "length", arg.getLength() } |
| 38 | }); | ||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | // Then, serialize the form/pronunciation relationship. | 41 | // Then, serialize the form/pronunciation relationship. |
| 42 | for (const pronunciation* p : arg.getPronunciations()) | 42 | for (const pronunciation* p : arg.getPronunciations()) |
| 43 | { | 43 | { |
| 44 | std::list<field> fields; | 44 | db.insertIntoTable( |
| 45 | fields.emplace_back("form_id", arg.getId()); | 45 | "forms_pronunciations", |
| 46 | fields.emplace_back("pronunciation_id", p->getId()); | 46 | { |
| 47 | 47 | { "form_id", arg.getId() }, | |
| 48 | db.insertIntoTable("forms_pronunciations", std::move(fields)); | 48 | { "pronunciation_id", p->getId() } |
| 49 | }); | ||
| 49 | } | 50 | } |
| 50 | 51 | ||
| 51 | return db; | 52 | return db; |
| diff --git a/generator/form.h b/generator/form.h index 37fd3cc..f3dd779 100644 --- a/generator/form.h +++ b/generator/form.h | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <string> | 4 | #include <string> |
| 5 | #include <set> | 5 | #include <set> |
| 6 | #include <hkutil/database.h> | ||
| 6 | 7 | ||
| 7 | namespace verbly { | 8 | namespace verbly { |
| 8 | namespace generator { | 9 | namespace generator { |
| @@ -68,7 +69,7 @@ namespace verbly { | |||
| 68 | 69 | ||
| 69 | // Serializer | 70 | // Serializer |
| 70 | 71 | ||
| 71 | database& operator<<(database& db, const form& arg); | 72 | hatkirby::database& operator<<(hatkirby::database& db, const form& arg); |
| 72 | 73 | ||
| 73 | }; | 74 | }; |
| 74 | }; | 75 | }; |
| diff --git a/generator/frame.cpp b/generator/frame.cpp index 4e4ac5f..60cedc4 100644 --- a/generator/frame.cpp +++ b/generator/frame.cpp | |||
| @@ -1,6 +1,4 @@ | |||
| 1 | #include "frame.h" | 1 | #include "frame.h" |
| 2 | #include "database.h" | ||
| 3 | #include "field.h" | ||
| 4 | 2 | ||
| 5 | namespace verbly { | 3 | namespace verbly { |
| 6 | namespace generator { | 4 | namespace generator { |
| diff --git a/generator/frame.h b/generator/frame.h index d26d500..3e15b39 100644 --- a/generator/frame.h +++ b/generator/frame.h | |||
| @@ -8,8 +8,6 @@ | |||
| 8 | namespace verbly { | 8 | namespace verbly { |
| 9 | namespace generator { | 9 | namespace generator { |
| 10 | 10 | ||
| 11 | class database; | ||
| 12 | |||
| 13 | class frame { | 11 | class frame { |
| 14 | public: | 12 | public: |
| 15 | 13 | ||
| @@ -20,9 +18,9 @@ namespace verbly { | |||
| 20 | // Constructor | 18 | // Constructor |
| 21 | 19 | ||
| 22 | frame(); | 20 | frame(); |
| 23 | 21 | ||
| 24 | // Duplication | 22 | // Duplication |
| 25 | 23 | ||
| 26 | static frame duplicate(const frame& other); | 24 | static frame duplicate(const frame& other); |
| 27 | 25 | ||
| 28 | // Mutators | 26 | // Mutators |
| @@ -35,12 +33,12 @@ namespace verbly { | |||
| 35 | { | 33 | { |
| 36 | return id_; | 34 | return id_; |
| 37 | } | 35 | } |
| 38 | 36 | ||
| 39 | int getLength() const | 37 | int getLength() const |
| 40 | { | 38 | { |
| 41 | return parts_.size(); | 39 | return parts_.size(); |
| 42 | } | 40 | } |
| 43 | 41 | ||
| 44 | const part& operator[](int index) const | 42 | const part& operator[](int index) const |
| 45 | { | 43 | { |
| 46 | return parts_.at(index); | 44 | return parts_.at(index); |
| diff --git a/generator/generator.cpp b/generator/generator.cpp index e34ca69..785ec87 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
| @@ -1,16 +1,14 @@ | |||
| 1 | #include "generator.h" | 1 | #include "generator.h" |
| 2 | #include <cassert> | ||
| 3 | #include <stdexcept> | 2 | #include <stdexcept> |
| 4 | #include <iostream> | 3 | #include <iostream> |
| 5 | #include <regex> | 4 | #include <regex> |
| 6 | #include <dirent.h> | 5 | #include <dirent.h> |
| 7 | #include <fstream> | 6 | #include <fstream> |
| 8 | #include "../lib/enums.h" | 7 | #include <hkutil/string.h> |
| 9 | #include "progress.h" | 8 | #include <hkutil/progress.h> |
| 10 | #include "role.h" | 9 | #include "role.h" |
| 11 | #include "part.h" | 10 | #include "part.h" |
| 12 | #include "field.h" | 11 | #include "../lib/enums.h" |
| 13 | #include "../lib/util.h" | ||
| 14 | #include "../lib/version.h" | 12 | #include "../lib/version.h" |
| 15 | 13 | ||
| 16 | namespace verbly { | 14 | namespace verbly { |
| @@ -28,7 +26,7 @@ namespace verbly { | |||
| 28 | wordNetPath_(wordNetPath), | 26 | wordNetPath_(wordNetPath), |
| 29 | cmudictPath_(cmudictPath), | 27 | cmudictPath_(cmudictPath), |
| 30 | imageNetPath_(imageNetPath), | 28 | imageNetPath_(imageNetPath), |
| 31 | db_(outputPath) | 29 | db_(outputPath, hatkirby::dbmode::create) |
| 32 | { | 30 | { |
| 33 | // Ensure VerbNet directory exists | 31 | // Ensure VerbNet directory exists |
| 34 | DIR* dir; | 32 | DIR* dir; |
| @@ -53,7 +51,8 @@ namespace verbly { | |||
| 53 | 51 | ||
| 54 | // Ensure WordNet tables exist | 52 | // Ensure WordNet tables exist |
| 55 | for (std::string table : { | 53 | for (std::string table : { |
| 56 | "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax" | 54 | "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", |
| 55 | "sa", "sim", "syntax" | ||
| 57 | }) | 56 | }) |
| 58 | { | 57 | { |
| 59 | if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl")) | 58 | if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl")) |
| @@ -166,13 +165,15 @@ namespace verbly { | |||
| 166 | void generator::readWordNetSynsets() | 165 | void generator::readWordNetSynsets() |
| 167 | { | 166 | { |
| 168 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); | 167 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); |
| 169 | progress ppgs("Reading synsets from WordNet...", lines.size()); | 168 | hatkirby::progress ppgs("Reading synsets from WordNet...", lines.size()); |
| 170 | 169 | ||
| 171 | for (std::string line : lines) | 170 | for (std::string line : lines) |
| 172 | { | 171 | { |
| 173 | ppgs.update(); | 172 | ppgs.update(); |
| 174 | 173 | ||
| 175 | std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); | 174 | std::regex relation( |
| 175 | "^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); | ||
| 176 | |||
| 176 | std::smatch relation_data; | 177 | std::smatch relation_data; |
| 177 | if (!std::regex_search(line, relation_data, relation)) | 178 | if (!std::regex_search(line, relation_data, relation)) |
| 178 | { | 179 | { |
| @@ -206,7 +207,10 @@ namespace verbly { | |||
| 206 | void generator::readAdjectivePositioning() | 207 | void generator::readAdjectivePositioning() |
| 207 | { | 208 | { |
| 208 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl")); | 209 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl")); |
| 209 | progress ppgs("Reading adjective positionings from WordNet...", lines.size()); | 210 | |
| 211 | hatkirby::progress ppgs( | ||
| 212 | "Reading adjective positionings from WordNet...", | ||
| 213 | lines.size()); | ||
| 210 | 214 | ||
| 211 | for (std::string line : lines) | 215 | for (std::string line : lines) |
| 212 | { | 216 | { |
| @@ -279,7 +283,10 @@ namespace verbly { | |||
| 279 | void generator::readWordNetSenseKeys() | 283 | void generator::readWordNetSenseKeys() |
| 280 | { | 284 | { |
| 281 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl")); | 285 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl")); |
| 282 | progress ppgs("Reading sense keys from WordNet...", lines.size()); | 286 | |
| 287 | hatkirby::progress ppgs( | ||
| 288 | "Reading sense keys from WordNet...", | ||
| 289 | lines.size()); | ||
| 283 | 290 | ||
| 284 | for (std::string line : lines) | 291 | for (std::string line : lines) |
| 285 | { | 292 | { |
| @@ -350,7 +357,8 @@ namespace verbly { | |||
| 350 | } | 357 | } |
| 351 | 358 | ||
| 352 | xmlNodePtr top = xmlDocGetRootElement(doc); | 359 | xmlNodePtr top = xmlDocGetRootElement(doc); |
| 353 | if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS")))) | 360 | if ((top == nullptr) || |
| 361 | (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS")))) | ||
| 354 | { | 362 | { |
| 355 | throw std::logic_error("Bad VerbNet file format: " + filename); | 363 | throw std::logic_error("Bad VerbNet file format: " + filename); |
| 356 | } | 364 | } |
| @@ -360,7 +368,8 @@ namespace verbly { | |||
| 360 | createGroup(top); | 368 | createGroup(top); |
| 361 | } catch (const std::exception& e) | 369 | } catch (const std::exception& e) |
| 362 | { | 370 | { |
| 363 | std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename)); | 371 | std::throw_with_nested( |
| 372 | std::logic_error("Error parsing VerbNet file: " + filename)); | ||
| 364 | } | 373 | } |
| 365 | } | 374 | } |
| 366 | 375 | ||
| @@ -370,7 +379,7 @@ namespace verbly { | |||
| 370 | void generator::readAgidInflections() | 379 | void generator::readAgidInflections() |
| 371 | { | 380 | { |
| 372 | std::list<std::string> lines(readFile(agidPath_)); | 381 | std::list<std::string> lines(readFile(agidPath_)); |
| 373 | progress ppgs("Reading inflections from AGID...", lines.size()); | 382 | hatkirby::progress ppgs("Reading inflections from AGID...", lines.size()); |
| 374 | 383 | ||
| 375 | for (std::string line : lines) | 384 | for (std::string line : lines) |
| 376 | { | 385 | { |
| @@ -395,12 +404,17 @@ namespace verbly { | |||
| 395 | 404 | ||
| 396 | lemma& curLemma = lookupOrCreateLemma(infinitive); | 405 | lemma& curLemma = lookupOrCreateLemma(infinitive); |
| 397 | 406 | ||
| 407 | auto inflWordList = | ||
| 408 | hatkirby::split<std::list<std::string>>(line, " | "); | ||
| 409 | |||
| 398 | std::vector<std::list<std::string>> agidForms; | 410 | std::vector<std::list<std::string>> agidForms; |
| 399 | for (std::string inflForms : split<std::list<std::string>>(line, " | ")) | 411 | for (std::string inflForms : inflWordList) |
| 400 | { | 412 | { |
| 401 | std::list<std::string> forms; | 413 | auto inflFormList = |
| 414 | hatkirby::split<std::list<std::string>>(std::move(inflForms), ", "); | ||
| 402 | 415 | ||
| 403 | for (std::string inflForm : split<std::list<std::string>>(std::move(inflForms), ", ")) | 416 | std::list<std::string> forms; |
| 417 | for (std::string inflForm : inflFormList) | ||
| 404 | { | 418 | { |
| 405 | int sympos = inflForm.find_first_of("~<!? "); | 419 | int sympos = inflForm.find_first_of("~<!? "); |
| 406 | if (sympos != std::string::npos) | 420 | if (sympos != std::string::npos) |
| @@ -443,7 +457,8 @@ namespace verbly { | |||
| 443 | // - may and shall do not conjugate the way we want them to | 457 | // - may and shall do not conjugate the way we want them to |
| 444 | // - methinks only has a past tense and is an outlier | 458 | // - methinks only has a past tense and is an outlier |
| 445 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now | 459 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now |
| 446 | std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | 460 | std::cout << " Ignoring verb \"" << infinitive |
| 461 | << "\" due to non-standard number of forms." << std::endl; | ||
| 447 | } | 462 | } |
| 448 | 463 | ||
| 449 | // For verbs in particular, we sometimes create a notion and a word | 464 | // For verbs in particular, we sometimes create a notion and a word |
| @@ -452,9 +467,13 @@ namespace verbly { | |||
| 452 | // that this verb appears in the AGID data but not in either WordNet | 467 | // that this verb appears in the AGID data but not in either WordNet |
| 453 | // or VerbNet. | 468 | // or VerbNet. |
| 454 | if (!wordsByBaseForm_.count(infinitive) | 469 | if (!wordsByBaseForm_.count(infinitive) |
| 455 | || !std::any_of(std::begin(wordsByBaseForm_.at(infinitive)), std::end(wordsByBaseForm_.at(infinitive)), [] (word* w) { | 470 | || !std::any_of( |
| 456 | return w->getNotion().getPartOfSpeech() == part_of_speech::verb; | 471 | std::begin(wordsByBaseForm_.at(infinitive)), |
| 457 | })) | 472 | std::end(wordsByBaseForm_.at(infinitive)), |
| 473 | [] (word* w) { | ||
| 474 | return (w->getNotion().getPartOfSpeech() == | ||
| 475 | part_of_speech::verb); | ||
| 476 | })) | ||
| 458 | { | 477 | { |
| 459 | notion& n = createNotion(part_of_speech::verb); | 478 | notion& n = createNotion(part_of_speech::verb); |
| 460 | createWord(n, curLemma); | 479 | createWord(n, curLemma); |
| @@ -471,7 +490,8 @@ namespace verbly { | |||
| 471 | mappedForms[inflection::superlative] = agidForms[1]; | 490 | mappedForms[inflection::superlative] = agidForms[1]; |
| 472 | } else { | 491 | } else { |
| 473 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" | 492 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" |
| 474 | std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | 493 | std::cout << " Ignoring adjective/adverb \"" << infinitive |
| 494 | << "\" due to non-standard number of forms." << std::endl; | ||
| 475 | } | 495 | } |
| 476 | 496 | ||
| 477 | break; | 497 | break; |
| @@ -484,7 +504,8 @@ namespace verbly { | |||
| 484 | mappedForms[inflection::plural] = agidForms[0]; | 504 | mappedForms[inflection::plural] = agidForms[0]; |
| 485 | } else { | 505 | } else { |
| 486 | // As of AGID 2014.08.11, this is non-existent. | 506 | // As of AGID 2014.08.11, this is non-existent. |
| 487 | std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | 507 | std::cout << " Ignoring noun \"" << infinitive |
| 508 | << "\" due to non-standard number of forms." << std::endl; | ||
| 488 | } | 509 | } |
| 489 | 510 | ||
| 490 | break; | 511 | break; |
| @@ -496,7 +517,9 @@ namespace verbly { | |||
| 496 | { | 517 | { |
| 497 | for (std::string infl : std::move(mapping.second)) | 518 | for (std::string infl : std::move(mapping.second)) |
| 498 | { | 519 | { |
| 499 | curLemma.addInflection(mapping.first, lookupOrCreateForm(std::move(infl))); | 520 | curLemma.addInflection( |
| 521 | mapping.first, | ||
| 522 | lookupOrCreateForm(std::move(infl))); | ||
| 500 | } | 523 | } |
| 501 | } | 524 | } |
| 502 | } | 525 | } |
| @@ -505,7 +528,7 @@ namespace verbly { | |||
| 505 | void generator::readPrepositions() | 528 | void generator::readPrepositions() |
| 506 | { | 529 | { |
| 507 | std::list<std::string> lines(readFile("prepositions.txt")); | 530 | std::list<std::string> lines(readFile("prepositions.txt")); |
| 508 | progress ppgs("Reading prepositions...", lines.size()); | 531 | hatkirby::progress ppgs("Reading prepositions...", lines.size()); |
| 509 | 532 | ||
| 510 | for (std::string line : lines) | 533 | for (std::string line : lines) |
| 511 | { | 534 | { |
| @@ -515,7 +538,9 @@ namespace verbly { | |||
| 515 | std::smatch relation_data; | 538 | std::smatch relation_data; |
| 516 | std::regex_search(line, relation_data, relation); | 539 | std::regex_search(line, relation_data, relation); |
| 517 | std::string prep = relation_data[1]; | 540 | std::string prep = relation_data[1]; |
| 518 | auto groups = split<std::list<std::string>>(relation_data[2], ", "); | 541 | |
| 542 | auto groups = | ||
| 543 | hatkirby::split<std::list<std::string>>(relation_data[2], ", "); | ||
| 519 | 544 | ||
| 520 | notion& n = createNotion(part_of_speech::preposition); | 545 | notion& n = createNotion(part_of_speech::preposition); |
| 521 | lemma& l = lookupOrCreateLemma(prep); | 546 | lemma& l = lookupOrCreateLemma(prep); |
| @@ -528,7 +553,10 @@ namespace verbly { | |||
| 528 | void generator::readCmudictPronunciations() | 553 | void generator::readCmudictPronunciations() |
| 529 | { | 554 | { |
| 530 | std::list<std::string> lines(readFile(cmudictPath_)); | 555 | std::list<std::string> lines(readFile(cmudictPath_)); |
| 531 | progress ppgs("Reading pronunciations from CMUDICT...", lines.size()); | 556 | |
| 557 | hatkirby::progress ppgs( | ||
| 558 | "Reading pronunciations from CMUDICT...", | ||
| 559 | lines.size()); | ||
| 532 | 560 | ||
| 533 | for (std::string line : lines) | 561 | for (std::string line : lines) |
| 534 | { | 562 | { |
| @@ -538,8 +566,7 @@ namespace verbly { | |||
| 538 | std::smatch phoneme_data; | 566 | std::smatch phoneme_data; |
| 539 | if (std::regex_search(line, phoneme_data, phoneme)) | 567 | if (std::regex_search(line, phoneme_data, phoneme)) |
| 540 | { | 568 | { |
| 541 | std::string canonical(phoneme_data[1]); | 569 | std::string canonical = hatkirby::lowercase(phoneme_data[1]); |
| 542 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | ||
| 543 | 570 | ||
| 544 | if (!formByText_.count(canonical)) | 571 | if (!formByText_.count(canonical)) |
| 545 | { | 572 | { |
| @@ -575,13 +602,14 @@ namespace verbly { | |||
| 575 | } | 602 | } |
| 576 | 603 | ||
| 577 | std::string schema = schemaBuilder.str(); | 604 | std::string schema = schemaBuilder.str(); |
| 578 | auto queries = split<std::list<std::string>>(schema, ";"); | 605 | auto queries = hatkirby::split<std::list<std::string>>(schema, ";"); |
| 579 | progress ppgs("Writing database schema...", queries.size()); | 606 | |
| 607 | hatkirby::progress ppgs("Writing database schema...", queries.size()); | ||
| 580 | for (std::string query : queries) | 608 | for (std::string query : queries) |
| 581 | { | 609 | { |
| 582 | if (!queries.empty()) | 610 | if (!queries.empty()) |
| 583 | { | 611 | { |
| 584 | db_.runQuery(query); | 612 | db_.execute(query); |
| 585 | } | 613 | } |
| 586 | 614 | ||
| 587 | ppgs.update(); | 615 | ppgs.update(); |
| @@ -590,10 +618,6 @@ namespace verbly { | |||
| 590 | 618 | ||
| 591 | void generator::writeVersion() | 619 | void generator::writeVersion() |
| 592 | { | 620 | { |
| 593 | std::list<field> fields; | ||
| 594 | fields.emplace_back("major", DATABASE_MAJOR_VERSION); | ||
| 595 | fields.emplace_back("minor", DATABASE_MINOR_VERSION); | ||
| 596 | |||
| 597 | db_.insertIntoTable( | 621 | db_.insertIntoTable( |
| 598 | "version", | 622 | "version", |
| 599 | { | 623 | { |
| @@ -605,7 +629,7 @@ namespace verbly { | |||
| 605 | void generator::dumpObjects() | 629 | void generator::dumpObjects() |
| 606 | { | 630 | { |
| 607 | { | 631 | { |
| 608 | progress ppgs("Writing notions...", notions_.size()); | 632 | hatkirby::progress ppgs("Writing notions...", notions_.size()); |
| 609 | 633 | ||
| 610 | for (notion& n : notions_) | 634 | for (notion& n : notions_) |
| 611 | { | 635 | { |
| @@ -616,7 +640,7 @@ namespace verbly { | |||
| 616 | } | 640 | } |
| 617 | 641 | ||
| 618 | { | 642 | { |
| 619 | progress ppgs("Writing words...", words_.size()); | 643 | hatkirby::progress ppgs("Writing words...", words_.size()); |
| 620 | 644 | ||
| 621 | for (word& w : words_) | 645 | for (word& w : words_) |
| 622 | { | 646 | { |
| @@ -627,7 +651,7 @@ namespace verbly { | |||
| 627 | } | 651 | } |
| 628 | 652 | ||
| 629 | { | 653 | { |
| 630 | progress ppgs("Writing lemmas...", lemmas_.size()); | 654 | hatkirby::progress ppgs("Writing lemmas...", lemmas_.size()); |
| 631 | 655 | ||
| 632 | for (lemma& l : lemmas_) | 656 | for (lemma& l : lemmas_) |
| 633 | { | 657 | { |
| @@ -638,7 +662,7 @@ namespace verbly { | |||
| 638 | } | 662 | } |
| 639 | 663 | ||
| 640 | { | 664 | { |
| 641 | progress ppgs("Writing forms...", forms_.size()); | 665 | hatkirby::progress ppgs("Writing forms...", forms_.size()); |
| 642 | 666 | ||
| 643 | for (form& f : forms_) | 667 | for (form& f : forms_) |
| 644 | { | 668 | { |
| @@ -649,7 +673,7 @@ namespace verbly { | |||
| 649 | } | 673 | } |
| 650 | 674 | ||
| 651 | { | 675 | { |
| 652 | progress ppgs("Writing pronunciations...", pronunciations_.size()); | 676 | hatkirby::progress ppgs("Writing pronunciations...", pronunciations_.size()); |
| 653 | 677 | ||
| 654 | for (pronunciation& p : pronunciations_) | 678 | for (pronunciation& p : pronunciations_) |
| 655 | { | 679 | { |
| @@ -660,7 +684,7 @@ namespace verbly { | |||
| 660 | } | 684 | } |
| 661 | 685 | ||
| 662 | { | 686 | { |
| 663 | progress ppgs("Writing verb frames...", groups_.size()); | 687 | hatkirby::progress ppgs("Writing verb frames...", groups_.size()); |
| 664 | 688 | ||
| 665 | for (group& g : groups_) | 689 | for (group& g : groups_) |
| 666 | { | 690 | { |
| @@ -674,22 +698,30 @@ namespace verbly { | |||
| 674 | void generator::readWordNetAntonymy() | 698 | void generator::readWordNetAntonymy() |
| 675 | { | 699 | { |
| 676 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); | 700 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); |
| 677 | progress ppgs("Writing antonyms...", lines.size()); | 701 | hatkirby::progress ppgs("Writing antonyms...", lines.size()); |
| 678 | for (auto line : lines) | 702 | for (auto line : lines) |
| 679 | { | 703 | { |
| 680 | ppgs.update(); | 704 | ppgs.update(); |
| 681 | 705 | ||
| 682 | std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | 706 | std::regex relation( |
| 707 | "^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | ||
| 708 | |||
| 683 | std::smatch relation_data; | 709 | std::smatch relation_data; |
| 684 | if (!std::regex_search(line, relation_data, relation)) | 710 | if (!std::regex_search(line, relation_data, relation)) |
| 685 | { | 711 | { |
| 686 | continue; | 712 | continue; |
| 687 | } | 713 | } |
| 688 | 714 | ||
| 689 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | 715 | std::pair<int, int> lookup1( |
| 690 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | 716 | std::stoi(relation_data[1]), |
| 717 | std::stoi(relation_data[2])); | ||
| 718 | |||
| 719 | std::pair<int, int> lookup2( | ||
| 720 | std::stoi(relation_data[3]), | ||
| 721 | std::stoi(relation_data[4])); | ||
| 691 | 722 | ||
| 692 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | 723 | if (wordByWnidAndWnum_.count(lookup1) && |
| 724 | wordByWnidAndWnum_.count(lookup2)) | ||
| 693 | { | 725 | { |
| 694 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | 726 | word& word1 = *wordByWnidAndWnum_.at(lookup1); |
| 695 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | 727 | word& word2 = *wordByWnidAndWnum_.at(lookup2); |
| @@ -707,7 +739,7 @@ namespace verbly { | |||
| 707 | void generator::readWordNetVariation() | 739 | void generator::readWordNetVariation() |
| 708 | { | 740 | { |
| 709 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl")); | 741 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl")); |
| 710 | progress ppgs("Writing variation...", lines.size()); | 742 | hatkirby::progress ppgs("Writing variation...", lines.size()); |
| 711 | for (auto line : lines) | 743 | for (auto line : lines) |
| 712 | { | 744 | { |
| 713 | ppgs.update(); | 745 | ppgs.update(); |
| @@ -730,7 +762,7 @@ namespace verbly { | |||
| 730 | db_.insertIntoTable( | 762 | db_.insertIntoTable( |
| 731 | "variation", | 763 | "variation", |
| 732 | { | 764 | { |
| 733 | { "noun_id", notion1.getId() } | 765 | { "noun_id", notion1.getId() }, |
| 734 | { "adjective_id", notion2.getId() } | 766 | { "adjective_id", notion2.getId() } |
| 735 | }); | 767 | }); |
| 736 | } | 768 | } |
| @@ -740,20 +772,32 @@ namespace verbly { | |||
| 740 | void generator::readWordNetClasses() | 772 | void generator::readWordNetClasses() |
| 741 | { | 773 | { |
| 742 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); | 774 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); |
| 743 | progress ppgs("Writing usage, topicality, and regionality...", lines.size()); | 775 | |
| 776 | hatkirby::progress ppgs( | ||
| 777 | "Writing usage, topicality, and regionality...", | ||
| 778 | lines.size()); | ||
| 779 | |||
| 744 | for (auto line : lines) | 780 | for (auto line : lines) |
| 745 | { | 781 | { |
| 746 | ppgs.update(); | 782 | ppgs.update(); |
| 747 | 783 | ||
| 748 | std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); | 784 | std::regex relation( |
| 785 | "^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); | ||
| 786 | |||
| 749 | std::smatch relation_data; | 787 | std::smatch relation_data; |
| 750 | if (!std::regex_search(line, relation_data, relation)) | 788 | if (!std::regex_search(line, relation_data, relation)) |
| 751 | { | 789 | { |
| 752 | continue; | 790 | continue; |
| 753 | } | 791 | } |
| 754 | 792 | ||
| 755 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | 793 | std::pair<int, int> lookup1( |
| 756 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | 794 | std::stoi(relation_data[1]), |
| 795 | std::stoi(relation_data[2])); | ||
| 796 | |||
| 797 | std::pair<int, int> lookup2( | ||
| 798 | std::stoi(relation_data[3]), | ||
| 799 | std::stoi(relation_data[4])); | ||
| 800 | |||
| 757 | std::string class_type = relation_data[5]; | 801 | std::string class_type = relation_data[5]; |
| 758 | 802 | ||
| 759 | std::string table_name; | 803 | std::string table_name; |
| @@ -773,18 +817,30 @@ namespace verbly { | |||
| 773 | 817 | ||
| 774 | if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) | 818 | if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) |
| 775 | { | 819 | { |
| 776 | std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) { | 820 | auto& wordSet = wordsByWnid_.at(lookup1.first); |
| 777 | return w->getId(); | 821 | |
| 778 | }); | 822 | std::transform( |
| 823 | std::begin(wordSet), | ||
| 824 | std::end(wordSet), | ||
| 825 | std::back_inserter(leftJoin), | ||
| 826 | [] (word* w) { | ||
| 827 | return w->getId(); | ||
| 828 | }); | ||
| 779 | } else if (wordByWnidAndWnum_.count(lookup1)) { | 829 | } else if (wordByWnidAndWnum_.count(lookup1)) { |
| 780 | leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); | 830 | leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); |
| 781 | } | 831 | } |
| 782 | 832 | ||
| 783 | if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) | 833 | if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) |
| 784 | { | 834 | { |
| 785 | std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) { | 835 | auto& wordSet = wordsByWnid_.at(lookup2.first); |
| 786 | return w->getId(); | 836 | |
| 787 | }); | 837 | std::transform( |
| 838 | std::begin(wordSet), | ||
| 839 | std::end(wordSet), | ||
| 840 | std::back_inserter(rightJoin), | ||
| 841 | [] (word* w) { | ||
| 842 | return w->getId(); | ||
| 843 | }); | ||
| 788 | } else if (wordByWnidAndWnum_.count(lookup2)) { | 844 | } else if (wordByWnidAndWnum_.count(lookup2)) { |
| 789 | rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); | 845 | rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); |
| 790 | } | 846 | } |
| @@ -807,7 +863,7 @@ namespace verbly { | |||
| 807 | void generator::readWordNetCausality() | 863 | void generator::readWordNetCausality() |
| 808 | { | 864 | { |
| 809 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl")); | 865 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl")); |
| 810 | progress ppgs("Writing causality...", lines.size()); | 866 | hatkirby::progress ppgs("Writing causality...", lines.size()); |
| 811 | for (auto line : lines) | 867 | for (auto line : lines) |
| 812 | { | 868 | { |
| 813 | ppgs.update(); | 869 | ppgs.update(); |
| @@ -840,7 +896,7 @@ namespace verbly { | |||
| 840 | void generator::readWordNetEntailment() | 896 | void generator::readWordNetEntailment() |
| 841 | { | 897 | { |
| 842 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl")); | 898 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl")); |
| 843 | progress ppgs("Writing entailment...", lines.size()); | 899 | hatkirby::progress ppgs("Writing entailment...", lines.size()); |
| 844 | for (auto line : lines) | 900 | for (auto line : lines) |
| 845 | { | 901 | { |
| 846 | ppgs.update(); | 902 | ppgs.update(); |
| @@ -873,7 +929,7 @@ namespace verbly { | |||
| 873 | void generator::readWordNetHypernymy() | 929 | void generator::readWordNetHypernymy() |
| 874 | { | 930 | { |
| 875 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl")); | 931 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl")); |
| 876 | progress ppgs("Writing hypernymy...", lines.size()); | 932 | hatkirby::progress ppgs("Writing hypernymy...", lines.size()); |
| 877 | for (auto line : lines) | 933 | for (auto line : lines) |
| 878 | { | 934 | { |
| 879 | ppgs.update(); | 935 | ppgs.update(); |
| @@ -906,7 +962,7 @@ namespace verbly { | |||
| 906 | void generator::readWordNetInstantiation() | 962 | void generator::readWordNetInstantiation() |
| 907 | { | 963 | { |
| 908 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl")); | 964 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl")); |
| 909 | progress ppgs("Writing instantiation...", lines.size()); | 965 | hatkirby::progress ppgs("Writing instantiation...", lines.size()); |
| 910 | for (auto line : lines) | 966 | for (auto line : lines) |
| 911 | { | 967 | { |
| 912 | ppgs.update(); | 968 | ppgs.update(); |
| @@ -939,7 +995,7 @@ namespace verbly { | |||
| 939 | void generator::readWordNetMemberMeronymy() | 995 | void generator::readWordNetMemberMeronymy() |
| 940 | { | 996 | { |
| 941 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl")); | 997 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl")); |
| 942 | progress ppgs("Writing member meronymy...", lines.size()); | 998 | hatkirby::progress ppgs("Writing member meronymy...", lines.size()); |
| 943 | for (auto line : lines) | 999 | for (auto line : lines) |
| 944 | { | 1000 | { |
| 945 | ppgs.update(); | 1001 | ppgs.update(); |
| @@ -972,7 +1028,7 @@ namespace verbly { | |||
| 972 | void generator::readWordNetPartMeronymy() | 1028 | void generator::readWordNetPartMeronymy() |
| 973 | { | 1029 | { |
| 974 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl")); | 1030 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl")); |
| 975 | progress ppgs("Writing part meronymy...", lines.size()); | 1031 | hatkirby::progress ppgs("Writing part meronymy...", lines.size()); |
| 976 | for (auto line : lines) | 1032 | for (auto line : lines) |
| 977 | { | 1033 | { |
| 978 | ppgs.update(); | 1034 | ppgs.update(); |
| @@ -1005,7 +1061,7 @@ namespace verbly { | |||
| 1005 | void generator::readWordNetSubstanceMeronymy() | 1061 | void generator::readWordNetSubstanceMeronymy() |
| 1006 | { | 1062 | { |
| 1007 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl")); | 1063 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl")); |
| 1008 | progress ppgs("Writing substance meronymy...", lines.size()); | 1064 | hatkirby::progress ppgs("Writing substance meronymy...", lines.size()); |
| 1009 | for (auto line : lines) | 1065 | for (auto line : lines) |
| 1010 | { | 1066 | { |
| 1011 | ppgs.update(); | 1067 | ppgs.update(); |
| @@ -1038,27 +1094,40 @@ namespace verbly { | |||
| 1038 | void generator::readWordNetPertainymy() | 1094 | void generator::readWordNetPertainymy() |
| 1039 | { | 1095 | { |
| 1040 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); | 1096 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); |
| 1041 | progress ppgs("Writing pertainymy and mannernymy...", lines.size()); | 1097 | |
| 1098 | hatkirby::progress ppgs( | ||
| 1099 | "Writing pertainymy and mannernymy...", | ||
| 1100 | lines.size()); | ||
| 1101 | |||
| 1042 | for (auto line : lines) | 1102 | for (auto line : lines) |
| 1043 | { | 1103 | { |
| 1044 | ppgs.update(); | 1104 | ppgs.update(); |
| 1045 | 1105 | ||
| 1046 | std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); | 1106 | std::regex relation( |
| 1107 | "^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); | ||
| 1108 | |||
| 1047 | std::smatch relation_data; | 1109 | std::smatch relation_data; |
| 1048 | if (!std::regex_search(line, relation_data, relation)) | 1110 | if (!std::regex_search(line, relation_data, relation)) |
| 1049 | { | 1111 | { |
| 1050 | continue; | 1112 | continue; |
| 1051 | } | 1113 | } |
| 1052 | 1114 | ||
| 1053 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | 1115 | std::pair<int, int> lookup1( |
| 1054 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | 1116 | std::stoi(relation_data[1]), |
| 1117 | std::stoi(relation_data[2])); | ||
| 1055 | 1118 | ||
| 1056 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | 1119 | std::pair<int, int> lookup2( |
| 1120 | std::stoi(relation_data[3]), | ||
| 1121 | std::stoi(relation_data[4])); | ||
| 1122 | |||
| 1123 | if (wordByWnidAndWnum_.count(lookup1) && | ||
| 1124 | wordByWnidAndWnum_.count(lookup2)) | ||
| 1057 | { | 1125 | { |
| 1058 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | 1126 | word& word1 = *wordByWnidAndWnum_.at(lookup1); |
| 1059 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | 1127 | word& word2 = *wordByWnidAndWnum_.at(lookup2); |
| 1060 | 1128 | ||
| 1061 | if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective) | 1129 | if (word1.getNotion().getPartOfSpeech() == |
| 1130 | part_of_speech::adjective) | ||
| 1062 | { | 1131 | { |
| 1063 | db_.insertIntoTable( | 1132 | db_.insertIntoTable( |
| 1064 | "pertainymy", | 1133 | "pertainymy", |
| @@ -1066,7 +1135,8 @@ namespace verbly { | |||
| 1066 | { "pertainym_id", word1.getId() }, | 1135 | { "pertainym_id", word1.getId() }, |
| 1067 | { "noun_id", word2.getId() } | 1136 | { "noun_id", word2.getId() } |
| 1068 | }); | 1137 | }); |
| 1069 | } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb) | 1138 | } else if (word1.getNotion().getPartOfSpeech() == |
| 1139 | part_of_speech::adverb) | ||
| 1070 | { | 1140 | { |
| 1071 | db_.insertIntoTable( | 1141 | db_.insertIntoTable( |
| 1072 | "mannernymy", | 1142 | "mannernymy", |
| @@ -1082,7 +1152,7 @@ namespace verbly { | |||
| 1082 | void generator::readWordNetSpecification() | 1152 | void generator::readWordNetSpecification() |
| 1083 | { | 1153 | { |
| 1084 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl")); | 1154 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl")); |
| 1085 | progress ppgs("Writing specifications...", lines.size()); | 1155 | hatkirby::progress ppgs("Writing specifications...", lines.size()); |
| 1086 | for (auto line : lines) | 1156 | for (auto line : lines) |
| 1087 | { | 1157 | { |
| 1088 | ppgs.update(); | 1158 | ppgs.update(); |
| @@ -1094,10 +1164,17 @@ namespace verbly { | |||
| 1094 | continue; | 1164 | continue; |
| 1095 | } | 1165 | } |
| 1096 | 1166 | ||
| 1097 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | 1167 | std::pair<int, int> lookup1( |
| 1098 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | 1168 | std::stoi(relation_data[1]), |
| 1169 | std::stoi(relation_data[2])); | ||
| 1170 | |||
| 1171 | std::pair<int, int> lookup2( | ||
| 1172 | std::stoi(relation_data[3]), | ||
| 1173 | std::stoi(relation_data[4])); | ||
| 1099 | 1174 | ||
| 1100 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | 1175 | |
| 1176 | if (wordByWnidAndWnum_.count(lookup1) && | ||
| 1177 | wordByWnidAndWnum_.count(lookup2)) | ||
| 1101 | { | 1178 | { |
| 1102 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | 1179 | word& word1 = *wordByWnidAndWnum_.at(lookup1); |
| 1103 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | 1180 | word& word2 = *wordByWnidAndWnum_.at(lookup2); |
| @@ -1115,7 +1192,7 @@ namespace verbly { | |||
| 1115 | void generator::readWordNetSimilarity() | 1192 | void generator::readWordNetSimilarity() |
| 1116 | { | 1193 | { |
| 1117 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl")); | 1194 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl")); |
| 1118 | progress ppgs("Writing adjective similarity...", lines.size()); | 1195 | hatkirby::progress ppgs("Writing adjective similarity...", lines.size()); |
| 1119 | for (auto line : lines) | 1196 | for (auto line : lines) |
| 1120 | { | 1197 | { |
| 1121 | ppgs.update(); | 1198 | ppgs.update(); |
| @@ -1149,7 +1226,7 @@ namespace verbly { | |||
| 1149 | { | 1226 | { |
| 1150 | std::cout << "Analyzing data..." << std::endl; | 1227 | std::cout << "Analyzing data..." << std::endl; |
| 1151 | 1228 | ||
| 1152 | db_.runQuery("ANALYZE"); | 1229 | db_.execute("ANALYZE"); |
| 1153 | } | 1230 | } |
| 1154 | 1231 | ||
| 1155 | std::list<std::string> generator::readFile(std::string path) | 1232 | std::list<std::string> generator::readFile(std::string path) |
| @@ -1183,7 +1260,8 @@ namespace verbly { | |||
| 1183 | case 2: return part_of_speech::verb; | 1260 | case 2: return part_of_speech::verb; |
| 1184 | case 3: return part_of_speech::adjective; | 1261 | case 3: return part_of_speech::adjective; |
| 1185 | case 4: return part_of_speech::adverb; | 1262 | case 4: return part_of_speech::adverb; |
| 1186 | default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid)); | 1263 | default: throw std::domain_error( |
| 1264 | "Invalid WordNet synset ID: " + std::to_string(wnid)); | ||
| 1187 | } | 1265 | } |
| 1188 | } | 1266 | } |
| 1189 | 1267 | ||
| @@ -1296,20 +1374,30 @@ namespace verbly { | |||
| 1296 | std::string wnSenses(reinterpret_cast<const char*>(key)); | 1374 | std::string wnSenses(reinterpret_cast<const char*>(key)); |
| 1297 | xmlFree(key); | 1375 | xmlFree(key); |
| 1298 | 1376 | ||
| 1299 | auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " "); | 1377 | auto wnSenseKeys = |
| 1378 | hatkirby::split<std::list<std::string>>(wnSenses, " "); | ||
| 1379 | |||
| 1300 | if (!wnSenseKeys.empty()) | 1380 | if (!wnSenseKeys.empty()) |
| 1301 | { | 1381 | { |
| 1302 | std::list<std::string> tempKeys; | 1382 | std::list<std::string> tempKeys; |
| 1303 | 1383 | ||
| 1304 | std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) { | 1384 | std::transform( |
| 1305 | return sense + "::"; | 1385 | std::begin(wnSenseKeys), |
| 1306 | }); | 1386 | std::end(wnSenseKeys), |
| 1387 | std::back_inserter(tempKeys), | ||
| 1388 | [] (std::string sense) { | ||
| 1389 | return sense + "::"; | ||
| 1390 | }); | ||
| 1307 | 1391 | ||
| 1308 | std::list<std::string> filteredKeys; | 1392 | std::list<std::string> filteredKeys; |
| 1309 | 1393 | ||
| 1310 | std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) { | 1394 | std::remove_copy_if( |
| 1311 | return !wnSenseKeys_.count(sense); | 1395 | std::begin(tempKeys), |
| 1312 | }); | 1396 | std::end(tempKeys), |
| 1397 | std::back_inserter(filteredKeys), | ||
| 1398 | [&] (std::string sense) { | ||
| 1399 | return !wnSenseKeys_.count(sense); | ||
| 1400 | }); | ||
| 1313 | 1401 | ||
| 1314 | wnSenseKeys = std::move(filteredKeys); | 1402 | wnSenseKeys = std::move(filteredKeys); |
| 1315 | } | 1403 | } |
| @@ -1431,10 +1519,15 @@ namespace verbly { | |||
| 1431 | std::string choicesStr = reinterpret_cast<const char*>(key); | 1519 | std::string choicesStr = reinterpret_cast<const char*>(key); |
| 1432 | xmlFree(key); | 1520 | xmlFree(key); |
| 1433 | 1521 | ||
| 1434 | for (std::string choice : split<std::list<std::string>>(choicesStr, " ")) | 1522 | auto choices = |
| 1523 | hatkirby::split<std::list<std::string>>( | ||
| 1524 | choicesStr, " "); | ||
| 1525 | |||
| 1526 | for (std::string choice : choices) | ||
| 1435 | { | 1527 | { |
| 1436 | int chloc; | 1528 | int chloc; |
| 1437 | while ((chloc = choice.find_first_of("_")) != std::string::npos) | 1529 | while ((chloc = choice.find_first_of("_")) |
| 1530 | != std::string::npos) | ||
| 1438 | { | 1531 | { |
| 1439 | choice.replace(chloc, 1, " "); | 1532 | choice.replace(chloc, 1, " "); |
| 1440 | } | 1533 | } |
| @@ -1444,7 +1537,9 @@ namespace verbly { | |||
| 1444 | } else { | 1537 | } else { |
| 1445 | partLiteral = false; | 1538 | partLiteral = false; |
| 1446 | 1539 | ||
| 1447 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | 1540 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; |
| 1541 | npnode != nullptr; | ||
| 1542 | npnode = npnode->next) | ||
| 1448 | { | 1543 | { |
| 1449 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | 1544 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) |
| 1450 | { | 1545 | { |
| diff --git a/generator/generator.h b/generator/generator.h index 52073bc..cd99f88 100644 --- a/generator/generator.h +++ b/generator/generator.h | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | #include <list> | 6 | #include <list> |
| 7 | #include <set> | 7 | #include <set> |
| 8 | #include <libxml/parser.h> | 8 | #include <libxml/parser.h> |
| 9 | #include "database.h" | 9 | #include <hkutil/database.h> |
| 10 | #include "notion.h" | 10 | #include "notion.h" |
| 11 | #include "word.h" | 11 | #include "word.h" |
| 12 | #include "lemma.h" | 12 | #include "lemma.h" |
| diff --git a/generator/group.cpp b/generator/group.cpp index 5b23578..1ffb9d9 100644 --- a/generator/group.cpp +++ b/generator/group.cpp | |||
| @@ -1,10 +1,8 @@ | |||
| 1 | #include "group.h" | 1 | #include "group.h" |
| 2 | #include <stdexcept> | 2 | #include <stdexcept> |
| 3 | #include <list> | 3 | #include <list> |
| 4 | #include "database.h" | 4 | #include <hkutil/string.h> |
| 5 | #include "field.h" | ||
| 6 | #include "frame.h" | 5 | #include "frame.h" |
| 7 | #include "../lib/util.h" | ||
| 8 | 6 | ||
| 9 | namespace verbly { | 7 | namespace verbly { |
| 10 | namespace generator { | 8 | namespace generator { |
| @@ -50,28 +48,26 @@ namespace verbly { | |||
| 50 | return roles_.at(name); | 48 | return roles_.at(name); |
| 51 | } | 49 | } |
| 52 | 50 | ||
| 53 | database& operator<<(database& db, const group& arg) | 51 | hatkirby::database& operator<<(hatkirby::database& db, const group& arg) |
| 54 | { | 52 | { |
| 55 | // Serialize each frame | 53 | // Serialize each frame |
| 56 | for (const frame& f : arg.getFrames()) | 54 | for (const frame& f : arg.getFrames()) |
| 57 | { | 55 | { |
| 58 | // First, serialize the group/frame relationship | 56 | // First, serialize the group/frame relationship |
| 59 | { | 57 | db.insertIntoTable( |
| 60 | std::list<field> fields; | 58 | "frames", |
| 61 | 59 | { | |
| 62 | fields.emplace_back("frame_id", f.getId()); | 60 | { "frame_id", f.getId() }, |
| 63 | fields.emplace_back("group_id", arg.getId()); | 61 | { "group_id", arg.getId() }, |
| 64 | fields.emplace_back("length", f.getLength()); | 62 | { "length", f.getLength() } |
| 65 | 63 | }); | |
| 66 | db.insertIntoTable("frames", std::move(fields)); | ||
| 67 | } | ||
| 68 | 64 | ||
| 69 | // Then, serialize the frame parts in the context of the group | 65 | // Then, serialize the frame parts in the context of the group |
| 70 | for (int partIndex = 0; partIndex < f.getLength(); partIndex++) | 66 | for (int partIndex = 0; partIndex < f.getLength(); partIndex++) |
| 71 | { | 67 | { |
| 72 | const part& p = f[partIndex]; | 68 | const part& p = f[partIndex]; |
| 73 | 69 | ||
| 74 | std::list<field> fields; | 70 | std::list<hatkirby::column> fields; |
| 75 | fields.emplace_back("part_id", p.getId()); | 71 | fields.emplace_back("part_id", p.getId()); |
| 76 | fields.emplace_back("frame_id", f.getId()); | 72 | fields.emplace_back("frame_id", f.getId()); |
| 77 | fields.emplace_back("part_index", partIndex); | 73 | fields.emplace_back("part_index", partIndex); |
| @@ -92,23 +88,23 @@ namespace verbly { | |||
| 92 | 88 | ||
| 93 | for (const std::string& s : partSelrestrs) | 89 | for (const std::string& s : partSelrestrs) |
| 94 | { | 90 | { |
| 95 | std::list<field> selrestrFields; | 91 | db.insertIntoTable( |
| 96 | 92 | "selrestrs", | |
| 97 | selrestrFields.emplace_back("part_id", p.getId()); | 93 | { |
| 98 | selrestrFields.emplace_back("selrestr", s); | 94 | { "part_id", p.getId() }, |
| 99 | 95 | { "selrestr", s } | |
| 100 | db.insertIntoTable("selrestrs", std::move(selrestrFields)); | 96 | }); |
| 101 | } | 97 | } |
| 102 | 98 | ||
| 103 | // Short interlude to serialize the synrestrs | 99 | // Short interlude to serialize the synrestrs |
| 104 | for (const std::string& s : p.getNounSynrestrs()) | 100 | for (const std::string& s : p.getNounSynrestrs()) |
| 105 | { | 101 | { |
| 106 | std::list<field> synrestrFields; | 102 | db.insertIntoTable( |
| 107 | 103 | "synrestrs", | |
| 108 | synrestrFields.emplace_back("part_id", p.getId()); | 104 | { |
| 109 | synrestrFields.emplace_back("synrestr", s); | 105 | { "part_id", p.getId() }, |
| 110 | 106 | { "synrestr", s } | |
| 111 | db.insertIntoTable("synrestrs", std::move(synrestrFields)); | 107 | }); |
| 112 | } | 108 | } |
| 113 | 109 | ||
| 114 | break; | 110 | break; |
| @@ -117,10 +113,17 @@ namespace verbly { | |||
| 117 | case part::type::preposition: | 113 | case part::type::preposition: |
| 118 | { | 114 | { |
| 119 | std::set<std::string> setChoices = p.getPrepositionChoices(); | 115 | std::set<std::string> setChoices = p.getPrepositionChoices(); |
| 120 | std::string serializedChoices = implode(std::begin(setChoices), std::end(setChoices), ","); | 116 | |
| 117 | std::string serializedChoices = | ||
| 118 | hatkirby::implode( | ||
| 119 | std::begin(setChoices), | ||
| 120 | std::end(setChoices), | ||
| 121 | ","); | ||
| 121 | 122 | ||
| 122 | fields.emplace_back("prepositions", std::move(serializedChoices)); | 123 | fields.emplace_back("prepositions", std::move(serializedChoices)); |
| 123 | fields.emplace_back("preposition_literality", p.isPrepositionLiteral() ? 1 : 0); | 124 | |
| 125 | fields.emplace_back("preposition_literality", | ||
| 126 | p.isPrepositionLiteral() ? 1 : 0); | ||
| 124 | 127 | ||
| 125 | break; | 128 | break; |
| 126 | } | 129 | } |
| diff --git a/generator/group.h b/generator/group.h index a7f3a17..f912920 100644 --- a/generator/group.h +++ b/generator/group.h | |||
| @@ -6,13 +6,13 @@ | |||
| 6 | #include <string> | 6 | #include <string> |
| 7 | #include <cassert> | 7 | #include <cassert> |
| 8 | #include <list> | 8 | #include <list> |
| 9 | #include <hkutil/database.h> | ||
| 9 | #include "role.h" | 10 | #include "role.h" |
| 10 | 11 | ||
| 11 | namespace verbly { | 12 | namespace verbly { |
| 12 | namespace generator { | 13 | namespace generator { |
| 13 | 14 | ||
| 14 | class frame; | 15 | class frame; |
| 15 | class database; | ||
| 16 | 16 | ||
| 17 | class group { | 17 | class group { |
| 18 | public: | 18 | public: |
| @@ -67,7 +67,7 @@ namespace verbly { | |||
| 67 | 67 | ||
| 68 | // Serializer | 68 | // Serializer |
| 69 | 69 | ||
| 70 | database& operator<<(database& db, const group& arg); | 70 | hatkirby::database& operator<<(hatkirby::database& db, const group& arg); |
| 71 | 71 | ||
| 72 | }; | 72 | }; |
| 73 | }; | 73 | }; |
| diff --git a/generator/lemma.cpp b/generator/lemma.cpp index e66b153..33ab037 100644 --- a/generator/lemma.cpp +++ b/generator/lemma.cpp | |||
| @@ -1,8 +1,6 @@ | |||
| 1 | #include "lemma.h" | 1 | #include "lemma.h" |
| 2 | #include <list> | 2 | #include <list> |
| 3 | #include <cassert> | 3 | #include <cassert> |
| 4 | #include "field.h" | ||
| 5 | #include "database.h" | ||
| 6 | #include "form.h" | 4 | #include "form.h" |
| 7 | 5 | ||
| 8 | namespace verbly { | 6 | namespace verbly { |
| @@ -35,7 +33,7 @@ namespace verbly { | |||
| 35 | } | 33 | } |
| 36 | } | 34 | } |
| 37 | 35 | ||
| 38 | database& operator<<(database& db, const lemma& arg) | 36 | hatkirby::database& operator<<(hatkirby::database& db, const lemma& arg) |
| 39 | { | 37 | { |
| 40 | for (inflection type : { | 38 | for (inflection type : { |
| 41 | inflection::base, | 39 | inflection::base, |
| @@ -49,12 +47,13 @@ namespace verbly { | |||
| 49 | { | 47 | { |
| 50 | for (const form* f : arg.getInflections(type)) | 48 | for (const form* f : arg.getInflections(type)) |
| 51 | { | 49 | { |
| 52 | std::list<field> fields; | 50 | db.insertIntoTable( |
| 53 | fields.emplace_back("lemma_id", arg.getId()); | 51 | "lemmas_forms", |
| 54 | fields.emplace_back("form_id", f->getId()); | 52 | { |
| 55 | fields.emplace_back("category", static_cast<int>(type)); | 53 | { "lemma_id", arg.getId() }, |
| 56 | 54 | { "form_id", f->getId() }, | |
| 57 | db.insertIntoTable("lemmas_forms", std::move(fields)); | 55 | { "category", static_cast<int>(type) } |
| 56 | }); | ||
| 58 | } | 57 | } |
| 59 | } | 58 | } |
| 60 | 59 | ||
| diff --git a/generator/lemma.h b/generator/lemma.h index f68667f..f7d5491 100644 --- a/generator/lemma.h +++ b/generator/lemma.h | |||
| @@ -4,12 +4,12 @@ | |||
| 4 | #include <string> | 4 | #include <string> |
| 5 | #include <map> | 5 | #include <map> |
| 6 | #include <set> | 6 | #include <set> |
| 7 | #include <hkutil/database.h> | ||
| 7 | #include "../lib/enums.h" | 8 | #include "../lib/enums.h" |
| 8 | 9 | ||
| 9 | namespace verbly { | 10 | namespace verbly { |
| 10 | namespace generator { | 11 | namespace generator { |
| 11 | 12 | ||
| 12 | class database; | ||
| 13 | class form; | 13 | class form; |
| 14 | 14 | ||
| 15 | class lemma { | 15 | class lemma { |
| @@ -50,7 +50,7 @@ namespace verbly { | |||
| 50 | 50 | ||
| 51 | // Serializer | 51 | // Serializer |
| 52 | 52 | ||
| 53 | database& operator<<(database& db, const lemma& arg); | 53 | hatkirby::database& operator<<(hatkirby::database& db, const lemma& arg); |
| 54 | 54 | ||
| 55 | }; | 55 | }; |
| 56 | }; | 56 | }; |
| diff --git a/generator/notion.cpp b/generator/notion.cpp index 35ba7b1..f388767 100644 --- a/generator/notion.cpp +++ b/generator/notion.cpp | |||
| @@ -1,8 +1,4 @@ | |||
| 1 | #include "notion.h" | 1 | #include "notion.h" |
| 2 | #include <string> | ||
| 3 | #include <list> | ||
| 4 | #include "database.h" | ||
| 5 | #include "field.h" | ||
| 6 | 2 | ||
| 7 | namespace verbly { | 3 | namespace verbly { |
| 8 | namespace generator { | 4 | namespace generator { |
| @@ -28,21 +24,25 @@ namespace verbly { | |||
| 28 | 24 | ||
| 29 | void notion::incrementNumOfImages() | 25 | void notion::incrementNumOfImages() |
| 30 | { | 26 | { |
| 31 | // Calling code should always call hasWnid and check that the notion is a noun first. | 27 | if (!hasWnid_ || (partOfSpeech_ != part_of_speech::noun)) |
| 32 | assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); | 28 | { |
| 29 | throw std::domain_error("Notion is not a noun with wnid"); | ||
| 30 | } | ||
| 33 | 31 | ||
| 34 | numOfImages_++; | 32 | numOfImages_++; |
| 35 | } | 33 | } |
| 36 | 34 | ||
| 37 | void notion::setPrepositionGroups(std::list<std::string> groups) | 35 | void notion::setPrepositionGroups(std::list<std::string> groups) |
| 38 | { | 36 | { |
| 39 | // Calling code should always check that the notion is a preposition first. | 37 | if (partOfSpeech_ != part_of_speech::preposition) |
| 40 | assert(partOfSpeech_ == part_of_speech::preposition); | 38 | { |
| 39 | throw std::domain_error("Notion is not a preposition"); | ||
| 40 | } | ||
| 41 | 41 | ||
| 42 | prepositionGroups_ = groups; | 42 | prepositionGroups_ = groups; |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | database& operator<<(database& db, const notion& arg) | 45 | hatkirby::database& operator<<(hatkirby::database& db, const notion& arg) |
| 46 | { | 46 | { |
| 47 | // First, serialize the notion | 47 | // First, serialize the notion |
| 48 | { | 48 | { |
| diff --git a/generator/notion.h b/generator/notion.h index 817e66a..3bff916 100644 --- a/generator/notion.h +++ b/generator/notion.h | |||
| @@ -1,9 +1,9 @@ | |||
| 1 | #ifndef NOTION_H_221DE2BC | 1 | #ifndef NOTION_H_221DE2BC |
| 2 | #define NOTION_H_221DE2BC | 2 | #define NOTION_H_221DE2BC |
| 3 | 3 | ||
| 4 | #include <cassert> | ||
| 5 | #include <list> | 4 | #include <list> |
| 6 | #include <string> | 5 | #include <string> |
| 6 | #include <hkutil/database.h> | ||
| 7 | #include "../lib/enums.h" | 7 | #include "../lib/enums.h" |
| 8 | 8 | ||
| 9 | namespace verbly { | 9 | namespace verbly { |
| @@ -43,24 +43,30 @@ namespace verbly { | |||
| 43 | 43 | ||
| 44 | int getWnid() const | 44 | int getWnid() const |
| 45 | { | 45 | { |
| 46 | // Calling code should always call hasWnid first. | 46 | if (!hasWnid_) |
| 47 | assert(hasWnid_); | 47 | { |
| 48 | throw std::domain_error("Notion does not have wnid"); | ||
| 49 | } | ||
| 48 | 50 | ||
| 49 | return wnid_; | 51 | return wnid_; |
| 50 | } | 52 | } |
| 51 | 53 | ||
| 52 | int getNumOfImages() const | 54 | int getNumOfImages() const |
| 53 | { | 55 | { |
| 54 | // Calling code should always call hasWnid and check that the notion is a noun first. | 56 | if (!hasWnid_ || (partOfSpeech_ != part_of_speech::noun)) |
| 55 | assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); | 57 | { |
| 58 | throw std::domain_error("Notion is not a noun with wnid"); | ||
| 59 | } | ||
| 56 | 60 | ||
| 57 | return numOfImages_; | 61 | return numOfImages_; |
| 58 | } | 62 | } |
| 59 | 63 | ||
| 60 | std::list<std::string> getPrepositionGroups() const | 64 | std::list<std::string> getPrepositionGroups() const |
| 61 | { | 65 | { |
| 62 | // Calling code should always check that the notion is a preposition first. | 66 | if (partOfSpeech_ != part_of_speech::preposition) |
| 63 | assert(partOfSpeech_ == part_of_speech::preposition); | 67 | { |
| 68 | throw std::domain_error("Notion is not a preposition"); | ||
| 69 | } | ||
| 64 | 70 | ||
| 65 | return prepositionGroups_; | 71 | return prepositionGroups_; |
| 66 | } | 72 | } |
| @@ -81,7 +87,7 @@ namespace verbly { | |||
| 81 | 87 | ||
| 82 | // Serializer | 88 | // Serializer |
| 83 | 89 | ||
| 84 | database& operator<<(database& db, const notion& arg); | 90 | hatkirby::database& operator<<(hatkirby::database& db, const notion& arg); |
| 85 | 91 | ||
| 86 | }; | 92 | }; |
| 87 | }; | 93 | }; |
| diff --git a/generator/progress.h b/generator/progress.h deleted file mode 100644 index 76cde48..0000000 --- a/generator/progress.h +++ /dev/null | |||
| @@ -1,56 +0,0 @@ | |||
| 1 | #ifndef PROGRESS_H_A34EF856 | ||
| 2 | #define PROGRESS_H_A34EF856 | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | |||
| 6 | namespace verbly { | ||
| 7 | namespace generator { | ||
| 8 | |||
| 9 | class progress { | ||
| 10 | private: | ||
| 11 | std::string message; | ||
| 12 | int total; | ||
| 13 | int cur = 0; | ||
| 14 | int lprint = 0; | ||
| 15 | |||
| 16 | public: | ||
| 17 | progress(std::string message, int total) : message(message), total(total) | ||
| 18 | { | ||
| 19 | std::cout << message << " 0%" << std::flush; | ||
| 20 | } | ||
| 21 | |||
| 22 | void update(int val) | ||
| 23 | { | ||
| 24 | if (val <= total) | ||
| 25 | { | ||
| 26 | cur = val; | ||
| 27 | } else { | ||
| 28 | cur = total; | ||
| 29 | } | ||
| 30 | |||
| 31 | int pp = cur * 100 / total; | ||
| 32 | if (pp != lprint) | ||
| 33 | { | ||
| 34 | lprint = pp; | ||
| 35 | |||
| 36 | std::cout << "\b\b\b\b" << std::right; | ||
| 37 | std::cout.width(3); | ||
| 38 | std::cout << pp << "%" << std::flush; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | void update() | ||
| 43 | { | ||
| 44 | update(cur+1); | ||
| 45 | } | ||
| 46 | |||
| 47 | ~progress() | ||
| 48 | { | ||
| 49 | std::cout << "\b\b\b\b100%" << std::endl; | ||
| 50 | } | ||
| 51 | }; | ||
| 52 | |||
| 53 | }; | ||
| 54 | }; | ||
| 55 | |||
| 56 | #endif /* end of include guard: PROGRESS_H_A34EF856 */ | ||
| diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp index eb07607..3075d42 100644 --- a/generator/pronunciation.cpp +++ b/generator/pronunciation.cpp | |||
| @@ -3,9 +3,7 @@ | |||
| 3 | #include <algorithm> | 3 | #include <algorithm> |
| 4 | #include <cctype> | 4 | #include <cctype> |
| 5 | #include <iterator> | 5 | #include <iterator> |
| 6 | #include "database.h" | 6 | #include <hkutil/string.h> |
| 7 | #include "field.h" | ||
| 8 | #include "../lib/util.h" | ||
| 9 | 7 | ||
| 10 | namespace verbly { | 8 | namespace verbly { |
| 11 | namespace generator { | 9 | namespace generator { |
| @@ -16,28 +14,45 @@ namespace verbly { | |||
| 16 | id_(nextId_++), | 14 | id_(nextId_++), |
| 17 | phonemes_(phonemes) | 15 | phonemes_(phonemes) |
| 18 | { | 16 | { |
| 19 | auto phonemeList = split<std::list<std::string>>(phonemes, " "); | 17 | auto phonemeList = |
| 20 | 18 | hatkirby::split<std::list<std::string>>(phonemes, " "); | |
| 21 | auto rhymeStart = std::find_if(std::begin(phonemeList), std::end(phonemeList), [] (std::string phoneme) { | 19 | |
| 22 | return phoneme.find("1") != std::string::npos; | 20 | std::list<std::string>::iterator rhymeStart = |
| 23 | }); | 21 | std::find_if( |
| 22 | std::begin(phonemeList), | ||
| 23 | std::end(phonemeList), | ||
| 24 | [] (std::string phoneme) { | ||
| 25 | return phoneme.find("1") != std::string::npos; | ||
| 26 | }); | ||
| 24 | 27 | ||
| 25 | // Rhyme detection | 28 | // Rhyme detection |
| 26 | if (rhymeStart != std::end(phonemeList)) | 29 | if (rhymeStart != std::end(phonemeList)) |
| 27 | { | 30 | { |
| 28 | std::list<std::string> rhymePhonemes; | 31 | std::list<std::string> rhymePhonemes; |
| 29 | 32 | ||
| 30 | std::transform(rhymeStart, std::end(phonemeList), std::back_inserter(rhymePhonemes), [] (std::string phoneme) { | 33 | std::transform( |
| 31 | std::string naked; | 34 | rhymeStart, |
| 32 | 35 | std::end(phonemeList), | |
| 33 | std::remove_copy_if(std::begin(phoneme), std::end(phoneme), std::back_inserter(naked), [] (char ch) { | 36 | std::back_inserter(rhymePhonemes), |
| 34 | return std::isdigit(ch); | 37 | [] (std::string phoneme) { |
| 38 | std::string naked; | ||
| 39 | |||
| 40 | std::remove_copy_if( | ||
| 41 | std::begin(phoneme), | ||
| 42 | std::end(phoneme), | ||
| 43 | std::back_inserter(naked), | ||
| 44 | [] (char ch) { | ||
| 45 | return std::isdigit(ch); | ||
| 46 | }); | ||
| 47 | |||
| 48 | return naked; | ||
| 35 | }); | 49 | }); |
| 36 | 50 | ||
| 37 | return naked; | 51 | rhyme_ = |
| 38 | }); | 52 | hatkirby::implode( |
| 39 | 53 | std::begin(rhymePhonemes), | |
| 40 | rhyme_ = implode(std::begin(rhymePhonemes), std::end(rhymePhonemes), " "); | 54 | std::end(rhymePhonemes), |
| 55 | " "); | ||
| 41 | 56 | ||
| 42 | if (rhymeStart != std::begin(phonemeList)) | 57 | if (rhymeStart != std::begin(phonemeList)) |
| 43 | { | 58 | { |
| @@ -63,9 +78,11 @@ namespace verbly { | |||
| 63 | } | 78 | } |
| 64 | } | 79 | } |
| 65 | 80 | ||
| 66 | database& operator<<(database& db, const pronunciation& arg) | 81 | hatkirby::database& operator<<( |
| 82 | hatkirby::database& db, | ||
| 83 | const pronunciation& arg) | ||
| 67 | { | 84 | { |
| 68 | std::list<field> fields; | 85 | std::list<hatkirby::column> fields; |
| 69 | 86 | ||
| 70 | fields.emplace_back("pronunciation_id", arg.getId()); | 87 | fields.emplace_back("pronunciation_id", arg.getId()); |
| 71 | fields.emplace_back("phonemes", arg.getPhonemes()); | 88 | fields.emplace_back("phonemes", arg.getPhonemes()); |
| diff --git a/generator/pronunciation.h b/generator/pronunciation.h index 81be6c4..163e55e 100644 --- a/generator/pronunciation.h +++ b/generator/pronunciation.h | |||
| @@ -3,12 +3,11 @@ | |||
| 3 | 3 | ||
| 4 | #include <string> | 4 | #include <string> |
| 5 | #include <cassert> | 5 | #include <cassert> |
| 6 | #include <hkutil/database.h> | ||
| 6 | 7 | ||
| 7 | namespace verbly { | 8 | namespace verbly { |
| 8 | namespace generator { | 9 | namespace generator { |
| 9 | 10 | ||
| 10 | class database; | ||
| 11 | |||
| 12 | class pronunciation { | 11 | class pronunciation { |
| 13 | public: | 12 | public: |
| 14 | 13 | ||
| @@ -74,7 +73,9 @@ namespace verbly { | |||
| 74 | 73 | ||
| 75 | // Serializer | 74 | // Serializer |
| 76 | 75 | ||
| 77 | database& operator<<(database& db, const pronunciation& arg); | 76 | hatkirby::database& operator<<( |
| 77 | hatkirby::database& db, | ||
| 78 | const pronunciation& arg); | ||
| 78 | 79 | ||
| 79 | }; | 80 | }; |
| 80 | }; | 81 | }; |
| diff --git a/generator/word.cpp b/generator/word.cpp index b3fc490..360cd6a 100644 --- a/generator/word.cpp +++ b/generator/word.cpp | |||
| @@ -1,10 +1,8 @@ | |||
| 1 | #include "word.h" | 1 | #include "word.h" |
| 2 | #include <list> | 2 | #include <list> |
| 3 | #include <string> | 3 | #include <string> |
| 4 | #include "database.h" | ||
| 5 | #include "notion.h" | 4 | #include "notion.h" |
| 6 | #include "lemma.h" | 5 | #include "lemma.h" |
| 7 | #include "field.h" | ||
| 8 | #include "group.h" | 6 | #include "group.h" |
| 9 | 7 | ||
| 10 | namespace verbly { | 8 | namespace verbly { |
| @@ -43,9 +41,9 @@ namespace verbly { | |||
| 43 | verbGroup_ = &verbGroup; | 41 | verbGroup_ = &verbGroup; |
| 44 | } | 42 | } |
| 45 | 43 | ||
| 46 | database& operator<<(database& db, const word& arg) | 44 | hatkirby::database& operator<<(hatkirby::database& db, const word& arg) |
| 47 | { | 45 | { |
| 48 | std::list<field> fields; | 46 | std::list<hatkirby::column> fields; |
| 49 | 47 | ||
| 50 | fields.emplace_back("word_id", arg.getId()); | 48 | fields.emplace_back("word_id", arg.getId()); |
| 51 | fields.emplace_back("notion_id", arg.getNotion().getId()); | 49 | fields.emplace_back("notion_id", arg.getNotion().getId()); |
| @@ -59,7 +57,8 @@ namespace verbly { | |||
| 59 | if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective) | 57 | if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective) |
| 60 | && (arg.getAdjectivePosition() != positioning::undefined)) | 58 | && (arg.getAdjectivePosition() != positioning::undefined)) |
| 61 | { | 59 | { |
| 62 | fields.emplace_back("position", static_cast<int>(arg.getAdjectivePosition())); | 60 | fields.emplace_back("position", |
| 61 | static_cast<int>(arg.getAdjectivePosition())); | ||
| 63 | } | 62 | } |
| 64 | 63 | ||
| 65 | if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb) | 64 | if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb) |
| diff --git a/generator/word.h b/generator/word.h index a994ec3..2e469d4 100644 --- a/generator/word.h +++ b/generator/word.h | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #define WORD_H_91F99D46 | 2 | #define WORD_H_91F99D46 |
| 3 | 3 | ||
| 4 | #include <cassert> | 4 | #include <cassert> |
| 5 | #include <hkutil/database.h> | ||
| 5 | #include "../lib/enums.h" | 6 | #include "../lib/enums.h" |
| 6 | 7 | ||
| 7 | namespace verbly { | 8 | namespace verbly { |
| @@ -9,7 +10,6 @@ namespace verbly { | |||
| 9 | 10 | ||
| 10 | class notion; | 11 | class notion; |
| 11 | class lemma; | 12 | class lemma; |
| 12 | class database; | ||
| 13 | class group; | 13 | class group; |
| 14 | 14 | ||
| 15 | class word { | 15 | class word { |
| @@ -102,7 +102,7 @@ namespace verbly { | |||
| 102 | 102 | ||
| 103 | // Serializer | 103 | // Serializer |
| 104 | 104 | ||
| 105 | database& operator<<(database& db, const word& arg); | 105 | hatkirby::database& operator<<(hatkirby::database& db, const word& arg); |
| 106 | 106 | ||
| 107 | }; | 107 | }; |
| 108 | }; | 108 | }; |
