From 75e947fa0021547f460496d1c3aef5b61af4c669 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Sat, 31 Mar 2018 23:05:02 -0400 Subject: Migrated generator to hkutil --- .gitmodules | 3 + generator/CMakeLists.txt | 8 +- generator/form.cpp | 35 +++--- generator/form.h | 3 +- generator/frame.cpp | 2 - generator/frame.h | 10 +- generator/generator.cpp | 275 +++++++++++++++++++++++++++++--------------- generator/generator.h | 2 +- generator/group.cpp | 59 +++++----- generator/group.h | 4 +- generator/lemma.cpp | 17 ++- generator/lemma.h | 4 +- generator/notion.cpp | 18 +-- generator/notion.h | 22 ++-- generator/progress.h | 56 --------- generator/pronunciation.cpp | 55 ++++++--- generator/pronunciation.h | 7 +- generator/word.cpp | 9 +- generator/word.h | 4 +- vendor/hkutil | 1 + 20 files changed, 332 insertions(+), 262 deletions(-) delete mode 100644 generator/progress.h create mode 160000 vendor/hkutil diff --git a/.gitmodules b/.gitmodules index e69de29..2ed8c31 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vendor/hkutil"] + path = vendor/hkutil + url = git@github.com:hatkirby/hkutil diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 95a11b5..8c070d2 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt @@ -5,8 +5,12 @@ find_package(PkgConfig) pkg_check_modules(sqlite3 sqlite3 REQUIRED) find_package(libxml2 REQUIRED) -include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR}) -add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp database.cpp field.cpp generator.cpp main.cpp) +include_directories( + ${sqlite3_INCLUDE_DIR} + ${LIBXML2_INCLUDE_DIR} + ../vendor/hkutil) + +add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp generator.cpp main.cpp) set_property(TARGET generator PROPERTY CXX_STANDARD 11) set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) diff --git a/generator/form.cpp b/generator/form.cpp index f616344..c66820c 100644 --- a/generator/form.cpp +++ b/generator/form.cpp @@ -1,8 +1,7 @@ #include "form.h" #include #include -#include "database.h" -#include "field.h" +#include #include "pronunciation.h" namespace verbly { @@ -14,7 +13,7 @@ namespace verbly { id_(nextId_++), text_(text), complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), - proper_(std::any_of(std::begin(text), std::end(text), std::isupper)), + proper_(std::any_of(std::begin(text), std::end(text), ::isupper)), length_(text.length()) { } @@ -24,28 +23,30 @@ namespace verbly { pronunciations_.insert(&p); } - database& operator<<(database& db, const form& arg) + hatkirby::database& operator<<(hatkirby::database& db, const form& arg) { // Serialize the form first. { - std::list fields; - fields.emplace_back("form_id", arg.getId()); - fields.emplace_back("form", arg.getText()); - fields.emplace_back("complexity", arg.getComplexity()); - fields.emplace_back("proper", arg.isProper()); - fields.emplace_back("length", arg.getLength()); - - db.insertIntoTable("forms", std::move(fields)); + db.insertIntoTable( + "forms", + { + { "form_id", arg.getId() }, + { "form", arg.getText() }, + { "complexity", arg.getComplexity() }, + { "proper", arg.isProper() }, + { "length", arg.getLength() } + }); } // Then, serialize the form/pronunciation relationship. for (const pronunciation* p : arg.getPronunciations()) { - std::list fields; - fields.emplace_back("form_id", arg.getId()); - fields.emplace_back("pronunciation_id", p->getId()); - - db.insertIntoTable("forms_pronunciations", std::move(fields)); + db.insertIntoTable( + "forms_pronunciations", + { + { "form_id", arg.getId() }, + { "pronunciation_id", p->getId() } + }); } return db; diff --git a/generator/form.h b/generator/form.h index 37fd3cc..f3dd779 100644 --- a/generator/form.h +++ b/generator/form.h @@ -3,6 +3,7 @@ #include #include +#include namespace verbly { namespace generator { @@ -68,7 +69,7 @@ namespace verbly { // Serializer - database& operator<<(database& db, const form& arg); + hatkirby::database& operator<<(hatkirby::database& db, const form& arg); }; }; diff --git a/generator/frame.cpp b/generator/frame.cpp index 4e4ac5f..60cedc4 100644 --- a/generator/frame.cpp +++ b/generator/frame.cpp @@ -1,6 +1,4 @@ #include "frame.h" -#include "database.h" -#include "field.h" namespace verbly { namespace generator { diff --git a/generator/frame.h b/generator/frame.h index d26d500..3e15b39 100644 --- a/generator/frame.h +++ b/generator/frame.h @@ -8,8 +8,6 @@ namespace verbly { namespace generator { - class database; - class frame { public: @@ -20,9 +18,9 @@ namespace verbly { // Constructor frame(); - + // Duplication - + static frame duplicate(const frame& other); // Mutators @@ -35,12 +33,12 @@ namespace verbly { { return id_; } - + int getLength() const { return parts_.size(); } - + const part& operator[](int index) const { return parts_.at(index); diff --git a/generator/generator.cpp b/generator/generator.cpp index e34ca69..785ec87 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -1,16 +1,14 @@ #include "generator.h" -#include #include #include #include #include #include -#include "../lib/enums.h" -#include "progress.h" +#include +#include #include "role.h" #include "part.h" -#include "field.h" -#include "../lib/util.h" +#include "../lib/enums.h" #include "../lib/version.h" namespace verbly { @@ -28,7 +26,7 @@ namespace verbly { wordNetPath_(wordNetPath), cmudictPath_(cmudictPath), imageNetPath_(imageNetPath), - db_(outputPath) + db_(outputPath, hatkirby::dbmode::create) { // Ensure VerbNet directory exists DIR* dir; @@ -53,7 +51,8 @@ namespace verbly { // Ensure WordNet tables exist for (std::string table : { - "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax" + "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", + "sa", "sim", "syntax" }) { if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl")) @@ -166,13 +165,15 @@ namespace verbly { void generator::readWordNetSynsets() { std::list lines(readFile(wordNetPath_ + "wn_s.pl")); - progress ppgs("Reading synsets from WordNet...", lines.size()); + hatkirby::progress ppgs("Reading synsets from WordNet...", lines.size()); for (std::string line : lines) { ppgs.update(); - std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); + std::regex relation( + "^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); + std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { @@ -206,7 +207,10 @@ namespace verbly { void generator::readAdjectivePositioning() { std::list lines(readFile(wordNetPath_ + "wn_syntax.pl")); - progress ppgs("Reading adjective positionings from WordNet...", lines.size()); + + hatkirby::progress ppgs( + "Reading adjective positionings from WordNet...", + lines.size()); for (std::string line : lines) { @@ -279,7 +283,10 @@ namespace verbly { void generator::readWordNetSenseKeys() { std::list lines(readFile(wordNetPath_ + "wn_sk.pl")); - progress ppgs("Reading sense keys from WordNet...", lines.size()); + + hatkirby::progress ppgs( + "Reading sense keys from WordNet...", + lines.size()); for (std::string line : lines) { @@ -350,7 +357,8 @@ namespace verbly { } xmlNodePtr top = xmlDocGetRootElement(doc); - if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast("VNCLASS")))) + if ((top == nullptr) || + (xmlStrcmp(top->name, reinterpret_cast("VNCLASS")))) { throw std::logic_error("Bad VerbNet file format: " + filename); } @@ -360,7 +368,8 @@ namespace verbly { createGroup(top); } catch (const std::exception& e) { - std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename)); + std::throw_with_nested( + std::logic_error("Error parsing VerbNet file: " + filename)); } } @@ -370,7 +379,7 @@ namespace verbly { void generator::readAgidInflections() { std::list lines(readFile(agidPath_)); - progress ppgs("Reading inflections from AGID...", lines.size()); + hatkirby::progress ppgs("Reading inflections from AGID...", lines.size()); for (std::string line : lines) { @@ -395,12 +404,17 @@ namespace verbly { lemma& curLemma = lookupOrCreateLemma(infinitive); + auto inflWordList = + hatkirby::split>(line, " | "); + std::vector> agidForms; - for (std::string inflForms : split>(line, " | ")) + for (std::string inflForms : inflWordList) { - std::list forms; + auto inflFormList = + hatkirby::split>(std::move(inflForms), ", "); - for (std::string inflForm : split>(std::move(inflForms), ", ")) + std::list forms; + for (std::string inflForm : inflFormList) { int sympos = inflForm.find_first_of("~getNotion().getPartOfSpeech() == part_of_speech::verb; - })) + || !std::any_of( + std::begin(wordsByBaseForm_.at(infinitive)), + std::end(wordsByBaseForm_.at(infinitive)), + [] (word* w) { + return (w->getNotion().getPartOfSpeech() == + part_of_speech::verb); + })) { notion& n = createNotion(part_of_speech::verb); createWord(n, curLemma); @@ -471,7 +490,8 @@ namespace verbly { mappedForms[inflection::superlative] = agidForms[1]; } else { // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" - std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; + std::cout << " Ignoring adjective/adverb \"" << infinitive + << "\" due to non-standard number of forms." << std::endl; } break; @@ -484,7 +504,8 @@ namespace verbly { mappedForms[inflection::plural] = agidForms[0]; } else { // As of AGID 2014.08.11, this is non-existent. - std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl; + std::cout << " Ignoring noun \"" << infinitive + << "\" due to non-standard number of forms." << std::endl; } break; @@ -496,7 +517,9 @@ namespace verbly { { for (std::string infl : std::move(mapping.second)) { - curLemma.addInflection(mapping.first, lookupOrCreateForm(std::move(infl))); + curLemma.addInflection( + mapping.first, + lookupOrCreateForm(std::move(infl))); } } } @@ -505,7 +528,7 @@ namespace verbly { void generator::readPrepositions() { std::list lines(readFile("prepositions.txt")); - progress ppgs("Reading prepositions...", lines.size()); + hatkirby::progress ppgs("Reading prepositions...", lines.size()); for (std::string line : lines) { @@ -515,7 +538,9 @@ namespace verbly { std::smatch relation_data; std::regex_search(line, relation_data, relation); std::string prep = relation_data[1]; - auto groups = split>(relation_data[2], ", "); + + auto groups = + hatkirby::split>(relation_data[2], ", "); notion& n = createNotion(part_of_speech::preposition); lemma& l = lookupOrCreateLemma(prep); @@ -528,7 +553,10 @@ namespace verbly { void generator::readCmudictPronunciations() { std::list lines(readFile(cmudictPath_)); - progress ppgs("Reading pronunciations from CMUDICT...", lines.size()); + + hatkirby::progress ppgs( + "Reading pronunciations from CMUDICT...", + lines.size()); for (std::string line : lines) { @@ -538,8 +566,7 @@ namespace verbly { std::smatch phoneme_data; if (std::regex_search(line, phoneme_data, phoneme)) { - std::string canonical(phoneme_data[1]); - std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); + std::string canonical = hatkirby::lowercase(phoneme_data[1]); if (!formByText_.count(canonical)) { @@ -575,13 +602,14 @@ namespace verbly { } std::string schema = schemaBuilder.str(); - auto queries = split>(schema, ";"); - progress ppgs("Writing database schema...", queries.size()); + auto queries = hatkirby::split>(schema, ";"); + + hatkirby::progress ppgs("Writing database schema...", queries.size()); for (std::string query : queries) { if (!queries.empty()) { - db_.runQuery(query); + db_.execute(query); } ppgs.update(); @@ -590,10 +618,6 @@ namespace verbly { void generator::writeVersion() { - std::list fields; - fields.emplace_back("major", DATABASE_MAJOR_VERSION); - fields.emplace_back("minor", DATABASE_MINOR_VERSION); - db_.insertIntoTable( "version", { @@ -605,7 +629,7 @@ namespace verbly { void generator::dumpObjects() { { - progress ppgs("Writing notions...", notions_.size()); + hatkirby::progress ppgs("Writing notions...", notions_.size()); for (notion& n : notions_) { @@ -616,7 +640,7 @@ namespace verbly { } { - progress ppgs("Writing words...", words_.size()); + hatkirby::progress ppgs("Writing words...", words_.size()); for (word& w : words_) { @@ -627,7 +651,7 @@ namespace verbly { } { - progress ppgs("Writing lemmas...", lemmas_.size()); + hatkirby::progress ppgs("Writing lemmas...", lemmas_.size()); for (lemma& l : lemmas_) { @@ -638,7 +662,7 @@ namespace verbly { } { - progress ppgs("Writing forms...", forms_.size()); + hatkirby::progress ppgs("Writing forms...", forms_.size()); for (form& f : forms_) { @@ -649,7 +673,7 @@ namespace verbly { } { - progress ppgs("Writing pronunciations...", pronunciations_.size()); + hatkirby::progress ppgs("Writing pronunciations...", pronunciations_.size()); for (pronunciation& p : pronunciations_) { @@ -660,7 +684,7 @@ namespace verbly { } { - progress ppgs("Writing verb frames...", groups_.size()); + hatkirby::progress ppgs("Writing verb frames...", groups_.size()); for (group& g : groups_) { @@ -674,22 +698,30 @@ namespace verbly { void generator::readWordNetAntonymy() { std::list lines(readFile(wordNetPath_ + "wn_ant.pl")); - progress ppgs("Writing antonyms...", lines.size()); + hatkirby::progress ppgs("Writing antonyms...", lines.size()); for (auto line : lines) { ppgs.update(); - std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); + std::regex relation( + "^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); + std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } - std::pair lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); - std::pair lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); + std::pair lookup1( + std::stoi(relation_data[1]), + std::stoi(relation_data[2])); + + std::pair lookup2( + std::stoi(relation_data[3]), + std::stoi(relation_data[4])); - if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) + if (wordByWnidAndWnum_.count(lookup1) && + wordByWnidAndWnum_.count(lookup2)) { word& word1 = *wordByWnidAndWnum_.at(lookup1); word& word2 = *wordByWnidAndWnum_.at(lookup2); @@ -707,7 +739,7 @@ namespace verbly { void generator::readWordNetVariation() { std::list lines(readFile(wordNetPath_ + "wn_at.pl")); - progress ppgs("Writing variation...", lines.size()); + hatkirby::progress ppgs("Writing variation...", lines.size()); for (auto line : lines) { ppgs.update(); @@ -730,7 +762,7 @@ namespace verbly { db_.insertIntoTable( "variation", { - { "noun_id", notion1.getId() } + { "noun_id", notion1.getId() }, { "adjective_id", notion2.getId() } }); } @@ -740,20 +772,32 @@ namespace verbly { void generator::readWordNetClasses() { std::list lines(readFile(wordNetPath_ + "wn_cls.pl")); - progress ppgs("Writing usage, topicality, and regionality...", lines.size()); + + hatkirby::progress ppgs( + "Writing usage, topicality, and regionality...", + lines.size()); + for (auto line : lines) { ppgs.update(); - std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); + std::regex relation( + "^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); + std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } - std::pair lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); - std::pair lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); + std::pair lookup1( + std::stoi(relation_data[1]), + std::stoi(relation_data[2])); + + std::pair lookup2( + std::stoi(relation_data[3]), + std::stoi(relation_data[4])); + std::string class_type = relation_data[5]; std::string table_name; @@ -773,18 +817,30 @@ namespace verbly { if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) { - std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) { - return w->getId(); - }); + auto& wordSet = wordsByWnid_.at(lookup1.first); + + std::transform( + std::begin(wordSet), + std::end(wordSet), + std::back_inserter(leftJoin), + [] (word* w) { + return w->getId(); + }); } else if (wordByWnidAndWnum_.count(lookup1)) { leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); } if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) { - std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) { - return w->getId(); - }); + auto& wordSet = wordsByWnid_.at(lookup2.first); + + std::transform( + std::begin(wordSet), + std::end(wordSet), + std::back_inserter(rightJoin), + [] (word* w) { + return w->getId(); + }); } else if (wordByWnidAndWnum_.count(lookup2)) { rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); } @@ -807,7 +863,7 @@ namespace verbly { void generator::readWordNetCausality() { std::list lines(readFile(wordNetPath_ + "wn_cs.pl")); - progress ppgs("Writing causality...", lines.size()); + hatkirby::progress ppgs("Writing causality...", lines.size()); for (auto line : lines) { ppgs.update(); @@ -840,7 +896,7 @@ namespace verbly { void generator::readWordNetEntailment() { std::list lines(readFile(wordNetPath_ + "wn_ent.pl")); - progress ppgs("Writing entailment...", lines.size()); + hatkirby::progress ppgs("Writing entailment...", lines.size()); for (auto line : lines) { ppgs.update(); @@ -873,7 +929,7 @@ namespace verbly { void generator::readWordNetHypernymy() { std::list lines(readFile(wordNetPath_ + "wn_hyp.pl")); - progress ppgs("Writing hypernymy...", lines.size()); + hatkirby::progress ppgs("Writing hypernymy...", lines.size()); for (auto line : lines) { ppgs.update(); @@ -906,7 +962,7 @@ namespace verbly { void generator::readWordNetInstantiation() { std::list lines(readFile(wordNetPath_ + "wn_ins.pl")); - progress ppgs("Writing instantiation...", lines.size()); + hatkirby::progress ppgs("Writing instantiation...", lines.size()); for (auto line : lines) { ppgs.update(); @@ -939,7 +995,7 @@ namespace verbly { void generator::readWordNetMemberMeronymy() { std::list lines(readFile(wordNetPath_ + "wn_mm.pl")); - progress ppgs("Writing member meronymy...", lines.size()); + hatkirby::progress ppgs("Writing member meronymy...", lines.size()); for (auto line : lines) { ppgs.update(); @@ -972,7 +1028,7 @@ namespace verbly { void generator::readWordNetPartMeronymy() { std::list lines(readFile(wordNetPath_ + "wn_mp.pl")); - progress ppgs("Writing part meronymy...", lines.size()); + hatkirby::progress ppgs("Writing part meronymy...", lines.size()); for (auto line : lines) { ppgs.update(); @@ -1005,7 +1061,7 @@ namespace verbly { void generator::readWordNetSubstanceMeronymy() { std::list lines(readFile(wordNetPath_ + "wn_ms.pl")); - progress ppgs("Writing substance meronymy...", lines.size()); + hatkirby::progress ppgs("Writing substance meronymy...", lines.size()); for (auto line : lines) { ppgs.update(); @@ -1038,27 +1094,40 @@ namespace verbly { void generator::readWordNetPertainymy() { std::list lines(readFile(wordNetPath_ + "wn_per.pl")); - progress ppgs("Writing pertainymy and mannernymy...", lines.size()); + + hatkirby::progress ppgs( + "Writing pertainymy and mannernymy...", + lines.size()); + for (auto line : lines) { ppgs.update(); - std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); + std::regex relation( + "^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); + std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } - std::pair lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); - std::pair lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); + std::pair lookup1( + std::stoi(relation_data[1]), + std::stoi(relation_data[2])); - if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) + std::pair lookup2( + std::stoi(relation_data[3]), + std::stoi(relation_data[4])); + + if (wordByWnidAndWnum_.count(lookup1) && + wordByWnidAndWnum_.count(lookup2)) { word& word1 = *wordByWnidAndWnum_.at(lookup1); word& word2 = *wordByWnidAndWnum_.at(lookup2); - if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective) + if (word1.getNotion().getPartOfSpeech() == + part_of_speech::adjective) { db_.insertIntoTable( "pertainymy", @@ -1066,7 +1135,8 @@ namespace verbly { { "pertainym_id", word1.getId() }, { "noun_id", word2.getId() } }); - } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb) + } else if (word1.getNotion().getPartOfSpeech() == + part_of_speech::adverb) { db_.insertIntoTable( "mannernymy", @@ -1082,7 +1152,7 @@ namespace verbly { void generator::readWordNetSpecification() { std::list lines(readFile(wordNetPath_ + "wn_sa.pl")); - progress ppgs("Writing specifications...", lines.size()); + hatkirby::progress ppgs("Writing specifications...", lines.size()); for (auto line : lines) { ppgs.update(); @@ -1094,10 +1164,17 @@ namespace verbly { continue; } - std::pair lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); - std::pair lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); + std::pair lookup1( + std::stoi(relation_data[1]), + std::stoi(relation_data[2])); + + std::pair lookup2( + std::stoi(relation_data[3]), + std::stoi(relation_data[4])); - if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) + + if (wordByWnidAndWnum_.count(lookup1) && + wordByWnidAndWnum_.count(lookup2)) { word& word1 = *wordByWnidAndWnum_.at(lookup1); word& word2 = *wordByWnidAndWnum_.at(lookup2); @@ -1115,7 +1192,7 @@ namespace verbly { void generator::readWordNetSimilarity() { std::list lines(readFile(wordNetPath_ + "wn_sim.pl")); - progress ppgs("Writing adjective similarity...", lines.size()); + hatkirby::progress ppgs("Writing adjective similarity...", lines.size()); for (auto line : lines) { ppgs.update(); @@ -1149,7 +1226,7 @@ namespace verbly { { std::cout << "Analyzing data..." << std::endl; - db_.runQuery("ANALYZE"); + db_.execute("ANALYZE"); } std::list generator::readFile(std::string path) @@ -1183,7 +1260,8 @@ namespace verbly { case 2: return part_of_speech::verb; case 3: return part_of_speech::adjective; case 4: return part_of_speech::adverb; - default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid)); + default: throw std::domain_error( + "Invalid WordNet synset ID: " + std::to_string(wnid)); } } @@ -1296,20 +1374,30 @@ namespace verbly { std::string wnSenses(reinterpret_cast(key)); xmlFree(key); - auto wnSenseKeys = split>(wnSenses, " "); + auto wnSenseKeys = + hatkirby::split>(wnSenses, " "); + if (!wnSenseKeys.empty()) { std::list tempKeys; - std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) { - return sense + "::"; - }); + std::transform( + std::begin(wnSenseKeys), + std::end(wnSenseKeys), + std::back_inserter(tempKeys), + [] (std::string sense) { + return sense + "::"; + }); std::list filteredKeys; - std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) { - return !wnSenseKeys_.count(sense); - }); + std::remove_copy_if( + std::begin(tempKeys), + std::end(tempKeys), + std::back_inserter(filteredKeys), + [&] (std::string sense) { + return !wnSenseKeys_.count(sense); + }); wnSenseKeys = std::move(filteredKeys); } @@ -1431,10 +1519,15 @@ namespace verbly { std::string choicesStr = reinterpret_cast(key); xmlFree(key); - for (std::string choice : split>(choicesStr, " ")) + auto choices = + hatkirby::split>( + choicesStr, " "); + + for (std::string choice : choices) { int chloc; - while ((chloc = choice.find_first_of("_")) != std::string::npos) + while ((chloc = choice.find_first_of("_")) + != std::string::npos) { choice.replace(chloc, 1, " "); } @@ -1444,7 +1537,9 @@ namespace verbly { } else { partLiteral = false; - for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) + for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; + npnode != nullptr; + npnode = npnode->next) { if (!xmlStrcmp(npnode->name, reinterpret_cast("SELRESTRS"))) { diff --git a/generator/generator.h b/generator/generator.h index 52073bc..cd99f88 100644 --- a/generator/generator.h +++ b/generator/generator.h @@ -6,7 +6,7 @@ #include #include #include -#include "database.h" +#include #include "notion.h" #include "word.h" #include "lemma.h" diff --git a/generator/group.cpp b/generator/group.cpp index 5b23578..1ffb9d9 100644 --- a/generator/group.cpp +++ b/generator/group.cpp @@ -1,10 +1,8 @@ #include "group.h" #include #include -#include "database.h" -#include "field.h" +#include #include "frame.h" -#include "../lib/util.h" namespace verbly { namespace generator { @@ -50,28 +48,26 @@ namespace verbly { return roles_.at(name); } - database& operator<<(database& db, const group& arg) + hatkirby::database& operator<<(hatkirby::database& db, const group& arg) { // Serialize each frame for (const frame& f : arg.getFrames()) { // First, serialize the group/frame relationship - { - std::list fields; - - fields.emplace_back("frame_id", f.getId()); - fields.emplace_back("group_id", arg.getId()); - fields.emplace_back("length", f.getLength()); - - db.insertIntoTable("frames", std::move(fields)); - } + db.insertIntoTable( + "frames", + { + { "frame_id", f.getId() }, + { "group_id", arg.getId() }, + { "length", f.getLength() } + }); // Then, serialize the frame parts in the context of the group for (int partIndex = 0; partIndex < f.getLength(); partIndex++) { const part& p = f[partIndex]; - std::list fields; + std::list fields; fields.emplace_back("part_id", p.getId()); fields.emplace_back("frame_id", f.getId()); fields.emplace_back("part_index", partIndex); @@ -92,23 +88,23 @@ namespace verbly { for (const std::string& s : partSelrestrs) { - std::list selrestrFields; - - selrestrFields.emplace_back("part_id", p.getId()); - selrestrFields.emplace_back("selrestr", s); - - db.insertIntoTable("selrestrs", std::move(selrestrFields)); + db.insertIntoTable( + "selrestrs", + { + { "part_id", p.getId() }, + { "selrestr", s } + }); } // Short interlude to serialize the synrestrs for (const std::string& s : p.getNounSynrestrs()) { - std::list synrestrFields; - - synrestrFields.emplace_back("part_id", p.getId()); - synrestrFields.emplace_back("synrestr", s); - - db.insertIntoTable("synrestrs", std::move(synrestrFields)); + db.insertIntoTable( + "synrestrs", + { + { "part_id", p.getId() }, + { "synrestr", s } + }); } break; @@ -117,10 +113,17 @@ namespace verbly { case part::type::preposition: { std::set setChoices = p.getPrepositionChoices(); - std::string serializedChoices = implode(std::begin(setChoices), std::end(setChoices), ","); + + std::string serializedChoices = + hatkirby::implode( + std::begin(setChoices), + std::end(setChoices), + ","); fields.emplace_back("prepositions", std::move(serializedChoices)); - fields.emplace_back("preposition_literality", p.isPrepositionLiteral() ? 1 : 0); + + fields.emplace_back("preposition_literality", + p.isPrepositionLiteral() ? 1 : 0); break; } diff --git a/generator/group.h b/generator/group.h index a7f3a17..f912920 100644 --- a/generator/group.h +++ b/generator/group.h @@ -6,13 +6,13 @@ #include #include #include +#include #include "role.h" namespace verbly { namespace generator { class frame; - class database; class group { public: @@ -67,7 +67,7 @@ namespace verbly { // Serializer - database& operator<<(database& db, const group& arg); + hatkirby::database& operator<<(hatkirby::database& db, const group& arg); }; }; diff --git a/generator/lemma.cpp b/generator/lemma.cpp index e66b153..33ab037 100644 --- a/generator/lemma.cpp +++ b/generator/lemma.cpp @@ -1,8 +1,6 @@ #include "lemma.h" #include #include -#include "field.h" -#include "database.h" #include "form.h" namespace verbly { @@ -35,7 +33,7 @@ namespace verbly { } } - database& operator<<(database& db, const lemma& arg) + hatkirby::database& operator<<(hatkirby::database& db, const lemma& arg) { for (inflection type : { inflection::base, @@ -49,12 +47,13 @@ namespace verbly { { for (const form* f : arg.getInflections(type)) { - std::list fields; - fields.emplace_back("lemma_id", arg.getId()); - fields.emplace_back("form_id", f->getId()); - fields.emplace_back("category", static_cast(type)); - - db.insertIntoTable("lemmas_forms", std::move(fields)); + db.insertIntoTable( + "lemmas_forms", + { + { "lemma_id", arg.getId() }, + { "form_id", f->getId() }, + { "category", static_cast(type) } + }); } } diff --git a/generator/lemma.h b/generator/lemma.h index f68667f..f7d5491 100644 --- a/generator/lemma.h +++ b/generator/lemma.h @@ -4,12 +4,12 @@ #include #include #include +#include #include "../lib/enums.h" namespace verbly { namespace generator { - class database; class form; class lemma { @@ -50,7 +50,7 @@ namespace verbly { // Serializer - database& operator<<(database& db, const lemma& arg); + hatkirby::database& operator<<(hatkirby::database& db, const lemma& arg); }; }; diff --git a/generator/notion.cpp b/generator/notion.cpp index 35ba7b1..f388767 100644 --- a/generator/notion.cpp +++ b/generator/notion.cpp @@ -1,8 +1,4 @@ #include "notion.h" -#include -#include -#include "database.h" -#include "field.h" namespace verbly { namespace generator { @@ -28,21 +24,25 @@ namespace verbly { void notion::incrementNumOfImages() { - // Calling code should always call hasWnid and check that the notion is a noun first. - assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); + if (!hasWnid_ || (partOfSpeech_ != part_of_speech::noun)) + { + throw std::domain_error("Notion is not a noun with wnid"); + } numOfImages_++; } void notion::setPrepositionGroups(std::list groups) { - // Calling code should always check that the notion is a preposition first. - assert(partOfSpeech_ == part_of_speech::preposition); + if (partOfSpeech_ != part_of_speech::preposition) + { + throw std::domain_error("Notion is not a preposition"); + } prepositionGroups_ = groups; } - database& operator<<(database& db, const notion& arg) + hatkirby::database& operator<<(hatkirby::database& db, const notion& arg) { // First, serialize the notion { diff --git a/generator/notion.h b/generator/notion.h index 817e66a..3bff916 100644 --- a/generator/notion.h +++ b/generator/notion.h @@ -1,9 +1,9 @@ #ifndef NOTION_H_221DE2BC #define NOTION_H_221DE2BC -#include #include #include +#include #include "../lib/enums.h" namespace verbly { @@ -43,24 +43,30 @@ namespace verbly { int getWnid() const { - // Calling code should always call hasWnid first. - assert(hasWnid_); + if (!hasWnid_) + { + throw std::domain_error("Notion does not have wnid"); + } return wnid_; } int getNumOfImages() const { - // Calling code should always call hasWnid and check that the notion is a noun first. - assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); + if (!hasWnid_ || (partOfSpeech_ != part_of_speech::noun)) + { + throw std::domain_error("Notion is not a noun with wnid"); + } return numOfImages_; } std::list getPrepositionGroups() const { - // Calling code should always check that the notion is a preposition first. - assert(partOfSpeech_ == part_of_speech::preposition); + if (partOfSpeech_ != part_of_speech::preposition) + { + throw std::domain_error("Notion is not a preposition"); + } return prepositionGroups_; } @@ -81,7 +87,7 @@ namespace verbly { // Serializer - database& operator<<(database& db, const notion& arg); + hatkirby::database& operator<<(hatkirby::database& db, const notion& arg); }; }; diff --git a/generator/progress.h b/generator/progress.h deleted file mode 100644 index 76cde48..0000000 --- a/generator/progress.h +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef PROGRESS_H_A34EF856 -#define PROGRESS_H_A34EF856 - -#include - -namespace verbly { - namespace generator { - - class progress { - private: - std::string message; - int total; - int cur = 0; - int lprint = 0; - - public: - progress(std::string message, int total) : message(message), total(total) - { - std::cout << message << " 0%" << std::flush; - } - - void update(int val) - { - if (val <= total) - { - cur = val; - } else { - cur = total; - } - - int pp = cur * 100 / total; - if (pp != lprint) - { - lprint = pp; - - std::cout << "\b\b\b\b" << std::right; - std::cout.width(3); - std::cout << pp << "%" << std::flush; - } - } - - void update() - { - update(cur+1); - } - - ~progress() - { - std::cout << "\b\b\b\b100%" << std::endl; - } - }; - - }; -}; - -#endif /* end of include guard: PROGRESS_H_A34EF856 */ diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp index eb07607..3075d42 100644 --- a/generator/pronunciation.cpp +++ b/generator/pronunciation.cpp @@ -3,9 +3,7 @@ #include #include #include -#include "database.h" -#include "field.h" -#include "../lib/util.h" +#include namespace verbly { namespace generator { @@ -16,28 +14,45 @@ namespace verbly { id_(nextId_++), phonemes_(phonemes) { - auto phonemeList = split>(phonemes, " "); - - auto rhymeStart = std::find_if(std::begin(phonemeList), std::end(phonemeList), [] (std::string phoneme) { - return phoneme.find("1") != std::string::npos; - }); + auto phonemeList = + hatkirby::split>(phonemes, " "); + + std::list::iterator rhymeStart = + std::find_if( + std::begin(phonemeList), + std::end(phonemeList), + [] (std::string phoneme) { + return phoneme.find("1") != std::string::npos; + }); // Rhyme detection if (rhymeStart != std::end(phonemeList)) { std::list rhymePhonemes; - std::transform(rhymeStart, std::end(phonemeList), std::back_inserter(rhymePhonemes), [] (std::string phoneme) { - std::string naked; - - std::remove_copy_if(std::begin(phoneme), std::end(phoneme), std::back_inserter(naked), [] (char ch) { - return std::isdigit(ch); + std::transform( + rhymeStart, + std::end(phonemeList), + std::back_inserter(rhymePhonemes), + [] (std::string phoneme) { + std::string naked; + + std::remove_copy_if( + std::begin(phoneme), + std::end(phoneme), + std::back_inserter(naked), + [] (char ch) { + return std::isdigit(ch); + }); + + return naked; }); - return naked; - }); - - rhyme_ = implode(std::begin(rhymePhonemes), std::end(rhymePhonemes), " "); + rhyme_ = + hatkirby::implode( + std::begin(rhymePhonemes), + std::end(rhymePhonemes), + " "); if (rhymeStart != std::begin(phonemeList)) { @@ -63,9 +78,11 @@ namespace verbly { } } - database& operator<<(database& db, const pronunciation& arg) + hatkirby::database& operator<<( + hatkirby::database& db, + const pronunciation& arg) { - std::list fields; + std::list fields; fields.emplace_back("pronunciation_id", arg.getId()); fields.emplace_back("phonemes", arg.getPhonemes()); diff --git a/generator/pronunciation.h b/generator/pronunciation.h index 81be6c4..163e55e 100644 --- a/generator/pronunciation.h +++ b/generator/pronunciation.h @@ -3,12 +3,11 @@ #include #include +#include namespace verbly { namespace generator { - class database; - class pronunciation { public: @@ -74,7 +73,9 @@ namespace verbly { // Serializer - database& operator<<(database& db, const pronunciation& arg); + hatkirby::database& operator<<( + hatkirby::database& db, + const pronunciation& arg); }; }; diff --git a/generator/word.cpp b/generator/word.cpp index b3fc490..360cd6a 100644 --- a/generator/word.cpp +++ b/generator/word.cpp @@ -1,10 +1,8 @@ #include "word.h" #include #include -#include "database.h" #include "notion.h" #include "lemma.h" -#include "field.h" #include "group.h" namespace verbly { @@ -43,9 +41,9 @@ namespace verbly { verbGroup_ = &verbGroup; } - database& operator<<(database& db, const word& arg) + hatkirby::database& operator<<(hatkirby::database& db, const word& arg) { - std::list fields; + std::list fields; fields.emplace_back("word_id", arg.getId()); fields.emplace_back("notion_id", arg.getNotion().getId()); @@ -59,7 +57,8 @@ namespace verbly { if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective) && (arg.getAdjectivePosition() != positioning::undefined)) { - fields.emplace_back("position", static_cast(arg.getAdjectivePosition())); + fields.emplace_back("position", + static_cast(arg.getAdjectivePosition())); } if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb) diff --git a/generator/word.h b/generator/word.h index a994ec3..2e469d4 100644 --- a/generator/word.h +++ b/generator/word.h @@ -2,6 +2,7 @@ #define WORD_H_91F99D46 #include +#include #include "../lib/enums.h" namespace verbly { @@ -9,7 +10,6 @@ namespace verbly { class notion; class lemma; - class database; class group; class word { @@ -102,7 +102,7 @@ namespace verbly { // Serializer - database& operator<<(database& db, const word& arg); + hatkirby::database& operator<<(hatkirby::database& db, const word& arg); }; }; diff --git a/vendor/hkutil b/vendor/hkutil new file mode 160000 index 0000000..eb30ce1 --- /dev/null +++ b/vendor/hkutil @@ -0,0 +1 @@ +Subproject commit eb30ce13012108fe38709cdf0732aa8b2ec2d52e -- cgit 1.4.1