From 6746da6edd7d9d50efe374eabbb79a3cac882d81 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 16 Jan 2017 18:02:50 -0500 Subject: Started structural rewrite The new object structure was designed to build on the existing WordNet structure, while also adding in all of the data that we get from other sources. More information about this can be found on the project wiki. The generator has already been completely rewritten to generate a datafile that uses the new structure. In addition, a number of indexes are created, which does double the size of the datafile, but also allows for much faster lookups. Finally, the new generator is written modularly and is a lot more readable than the old one. The verbly interface to the new object structure has mostly been completed, but has not been tested fully. There is a completely new search API which utilizes a lot of operator overloading; documentation on how to use it should go up at some point. Token processing and verb frames are currently unimplemented. Source for these have been left in the repository for now. --- CMakeLists.txt | 4 +- generator/CMakeLists.txt | 6 +- generator/database.cpp | 173 +++ generator/database.h | 73 + generator/field.cpp | 193 +++ generator/field.h | 76 + generator/form.cpp | 53 + generator/form.h | 71 + generator/frame.cpp | 83 ++ generator/frame.h | 59 + generator/generator.cpp | 3227 ++++++++++++++++--------------------------- generator/generator.h | 151 ++ generator/group.cpp | 119 ++ generator/group.h | 80 ++ generator/lemma.cpp | 65 + generator/lemma.h | 58 + generator/main.cpp | 40 + generator/notion.cpp | 85 ++ generator/notion.h | 91 ++ generator/part.cpp | 336 +++++ generator/part.h | 114 ++ generator/progress.h | 78 +- generator/pronunciation.cpp | 87 ++ generator/pronunciation.h | 82 ++ generator/role.h | 35 + generator/schema.sql | 352 ++--- generator/selrestr.cpp | 288 ++++ generator/selrestr.h | 88 ++ generator/word.cpp | 77 ++ generator/word.h | 110 ++ lib/adjective.cpp | 113 -- lib/adjective.h | 51 - lib/adjective_query.cpp | 1072 -------------- lib/adjective_query.h | 112 -- lib/adverb.cpp | 71 - lib/adverb.h | 35 - lib/adverb_query.cpp | 758 ---------- lib/adverb_query.h | 86 -- lib/binding.cpp | 180 +++ lib/binding.h | 70 + lib/data.cpp | 177 --- lib/data.h | 380 ----- lib/database.cpp | 79 ++ lib/database.h | 73 + lib/enums.h | 45 + lib/field.cpp | 91 ++ lib/field.h | 306 ++++ lib/filter.cpp | 1365 ++++++++++++++++++ lib/filter.h | 143 ++ lib/form.cpp | 53 + lib/form.h | 149 ++ lib/frame.cpp | 317 +---- lib/frame.h | 178 +-- lib/group.cpp | 43 + lib/group.h | 87 ++ lib/lemma.cpp | 69 + lib/lemma.h | 120 ++ lib/notion.cpp | 94 ++ lib/notion.h | 200 +++ lib/noun.cpp | 221 --- lib/noun.h | 55 - lib/noun_query.cpp | 2013 --------------------------- lib/noun_query.h | 180 --- lib/preposition.cpp | 107 -- lib/preposition.h | 38 - lib/pronunciation.cpp | 69 + lib/pronunciation.h | 163 +++ lib/query.h | 123 ++ lib/statement.cpp | 806 +++++++++++ lib/statement.h | 272 ++++ lib/util.h | 24 +- lib/verb.cpp | 64 - lib/verb.h | 34 - lib/verb_query.cpp | 315 ----- lib/verb_query.h | 45 - lib/verbly.h | 36 +- lib/word.cpp | 120 +- lib/word.h | 193 ++- 78 files changed, 9012 insertions(+), 8737 deletions(-) create mode 100644 generator/database.cpp create mode 100644 generator/database.h create mode 100644 generator/field.cpp create mode 100644 generator/field.h create mode 100644 generator/form.cpp create mode 100644 generator/form.h create mode 100644 generator/frame.cpp create mode 100644 generator/frame.h create mode 100644 generator/generator.h create mode 100644 generator/group.cpp create mode 100644 generator/group.h create mode 100644 generator/lemma.cpp create mode 100644 generator/lemma.h create mode 100644 generator/main.cpp create mode 100644 generator/notion.cpp create mode 100644 generator/notion.h create mode 100644 generator/part.cpp create mode 100644 generator/part.h create mode 100644 generator/pronunciation.cpp create mode 100644 generator/pronunciation.h create mode 100644 generator/role.h create mode 100644 generator/selrestr.cpp create mode 100644 generator/selrestr.h create mode 100644 generator/word.cpp create mode 100644 generator/word.h delete mode 100644 lib/adjective.cpp delete mode 100644 lib/adjective.h delete mode 100644 lib/adjective_query.cpp delete mode 100644 lib/adjective_query.h delete mode 100644 lib/adverb.cpp delete mode 100644 lib/adverb.h delete mode 100644 lib/adverb_query.cpp delete mode 100644 lib/adverb_query.h create mode 100644 lib/binding.cpp create mode 100644 lib/binding.h delete mode 100644 lib/data.cpp delete mode 100644 lib/data.h create mode 100644 lib/database.cpp create mode 100644 lib/database.h create mode 100644 lib/enums.h create mode 100644 lib/field.cpp create mode 100644 lib/field.h create mode 100644 lib/filter.cpp create mode 100644 lib/filter.h create mode 100644 lib/form.cpp create mode 100644 lib/form.h create mode 100644 lib/group.cpp create mode 100644 lib/group.h create mode 100644 lib/lemma.cpp create mode 100644 lib/lemma.h create mode 100644 lib/notion.cpp create mode 100644 lib/notion.h delete mode 100644 lib/noun.cpp delete mode 100644 lib/noun.h delete mode 100644 lib/noun_query.cpp delete mode 100644 lib/noun_query.h delete mode 100644 lib/preposition.cpp delete mode 100644 lib/preposition.h create mode 100644 lib/pronunciation.cpp create mode 100644 lib/pronunciation.h create mode 100644 lib/query.h create mode 100644 lib/statement.cpp create mode 100644 lib/statement.h delete mode 100644 lib/verb.cpp delete mode 100644 lib/verb.h delete mode 100644 lib/verb_query.cpp delete mode 100644 lib/verb_query.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c39d99..61fcce2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,8 +4,10 @@ project (verbly) find_package(PkgConfig) pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) +set(CMAKE_BUILD_TYPE Debug) + include_directories(vendor/json) -add_library(verbly lib/data.cpp lib/adjective.cpp lib/noun.cpp lib/verb.cpp lib/adverb.cpp lib/token.cpp lib/word.cpp lib/frame.cpp lib/preposition.cpp lib/adjective_query.cpp lib/adverb_query.cpp lib/noun_query.cpp lib/verb_query.cpp lib/frame_query.cpp) +add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp) set_property(TARGET verbly PROPERTY CXX_STANDARD 11) set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) target_link_libraries(verbly ${sqlite3_LIBRARIES}) diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 552526d..4f78eb8 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt @@ -1,12 +1,12 @@ -cmake_minimum_required (VERSION 2.6) +cmake_minimum_required (VERSION 3.1) project (generator) find_package(PkgConfig) pkg_check_modules(sqlite3 sqlite3 REQUIRED) find_package(libxml2 REQUIRED) -include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json/src) -add_executable(generator generator.cpp) +include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json) +add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp selrestr.cpp database.cpp field.cpp generator.cpp main.cpp) set_property(TARGET generator PROPERTY CXX_STANDARD 11) set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) diff --git a/generator/database.cpp b/generator/database.cpp new file mode 100644 index 0000000..c7e4cfa --- /dev/null +++ b/generator/database.cpp @@ -0,0 +1,173 @@ +#include "database.h" +#include +#include +#include +#include +#include +#include +#include "field.h" +#include "../lib/util.h" + +namespace verbly { + namespace generator { + + sqlite3_error::sqlite3_error( + const std::string& what, + const std::string& db_err) : + what_(what + " (" + db_err + ")"), + db_err_(db_err) + { + } + + const char* sqlite3_error::what() const noexcept + { + return what_.c_str(); + } + + const char* sqlite3_error::db_err() const noexcept + { + return db_err_.c_str(); + } + + database::database(std::string path) + { + // If there is already a file at this path, overwrite it. + if (std::ifstream(path)) + { + if (std::remove(path.c_str())) + { + throw std::logic_error("Could not overwrite file at path"); + } + } + + if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) + { + // We still have to free the resources allocated. In the event that + // allocation failed, ppdb will be null and sqlite3_close_v2 will just + // ignore it. + std::string errmsg(sqlite3_errmsg(ppdb_)); + sqlite3_close_v2(ppdb_); + + throw sqlite3_error("Could not create output datafile", errmsg); + } + } + + database::database(database&& other) : database() + { + swap(*this, other); + } + + database& database::operator=(database&& other) + { + swap(*this, other); + + return *this; + } + + void swap(database& first, database& second) + { + std::swap(first.ppdb_, second.ppdb_); + } + + database::~database() + { + sqlite3_close_v2(ppdb_); + } + + void database::runQuery(std::string query) + { + // This can only happen when doing bad things with move semantics. + assert(ppdb_ != nullptr); + + sqlite3_stmt* ppstmt; + + if (sqlite3_prepare_v2(ppdb_, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) + { + throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); + } + + int result = sqlite3_step(ppstmt); + sqlite3_finalize(ppstmt); + + if (result != SQLITE_DONE) + { + throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); + } + } + + void database::insertIntoTable(std::string table, std::list fields) + { + // This can only happen when doing bad things with move semantics. + assert(ppdb_ != nullptr); + + // This shouldn't happen. + assert(!fields.empty()); + + std::list fieldNames; + std::list qs; + for (field& f : fields) + { + fieldNames.push_back(f.getName()); + qs.push_back("?"); + } + + std::ostringstream query; + query << "INSERT INTO "; + query << table; + query << " ("; + query << implode(std::begin(fieldNames), std::end(fieldNames), ", "); + query << ") VALUES ("; + query << implode(std::begin(qs), std::end(qs), ", "); + query << ")"; + + std::string query_str = query.str(); + + sqlite3_stmt* ppstmt; + + if (sqlite3_prepare_v2(ppdb_, query_str.c_str(), query_str.length(), &ppstmt, NULL) != SQLITE_OK) + { + throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); + } + + int i = 1; + for (field& f : fields) + { + switch (f.getType()) + { + case field::type::integer: + { + sqlite3_bind_int(ppstmt, i, f.getInteger()); + + break; + } + + case field::type::string: + { + sqlite3_bind_text(ppstmt, i, f.getString().c_str(), f.getString().length(), SQLITE_TRANSIENT); + + break; + } + + case field::type::invalid: + { + // Fields can only be invalid when doing bad things with move semantics. + assert(false); + + break; + } + } + + i++; + } + + int result = sqlite3_step(ppstmt); + sqlite3_finalize(ppstmt); + + if (result != SQLITE_DONE) + { + throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); + } + } + + }; +}; diff --git a/generator/database.h b/generator/database.h new file mode 100644 index 0000000..15cdff5 --- /dev/null +++ b/generator/database.h @@ -0,0 +1,73 @@ +#ifndef DATABASE_H_0B0A47D2 +#define DATABASE_H_0B0A47D2 + +#include +#include +#include + +struct sqlite3; + +namespace verbly { + namespace generator { + + class field; + + class sqlite3_error : public std::exception { + public: + + sqlite3_error(const std::string& what, const std::string& db_err); + + const char* what() const noexcept override; + const char* db_err() const noexcept; + + private: + std::string what_; + std::string db_err_; + + }; + + class database { + public: + + // Constructor + + explicit database(std::string path); + + // Disable copying + + database(const database& other) = delete; + database& operator=(const database& other) = delete; + + // Move constructor and move assignment + + database(database&& other); + database& operator=(database&& other); + + // Swap + + friend void swap(database& first, database& second); + + // Destructor + + ~database(); + + // Actions + + void runQuery(std::string query); + + void insertIntoTable(std::string table, std::list fields); + + private: + + database() + { + } + + sqlite3* ppdb_ = nullptr; + + }; + + }; +}; + +#endif /* end of include guard: DATABASE_H_0B0A47D2 */ diff --git a/generator/field.cpp b/generator/field.cpp new file mode 100644 index 0000000..84b2f91 --- /dev/null +++ b/generator/field.cpp @@ -0,0 +1,193 @@ +#include "field.h" +#include +#include + +namespace verbly { + namespace generator { + + field::field(const field& other) + { + type_ = other.type_; + name_ = other.name_; + + switch (type_) + { + case type::integer: + { + integer_ = other.integer_; + + break; + } + + case type::string: + { + new(&string_) std::string(other.string_); + + break; + } + + case type::invalid: + { + break; + } + } + } + + field::field(field&& other) : field() + { + swap(*this, other); + } + + field& field::operator=(field other) + { + swap(*this, other); + + return *this; + } + + void swap(field& first, field& second) + { + using type = field::type; + + type tempType = first.type_; + std::string tempName = std::move(first.name_); + int tempInteger; + std::string tempString; + + switch (first.type_) + { + case type::integer: + { + tempInteger = first.integer_; + + break; + } + + case type::string: + { + tempString = std::move(tempString); + + break; + } + + case type::invalid: + { + break; + } + } + + first.~field(); + + first.type_ = second.type_; + first.name_ = std::move(second.name_); + + switch (second.type_) + { + case type::integer: + { + first.integer_ = second.integer_; + + break; + } + + case type::string: + { + new(&first.string_) std::string(std::move(second.string_)); + + break; + } + + case type::invalid: + { + break; + } + } + + second.~field(); + + second.type_ = tempType; + second.name_ = std::move(tempName); + + switch (tempType) + { + case type::integer: + { + second.integer_ = tempInteger; + + break; + } + + case type::string: + { + new(&second.string_) std::string(std::move(tempString)); + + break; + } + + case type::invalid: + { + break; + } + } + } + + field::~field() + { + switch (type_) + { + case type::string: + { + using string_type = std::string; + string_.~string_type(); + + break; + } + + case type::integer: + case type::invalid: + { + break; + } + } + } + + field::field( + std::string name, + int arg) : + type_(type::integer), + name_(name), + integer_(arg) + { + } + + int field::getInteger() const + { + if (type_ != type::integer) + { + throw std::domain_error("field::getInteger called on non-integer field"); + } + + return integer_; + } + + field::field( + std::string name, + std::string arg) : + type_(type::string), + name_(name) + { + new(&string_) std::string(arg); + } + + std::string field::getString() const + { + if (type_ != type::string) + { + throw std::domain_error("field::getString called on non-string field"); + } + + return string_; + } + + }; +}; diff --git a/generator/field.h b/generator/field.h new file mode 100644 index 0000000..1fbabfc --- /dev/null +++ b/generator/field.h @@ -0,0 +1,76 @@ +#ifndef BINDING_H_CAE0B18E +#define BINDING_H_CAE0B18E + +#include + +namespace verbly { + namespace generator { + + class field { + public: + enum class type { + invalid, + integer, + string + }; + + // Copy and move constructors + + field(const field& other); + field(field&& other); + + // Assignment + + field& operator=(field other); + + // Swap + + friend void swap(field& first, field& second); + + // Destructor + + ~field(); + + // Generic accessors + + type getType() const + { + return type_; + } + + std::string getName() const + { + return name_; + } + + // Integer + + field(std::string name, int arg); + + int getInteger() const; + + // String + + field(std::string name, std::string arg); + + std::string getString() const; + + private: + + field() + { + } + + union { + int integer_; + std::string string_; + }; + + type type_ = type::invalid; + std::string name_; + }; + + }; +}; + +#endif /* end of include guard: BINDING_H_CAE0B18E */ diff --git a/generator/form.cpp b/generator/form.cpp new file mode 100644 index 0000000..6be9d47 --- /dev/null +++ b/generator/form.cpp @@ -0,0 +1,53 @@ +#include "form.h" +#include +#include +#include "database.h" +#include "field.h" +#include "pronunciation.h" + +namespace verbly { + namespace generator { + + int form::nextId_ = 0; + + form::form(std::string text) : + id_(nextId_++), + text_(text), + complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), + proper_(std::any_of(std::begin(text), std::end(text), std::isupper)) + { + } + + void form::addPronunciation(const pronunciation& p) + { + pronunciations_.insert(&p); + } + + database& operator<<(database& db, const form& arg) + { + // Serialize the form first. + { + std::list fields; + fields.emplace_back("form_id", arg.getId()); + fields.emplace_back("form", arg.getText()); + fields.emplace_back("complexity", arg.getComplexity()); + fields.emplace_back("proper", arg.isProper()); + + db.insertIntoTable("forms", std::move(fields)); + } + + // Then, serialize the form/pronunciation relationship. + for (const pronunciation* p : arg.getPronunciations()) + { + std::list fields; + fields.emplace_back("form_id", arg.getId()); + fields.emplace_back("pronunciation_id", p->getId()); + + db.insertIntoTable("forms_pronunciations", std::move(fields)); + } + + return db; + } + + }; +}; diff --git a/generator/form.h b/generator/form.h new file mode 100644 index 0000000..5576035 --- /dev/null +++ b/generator/form.h @@ -0,0 +1,71 @@ +#ifndef FORM_H_7EFBC970 +#define FORM_H_7EFBC970 + +#include +#include + +namespace verbly { + namespace generator { + + class pronunciation; + class database; + + class form { + public: + + // Constructor + + explicit form(std::string text); + + // Mutators + + void addPronunciation(const pronunciation& p); + + // Accessors + + int getId() const + { + return id_; + } + + std::string getText() const + { + return text_; + } + + int getComplexity() const + { + return complexity_; + } + + bool isProper() const + { + return proper_; + } + + std::set getPronunciations() const + { + return pronunciations_; + } + + private: + + static int nextId_; + + const int id_; + const std::string text_; + const int complexity_; + const bool proper_; + + std::set pronunciations_; + + }; + + // Serializer + + database& operator<<(database& db, const form& arg); + + }; +}; + +#endif /* end of include guard: FORM_H_7EFBC970 */ diff --git a/generator/frame.cpp b/generator/frame.cpp new file mode 100644 index 0000000..9f0653f --- /dev/null +++ b/generator/frame.cpp @@ -0,0 +1,83 @@ +#include "frame.h" +#include "database.h" +#include "field.h" + +namespace verbly { + namespace generator { + + int frame::nextId_ = 0; + + frame::frame() : id_(nextId_++) + { + } + + void frame::push_back(part fp) + { + parts_.push_back(std::move(fp)); + } + + database& operator<<(database& db, const frame& arg) + { + std::list fields; + fields.emplace_back("frame_id", arg.getId()); + + nlohmann::json jsonParts; + for (const part& p : arg) + { + nlohmann::json jsonPart; + jsonPart["type"] = static_cast(p.getType()); + + switch (p.getType()) + { + case part::type::noun_phrase: + { + jsonPart["role"] = p.getNounRole(); + jsonPart["selrestrs"] = p.getNounSelrestrs().toJson(); + jsonPart["synrestrs"] = p.getNounSynrestrs(); + + break; + } + + case part::type::preposition: + { + jsonPart["choices"] = p.getPrepositionChoices(); + jsonPart["literal"] = p.isPrepositionLiteral(); + + break; + } + + case part::type::literal: + { + jsonPart["value"] = p.getLiteralValue(); + + break; + } + + case part::type::verb: + case part::type::adjective: + case part::type::adverb: + { + break; + } + + case part::type::invalid: + { + // Invalid parts should not be serialized. + assert(false); + + break; + } + } + + jsonParts.emplace_back(std::move(jsonPart)); + } + + fields.emplace_back("data", jsonParts.dump()); + + db.insertIntoTable("frames", std::move(fields)); + + return db; + } + + }; +}; diff --git a/generator/frame.h b/generator/frame.h new file mode 100644 index 0000000..411ce6c --- /dev/null +++ b/generator/frame.h @@ -0,0 +1,59 @@ +#ifndef FRAME_H_26770FF1 +#define FRAME_H_26770FF1 + +#include +#include "part.h" + +namespace verbly { + namespace generator { + + class database; + + class frame { + public: + + // Aliases + + using const_iterator = std::list::const_iterator; + + // Constructor + + frame(); + + // Mutators + + void push_back(part fp); + + // Accessors + + int getId() const + { + return id_; + } + + const_iterator begin() const + { + return std::begin(parts_); + } + + const_iterator end() const + { + return std::end(parts_); + } + + private: + + static int nextId_; + + const int id_; + + std::list parts_; + + }; + + database& operator<<(database& db, const frame& arg); + + }; +}; + +#endif /* end of include guard: FRAME_H_26770FF1 */ diff --git a/generator/generator.cpp b/generator/generator.cpp index 6a16467..d88cb31 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -1,2320 +1,1477 @@ -#include +#include "generator.h" +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "progress.h" -#include "../lib/util.h" - -using json = nlohmann::json; - -struct verb_t { - std::string infinitive; - std::string past_tense; - std::string past_participle; - std::string ing_form; - std::string s_form; - int id; -}; - -struct adjective_t { - std::string base; - std::string comparative; - std::string superlative; -}; - -struct noun_t { - std::string singular; - std::string plural; -}; - -struct selrestr_t { - enum class type_t { - singleton, - andlogic, - orlogic, - empty - }; - type_t type; - std::string restriction; - bool pos; - std::list subordinates; -}; - -struct framepart_t { - enum class type_t { - np, - v, - pp, - adj, - adv, - lex - }; - type_t type; - std::string role; - selrestr_t selrestrs; - std::set preprestrs; - std::set synrestrs; - std::list choices; - std::string lexval; -}; - -struct group_t { - std::string id; - std::string parent; - std::set members; - std::map roles; - std::list> frames; -}; - -struct pronunciation_t { - std::string phonemes; - std::string prerhyme; - std::string rhyme; - int syllables = 0; - std::string stress; - - bool operator<(const pronunciation_t& other) const - { - return phonemes < other.phonemes; - } -}; - -std::map groups; -std::map verbs; -std::map adjectives; -std::map nouns; -std::map> wn; -std::map images; -std::map> pronunciations; - -void print_usage() -{ - std::cout << "Verbly Datafile Generator" << std::endl; - std::cout << "-------------------------" << std::endl; - std::cout << "Requires exactly six arguments." << std::endl; - std::cout << "1. The path to a VerbNet data directory." << std::endl; - std::cout << "2. The path to an AGID infl.txt file." << std::endl; - std::cout << "3. The path to a WordNet prolog data directory." << std::endl; - std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl; - std::cout << "5. The path to an ImageNet urls.txt file." << std::endl; - std::cout << "6. Datafile output path." << std::endl; - - exit(1); -} - -void db_error(sqlite3* ppdb, std::string query) -{ - std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; - std::cout << query << std::endl; - sqlite3_close_v2(ppdb); - print_usage(); -} - -json export_selrestrs(selrestr_t r) -{ - if (r.type == selrestr_t::type_t::empty) - { - return {}; - } else if (r.type == selrestr_t::type_t::singleton) - { - json result; - result["type"] = r.restriction; - result["pos"] = r.pos; - return result; - } else { - json result; - if (r.type == selrestr_t::type_t::andlogic) - { - result["logic"] = "and"; - } else { - result["logic"] = "or"; - } - - std::list outlist; - std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs); - result["children"] = outlist; - - return result; - } -} - -selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename) -{ - selrestr_t r; - xmlChar* key; - - if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS")) - { - if (xmlChildElementCount(top) == 0) - { - r.type = selrestr_t::type_t::empty; - } else if (xmlChildElementCount(top) == 1) - { - r = parse_selrestrs(xmlFirstElementChild(top), filename); - } else { - r.type = selrestr_t::type_t::andlogic; - - if (xmlHasProp(top, (const xmlChar*) "logic")) - { - key = xmlGetProp(top, (const xmlChar*) "logic"); - if (!xmlStrcmp(key, (const xmlChar*) "or")) - { - r.type = selrestr_t::type_t::orlogic; - } - xmlFree(key); - } - - for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) - { - if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR")) - { - r.subordinates.push_back(parse_selrestrs(selrestr, filename)); - } - } - } - } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR")) - { - r.type = selrestr_t::type_t::singleton; - - key = xmlGetProp(top, (xmlChar*) "Value"); - r.pos = (std::string((const char*)key) == "+"); - xmlFree(key); - - key = xmlGetProp(top, (xmlChar*) "type"); - r.restriction = (const char*) key; - xmlFree(key); - } else { - // Invalid - std::cout << "Bad VerbNet file format: " << filename << std::endl; - print_usage(); - } - - return r; -} - -group_t& parse_group(xmlNodePtr top, std::string filename) -{ - xmlChar* key = xmlGetProp(top, (xmlChar*) "ID"); - if (key == 0) - { - std::cout << "Bad VerbNet file format: " << filename << std::endl; - print_usage(); - } - std::string vnid = (const char*)key; - vnid = vnid.substr(vnid.find_first_of("-")+1); - xmlFree(key); - - group_t g; - g.id = vnid; - - for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) - { - if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES")) - { - for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next) - { - if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS")) - { - auto& sg = parse_group(subclass, filename); - sg.parent = vnid; - - for (auto member : sg.members) - { - g.members.insert(member); - } - - // The schema requires that subclasses appear after role definitions, so we can do this now - for (auto role : g.roles) - { - if (sg.roles.count(role.first) == 0) - { - sg.roles[role.first] = role.second; - } - } - } - } - } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) - { - for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) - { - if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) - { - key = xmlGetProp(member, (xmlChar*) "name"); - g.members.insert((const char*)key); - xmlFree(key); - } - } - } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES")) - { - for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next) - { - if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE")) - { - selrestr_t r; - r.type = selrestr_t::type_t::empty; - - key = xmlGetProp(role, (const xmlChar*) "type"); - std::string type = (const char*)key; - xmlFree(key); - - for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) - { - if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS")) - { - r = parse_selrestrs(rolenode, filename); - } - } - - g.roles[type] = r; - } - } - } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) - { - for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) - { - if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) - { - std::list f; - - for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) - { - if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX")) - { - for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) - { - framepart_t fp; - - if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP")) - { - fp.type = framepart_t::type_t::np; - - key = xmlGetProp(syntaxnode, (xmlChar*) "value"); - fp.role = (const char*)key; - xmlFree(key); - - fp.selrestrs.type = selrestr_t::type_t::empty; - - for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) - { - if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS")) - { - for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) - { - if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR")) - { - key = xmlGetProp(synrestr, (xmlChar*) "type"); - fp.synrestrs.insert(std::string((const char*)key)); - xmlFree(key); - } - } - } - - if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) - { - fp.selrestrs = parse_selrestrs(npnode, filename); - } - } - } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB")) - { - fp.type = framepart_t::type_t::v; - } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP")) - { - fp.type = framepart_t::type_t::pp; - - if (xmlHasProp(syntaxnode, (xmlChar*) "value")) - { - key = xmlGetProp(syntaxnode, (xmlChar*) "value"); - std::string choices = (const char*)key; - xmlFree(key); - - fp.choices = verbly::split>(choices, " "); - } - - for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) - { - if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) - { - for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) - { - if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR")) - { - key = xmlGetProp(synrestr, (xmlChar*) "type"); - fp.preprestrs.insert(std::string((const char*)key)); - xmlFree(key); - } - } - } - } - } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ")) - { - fp.type = framepart_t::type_t::adj; - } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV")) - { - fp.type = framepart_t::type_t::adv; - } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX")) - { - fp.type = framepart_t::type_t::lex; - - key = xmlGetProp(syntaxnode, (xmlChar*) "value"); - fp.lexval = (const char*)key; - xmlFree(key); - } else { - continue; - } - - f.push_back(fp); - } - - g.frames.push_back(f); - } - } - } - } - } - } - - groups[vnid] = g; - - return groups[vnid]; -} - -int main(int argc, char** argv) -{ - if (argc != 7) - { - print_usage(); - } - - // VerbNet data - std::cout << "Reading verb frames..." << std::endl; - - DIR* dir; - if ((dir = opendir(argv[1])) == nullptr) - { - std::cout << "Invalid VerbNet data directory." << std::endl; - - print_usage(); - } - - struct dirent* ent; - while ((ent = readdir(dir)) != nullptr) - { - std::string filename(argv[1]); - if (filename.back() != '/') - { - filename += '/'; - } - - filename += ent->d_name; - //std::cout << ent->d_name << std::endl; - - if (filename.rfind(".xml") != filename.size() - 4) - { - continue; - } - - xmlDocPtr doc = xmlParseFile(filename.c_str()); - if (doc == nullptr) - { - std::cout << "Error opening " << filename << std::endl; - print_usage(); - } - - xmlNodePtr top = xmlDocGetRootElement(doc); - if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS"))) - { - std::cout << "Bad VerbNet file format: " << filename << std::endl; - print_usage(); - } - - parse_group(top, filename); - } - - closedir(dir); - - // Get verbs from AGID - std::cout << "Reading inflections..." << std::endl; - - std::ifstream agidfile(argv[2]); - if (!agidfile.is_open()) - { - std::cout << "Could not open AGID file: " << argv[2] << std::endl; - print_usage(); - } - - for (;;) - { - std::string line; - if (!getline(agidfile, line)) - { - break; - } - - if (line.back() == '\r') - { - line.pop_back(); - } - - int divider = line.find_first_of(" "); - std::string word = line.substr(0, divider); - line = line.substr(divider+1); - char type = line[0]; - - if (line[1] == '?') - { - line.erase(0, 4); - } else { - line.erase(0, 3); - } - - std::vector forms; - while (!line.empty()) - { - std::string inflection; - if ((divider = line.find(" | ")) != std::string::npos) - { - inflection = line.substr(0, divider); - line = line.substr(divider + 3); - } else { - inflection = line; - line = ""; - } - - if ((divider = inflection.find_first_of(",?")) != std::string::npos) - { - inflection = inflection.substr(0, divider); - } - - forms.push_back(inflection); - } - - switch (type) - { - case 'V': - { - verb_t v; - v.infinitive = word; - if (forms.size() == 4) - { - v.past_tense = forms[0]; - v.past_participle = forms[1]; - v.ing_form = forms[2]; - v.s_form = forms[3]; - } else if (forms.size() == 3) - { - v.past_tense = forms[0]; - v.past_participle = forms[0]; - v.ing_form = forms[1]; - v.s_form = forms[2]; - } else if (forms.size() == 8) - { - // As of AGID 2014.08.11, this is only "to be" - v.past_tense = forms[0]; - v.past_participle = forms[2]; - v.ing_form = forms[3]; - v.s_form = forms[4]; - } else { - // Words that don't fit the cases above as of AGID 2014.08.11: - // - may and shall do not conjugate the way we want them to - // - methinks only has a past tense and is an outlier - // - wit has five forms, and is archaic/obscure enough that we can ignore it for now - std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl; - } - - verbs[word] = v; - - break; - } - - case 'A': - { - adjective_t adj; - adj.base = word; - if (forms.size() == 2) - { - adj.comparative = forms[0]; - adj.superlative = forms[1]; - } else { - // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" - std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl; - } - - adjectives[word] = adj; - - break; - } - - case 'N': - { - noun_t n; - n.singular = word; - if (forms.size() == 1) - { - n.plural = forms[0]; - } else { - // As of AGID 2014.08.11, this is non-existent. - std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl; - } - - nouns[word] = n; - - break; - } - } - } - - // Pronounciations - std::cout << "Reading pronunciations..." << std::endl; - - std::ifstream pronfile(argv[4]); - if (!pronfile.is_open()) - { - std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl; - print_usage(); - } - - for (;;) - { - std::string line; - if (!getline(pronfile, line)) - { - break; - } - - if (line.back() == '\r') - { - line.pop_back(); - } - - std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); - std::smatch phoneme_data; - if (std::regex_search(line, phoneme_data, phoneme)) - { - std::string canonical(phoneme_data[1]); - std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); - - std::string phonemes = phoneme_data[2]; - auto phoneme_set = verbly::split>(phonemes, " "); - auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) { - return phoneme.find("1") != std::string::npos; - }); - - pronunciation_t p; - p.phonemes = phonemes; - - // Rhyme detection - if (phemstrt != std::end(phoneme_set)) - { - std::stringstream rhymer; - for (auto it = phemstrt; it != std::end(phoneme_set); it++) - { - std::string naked; - std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) { - return isdigit(ch); - }); - - if (it != phemstrt) - { - rhymer << " "; - } - - rhymer << naked; - } - - p.rhyme = rhymer.str(); - - if (phemstrt != std::begin(phoneme_set)) - { - phemstrt--; - p.prerhyme = *phemstrt; - } else { - p.prerhyme = ""; - } - } else { - p.prerhyme = ""; - p.rhyme = ""; - } - - // Syllable/stress - for (auto phm : phoneme_set) - { - if (isdigit(phm.back())) - { - // It's a vowel! - p.syllables++; - - if (phm.back() == '1') - { - p.stress.push_back('1'); - } else { - p.stress.push_back('0'); - } - } - } - - pronunciations[canonical].insert(p); - } - } - - // Images - std::cout << "Reading images..." << std::endl; - - std::ifstream imagefile(argv[5]); - if (!imagefile.is_open()) - { - std::cout << "Could not open ImageNet file: " << argv[5] << std::endl; - print_usage(); - } - - for (;;) - { - std::string line; - if (!getline(imagefile, line)) - { - break; - } - - if (line.back() == '\r') - { - line.pop_back(); - } - - std::string wnid_s = line.substr(1, 8); - int wnid = stoi(wnid_s) + 100000000; - images[wnid]++; - } - - imagefile.close(); - - // Start writing output - std::cout << "Writing schema..." << std::endl; - - sqlite3* ppdb; - if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) - { - std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl; - print_usage(); - } - - std::ifstream schemafile("schema.sql"); - if (!schemafile.is_open()) - { - std::cout << "Could not find schema file" << std::endl; - print_usage(); - } - - std::stringstream schemabuilder; - for (;;) - { - std::string line; - if (!getline(schemafile, line)) - { - break; - } - - if (line.back() == '\r') - { - line.pop_back(); - } - - schemabuilder << line << std::endl; - } - - std::string schema = schemabuilder.str(); - while (!schema.empty()) - { - std::string query; - int divider = schema.find(";"); - if (divider != std::string::npos) - { - query = schema.substr(0, divider+1); - schema = schema.substr(divider+2); - } else { - break; - } - - sqlite3_stmt* schmstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - if (sqlite3_step(schmstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } - - sqlite3_finalize(schmstmt); - } - - std::cout << "Writing prepositions..." << std::endl; - std::ifstream prepfile("prepositions.txt"); - if (!prepfile.is_open()) - { - std::cout << "Could not find prepositions file" << std::endl; - print_usage(); - } - - for (;;) - { - std::string line; - if (!getline(prepfile, line)) - { - break; - } - - if (line.back() == '\r') - { - line.pop_back(); - } - - std::regex relation("^([^:]+): (.+)"); - std::smatch relation_data; - std::regex_search(line, relation_data, relation); - std::string prep = relation_data[1]; - std::list groups = verbly::split>(relation_data[2], ", "); - - std::string query("INSERT INTO prepositions (form) VALUES (?)"); - sqlite3_stmt* ppstmt; - - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } - - sqlite3_finalize(ppstmt); - - query = "SELECT last_insert_rowid()"; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - if (sqlite3_step(ppstmt) != SQLITE_ROW) - { - db_error(ppdb, query); - } - - int rowid = sqlite3_column_int(ppstmt, 0); - sqlite3_finalize(ppstmt); - - for (auto group : groups) - { - query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)"; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, rowid); - sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } - - sqlite3_finalize(ppstmt); - } - } - - - { - progress ppgs("Writing verbs...", verbs.size()); - for (auto& mapping : verbs) - { - sqlite3_stmt* ppstmt; - std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)"); - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT); - sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT); - sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT); - sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT); - sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } - - sqlite3_finalize(ppstmt); - - std::string canonical(mapping.second.infinitive); - std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); - if (pronunciations.count(canonical) == 1) - { - query = "SELECT last_insert_rowid()"; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - if (sqlite3_step(ppstmt) != SQLITE_ROW) - { - db_error(ppdb, query); - } - - int rowid = sqlite3_column_int(ppstmt, 0); - - sqlite3_finalize(ppstmt); - - mapping.second.id = rowid; - - for (auto pronunciation : pronunciations[canonical]) - { - if (!pronunciation.rhyme.empty()) - { - query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; - } else { - query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; - } - - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, rowid); - sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); - sqlite3_bind_int(ppstmt, 3, pronunciation.syllables); - sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT); - - if (!pronunciation.rhyme.empty()) - { - sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); - sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); - } - - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } - - sqlite3_finalize(ppstmt); - } - } - - ppgs.update(); - } - } - - { - progress ppgs("Writing verb frames...", groups.size()); - for (auto& mapping : groups) +#include +#include +#include +#include "enums.h" +#include "progress.h" +#include "selrestr.h" +#include "role.h" +#include "part.h" +#include "field.h" +#include "../lib/util.h" + +namespace verbly { + namespace generator { + + generator::generator( + std::string verbNetPath, + std::string agidPath, + std::string wordNetPath, + std::string cmudictPath, + std::string imageNetPath, + std::string outputPath) : + verbNetPath_(verbNetPath), + agidPath_(agidPath), + wordNetPath_(wordNetPath), + cmudictPath_(cmudictPath), + imageNetPath_(imageNetPath), + db_(outputPath) { - std::list roledatal; - std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair r) { - json role; - role["type"] = r.first; - role["selrestrs"] = export_selrestrs(r.second); - - return role; - }); - - json roledata(roledatal); - std::string rdm = roledata.dump(); - - sqlite3_stmt* ppstmt; - std::string query("INSERT INTO groups (data) VALUES (?)"); - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) + // Ensure VerbNet directory exists + DIR* dir; + if ((dir = opendir(verbNetPath_.c_str())) == nullptr) { - db_error(ppdb, query); + throw std::invalid_argument("Invalid VerbNet data directory"); } - sqlite3_finalize(ppstmt); + closedir(dir); - query = "SELECT last_insert_rowid()"; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) + // Ensure AGID infl.txt exists + if (!std::ifstream(agidPath_)) { - db_error(ppdb, query); + throw std::invalid_argument("AGID infl.txt file not found"); } - if (sqlite3_step(ppstmt) != SQLITE_ROW) + // Add directory separator to WordNet path + if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\')) { - db_error(ppdb, query); + wordNetPath_ += '/'; } - int gid = sqlite3_column_int(ppstmt, 0); - sqlite3_finalize(ppstmt); - - for (auto frame : mapping.second.frames) + // Ensure WordNet tables exist + for (std::string table : { + "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax" + }) { - std::list fdatap; - std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) { - json part; - - switch (fp.type) - { - case framepart_t::type_t::np: - { - part["type"] = "np"; - part["role"] = fp.role; - part["selrestrs"] = export_selrestrs(fp.selrestrs); - part["synrestrs"] = fp.synrestrs; - - break; - } - - case framepart_t::type_t::pp: - { - part["type"] = "pp"; - part["values"] = fp.choices; - part["preprestrs"] = fp.preprestrs; - - break; - } - - case framepart_t::type_t::v: - { - part["type"] = "v"; - - break; - } - - case framepart_t::type_t::adj: - { - part["type"] = "adj"; - - break; - } - - case framepart_t::type_t::adv: - { - part["type"] = "adv"; - - break; - } - - case framepart_t::type_t::lex: - { - part["type"] = "lex"; - part["value"] = fp.lexval; - - break; - } - } - - return part; - }); - - json fdata(fdatap); - std::string marshall = fdata.dump(); - - query = "INSERT INTO frames (group_id, data) VALUES (?, ?)"; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, gid); - sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) + if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl")) { - db_error(ppdb, query); + throw std::invalid_argument("WordNet " + table + " table not found"); } - - sqlite3_finalize(ppstmt); } - for (auto member : mapping.second.members) + // Ensure CMUDICT file exists + if (!std::ifstream(cmudictPath_)) { - if (verbs.count(member) == 1) - { - auto& v = verbs[member]; - - query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)"; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, v.id); - sqlite3_bind_int(ppstmt, 2, gid); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } - - sqlite3_finalize(ppstmt); - } + throw std::invalid_argument("CMUDICT file not found"); } - ppgs.update(); - } - } - - // Get nouns/adjectives/adverbs from WordNet - // Useful relations: - // - s: master list - // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness) - // - at: variation (e.g. a measurement can be standard or nonstandard) - // - der: derivation (e.g. happy/happily, happily/happy) - // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue) - // - ins: instantiation (do we need this? let's see) - // - mm: member meronymy/holonymy (e.g. family/mother, family/child) - // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire) - // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber) - // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska) - // mannernymy (e.g. something done quickly is done in a manner that is quick) - // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) - // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) - // - syntax: positioning flags for some adjectives - std::string wnpref {argv[3]}; - if (wnpref.back() != '/') - { - wnpref += '/'; - } - - // s table - { - std::ifstream wnsfile(wnpref + "wn_s.pl"); - if (!wnsfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - - std::list lines; - for (;;) - { - std::string line; - if (!getline(wnsfile, line)) + // Ensure ImageNet urls.txt exists + if (!std::ifstream(imageNetPath_)) { - break; + throw std::invalid_argument("ImageNet urls.txt file not found"); } + } - if (line.back() == '\r') - { - line.pop_back(); - } + void generator::run() + { + // Create notions, words, lemmas, and forms from WordNet synsets + readWordNetSynsets(); + + // Reads adjective positioning WordNet data + readAdjectivePositioning(); + + // Counts the number of URLs ImageNet has per notion + readImageNetUrls(); + + // Creates a word by WordNet sense key lookup table + readWordNetSenseKeys(); + + // Creates groups and frames from VerbNet data + readVerbNet(); + + // Creates forms and inflections from AGID. To reduce the amount of forms + // created, we do this after most lemmas that need inflecting have been + // created through other means, and then only generate forms for + // inflections of already-existing lemmas. The exception to this regards + // verb lemmas. If a verb lemma in AGID either does not exist yet, or does + // exist but is not related to any words that are related to verb notions, + // then a notion and a word is generated and the form generation proceeds + // as usual. + readAgidInflections(); + + // Reads in prepositions and the is_a relationship + readPrepositions(); + + // Creates pronunciations from CMUDICT. To reduce the amount of + // pronunciations created, we do this after all forms have been created, + // and then only generate pronunciations for already-exisiting forms. + readCmudictPronunciations(); + + // Writes the database schema + writeSchema(); + + // Dumps data to the database + dumpObjects(); + + // Populates the antonymy relationship from WordNet + readWordNetAntonymy(); + + // Populates the variation relationship from WordNet + readWordNetVariation(); + + // Populates the usage, topicality, and regionality relationships from + // WordNet + readWordNetClasses(); + + // Populates the causality relationship from WordNet + readWordNetCausality(); + + // Populates the entailment relationship from WordNet + readWordNetEntailment(); + + // Populates the hypernymy relationship from WordNet + readWordNetHypernymy(); + + // Populates the instantiation relationship from WordNet + readWordNetInstantiation(); + + // Populates the member meronymy relationship from WordNet + readWordNetMemberMeronymy(); + + // Populates the part meronymy relationship from WordNet + readWordNetPartMeronymy(); + + // Populates the substance meronymy relationship from WordNet + readWordNetSubstanceMeronymy(); + + // Populates the pertainymy and mannernymy relationships from WordNet + readWordNetPertainymy(); + + // Populates the specification relationship from WordNet + readWordNetSpecification(); + + // Populates the adjective similarity relationship from WordNet + readWordNetSimilarity(); + + + + + + + - lines.push_back(line); } - progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size()); - for (auto line : lines) + void generator::readWordNetSynsets() { - ppgs.update(); + std::list lines(readFile(wordNetPath_ + "wn_s.pl")); + progress ppgs("Reading synsets from WordNet...", lines.size()); - std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$"); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) - { - continue; - } - - int synset_id = stoi(relation_data[1]); - int wnum = stoi(relation_data[2]); - std::string word = relation_data[3]; - size_t word_it; - while ((word_it = word.find("''")) != std::string::npos) - { - word.erase(word_it, 1); - } - - std::string query; - switch (synset_id / 100000000) + for (std::string line : lines) { - case 1: // Noun - { - if (nouns.count(word) == 1) - { - query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)"; - } else { - query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)"; - } + ppgs.update(); - break; - } - - case 2: // Verb + std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) { - // Ignore - - break; + continue; } - - case 3: // Adjective + + int synset_id = std::stoi(relation_data[1]); + int wnum = std::stoi(relation_data[2]); + std::string text = relation_data[3]; + int tag_count = std::stoi(relation_data[4]); + size_t word_it; + while ((word_it = text.find("''")) != std::string::npos) { - if (adjectives.count(word) == 1) - { - query = "INSERT INTO adjectives (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; - } else { - query = "INSERT INTO adjectives (base_form, complexity) VALUES (?, ?)"; - } - - break; + text.erase(word_it, 1); } - - case 4: // Adverb - { - if (adjectives.count(word) == 1) - { - query = "INSERT INTO adverbs (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; - } else { - query = "INSERT INTO adverbs (base_form, complexity) VALUES (?, ?)"; - } - break; + // The WordNet data does contain duplicates, so we need to check that we + // haven't already created this word. + std::pair lookup(synset_id, wnum); + if (!wordByWnidAndWnum_.count(lookup)) + { + notion& synset = lookupOrCreateNotion(synset_id); + lemma& lex = lookupOrCreateLemma(text); + word& entry = createWord(synset, lex, tag_count); + + wordByWnidAndWnum_[lookup] = &entry; } } + } - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); - switch (synset_id / 100000000) + void generator::readAdjectivePositioning() + { + std::list lines(readFile(wordNetPath_ + "wn_syntax.pl")); + progress ppgs("Reading adjective positionings from WordNet...", lines.size()); + + for (std::string line : lines) { - case 1: // Noun + ppgs.update(); + + std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) { - sqlite3_bind_int(ppstmt, 2, (std::any_of(std::begin(word), std::end(word), [] (char ch) { - return isupper(ch); - }) ? 1 : 0)); - - sqlite3_bind_int(ppstmt, 3, verbly::split>(word, " ").size()); - sqlite3_bind_int(ppstmt, 4, images[synset_id]); - sqlite3_bind_int(ppstmt, 5, synset_id); - - if (nouns.count(word) == 1) - { - sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT); - } - - break; + continue; } + + int synset_id = stoi(relation_data[1]); + int wnum = stoi(relation_data[2]); + std::string adjpos_str = relation_data[3]; - case 3: // Adjective - case 4: // Adverb + std::pair lookup(synset_id, wnum); + if (wordByWnidAndWnum_.count(lookup)) { - sqlite3_bind_int(ppstmt, 2, verbly::split>(word, " ").size()); + word& adj = *wordByWnidAndWnum_.at(lookup); - if (adjectives.count(word) == 1) + if (adjpos_str == "p") + { + adj.setAdjectivePosition(positioning::predicate); + } else if (adjpos_str == "a") { - sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); - sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT); + adj.setAdjectivePosition(positioning::attributive); + } else if (adjpos_str == "i") + { + adj.setAdjectivePosition(positioning::postnominal); + } else { + // Can't happen because of how we specified the regex. + assert(false); } - - break; } } + } - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } - - sqlite3_finalize(ppstmt); - - query = "SELECT last_insert_rowid()"; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) + void generator::readImageNetUrls() + { + // The ImageNet datafile is so large that it is unreasonable and + // unnecessary to read it into memory; instead, we will parse each line as + // we read it. This has the caveat that we cannot display a progress bar. + std::cout << "Reading image counts from ImageNet..." << std::endl; + + std::ifstream file(imageNetPath_); + if (!file) { - db_error(ppdb, query); + throw std::invalid_argument("Could not find file " + imageNetPath_); } - - if (sqlite3_step(ppstmt) != SQLITE_ROW) + + std::string line; + while (std::getline(file, line)) { - db_error(ppdb, query); + if (line.back() == '\r') + { + line.pop_back(); + } + + std::string wnid_s = line.substr(1, 8); + int wnid = stoi(wnid_s) + 100000000; + if (notionByWnid_.count(wnid)) + { + // We know that this notion has a wnid and is a noun. + notionByWnid_.at(wnid)->incrementNumOfImages(); + } } + } - int rowid = sqlite3_column_int(ppstmt, 0); - wn[synset_id][wnum] = rowid; - - sqlite3_finalize(ppstmt); + void generator::readWordNetSenseKeys() + { + std::list lines(readFile(wordNetPath_ + "wn_sk.pl")); + progress ppgs("Reading sense keys from WordNet...", lines.size()); - std::string canonical(word); - std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); - if (pronunciations.count(canonical) == 1) + for (std::string line : lines) { - for (auto pronunciation : pronunciations[canonical]) - { - switch (synset_id / 100000000) - { - case 1: // Noun - { - if (!pronunciation.rhyme.empty()) - { - query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; - } else { - query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; - } - - break; - } - - case 3: // Adjective - { - if (!pronunciation.rhyme.empty()) - { - query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; - } else { - query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; - } - - break; - } - - case 4: // Adverb - { - if (!pronunciation.rhyme.empty()) - { - query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; - } else { - query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; - } - - break; - } - } - - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } + ppgs.update(); - sqlite3_bind_int(ppstmt, 1, rowid); - sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); - sqlite3_bind_int(ppstmt, 3, pronunciation.syllables); - sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT); - - if (!pronunciation.rhyme.empty()) - { - sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); - sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); - } + // We only actually need to lookup verbs by sense key so we'll just + // ignore everything that isn't a verb. + std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$"); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) + { + continue; + } + + int synset_id = stoi(relation_data[1]); + int wnum = stoi(relation_data[2]); + std::string sense_key = relation_data[3]; - if (sqlite3_step(ppstmt) != SQLITE_DONE) + // We are treating this mapping as injective, which is not entirely + // accurate. First, the WordNet table contains duplicate rows, so those + // need to be ignored. More importantly, a small number of sense keys + // (one for each letter of the Latin alphabet, plus 9 other words) each + // map to two different words in the same synset which differ only by + // capitalization. Luckily, none of these exceptions are verbs, so we + // can pretend that the mapping is injective. + if (!wnSenseKeys_.count(sense_key)) + { + std::pair lookup(synset_id, wnum); + if (wordByWnidAndWnum_.count(lookup)) { - db_error(ppdb, query); + wnSenseKeys_[sense_key] = wordByWnidAndWnum_.at(lookup); } - - sqlite3_finalize(ppstmt); } } } - } - - // While we're working on s - { - progress ppgs("Writing word synonyms...", wn.size()); - for (auto sense : wn) + + void generator::readVerbNet() { - ppgs.update(); - - for (auto word1 : sense.second) + std::cout << "Reading frames from VerbNet..." << std::endl; + + DIR* dir; + if ((dir = opendir(verbNetPath_.c_str())) == nullptr) + { + throw std::invalid_argument("Invalid VerbNet data directory"); + } + + struct dirent* ent; + while ((ent = readdir(dir)) != nullptr) { - for (auto word2 : sense.second) + std::string filename(verbNetPath_); + + if (filename.back() != '/') { - if (word1 != word2) - { - std::string query; - switch (sense.first / 100000000) - { - case 1: // Noun - { - query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; - - break; - } - - case 2: // Verb - { - // Ignore - - break; - } - - case 3: // Adjective - { - query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; - - break; - } - - case 4: // Adverb - { - query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; - - break; - } - } - - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } + filename += '/'; + } - sqlite3_bind_int(ppstmt, 1, word1.second); - sqlite3_bind_int(ppstmt, 2, word2.second); + filename += ent->d_name; - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } + if (filename.rfind(".xml") != filename.size() - 4) + { + continue; + } - sqlite3_finalize(ppstmt); - } + xmlDocPtr doc = xmlParseFile(filename.c_str()); + if (doc == nullptr) + { + throw std::logic_error("Error opening " + filename); } - } - } - } - - // ant table - { - std::ifstream wnantfile(wnpref + "wn_ant.pl"); - if (!wnantfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - - std::list lines; - for (;;) - { - std::string line; - if (!getline(wnantfile, line)) - { - break; - } - if (line.back() == '\r') - { - line.pop_back(); + xmlNodePtr top = xmlDocGetRootElement(doc); + if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast("VNCLASS")))) + { + throw std::logic_error("Bad VerbNet file format: " + filename); + } + + try + { + createGroup(top); + } catch (const std::exception& e) + { + std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename)); + } } - - lines.push_back(line); + + closedir(dir); } - progress ppgs("Writing antonyms...", lines.size()); - for (auto line : lines) + void generator::readAgidInflections() { - ppgs.update(); + std::list lines(readFile(agidPath_)); + progress ppgs("Reading inflections from AGID...", lines.size()); - std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) + for (std::string line : lines) { - continue; - } - - int synset_id_1 = stoi(relation_data[1]); - int wnum_1 = stoi(relation_data[2]); - int synset_id_2 = stoi(relation_data[3]); - int wnum_2 = stoi(relation_data[4]); + ppgs.update(); + + int divider = line.find_first_of(" "); + std::string infinitive = line.substr(0, divider); + line = line.substr(divider+1); + char type = line[0]; - std::string query; - switch (synset_id_1 / 100000000) - { - case 1: // Noun + if (line[1] == '?') { - query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; - - break; + line.erase(0, 4); + } else { + line.erase(0, 3); } - - case 2: // Verb + + if (!lemmaByBaseForm_.count(infinitive) && (type != 'V')) { - // Ignore + continue; + } - break; + lemma& curLemma = lookupOrCreateLemma(infinitive); + + auto forms = split>(line, " | "); + for (std::string& inflForm : forms) + { + int sympos = inflForm.find_first_of(",?"); + if (sympos != std::string::npos) + { + inflForm = inflForm.substr(0, sympos); + } } - - case 3: // Adjective + + switch (type) { - query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; + case 'V': + { + if (forms.size() == 4) + { + curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); + curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1])); + curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2])); + curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3])); + } else if (forms.size() == 3) + { + curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); + curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0])); + curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1])); + curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2])); + } else if (forms.size() == 8) + { + // As of AGID 2014.08.11, this is only "to be" + curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); + curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2])); + curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3])); + curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4])); + } else { + // Words that don't fit the cases above as of AGID 2014.08.11: + // - may and shall do not conjugate the way we want them to + // - methinks only has a past tense and is an outlier + // - wit has five forms, and is archaic/obscure enough that we can ignore it for now + std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; + } + + // For verbs in particular, we sometimes create a notion and a word + // from inflection data. Specifically, if there are not yet any + // verbs existing that have the same infinitive form. "Yet" means + // that this verb appears in the AGID data but not in either WordNet + // or VerbNet. + if (!wordsByBaseForm_.count(infinitive) + || !std::any_of(std::begin(wordsByBaseForm_.at(infinitive)), std::end(wordsByBaseForm_.at(infinitive)), [] (word* w) { + return w->getNotion().getPartOfSpeech() == part_of_speech::verb; + })) + { + notion& n = createNotion(part_of_speech::verb); + createWord(n, curLemma); + } - break; - } + break; + } - case 4: // Adverb - { - query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; + case 'A': + { + if (forms.size() == 2) + { + curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0])); + curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1])); + } else { + // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" + std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; + } + + break; + } + + case 'N': + { + if (forms.size() == 1) + { + curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0])); + } else { + // As of AGID 2014.08.11, this is non-existent. + std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl; + } - break; + break; + } } } - - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); - sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } - - sqlite3_finalize(ppstmt); } - } - - // at table - { - std::ifstream wnatfile(wnpref + "wn_at.pl"); - if (!wnatfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - - std::list lines; - for (;;) - { - std::string line; - if (!getline(wnatfile, line)) - { - break; - } - if (line.back() == '\r') + void generator::readPrepositions() + { + std::list lines(readFile("prepositions.txt")); + progress ppgs("Reading prepositions...", lines.size()); + + for (std::string line : lines) { - line.pop_back(); + ppgs.update(); + + std::regex relation("^([^:]+): (.+)"); + std::smatch relation_data; + std::regex_search(line, relation_data, relation); + std::string prep = relation_data[1]; + auto groups = split>(relation_data[2], ", "); + + notion& n = createNotion(part_of_speech::preposition); + lemma& l = lookupOrCreateLemma(prep); + word& w = createWord(n, l); + + n.setPrepositionGroups(groups); } - - lines.push_back(line); } - progress ppgs("Writing variations...", lines.size()); - for (auto line : lines) + void generator::readCmudictPronunciations() { - ppgs.update(); - - std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) - { - continue; - } - - int synset_id_1 = stoi(relation_data[1]); - int synset_id_2 = stoi(relation_data[2]); - std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)"); + std::list lines(readFile(cmudictPath_)); + progress ppgs("Reading pronunciations from CMUDICT...", lines.size()); - for (auto mapping1 : wn[synset_id_1]) + for (std::string line : lines) { - for (auto mapping2 : wn[synset_id_2]) + ppgs.update(); + + std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); + std::smatch phoneme_data; + if (std::regex_search(line, phoneme_data, phoneme)) { - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, mapping1.second); - sqlite3_bind_int(ppstmt, 2, mapping2.second); + std::string canonical(phoneme_data[1]); + std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); - if (sqlite3_step(ppstmt) != SQLITE_DONE) + if (!formByText_.count(canonical)) { - db_error(ppdb, query); + continue; } - sqlite3_finalize(ppstmt); + std::string phonemes = phoneme_data[2]; + pronunciations_.emplace_back(phonemes); + pronunciation& p = pronunciations_.back(); + formByText_.at(canonical)->addPronunciation(p); } } } - } - - // der table - { - std::ifstream wnderfile(wnpref + "wn_der.pl"); - if (!wnderfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - std::list lines; - for (;;) + void generator::writeSchema() { - std::string line; - if (!getline(wnderfile, line)) + std::ifstream file("schema.sql"); + if (!file) { - break; + throw std::invalid_argument("Could not find database schema"); } - - if (line.back() == '\r') + + std::ostringstream schemaBuilder; + std::string line; + while (std::getline(file, line)) { - line.pop_back(); + if (line.back() == '\r') + { + line.pop_back(); + } + + schemaBuilder << line; } - lines.push_back(line); + std::string schema = schemaBuilder.str(); + auto queries = split>(schema, ";"); + progress ppgs("Writing database schema...", queries.size()); + for (std::string query : queries) + { + if (!queries.empty()) + { + db_.runQuery(query); + } + + ppgs.update(); + } } - progress ppgs("Writing morphological derivation...", lines.size()); - for (auto line : lines) + void generator::dumpObjects() { - ppgs.update(); - - std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) { - continue; + progress ppgs("Writing notions...", notions_.size()); + + for (notion& n : notions_) + { + db_ << n; + + ppgs.update(); + } } - int synset_id_1 = stoi(relation_data[1]); - int wnum_1 = stoi(relation_data[2]); - int synset_id_2 = stoi(relation_data[3]); - int wnum_2 = stoi(relation_data[4]); - std::string query; - switch (synset_id_1 / 100000000) { - case 1: // Noun + progress ppgs("Writing words...", words_.size()); + + for (word& w : words_) { - switch (synset_id_2 / 100000000) - { - case 1: // Noun - { - query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)"; - break; - } - - case 3: // Adjective - { - query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)"; - break; - } - - case 4: // Adverb - { - query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)"; - break; - } - } + db_ << w; - break; + ppgs.update(); } + } + + { + progress ppgs("Writing lemmas...", lemmas_.size()); - case 3: // Adjective + for (lemma& l : lemmas_) { - switch (synset_id_2 / 100000000) - { - case 1: // Noun - { - query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)"; - break; - } - - case 3: // Adjective - { - query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)"; - break; - } - - case 4: // Adverb - { - query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)"; - break; - } - } + db_ << l; - break; + ppgs.update(); } + } + + { + progress ppgs("Writing forms...", forms_.size()); - case 4: // Adverb + for (form& f : forms_) { - switch (synset_id_2 / 100000000) - { - case 1: // Noun - { - query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)"; - break; - } - - case 3: // Adjective - { - query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)"; - break; - } - - case 4: // Adverb - { - query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)"; - break; - } - } + db_ << f; - break; + ppgs.update(); } } - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) { - db_error(ppdb, query); + progress ppgs("Writing pronunciations...", pronunciations_.size()); + + for (pronunciation& p : pronunciations_) + { + db_ << p; + + ppgs.update(); + } } - sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); - sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) { - db_error(ppdb, query); + progress ppgs("Writing verb groups...", groups_.size()); + + for (group& g : groups_) + { + db_ << g; + + ppgs.update(); + } } - sqlite3_finalize(ppstmt); - } - } - - // hyp table - { - std::ifstream wnhypfile(wnpref + "wn_hyp.pl"); - if (!wnhypfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - - std::list lines; - for (;;) - { - std::string line; - if (!getline(wnhypfile, line)) { - break; - } - - if (line.back() == '\r') - { - line.pop_back(); + progress ppgs("Writing verb frames...", frames_.size()); + + for (frame& f : frames_) + { + db_ << f; + + ppgs.update(); + } } - - lines.push_back(line); } - progress ppgs("Writing hypernyms...", lines.size()); - for (auto line : lines) + void generator::readWordNetAntonymy() { - ppgs.update(); - - std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) + std::list lines(readFile(wordNetPath_ + "wn_ant.pl")); + progress ppgs("Writing antonyms...", lines.size()); + for (auto line : lines) { - continue; - } + ppgs.update(); - int synset_id_1 = stoi(relation_data[1]); - int synset_id_2 = stoi(relation_data[2]); - std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)"); - - for (auto mapping1 : wn[synset_id_1]) - { - for (auto mapping2 : wn[synset_id_2]) + std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) { - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, mapping1.second); - sqlite3_bind_int(ppstmt, 2, mapping2.second); + continue; + } + + std::pair lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); + std::pair lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); + + if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) + { + word& word1 = *wordByWnidAndWnum_.at(lookup1); + word& word2 = *wordByWnidAndWnum_.at(lookup2); - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } + std::list fields; + fields.emplace_back("antonym_1_id", word1.getId()); + fields.emplace_back("antonym_2_id", word2.getId()); - sqlite3_finalize(ppstmt); + db_.insertIntoTable("antonymy", std::move(fields)); } } } - } - - // ins table - { - std::ifstream wninsfile(wnpref + "wn_ins.pl"); - if (!wninsfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - - std::list lines; - for (;;) - { - std::string line; - if (!getline(wninsfile, line)) - { - break; - } - if (line.back() == '\r') + void generator::readWordNetVariation() + { + std::list lines(readFile(wordNetPath_ + "wn_at.pl")); + progress ppgs("Writing variation...", lines.size()); + for (auto line : lines) { - line.pop_back(); - } + ppgs.update(); - lines.push_back(line); + std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) + { + continue; + } + + int lookup1 = std::stoi(relation_data[1]); + int lookup2 = std::stoi(relation_data[2]); + + if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) + { + notion& notion1 = *notionByWnid_.at(lookup1); + notion& notion2 = *notionByWnid_.at(lookup2); + + std::list fields; + fields.emplace_back("noun_id", notion1.getId()); + fields.emplace_back("adjective_id", notion2.getId()); + + db_.insertIntoTable("variation", std::move(fields)); + } + } } - progress ppgs("Writing instantiations...", lines.size()); - for (auto line : lines) + void generator::readWordNetClasses() { - ppgs.update(); - - std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) + std::list lines(readFile(wordNetPath_ + "wn_cls.pl")); + progress ppgs("Writing usage, topicality, and regionality...", lines.size()); + for (auto line : lines) { - continue; - } + ppgs.update(); - int synset_id_1 = stoi(relation_data[1]); - int synset_id_2 = stoi(relation_data[2]); - std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)"); - - for (auto mapping1 : wn[synset_id_1]) - { - for (auto mapping2 : wn[synset_id_2]) + std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) + { + continue; + } + + std::pair lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); + std::pair lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); + std::string class_type = relation_data[5]; + + std::string table_name; + if (class_type == "t") + { + table_name += "topicality"; + } else if (class_type == "u") + { + table_name += "usage"; + } else if (class_type == "r") + { + table_name += "regionality"; + } + + std::list leftJoin; + std::list rightJoin; + + if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) + { + std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) { + return w->getId(); + }); + } else if (wordByWnidAndWnum_.count(lookup1)) { + leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); + } + + if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) + { + std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) { + return w->getId(); + }); + } else if (wordByWnidAndWnum_.count(lookup2)) { + rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); + } + + for (int word1 : leftJoin) { - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) + for (int word2 : rightJoin) { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, mapping1.second); - sqlite3_bind_int(ppstmt, 2, mapping2.second); + std::list fields; + fields.emplace_back("term_id", word1); + fields.emplace_back("domain_id", word2); - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); + db_.insertIntoTable(table_name, std::move(fields)); } - - sqlite3_finalize(ppstmt); } } } - } - - // mm table - { - std::ifstream wnmmfile(wnpref + "wn_mm.pl"); - if (!wnmmfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - - std::list lines; - for (;;) - { - std::string line; - if (!getline(wnmmfile, line)) - { - break; - } - if (line.back() == '\r') + void generator::readWordNetCausality() + { + std::list lines(readFile(wordNetPath_ + "wn_cs.pl")); + progress ppgs("Writing causality...", lines.size()); + for (auto line : lines) { - line.pop_back(); - } + ppgs.update(); - lines.push_back(line); + std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) + { + continue; + } + + int lookup1 = std::stoi(relation_data[1]); + int lookup2 = std::stoi(relation_data[2]); + + if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) + { + notion& notion1 = *notionByWnid_.at(lookup1); + notion& notion2 = *notionByWnid_.at(lookup2); + + std::list fields; + fields.emplace_back("effect_id", notion1.getId()); + fields.emplace_back("cause_id", notion2.getId()); + + db_.insertIntoTable("causality", std::move(fields)); + } + } } - progress ppgs("Writing member meronyms...", lines.size()); - for (auto line : lines) + void generator::readWordNetEntailment() { - ppgs.update(); - - std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) + std::list lines(readFile(wordNetPath_ + "wn_ent.pl")); + progress ppgs("Writing entailment...", lines.size()); + for (auto line : lines) { - continue; - } - - int synset_id_1 = stoi(relation_data[1]); - int synset_id_2 = stoi(relation_data[2]); - std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); + ppgs.update(); - for (auto mapping1 : wn[synset_id_1]) - { - for (auto mapping2 : wn[synset_id_2]) + std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) { - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, mapping1.second); - sqlite3_bind_int(ppstmt, 2, mapping2.second); + continue; + } + + int lookup1 = std::stoi(relation_data[1]); + int lookup2 = std::stoi(relation_data[2]); + + if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) + { + notion& notion1 = *notionByWnid_.at(lookup1); + notion& notion2 = *notionByWnid_.at(lookup2); - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } + std::list fields; + fields.emplace_back("given_id", notion1.getId()); + fields.emplace_back("entailment_id", notion2.getId()); - sqlite3_finalize(ppstmt); + db_.insertIntoTable("entailment", std::move(fields)); } } } - } - - // ms table - { - std::ifstream wnmsfile(wnpref + "wn_ms.pl"); - if (!wnmsfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - - std::list lines; - for (;;) + + void generator::readWordNetHypernymy() { - std::string line; - if (!getline(wnmsfile, line)) + std::list lines(readFile(wordNetPath_ + "wn_hyp.pl")); + progress ppgs("Writing hypernymy...", lines.size()); + for (auto line : lines) { - break; + ppgs.update(); + + std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) + { + continue; + } + + int lookup1 = std::stoi(relation_data[1]); + int lookup2 = std::stoi(relation_data[2]); + + if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) + { + notion& notion1 = *notionByWnid_.at(lookup1); + notion& notion2 = *notionByWnid_.at(lookup2); + + std::list fields; + fields.emplace_back("hyponym_id", notion1.getId()); + fields.emplace_back("hypernym_id", notion2.getId()); + + db_.insertIntoTable("hypernymy", std::move(fields)); + } } + } - if (line.back() == '\r') + void generator::readWordNetInstantiation() + { + std::list lines(readFile(wordNetPath_ + "wn_ins.pl")); + progress ppgs("Writing instantiation...", lines.size()); + for (auto line : lines) { - line.pop_back(); + ppgs.update(); + + std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) + { + continue; + } + + int lookup1 = std::stoi(relation_data[1]); + int lookup2 = std::stoi(relation_data[2]); + + if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) + { + notion& notion1 = *notionByWnid_.at(lookup1); + notion& notion2 = *notionByWnid_.at(lookup2); + + std::list fields; + fields.emplace_back("instance_id", notion1.getId()); + fields.emplace_back("class_id", notion2.getId()); + + db_.insertIntoTable("instantiation", std::move(fields)); + } } - - lines.push_back(line); } - progress ppgs("Writing substance meronyms...", lines.size()); - for (auto line : lines) + void generator::readWordNetMemberMeronymy() { - ppgs.update(); - - std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) + std::list lines(readFile(wordNetPath_ + "wn_mm.pl")); + progress ppgs("Writing member meronymy...", lines.size()); + for (auto line : lines) { - continue; - } - - int synset_id_1 = stoi(relation_data[1]); - int synset_id_2 = stoi(relation_data[2]); - std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); + ppgs.update(); - for (auto mapping1 : wn[synset_id_1]) - { - for (auto mapping2 : wn[synset_id_2]) + std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) { - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, mapping1.second); - sqlite3_bind_int(ppstmt, 2, mapping2.second); + continue; + } + + int lookup1 = std::stoi(relation_data[1]); + int lookup2 = std::stoi(relation_data[2]); + + if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) + { + notion& notion1 = *notionByWnid_.at(lookup1); + notion& notion2 = *notionByWnid_.at(lookup2); - if (sqlite3_step(ppstmt) != SQLITE_DONE) - { - db_error(ppdb, query); - } + std::list fields; + fields.emplace_back("holonym_id", notion1.getId()); + fields.emplace_back("meronym_id", notion2.getId()); - sqlite3_finalize(ppstmt); + db_.insertIntoTable("member_meronymy", std::move(fields)); } } } - } - - // mm table - { - std::ifstream wnmpfile(wnpref + "wn_mp.pl"); - if (!wnmpfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - - std::list lines; - for (;;) + + void generator::readWordNetPartMeronymy() { - std::string line; - if (!getline(wnmpfile, line)) + std::list lines(readFile(wordNetPath_ + "wn_mp.pl")); + progress ppgs("Writing part meronymy...", lines.size()); + for (auto line : lines) { - break; + ppgs.update(); + + std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) + { + continue; + } + + int lookup1 = std::stoi(relation_data[1]); + int lookup2 = std::stoi(relation_data[2]); + + if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) + { + notion& notion1 = *notionByWnid_.at(lookup1); + notion& notion2 = *notionByWnid_.at(lookup2); + + std::list fields; + fields.emplace_back("holonym_id", notion1.getId()); + fields.emplace_back("meronym_id", notion2.getId()); + + db_.insertIntoTable("part_meronymy", std::move(fields)); + } } + } - if (line.back() == '\r') + void generator::readWordNetSubstanceMeronymy() + { + std::list lines(readFile(wordNetPath_ + "wn_ms.pl")); + progress ppgs("Writing substance meronymy...", lines.size()); + for (auto line : lines) { - line.pop_back(); - } + ppgs.update(); - lines.push_back(line); + std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) + { + continue; + } + + int lookup1 = std::stoi(relation_data[1]); + int lookup2 = std::stoi(relation_data[2]); + + if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) + { + notion& notion1 = *notionByWnid_.at(lookup1); + notion& notion2 = *notionByWnid_.at(lookup2); + + std::list fields; + fields.emplace_back("holonym_id", notion1.getId()); + fields.emplace_back("meronym_id", notion2.getId()); + + db_.insertIntoTable("substance_meronymy", std::move(fields)); + } + } } - progress ppgs("Writing part meronyms...", lines.size()); - for (auto line : lines) + void generator::readWordNetPertainymy() { - ppgs.update(); - - std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) + std::list lines(readFile(wordNetPath_ + "wn_per.pl")); + progress ppgs("Writing pertainymy and mannernymy...", lines.size()); + for (auto line : lines) { - continue; - } - - int synset_id_1 = stoi(relation_data[1]); - int synset_id_2 = stoi(relation_data[2]); - std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); + ppgs.update(); - for (auto mapping1 : wn[synset_id_1]) - { - for (auto mapping2 : wn[synset_id_2]) + std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) { - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } + continue; + } + + std::pair lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); + std::pair lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); + + if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) + { + word& word1 = *wordByWnidAndWnum_.at(lookup1); + word& word2 = *wordByWnidAndWnum_.at(lookup2); - sqlite3_bind_int(ppstmt, 1, mapping1.second); - sqlite3_bind_int(ppstmt, 2, mapping2.second); + if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective) + { + std::list fields; + fields.emplace_back("pertainym_id", word1.getId()); + fields.emplace_back("noun_id", word2.getId()); - if (sqlite3_step(ppstmt) != SQLITE_DONE) + db_.insertIntoTable("pertainymy", std::move(fields)); + } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb) { - db_error(ppdb, query); - } + std::list fields; + fields.emplace_back("mannernym_id", word1.getId()); + fields.emplace_back("adjective_id", word2.getId()); - sqlite3_finalize(ppstmt); + db_.insertIntoTable("mannernymy", std::move(fields)); + } } } } - } - - // per table - { - std::ifstream wnperfile(wnpref + "wn_per.pl"); - if (!wnperfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - - std::list lines; - for (;;) - { - std::string line; - if (!getline(wnperfile, line)) - { - break; - } - if (line.back() == '\r') + void generator::readWordNetSpecification() + { + std::list lines(readFile(wordNetPath_ + "wn_sa.pl")); + progress ppgs("Writing specifications...", lines.size()); + for (auto line : lines) { - line.pop_back(); + ppgs.update(); + + std::regex relation("^sa\\((23\\d{8}),(\\d+),(23\\d{8}),(\\d+)\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) + { + continue; + } + + std::pair lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); + std::pair lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); + + if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) + { + word& word1 = *wordByWnidAndWnum_.at(lookup1); + word& word2 = *wordByWnidAndWnum_.at(lookup2); + + std::list fields; + fields.emplace_back("general_id", word1.getId()); + fields.emplace_back("specific_id", word2.getId()); + + db_.insertIntoTable("specification", std::move(fields)); + } } - - lines.push_back(line); } - progress ppgs("Writing pertainyms and mannernyms...", lines.size()); - for (auto line : lines) + void generator::readWordNetSimilarity() { - ppgs.update(); - - std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) + std::list lines(readFile(wordNetPath_ + "wn_sim.pl")); + progress ppgs("Writing adjective similarity...", lines.size()); + for (auto line : lines) { - continue; - } + ppgs.update(); - int synset_id_1 = stoi(relation_data[1]); - int wnum_1 = stoi(relation_data[2]); - int synset_id_2 = stoi(relation_data[3]); - int wnum_2 = stoi(relation_data[4]); - std::string query; - switch (synset_id_1 / 100000000) - { - case 3: // Adjective + std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); + std::smatch relation_data; + if (!std::regex_search(line, relation_data, relation)) { - // This is a pertainym, the second word should be a noun - // Technically it can be an adjective but we're ignoring that - if (synset_id_2 / 100000000 != 1) - { - continue; - } - - query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)"; - - break; + continue; } + + int lookup1 = std::stoi(relation_data[1]); + int lookup2 = std::stoi(relation_data[2]); - case 4: // Adverb + if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) { - // This is a mannernym, the second word should be an adjective - if (synset_id_2 / 100000000 != 3) - { - continue; - } + notion& notion1 = *notionByWnid_.at(lookup1); + notion& notion2 = *notionByWnid_.at(lookup2); - query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)"; + std::list fields; + fields.emplace_back("adjective_1_id", notion1.getId()); + fields.emplace_back("adjective_2_id", notion2.getId()); - break; + db_.insertIntoTable("similarity", std::move(fields)); } } - - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - db_error(ppdb, query); - } - - sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); - sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); + } - if (sqlite3_step(ppstmt) != SQLITE_DONE) + std::list generator::readFile(std::string path) + { + std::ifstream file(path); + if (!file) { - db_error(ppdb, query); + throw std::invalid_argument("Could not find file " + path); } - - sqlite3_finalize(ppstmt); - } - } - // sa table - { - std::ifstream wnsafile(wnpref + "wn_sa.pl"); - if (!wnsafile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - - std::list lines; - for (;;) - { + std::list lines; std::string line; - if (!getline(wnsafile, line)) - { - break; - } - - if (line.back() == '\r') + while (std::getline(file, line)) { - line.pop_back(); + if (line.back() == '\r') + { + line.pop_back(); + } + + lines.push_back(line); } - lines.push_back(line); + return lines; } - progress ppgs("Writing specifications...", lines.size()); - for (auto line : lines) + part_of_speech generator::partOfSpeechByWnid(int wnid) { - ppgs.update(); - - std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) - { - continue; - } - - int synset_id_1 = stoi(relation_data[1]); - int wnum_1 = stoi(relation_data[2]); - int synset_id_2 = stoi(relation_data[3]); - int wnum_2 = stoi(relation_data[4]); - std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)"); - - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) + switch (wnid / 100000000) { - db_error(ppdb, query); + case 1: return part_of_speech::noun; + case 2: return part_of_speech::verb; + case 3: return part_of_speech::adjective; + case 4: return part_of_speech::adverb; + default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid)); } + } - sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); - sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); + notion& generator::createNotion(part_of_speech partOfSpeech) + { + notions_.emplace_back(partOfSpeech); + + return notions_.back(); + } - if (sqlite3_step(ppstmt) != SQLITE_DONE) + notion& generator::lookupOrCreateNotion(int wnid) + { + if (!notionByWnid_.count(wnid)) { - db_error(ppdb, query); + notions_.emplace_back(partOfSpeechByWnid(wnid), wnid); + notionByWnid_[wnid] = ¬ions_.back(); } - - sqlite3_finalize(ppstmt); - } - } - - // sim table - { - std::ifstream wnsimfile(wnpref + "wn_sim.pl"); - if (!wnsimfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); + + return *notionByWnid_.at(wnid); } - - std::list lines; - for (;;) + + lemma& generator::lookupOrCreateLemma(std::string base_form) { - std::string line; - if (!getline(wnsimfile, line)) + if (!lemmaByBaseForm_.count(base_form)) { - break; + lemmas_.emplace_back(lookupOrCreateForm(base_form)); + lemmaByBaseForm_[base_form] = &lemmas_.back(); } + + return *lemmaByBaseForm_.at(base_form); + } - if (line.back() == '\r') + form& generator::lookupOrCreateForm(std::string text) + { + if (!formByText_.count(text)) { - line.pop_back(); + forms_.emplace_back(text); + formByText_[text] = &forms_.back(); } - lines.push_back(line); + return *formByText_[text]; } - progress ppgs("Writing sense synonyms...", lines.size()); - for (auto line : lines) + template word& generator::createWord(Args&&... args) { - ppgs.update(); + words_.emplace_back(std::forward(args)...); + word& w = words_.back(); - std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) + wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w); + + if (w.getNotion().hasWnid()) { - continue; + wordsByWnid_[w.getNotion().getWnid()].insert(&w); } - int synset_id_1 = stoi(relation_data[1]); - int synset_id_2 = stoi(relation_data[2]); - std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"); + return w; + } + + group& generator::createGroup(xmlNodePtr top) + { + groups_.emplace_back(); + group& grp = groups_.back(); - for (auto mapping1 : wn[synset_id_1]) + xmlChar* key; + + for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) { - for (auto mapping2 : wn[synset_id_2]) + if (!xmlStrcmp(node->name, reinterpret_cast("SUBCLASSES"))) { - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) + for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next) { - db_error(ppdb, query); + if (!xmlStrcmp(subclass->name, reinterpret_cast("VNSUBCLASS"))) + { + try + { + group& subgrp = createGroup(subclass); + subgrp.setParent(grp); + } catch (const std::exception& e) + { + key = xmlGetProp(subclass, reinterpret_cast("ID")); + + if (key == nullptr) + { + std::throw_with_nested(std::logic_error("Error parsing IDless subgroup")); + } else { + std::string subgroupId(reinterpret_cast(key)); + xmlFree(key); + + std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId)); + } + } + } } - - sqlite3_bind_int(ppstmt, 1, mapping1.second); - sqlite3_bind_int(ppstmt, 2, mapping2.second); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) + } else if (!xmlStrcmp(node->name, reinterpret_cast("MEMBERS"))) + { + for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) { - db_error(ppdb, query); + if (!xmlStrcmp(member->name, reinterpret_cast("MEMBER"))) + { + key = xmlGetProp(member, reinterpret_cast("wn")); + std::string wnSenses(reinterpret_cast(key)); + xmlFree(key); + + auto wnSenseKeys = split>(wnSenses, " "); + if (!wnSenseKeys.empty()) + { + std::list tempKeys; + + std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) { + return sense + "::"; + }); + + std::list filteredKeys; + + std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) { + return !wnSenseKeys_.count(sense); + }); + + wnSenseKeys = std::move(filteredKeys); + } + + if (!wnSenseKeys.empty()) + { + for (std::string sense : wnSenseKeys) + { + word& wordSense = *wnSenseKeys_[sense]; + wordSense.setVerbGroup(grp); + } + } else { + key = xmlGetProp(member, reinterpret_cast("name")); + std::string memberName(reinterpret_cast(key)); + xmlFree(key); + + notion& n = createNotion(part_of_speech::verb); + lemma& l = lookupOrCreateLemma(memberName); + word& w = createWord(n, l); + + w.setVerbGroup(grp); + } + } } - - sqlite3_reset(ppstmt); - sqlite3_clear_bindings(ppstmt); - - sqlite3_bind_int(ppstmt, 1, mapping2.second); - sqlite3_bind_int(ppstmt, 2, mapping1.second); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) + } else if (!xmlStrcmp(node->name, reinterpret_cast("THEMROLES"))) + { + for (xmlNodePtr roletopnode = node->xmlChildrenNode; roletopnode != nullptr; roletopnode = roletopnode->next) { - db_error(ppdb, query); + if (!xmlStrcmp(roletopnode->name, reinterpret_cast("THEMROLE"))) + { + role r; + + key = xmlGetProp(roletopnode, reinterpret_cast("type")); + std::string roleName = reinterpret_cast(key); + xmlFree(key); + + for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) + { + if (!xmlStrcmp(rolenode->name, reinterpret_cast("SELRESTRS"))) + { + r.setSelrestrs(parseSelrestr(rolenode)); + } + } + + grp.addRole(roleName, std::move(r)); + } } + } else if (!xmlStrcmp(node->name, reinterpret_cast("FRAMES"))) + { + for (xmlNodePtr frametopnode = node->xmlChildrenNode; frametopnode != nullptr; frametopnode = frametopnode->next) + { + if (!xmlStrcmp(frametopnode->name, reinterpret_cast("FRAME"))) + { + frames_.emplace_back(); + frame& fr = frames_.back(); - sqlite3_finalize(ppstmt); + for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) + { + if (!xmlStrcmp(framenode->name, reinterpret_cast("SYNTAX"))) + { + for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) + { + if (!xmlStrcmp(syntaxnode->name, reinterpret_cast("NP"))) + { + key = xmlGetProp(syntaxnode, reinterpret_cast("value")); + std::string partRole = reinterpret_cast(key); + xmlFree(key); + + selrestr partSelrestrs; + std::set partSynrestrs; + + for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) + { + if (!xmlStrcmp(npnode->name, reinterpret_cast("SYNRESTRS"))) + { + for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) + { + if (!xmlStrcmp(synrestr->name, reinterpret_cast("SYNRESTR"))) + { + key = xmlGetProp(synrestr, reinterpret_cast("type")); + partSynrestrs.insert(reinterpret_cast(key)); + xmlFree(key); + } + } + } + + if (!xmlStrcmp(npnode->name, reinterpret_cast("SELRESTRS"))) + { + partSelrestrs = parseSelrestr(npnode); + } + } + + fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs))); + } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast("VERB"))) + { + fr.push_back(part::createVerb()); + } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast("PREP"))) + { + std::set partChoices; + bool partLiteral; + + if (xmlHasProp(syntaxnode, reinterpret_cast("value"))) + { + partLiteral = true; + + key = xmlGetProp(syntaxnode, reinterpret_cast("value")); + std::string choicesStr = reinterpret_cast(key); + xmlFree(key); + + split(choicesStr, " ", std::inserter(partChoices, std::end(partChoices))); + } else { + partLiteral = false; + + for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) + { + if (!xmlStrcmp(npnode->name, reinterpret_cast("SELRESTRS"))) + { + for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) + { + if (!xmlStrcmp(synrestr->name, reinterpret_cast("SELRESTR"))) + { + key = xmlGetProp(synrestr, reinterpret_cast("type")); + partChoices.insert(reinterpret_cast(key)); + xmlFree(key); + } + } + } + } + } + + fr.push_back(part::createPreposition(std::move(partChoices), partLiteral)); + } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast("ADJ"))) + { + fr.push_back(part::createAdjective()); + } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast("ADV"))) + { + fr.push_back(part::createAdverb()); + } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast("LEX"))) + { + key = xmlGetProp(syntaxnode, reinterpret_cast("value")); + std::string literalValue = reinterpret_cast(key); + xmlFree(key); + + fr.push_back(part::createLiteral(literalValue)); + } else { + continue; + } + } + + grp.addFrame(fr); + } + } + } + } } } - } - } - - // syntax table - { - std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl"); - if (!wnsyntaxfile.is_open()) - { - std::cout << "Invalid WordNet data directory." << std::endl; - print_usage(); - } - std::list lines; - for (;;) - { - std::string line; - if (!getline(wnsyntaxfile, line)) - { - break; - } - - if (line.back() == '\r') - { - line.pop_back(); - } - - lines.push_back(line); + return grp; } - progress ppgs("Writing adjective syntax markers...", lines.size()); - for (auto line : lines) + selrestr generator::parseSelrestr(xmlNodePtr top) { - ppgs.update(); - - std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); - std::smatch relation_data; - if (!std::regex_search(line, relation_data, relation)) - { - continue; - } - - int synset_id = stoi(relation_data[1]); - int wnum = stoi(relation_data[2]); - std::string syn = relation_data[3]; - std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?"); - - sqlite3_stmt* ppstmt; - if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) + xmlChar* key; + + if (!xmlStrcmp(top->name, reinterpret_cast("SELRESTRS"))) { - db_error(ppdb, query); - } - - sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); - sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); - - if (sqlite3_step(ppstmt) != SQLITE_DONE) + if (xmlChildElementCount(top) == 0) + { + return {}; + } else if (xmlChildElementCount(top) == 1) + { + return parseSelrestr(xmlFirstElementChild(top)); + } else { + bool orlogic = false; + if (xmlHasProp(top, reinterpret_cast("logic"))) + { + key = xmlGetProp(top, reinterpret_cast("logic")); + if (!xmlStrcmp(key, reinterpret_cast("or"))) + { + orlogic = true; + } + + xmlFree(key); + } + + std::list children; + for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) + { + if (!xmlStrcmp(selrestr->name, reinterpret_cast("SELRESTRS")) + || !xmlStrcmp(selrestr->name, reinterpret_cast("SELRESTR"))) + { + children.push_back(parseSelrestr(selrestr)); + } + } + + return selrestr(children, orlogic); + } + } else if (!xmlStrcmp(top->name, reinterpret_cast("SELRESTR"))) { - db_error(ppdb, query); + key = xmlGetProp(top, reinterpret_cast("Value")); + bool selPos = (std::string(reinterpret_cast(key)) == "+"); + xmlFree(key); + + key = xmlGetProp(top, reinterpret_cast("type")); + std::string selRestriction = reinterpret_cast(key); + xmlFree(key); + + return selrestr(selRestriction, selPos); + } else { + throw std::logic_error("Badly formatted selrestr"); } - - sqlite3_finalize(ppstmt); } - } - - sqlite3_close_v2(ppdb); - - std::cout << "Done." << std::endl; -} + + }; +}; diff --git a/generator/generator.h b/generator/generator.h new file mode 100644 index 0000000..e2a7404 --- /dev/null +++ b/generator/generator.h @@ -0,0 +1,151 @@ +#ifndef GENERATOR_H_5B61CBC5 +#define GENERATOR_H_5B61CBC5 + +#include +#include +#include +#include +#include +#include "database.h" +#include "notion.h" +#include "word.h" +#include "lemma.h" +#include "form.h" +#include "pronunciation.h" +#include "group.h" +#include "frame.h" + +namespace verbly { + namespace generator { + + enum class part_of_speech; + class selrestr; + + class generator { + public: + + // Constructor + + generator( + std::string verbNetPath, + std::string agidPath, + std::string wordNetPath, + std::string cmudictPath, + std::string imageNetPath, + std::string outputPath); + + // Action + + void run(); + + private: + + // Subroutines + + void readWordNetSynsets(); + + void readAdjectivePositioning(); + + void readImageNetUrls(); + + void readWordNetSenseKeys(); + + void readVerbNet(); + + void readAgidInflections(); + + void readPrepositions(); + + void readCmudictPronunciations(); + + void writeSchema(); + + void dumpObjects(); + + void readWordNetAntonymy(); + + void readWordNetVariation(); + + void readWordNetClasses(); + + void readWordNetCausality(); + + void readWordNetEntailment(); + + void readWordNetHypernymy(); + + void readWordNetInstantiation(); + + void readWordNetMemberMeronymy(); + + void readWordNetPartMeronymy(); + + void readWordNetSubstanceMeronymy(); + + void readWordNetPertainymy(); + + void readWordNetSpecification(); + + void readWordNetSimilarity(); + + // Helpers + + std::list readFile(std::string path); + + inline part_of_speech partOfSpeechByWnid(int wnid); + + notion& createNotion(part_of_speech partOfSpeech); + + notion& lookupOrCreateNotion(int wnid); + + lemma& lookupOrCreateLemma(std::string base_form); + + form& lookupOrCreateForm(std::string text); + + template word& createWord(Args&&... args); + + group& createGroup(xmlNodePtr top); + + selrestr parseSelrestr(xmlNodePtr top); + + // Input + + std::string verbNetPath_; + std::string agidPath_; + std::string wordNetPath_; + std::string cmudictPath_; + std::string imageNetPath_; + + // Output + + database db_; + + // Data + + std::list notions_; + std::list words_; + std::list lemmas_; + std::list
forms_; + std::list pronunciations_; + std::list frames_; + std::list groups_; + + // Indexes + + std::map notionByWnid_; + std::map> wordsByWnid_; + std::map, word*> wordByWnidAndWnum_; + std::map> wordsByBaseForm_; + std::map lemmaByBaseForm_; + std::map formByText_; + + // Caches + + std::map wnSenseKeys_; + + }; + + }; +}; + +#endif /* end of include guard: GENERATOR_H_5B61CBC5 */ diff --git a/generator/group.cpp b/generator/group.cpp new file mode 100644 index 0000000..7cbd4c8 --- /dev/null +++ b/generator/group.cpp @@ -0,0 +1,119 @@ +#include "group.h" +#include +#include +#include +#include "database.h" +#include "field.h" +#include "frame.h" + +namespace verbly { + namespace generator { + + int group::nextId_ = 0; + + group::group() : id_(nextId_++) + { + } + + void group::setParent(const group& parent) + { + // Adding a group to itself is nonsensical. + assert(&parent != this); + + parent_ = &parent; + } + + void group::addRole(std::string name, role r) + { + roleNames_.insert(name); + roles_[name] = std::move(r); + } + + void group::addFrame(const frame& f) + { + frames_.insert(&f); + } + + std::set group::getRoles() const + { + std::set fullRoles = roleNames_; + + if (hasParent()) + { + for (std::string name : getParent().getRoles()) + { + fullRoles.insert(name); + } + } + + return fullRoles; + } + + const role& group::getRole(std::string name) const + { + if (roles_.count(name)) + { + return roles_.at(name); + } else if (hasParent()) + { + return getParent().getRole(name); + } else { + throw std::invalid_argument("Specified role not found in verb group"); + } + } + + std::set group::getFrames() const + { + std::set fullFrames = frames_; + + if (hasParent()) + { + for (const frame* f : getParent().getFrames()) + { + fullFrames.insert(f); + } + } + + return fullFrames; + } + + database& operator<<(database& db, const group& arg) + { + // Serialize the group first + { + std::list fields; + fields.emplace_back("group_id", arg.getId()); + + nlohmann::json jsonRoles; + for (std::string name : arg.getRoles()) + { + const role& r = arg.getRole(name); + + nlohmann::json jsonRole; + jsonRole["type"] = name; + jsonRole["selrestrs"] = r.getSelrestrs().toJson(); + + jsonRoles.emplace_back(std::move(jsonRole)); + } + + fields.emplace_back("data", jsonRoles.dump()); + + db.insertIntoTable("groups", std::move(fields)); + } + + // Then, serialize the group/frame relationship + for (const frame* f : arg.getFrames()) + { + std::list fields; + + fields.emplace_back("group_id", arg.getId()); + fields.emplace_back("frame_id", f->getId()); + + db.insertIntoTable("groups_frames", std::move(fields)); + } + + return db; + } + + }; +}; diff --git a/generator/group.h b/generator/group.h new file mode 100644 index 0000000..efb8c5d --- /dev/null +++ b/generator/group.h @@ -0,0 +1,80 @@ +#ifndef GROUP_H_EDAFB5DC +#define GROUP_H_EDAFB5DC + +#include +#include +#include +#include +#include "role.h" + +namespace verbly { + namespace generator { + + class frame; + class database; + + class group { + public: + + // Constructor + + group(); + + // Mutators + + void setParent(const group& parent); + + void addRole(std::string name, role r); + + void addFrame(const frame& f); + + // Accessors + + int getId() const + { + return id_; + } + + bool hasParent() const + { + return (parent_ != nullptr); + } + + const group& getParent() const + { + // Calling code should always call hasParent first + assert(parent_ != nullptr); + + return *parent_; + } + + std::set getRoles() const; + + const role& getRole(std::string name) const; + + std::set getFrames() const; + + private: + + static int nextId_; + + const int id_; + + const group* parent_ = nullptr; + std::map roles_; + std::set frames_; + + // Caches + + std::set roleNames_; + + }; + + // Serializer + + database& operator<<(database& db, const group& arg); + + }; +}; + +#endif /* end of include guard: GROUP_H_EDAFB5DC */ diff --git a/generator/lemma.cpp b/generator/lemma.cpp new file mode 100644 index 0000000..e66b153 --- /dev/null +++ b/generator/lemma.cpp @@ -0,0 +1,65 @@ +#include "lemma.h" +#include +#include +#include "field.h" +#include "database.h" +#include "form.h" + +namespace verbly { + namespace generator { + + int lemma::nextId_ = 0; + + lemma::lemma(const form& baseForm) : + id_(nextId_++), + baseForm_(baseForm) + { + inflections_[inflection::base] = {&baseForm}; + } + + void lemma::addInflection(inflection type, const form& f) + { + // There can only be one base form. + assert(type != inflection::base); + + inflections_[type].insert(&f); + } + + std::set lemma::getInflections(inflection type) const + { + if (inflections_.count(type)) + { + return inflections_.at(type); + } else { + return {}; + } + } + + database& operator<<(database& db, const lemma& arg) + { + for (inflection type : { + inflection::base, + inflection::plural, + inflection::comparative, + inflection::superlative, + inflection::past_tense, + inflection::past_participle, + inflection::ing_form, + inflection::s_form}) + { + for (const form* f : arg.getInflections(type)) + { + std::list fields; + fields.emplace_back("lemma_id", arg.getId()); + fields.emplace_back("form_id", f->getId()); + fields.emplace_back("category", static_cast(type)); + + db.insertIntoTable("lemmas_forms", std::move(fields)); + } + } + + return db; + } + + }; +}; diff --git a/generator/lemma.h b/generator/lemma.h new file mode 100644 index 0000000..6452e08 --- /dev/null +++ b/generator/lemma.h @@ -0,0 +1,58 @@ +#ifndef LEMMA_H_D73105A7 +#define LEMMA_H_D73105A7 + +#include +#include +#include +#include "enums.h" + +namespace verbly { + namespace generator { + + class database; + class form; + + class lemma { + public: + + // Constructors + + explicit lemma(const form& baseForm); + + // Mutators + + void addInflection(inflection type, const form& f); + + // Accessors + + int getId() const + { + return id_; + } + + const form& getBaseForm() const + { + return baseForm_; + } + + std::set getInflections(inflection type) const; + + private: + + static int nextId_; + + const int id_; + const form& baseForm_; + + std::map> inflections_; + + }; + + // Serializer + + database& operator<<(database& db, const lemma& arg); + + }; +}; + +#endif /* end of include guard: LEMMA_H_D73105A7 */ diff --git a/generator/main.cpp b/generator/main.cpp new file mode 100644 index 0000000..827c963 --- /dev/null +++ b/generator/main.cpp @@ -0,0 +1,40 @@ +#include +#include +#include "generator.h" + +void printUsage() +{ + std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl; + std::cout << "verbnet :: path to a VerbNet data directory" << std::endl; + std::cout << "agid :: path to an AGID infl.txt file" << std::endl; + std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl; + std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl; + std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl; + std::cout << "output :: datafile output path" << std::endl; +} + +int main(int argc, char** argv) +{ + if (argc == 7) + { + try + { + verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); + + try + { + app.run(); + } catch (const std::exception& e) + { + std::cout << e.what() << std::endl; + } + } catch (const std::exception& e) + { + std::cout << e.what() << std::endl; + printUsage(); + } + } else { + std::cout << "verbly datafile generator" << std::endl; + printUsage(); + } +} diff --git a/generator/notion.cpp b/generator/notion.cpp new file mode 100644 index 0000000..290d982 --- /dev/null +++ b/generator/notion.cpp @@ -0,0 +1,85 @@ +#include "notion.h" +#include +#include +#include "database.h" +#include "field.h" + +namespace verbly { + namespace generator { + + int notion::nextId_ = 0; + + notion::notion( + part_of_speech partOfSpeech) : + id_(nextId_++), + partOfSpeech_(partOfSpeech) + { + } + + notion::notion( + part_of_speech partOfSpeech, + int wnid) : + id_(nextId_++), + partOfSpeech_(partOfSpeech), + wnid_(wnid), + hasWnid_(true) + { + } + + void notion::incrementNumOfImages() + { + // Calling code should always call hasWnid and check that the notion is a noun first. + assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); + + numOfImages_++; + } + + void notion::setPrepositionGroups(std::list groups) + { + // Calling code should always check that the notion is a preposition first. + assert(partOfSpeech_ == part_of_speech::preposition); + + prepositionGroups_ = groups; + } + + database& operator<<(database& db, const notion& arg) + { + // First, serialize the notion + { + std::list fields; + + fields.emplace_back("notion_id", arg.getId()); + fields.emplace_back("part_of_speech", static_cast(arg.getPartOfSpeech())); + + if (arg.hasWnid()) + { + fields.emplace_back("wnid", arg.getWnid()); + + if (arg.getPartOfSpeech() == part_of_speech::noun) + { + fields.emplace_back("images", arg.getNumOfImages()); + } + } + + db.insertIntoTable("notions", std::move(fields)); + } + + // Next, serialize the is_a relationship if this is a preposition + if (arg.getPartOfSpeech() == part_of_speech::preposition) + { + for (std::string group : arg.getPrepositionGroups()) + { + std::list fields; + + fields.emplace_back("notion_id", arg.getId()); + fields.emplace_back("groupname", group); + + db.insertIntoTable("is_a", std::move(fields)); + } + } + + return db; + } + + }; +}; diff --git a/generator/notion.h b/generator/notion.h new file mode 100644 index 0000000..76210de --- /dev/null +++ b/generator/notion.h @@ -0,0 +1,91 @@ +#ifndef NOTION_H_221DE2BC +#define NOTION_H_221DE2BC + +#include +#include +#include +#include "enums.h" + +namespace verbly { + namespace generator { + + class database; + + class notion { + public: + + // Constructors + + explicit notion(part_of_speech partOfSpeech); + + notion(part_of_speech partOfSpeech, int wnid); + + // Mutators + + void incrementNumOfImages(); + + void setPrepositionGroups(std::list groups); + + // Accessors + + int getId() const + { + return id_; + } + + part_of_speech getPartOfSpeech() const + { + return partOfSpeech_; + } + + bool hasWnid() const + { + return hasWnid_; + } + + int getWnid() const + { + // Calling code should always call hasWnid first. + assert(hasWnid_); + + return wnid_; + } + + int getNumOfImages() const + { + // Calling code should always call hasWnid and check that the notion is a noun first. + assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); + + return numOfImages_; + } + + std::list getPrepositionGroups() const + { + // Calling code should always check that the notion is a preposition first. + assert(partOfSpeech_ == part_of_speech::preposition); + + return prepositionGroups_; + } + + private: + + static int nextId_; + + const int id_; + const part_of_speech partOfSpeech_; + const int wnid_ = 0; + const bool hasWnid_ = false; + + int numOfImages_ = 0; + std::list prepositionGroups_; + + }; + + // Serializer + + database& operator<<(database& db, const notion& arg); + + }; +}; + +#endif /* end of include guard: NOTION_H_221DE2BC */ diff --git a/generator/part.cpp b/generator/part.cpp new file mode 100644 index 0000000..dbd4e11 --- /dev/null +++ b/generator/part.cpp @@ -0,0 +1,336 @@ +#include "part.h" +#include +#include "selrestr.h" + +namespace verbly { + namespace generator { + + part part::createNounPhrase(std::string role, selrestr selrestrs, std::set synrestrs) + { + part p(type::noun_phrase); + + new(&p.noun_phrase_.role) std::string(std::move(role)); + new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs)); + new(&p.noun_phrase_.synrestrs) std::set(std::move(synrestrs)); + + return p; + } + + part part::createVerb() + { + return part(type::verb); + } + + part part::createPreposition(std::set choices, bool literal) + { + part p(type::preposition); + + new(&p.preposition_.choices) std::set(std::move(choices)); + p.preposition_.literal = literal; + + return p; + } + + part part::createAdjective() + { + return part(type::adjective); + } + + part part::createAdverb() + { + return part(type::adverb); + } + + part part::createLiteral(std::string value) + { + part p(type::literal); + + new(&p.literal_) std::string(std::move(value)); + + return p; + } + + part::part(const part& other) + { + type_ = other.type_; + + switch (type_) + { + case type::noun_phrase: + { + new(&noun_phrase_.role) std::string(other.noun_phrase_.role); + new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs); + new(&noun_phrase_.synrestrs) std::set(other.noun_phrase_.synrestrs); + + break; + } + + case type::preposition: + { + new(&preposition_.choices) std::set(other.preposition_.choices); + preposition_.literal = other.preposition_.literal; + + break; + } + + case type::literal: + { + new(&literal_) std::string(other.literal_); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + } + + part::part(part&& other) : part() + { + swap(*this, other); + } + + part& part::operator=(part other) + { + swap(*this, other); + + return *this; + } + + void swap(part& first, part& second) + { + using type = part::type; + + type tempType = first.type_; + std::string tempRole; + selrestr tempSelrestrs; + std::set tempSynrestrs; + std::set tempChoices; + bool tempPrepLiteral; + std::string tempLiteralValue; + + switch (tempType) + { + case type::noun_phrase: + { + tempRole = std::move(first.noun_phrase_.role); + tempSelrestrs = std::move(first.noun_phrase_.selrestrs); + tempSynrestrs = std::move(first.noun_phrase_.synrestrs); + + break; + } + + case type::preposition: + { + tempChoices = std::move(first.preposition_.choices); + tempPrepLiteral = first.preposition_.literal; + + break; + } + + case type::literal: + { + tempLiteralValue = std::move(first.literal_); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + + first.~part(); + + first.type_ = second.type_; + + switch (first.type_) + { + case type::noun_phrase: + { + new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role)); + new(&first.noun_phrase_.selrestrs) selrestr(std::move(second.noun_phrase_.selrestrs)); + new(&first.noun_phrase_.synrestrs) std::set(std::move(second.noun_phrase_.synrestrs)); + + break; + } + + case type::preposition: + { + new(&first.preposition_.choices) std::set(std::move(second.preposition_.choices)); + first.preposition_.literal = second.preposition_.literal; + + break; + } + + case type::literal: + { + new(&first.literal_) std::string(std::move(second.literal_)); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + + second.~part(); + + second.type_ = tempType; + + switch (second.type_) + { + case type::noun_phrase: + { + new(&second.noun_phrase_.role) std::string(std::move(tempRole)); + new(&second.noun_phrase_.selrestrs) selrestr(std::move(tempSelrestrs)); + new(&second.noun_phrase_.synrestrs) std::set(std::move(tempSynrestrs)); + + break; + } + + case type::preposition: + { + new(&second.preposition_.choices) std::set(std::move(tempChoices)); + second.preposition_.literal = tempPrepLiteral; + + break; + } + + case type::literal: + { + new(&second.literal_) std::string(std::move(tempLiteralValue)); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + } + + part::~part() + { + switch (type_) + { + case type::noun_phrase: + { + using string_type = std::string; + using set_type = std::set; + + noun_phrase_.role.~string_type(); + noun_phrase_.selrestrs.~selrestr(); + noun_phrase_.synrestrs.~set_type(); + + break; + } + + case type::preposition: + { + using set_type = std::set; + + preposition_.choices.~set_type(); + + break; + } + + case type::literal: + { + using string_type = std::string; + + literal_.~string_type(); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + } + + std::string part::getNounRole() const + { + if (type_ == type::noun_phrase) + { + return noun_phrase_.role; + } else { + throw std::domain_error("part::getNounRole is only valid for noun phrase parts"); + } + } + + selrestr part::getNounSelrestrs() const + { + if (type_ == type::noun_phrase) + { + return noun_phrase_.selrestrs; + } else { + throw std::domain_error("part::getNounSelrestrs is only valid for noun phrase parts"); + } + } + + std::set part::getNounSynrestrs() const + { + if (type_ == type::noun_phrase) + { + return noun_phrase_.synrestrs; + } else { + throw std::domain_error("part::getNounSynrestrs is only valid for noun phrase parts"); + } + } + + std::set part::getPrepositionChoices() const + { + if (type_ == type::preposition) + { + return preposition_.choices; + } else { + throw std::domain_error("part::getPrepositionChoices is only valid for preposition parts"); + } + } + + bool part::isPrepositionLiteral() const + { + if (type_ == type::preposition) + { + return preposition_.literal; + } else { + throw std::domain_error("part::isPrepositionLiteral is only valid for preposition parts"); + } + } + + std::string part::getLiteralValue() const + { + if (type_ == type::literal) + { + return literal_; + } else { + throw std::domain_error("part::getLiteralValue is only valid for literal parts"); + } + } + + }; +}; diff --git a/generator/part.h b/generator/part.h new file mode 100644 index 0000000..d044630 --- /dev/null +++ b/generator/part.h @@ -0,0 +1,114 @@ +#ifndef PART_H_FB54F361 +#define PART_H_FB54F361 + +#include +#include +#include "selrestr.h" + +namespace verbly { + namespace generator { + + class part { + public: + enum class type { + invalid = -1, + noun_phrase = 0, + verb = 1, + preposition = 2, + adjective = 3, + adverb = 4, + literal = 5 + }; + + // Static factories + + static part createNounPhrase(std::string role, selrestr selrestrs, std::set synrestrs); + + static part createVerb(); + + static part createPreposition(std::set choices, bool literal); + + static part createAdjective(); + + static part createAdverb(); + + static part createLiteral(std::string value); + + // Copy and move constructors + + part(const part& other); + + part(part&& other); + + // Assignment + + part& operator=(part other); + + // Swap + + friend void swap(part& first, part& second); + + // Destructor + + ~part(); + + // General accessors + + type getType() const + { + return type_; + } + + // Noun phrase accessors + + std::string getNounRole() const; + + selrestr getNounSelrestrs() const; + + std::set getNounSynrestrs() const; + + // Preposition accessors + + std::set getPrepositionChoices() const; + + bool isPrepositionLiteral() const; + + // Literal accessors + + std::string getLiteralValue() const; + + private: + + // Private constructors + + part() + { + } + + part(type t) : type_(t) + { + } + + // Data + + union { + struct { + std::string role; + selrestr selrestrs; + std::set synrestrs; + } noun_phrase_; + struct { + std::set choices; + bool literal; + } preposition_; + std::string literal_; + }; + + type type_ = type::invalid; + + }; + + }; +}; + +#endif /* end of include guard: PART_H_FB54F361 */ diff --git a/generator/progress.h b/generator/progress.h index 81f07a3..fcb680d 100644 --- a/generator/progress.h +++ b/generator/progress.h @@ -3,48 +3,54 @@ #include -class progress { - private: - std::string message; - int total; - int cur = 0; - int lprint = 0; +namespace verbly { + namespace generator { - public: - progress(std::string message, int total) : message(message), total(total) - { - std::cout << message << " 0%" << std::flush; - } + class progress { + private: + std::string message; + int total; + int cur = 0; + int lprint = 0; - void update(int val) - { - if (val <= total) - { - cur = val; - } else { - cur = total; - } + public: + progress(std::string message, int total) : message(message), total(total) + { + std::cout << message << " 0%" << std::flush; + } + + void update(int val) + { + if (val <= total) + { + cur = val; + } else { + cur = total; + } - int pp = cur * 100 / total; - if (pp != lprint) - { - lprint = pp; + int pp = cur * 100 / total; + if (pp != lprint) + { + lprint = pp; - std::cout << "\b\b\b\b" << std::right; - std::cout.width(3); - std::cout << pp << "%" << std::flush; - } - } + std::cout << "\b\b\b\b" << std::right; + std::cout.width(3); + std::cout << pp << "%" << std::flush; + } + } + + void update() + { + update(cur+1); + } - void update() - { - update(cur+1); - } + ~progress() + { + std::cout << "\b\b\b\b100%" << std::endl; + } + }; - ~progress() - { - std::cout << "\b\b\b\b100%" << std::endl; - } + }; }; #endif /* end of include guard: PROGRESS_H_A34EF856 */ diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp new file mode 100644 index 0000000..eb07607 --- /dev/null +++ b/generator/pronunciation.cpp @@ -0,0 +1,87 @@ +#include "pronunciation.h" +#include +#include +#include +#include +#include "database.h" +#include "field.h" +#include "../lib/util.h" + +namespace verbly { + namespace generator { + + int pronunciation::nextId_ = 0; + + pronunciation::pronunciation(std::string phonemes) : + id_(nextId_++), + phonemes_(phonemes) + { + auto phonemeList = split>(phonemes, " "); + + auto rhymeStart = std::find_if(std::begin(phonemeList), std::end(phonemeList), [] (std::string phoneme) { + return phoneme.find("1") != std::string::npos; + }); + + // Rhyme detection + if (rhymeStart != std::end(phonemeList)) + { + std::list rhymePhonemes; + + std::transform(rhymeStart, std::end(phonemeList), std::back_inserter(rhymePhonemes), [] (std::string phoneme) { + std::string naked; + + std::remove_copy_if(std::begin(phoneme), std::end(phoneme), std::back_inserter(naked), [] (char ch) { + return std::isdigit(ch); + }); + + return naked; + }); + + rhyme_ = implode(std::begin(rhymePhonemes), std::end(rhymePhonemes), " "); + + if (rhymeStart != std::begin(phonemeList)) + { + prerhyme_ = *std::prev(rhymeStart); + } + } + + // Syllable/stress + for (std::string phoneme : phonemeList) + { + if (std::isdigit(phoneme.back())) + { + // It's a vowel! + syllables_++; + + if (phoneme.back() == '1') + { + stress_.push_back('1'); + } else { + stress_.push_back('0'); + } + } + } + } + + database& operator<<(database& db, const pronunciation& arg) + { + std::list fields; + + fields.emplace_back("pronunciation_id", arg.getId()); + fields.emplace_back("phonemes", arg.getPhonemes()); + fields.emplace_back("syllables", arg.getSyllables()); + fields.emplace_back("stress", arg.getStress()); + + if (arg.hasRhyme()) + { + fields.emplace_back("rhyme", arg.getRhymePhonemes()); + fields.emplace_back("prerhyme", arg.getPrerhyme()); + } + + db.insertIntoTable("pronunciations", std::move(fields)); + + return db; + } + + }; +}; diff --git a/generator/pronunciation.h b/generator/pronunciation.h new file mode 100644 index 0000000..81be6c4 --- /dev/null +++ b/generator/pronunciation.h @@ -0,0 +1,82 @@ +#ifndef PRONUNCIATION_H_584A08DD +#define PRONUNCIATION_H_584A08DD + +#include +#include + +namespace verbly { + namespace generator { + + class database; + + class pronunciation { + public: + + // Constructor + + explicit pronunciation(std::string phonemes); + + // Accessors + + int getId() const + { + return id_; + } + + std::string getPhonemes() const + { + return phonemes_; + } + + bool hasRhyme() const + { + return !rhyme_.empty(); + } + + std::string getRhymePhonemes() const + { + // Calling code should always call hasRhyme first. + assert(!rhyme_.empty()); + + return rhyme_; + } + + std::string getPrerhyme() const + { + // Calling code should always call hasRhyme first. + assert(!rhyme_.empty()); + + return prerhyme_; + } + + int getSyllables() const + { + return syllables_; + } + + std::string getStress() const + { + return stress_; + } + + private: + + static int nextId_; + + const int id_; + const std::string phonemes_; + std::string rhyme_; + std::string prerhyme_; + int syllables_ = 0; + std::string stress_; + + }; + + // Serializer + + database& operator<<(database& db, const pronunciation& arg); + + }; +}; + +#endif /* end of include guard: PRONUNCIATION_H_584A08DD */ diff --git a/generator/role.h b/generator/role.h new file mode 100644 index 0000000..5fa68b8 --- /dev/null +++ b/generator/role.h @@ -0,0 +1,35 @@ +#ifndef ROLE_H_249F9A9C +#define ROLE_H_249F9A9C + +#include "selrestr.h" + +namespace verbly { + namespace generator { + + class role { + public: + + // Mutators + + void setSelrestrs(selrestr selrestrs) + { + selrestrs_ = selrestrs; + } + + // Accessors + + const selrestr& getSelrestrs() const + { + return selrestrs_; + } + + private: + + selrestr selrestrs_; + + }; + + }; +}; + +#endif /* end of include guard: ROLE_H_249F9A9C */ diff --git a/generator/schema.sql b/generator/schema.sql index 410b536..c3e54d8 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -1,286 +1,204 @@ -DROP TABLE IF EXISTS `verbs`; -CREATE TABLE `verbs` ( - `verb_id` INTEGER PRIMARY KEY, - `infinitive` VARCHAR(32) NOT NULL, - `past_tense` VARCHAR(32) NOT NULL, - `past_participle` VARCHAR(32) NOT NULL, - `ing_form` VARCHAR(32) NOT NULL, - `s_form` VARCHAR(32) NOT NULL +CREATE TABLE `notions` ( + `notion_id` INTEGER PRIMARY KEY, + `part_of_speech` SMALLINT NOT NULL, + `wnid` INTEGER, + `images` INTEGER ); -DROP TABLE IF EXISTS `groups`; -CREATE TABLE `groups` ( - `group_id` INTEGER PRIMARY KEY, - `data` BLOB NOT NULL -); - -DROP TABLE IF EXISTS `frames`; -CREATE TABLE `frames` ( - `frame_id` INTEGER PRIMARY KEY, - `group_id` INTEGER NOT NULL, - `data` BLOB NOT NULL, - FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) -); +CREATE UNIQUE INDEX `notion_by_wnid` ON `notions`(`wnid`); -DROP TABLE IF EXISTS `verb_groups`; -CREATE TABLE `verb_groups` ( - `verb_id` INTEGER NOT NULL, - `group_id` INTEGER NOT NULL, - FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`), - FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) -); - -DROP TABLE IF EXISTS `adjectives`; -CREATE TABLE `adjectives` ( - `adjective_id` INTEGER PRIMARY KEY, - `base_form` VARCHAR(32) NOT NULL, - `comparative` VARCHAR(32), - `superlative` VARCHAR(32), - `position` CHAR(1), - `complexity` INTEGER NOT NULL -); - -DROP TABLE IF EXISTS `adverbs`; -CREATE TABLE `adverbs` ( - `adverb_id` INTEGER PRIMARY KEY, - `base_form` VARCHAR(32) NOT NULL, - `comparative` VARCHAR(32), - `superlative` VARCHAR(32), - `complexity` INTEGER NOT NULL -); - -DROP TABLE IF EXISTS `nouns`; -CREATE TABLE `nouns` ( - `noun_id` INTEGER PRIMARY KEY, - `singular` VARCHAR(32) NOT NULL, - `plural` VARCHAR(32), - `proper` INTEGER(1) NOT NULL, - `complexity` INTEGER NOT NULL, - `images` INTEGER NOT NULL, - `wnid` INTEGER NOT NULL -); - -DROP TABLE IF EXISTS `hypernymy`; CREATE TABLE `hypernymy` ( `hypernym_id` INTEGER NOT NULL, - `hyponym_id` INTEGER NOT NULL, - FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`) + `hyponym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `instantiation`; +CREATE INDEX `hyponym_of` ON `hypernymy`(`hypernym_id`); +CREATE INDEX `hypernym_of` ON `hypernymy`(`hyponym_id`); + CREATE TABLE `instantiation` ( `class_id` INTEGER NOT NULL, - `instance_id` INTEGER NOT NULL, - FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`) + `instance_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `member_meronymy`; +CREATE INDEX `instance_of` ON `instantiation`(`class_id`); +CREATE INDEX `class_of` ON `instantiation`(`instance_id`); + CREATE TABLE `member_meronymy` ( `meronym_id` INTEGER NOT NULL, - `holonym_id` INTEGER NOT NULL, - FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) + `holonym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `part_meronymy`; +CREATE INDEX `member_holonym_of` ON `member_meronymy`(`meronym_id`); +CREATE INDEX `member_meronym_of` ON `member_meronymy`(`holonym_id`); + CREATE TABLE `part_meronymy` ( `meronym_id` INTEGER NOT NULL, - `holonym_id` INTEGER NOT NULL, - FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) + `holonym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `substance_meronymy`; +CREATE INDEX `part_holonym_of` ON `part_meronymy`(`meronym_id`); +CREATE INDEX `part_meronym_of` ON `part_meronymy`(`holonym_id`); + CREATE TABLE `substance_meronymy` ( `meronym_id` INTEGER NOT NULL, - `holonym_id` INTEGER NOT NULL, - FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) + `holonym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `variation`; +CREATE INDEX `substance_holonym_of` ON `substance_meronymy`(`meronym_id`); +CREATE INDEX `substance_meronym_of` ON `substance_meronymy`(`holonym_id`); + CREATE TABLE `variation` ( `noun_id` INTEGER NOT NULL, - `adjective_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) + `adjective_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `noun_antonymy`; -CREATE TABLE `noun_antonymy` ( - `noun_1_id` INTEGER NOT NULL, - `noun_2_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) -); +CREATE INDEX `variant_of` ON `variation`(`noun_id`); +CREATE INDEX `attribute_of` ON `variation`(`adjective_id`); -DROP TABLE IF EXISTS `adjective_antonymy`; -CREATE TABLE `adjective_antonymy` ( +CREATE TABLE `similarity` ( `adjective_1_id` INTEGER NOT NULL, - `adjective_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) + `adjective_2_id` INTEGER NOT NULL +); + +CREATE INDEX `similar_to` ON `similarity`(`adjective_1_id`); + +CREATE TABLE `is_a` ( + `notion_id` INTEGER NOT NULL, + `groupname` VARCHAR(32) NOT NULL ); -DROP TABLE IF EXISTS `adverb_antonymy`; -CREATE TABLE `adverb_antonymy` ( - `adverb_1_id` INTEGER NOT NULL, - `adverb_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), - FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) +CREATE TABLE `entailment` ( + `given_id` INTEGER NOT NULL, + `entailment_id` INTEGER NOT NULL +); + +CREATE INDEX `entailment_of` ON `entailment`(`given_id`); +CREATE INDEX `entailed_by` ON `entailment`(`entailment_id`); + +CREATE TABLE `causality` ( + `cause_id` INTEGER NOT NULL, + `effect_id` INTEGER NOT NULL +); + +CREATE INDEX `effect_of` ON `causality`(`cause_id`); +CREATE INDEX `cause_of` ON `causality`(`effect_id`); + +CREATE TABLE `words` ( + `word_id` INTEGER PRIMARY KEY, + `notion_id` INTEGER NOT NULL, + `lemma_id` INTEGER NOT NULL, + `tag_count` INTEGER, + `position` SMALLINT, + `group_id` INTEGER +); + +CREATE INDEX `notion_words` ON `words`(`notion_id`); +CREATE INDEX `lemma_words` ON `words`(`lemma_id`); +CREATE INDEX `group_words` ON `words`(`group_id`); + +CREATE TABLE `antonymy` ( + `antonym_1_id` INTEGER NOT NULL, + `antonym_2_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `specification`; +CREATE INDEX `antonym_of` ON `antonymy`(`antonym_1_id`); + CREATE TABLE `specification` ( `general_id` INTEGER NOT NULL, - `specific_id` INTEGER NOT NULL, - FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`) + `specific_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `pertainymy`; +CREATE INDEX `specification_of` ON `specification`(`general_id`); +CREATE INDEX `generalization_of` ON `specification`(`specific_id`); + CREATE TABLE `pertainymy` ( `noun_id` INTEGER NOT NULL, - `pertainym_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`) + `pertainym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `mannernymy`; +CREATE INDEX `pertainym_of` ON `pertainymy`(`noun_id`); +CREATE INDEX `anti_pertainym_of` ON `pertainymy`(`pertainym_id`); + CREATE TABLE `mannernymy` ( `adjective_id` INTEGER NOT NULL, - `mannernym_id` INTEGER NOT NULL, - FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`) + `mannernym_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `noun_synonymy`; -CREATE TABLE `noun_synonymy` ( - `noun_1_id` INTEGER NOT NULL, - `noun_2_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`), - FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`) -); +CREATE INDEX `mannernym_of` ON `mannernymy`(`adjective_id`); +CREATE INDEX `anti_mannernym_of` ON `mannernymy`(`mannernym_id`); -DROP TABLE IF EXISTS `adjective_synonymy`; -CREATE TABLE `adjective_synonymy` ( - `adjective_1_id` INTEGER NOT NULL, - `adjective_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) +CREATE TABLE `usage` ( + `domain_id` INTEGER NOT NULL, + `term_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `adverb_synonymy`; -CREATE TABLE `adverb_synonymy` ( - `adverb_1_id` INTEGER NOT NULL, - `adverb_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), - FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) -); +CREATE INDEX `usage_term_of` ON `usage`(`domain_id`); +CREATE INDEX `usage_domain_of` ON `usage`(`term_id`); -DROP TABLE IF EXISTS `noun_pronunciations`; -CREATE TABLE `noun_pronunciations` ( - `noun_id` INTEGER NOT NULL, - `pronunciation` VARCHAR(64) NOT NULL, - `prerhyme` VARCHAR(8), - `rhyme` VARCHAR(64), - `syllables` INT NOT NULL, - `stress` VARCHAR(64) NOT NULL, - FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) +CREATE TABLE `topicality` ( + `domain_id` INTEGER NOT NULL, + `term_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `verb_pronunciations`; -CREATE TABLE `verb_pronunciations` ( - `verb_id` INTEGER NOT NULL, - `pronunciation` VARCHAR(64) NOT NULL, - `prerhyme` VARCHAR(8), - `rhyme` VARCHAR(64), - `syllables` INT NOT NULL, - `stress` VARCHAR(64) NOT NULL, - FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) -); +CREATE INDEX `topical_term_of` ON `topicality`(`domain_id`); +CREATE INDEX `topical_domain_of` ON `topicality`(`term_id`); -DROP TABLE IF EXISTS `adjective_pronunciations`; -CREATE TABLE `adjective_pronunciations` ( - `adjective_id` INTEGER NOT NULL, - `pronunciation` VARCHAR(64) NOT NULL, - `prerhyme` VARCHAR(8), - `rhyme` VARCHAR(64), - `syllables` INT NOT NULL, - `stress` VARCHAR(64) NOT NULL, - FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) +CREATE TABLE `regionality` ( + `domain_id` INTEGER NOT NULL, + `term_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `adverb_pronunciations`; -CREATE TABLE `adverb_pronunciations` ( - `adverb_id` INTEGER NOT NULL, - `pronunciation` VARCHAR(64) NOT NULL, - `prerhyme` VARCHAR(8), - `rhyme` VARCHAR(64), - `syllables` INT NOT NULL, - `stress` VARCHAR(64) NOT NULL, - FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) -); +CREATE INDEX `regional_term_of` ON `regionality`(`domain_id`); +CREATE INDEX `regional_domain_of` ON `regionality`(`term_id`); -DROP TABLE IF EXISTS `noun_noun_derivation`; -CREATE TABLE `noun_noun_derivation` ( - `noun_1_id` INTEGER NOT NULL, - `noun_2_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) +CREATE TABLE `forms` ( + `form_id` INTEGER PRIMARY KEY, + `form` VARCHAR(32) NOT NULL, + `complexity` SMALLINT NOT NULL, + `proper` SMALLINT NOT NULL ); -DROP TABLE IF EXISTS `noun_adjective_derivation`; -CREATE TABLE `noun_adjective_derivation` ( - `noun_id` INTEGER NOT NULL, - `adjective_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) -); +CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); -DROP TABLE IF EXISTS `noun_adverb_derivation`; -CREATE TABLE `noun_adverb_derivation` ( - `noun_id` INTEGER NOT NULL, - `adverb_id` INTEGER NOT NULL, - FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), - FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) +CREATE TABLE `lemmas_forms` ( + `lemma_id` INTEGER NOT NULL, + `form_id` INTEGER NOT NULL, + `category` SMALLINT NOT NULL ); -DROP TABLE IF EXISTS `adjective_adjective_derivation`; -CREATE TABLE `adjective_adjective_derivation` ( - `adjective_1_id` INTEGER NOT NULL, - `adjective_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) +CREATE INDEX `form_of` ON `lemmas_forms`(`lemma_id`); +CREATE INDEX `lemma_of` ON `lemmas_forms`(`form_id`); + +CREATE TABLE `pronunciations` ( + `pronunciation_id` INTEGER PRIMARY KEY, + `phonemes` VARCHAR(64) NOT NULL, + `prerhyme` VARCHAR(8), + `rhyme` VARCHAR(64), + `syllables` INTEGER NOT NULL, + `stress` VARCHAR(64) NOT NULL ); -DROP TABLE IF EXISTS `adjective_adverb_derivation`; -CREATE TABLE `adjective_adverb_derivation` ( - `adjective_id` INTEGER NOT NULL, - `adverb_id` INTEGER NOT NULL, - FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), - FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`) +CREATE TABLE `forms_pronunciations` ( + `form_id` INTEGER NOT NULL, + `pronunciation_id` INTEGER NOT NULL ); -DROP TABLE IF EXISTS `adverb_adverb_derivation`; -CREATE TABLE `adverb_adverb_derivation` ( - `adverb_1_id` INTEGER NOT NULL, - `adverb_2_id` INTEGER NOT NULL, - FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), - FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) +CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`); +CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`); + +CREATE TABLE `groups` ( + `group_id` INTEGER PRIMARY KEY, + `data` BLOB NOT NULL ); -DROP TABLE IF EXISTS `prepositions`; -CREATE TABLE `prepositions` ( - `preposition_id` INTEGER PRIMARY KEY, - `form` VARCHAR(32) NOT NULL +CREATE TABLE `frames` ( + `frame_id` INTEGER PRIMARY KEY, + `data` BLOB NOT NULL ); -DROP TABLE IF EXISTS `preposition_groups`; -CREATE TABLE `preposition_groups` ( - `preposition_id` INTEGER NOT NULL, - `groupname` VARCHAR(32) NOT NULL, - FOREIGN KEY (`preposition_id`) REFERENCES `prepositions`(`preposition_id`) +CREATE TABLE `groups_frames` ( + `group_id` INTEGER NOT NULL, + `frame_id` INTEGER NOT NULL ); + +CREATE INDEX `frames_in` ON `groups_frames`(`group_id`); diff --git a/generator/selrestr.cpp b/generator/selrestr.cpp new file mode 100644 index 0000000..8bdd3f6 --- /dev/null +++ b/generator/selrestr.cpp @@ -0,0 +1,288 @@ +#include "selrestr.h" + +namespace verbly { + namespace generator { + + selrestr::selrestr(const selrestr& other) + { + type_ = other.type_; + + switch (type_) + { + case type::singleton: + { + singleton_.pos = other.singleton_.pos; + new(&singleton_.restriction) std::string(other.singleton_.restriction); + + break; + } + + case type::group: + { + new(&group_.children) std::list(other.group_.children); + group_.orlogic = other.group_.orlogic; + + break; + } + + case type::empty: + { + break; + } + } + } + + selrestr::selrestr(selrestr&& other) : selrestr() + { + swap(*this, other); + } + + selrestr& selrestr::operator=(selrestr other) + { + swap(*this, other); + + return *this; + } + + void swap(selrestr& first, selrestr& second) + { + using type = selrestr::type; + + type tempType = first.type_; + int tempPos; + std::string tempRestriction; + std::list tempChildren; + bool tempOrlogic; + + switch (tempType) + { + case type::singleton: + { + tempPos = first.singleton_.pos; + tempRestriction = std::move(first.singleton_.restriction); + + break; + } + + case type::group: + { + tempChildren = std::move(first.group_.children); + tempOrlogic = first.group_.orlogic; + + break; + } + + case type::empty: + { + break; + } + } + + first.~selrestr(); + + first.type_ = second.type_; + + switch (first.type_) + { + case type::singleton: + { + first.singleton_.pos = second.singleton_.pos; + new(&first.singleton_.restriction) std::string(std::move(second.singleton_.restriction)); + + break; + } + + case type::group: + { + new(&first.group_.children) std::list(std::move(second.group_.children)); + first.group_.orlogic = second.group_.orlogic; + + break; + } + + case type::empty: + { + break; + } + } + + second.~selrestr(); + + second.type_ = tempType; + + switch (second.type_) + { + case type::singleton: + { + second.singleton_.pos = tempPos; + new(&second.singleton_.restriction) std::string(std::move(tempRestriction)); + + break; + } + + case type::group: + { + new(&second.group_.children) std::list(std::move(tempChildren)); + second.group_.orlogic = tempOrlogic; + + break; + } + + case type::empty: + { + break; + } + } + } + + selrestr::~selrestr() + { + switch (type_) + { + case type::singleton: + { + using string_type = std::string; + singleton_.restriction.~string_type(); + + break; + } + + case type::group: + { + using list_type = std::list; + group_.children.~list_type(); + + break; + } + + case type::empty: + { + break; + } + } + } + + selrestr::selrestr() : type_(type::empty) + { + } + + selrestr::selrestr( + std::string restriction, + bool pos) : + type_(type::singleton) + { + new(&singleton_.restriction) std::string(std::move(restriction)); + singleton_.pos = pos; + } + + std::string selrestr::getRestriction() const + { + if (type_ == type::singleton) + { + return singleton_.restriction; + } else { + throw std::domain_error("Only singleton selrestrs have restrictions"); + } + } + + bool selrestr::getPos() const + { + if (type_ == type::singleton) + { + return singleton_.pos; + } else { + throw std::domain_error("Only singleton selrestrs have positivity flags"); + } + } + + selrestr::selrestr( + std::list children, + bool orlogic) : + type_(type::group) + { + new(&group_.children) std::list(std::move(children)); + group_.orlogic = orlogic; + } + + std::list selrestr::getChildren() const + { + if (type_ == type::group) + { + return group_.children; + } else { + throw std::domain_error("Only group selrestrs have children"); + } + } + + std::list::const_iterator selrestr::begin() const + { + if (type_ == type::group) + { + return std::begin(group_.children); + } else { + throw std::domain_error("Only group selrestrs have children"); + } + } + + std::list::const_iterator selrestr::end() const + { + if (type_ == type::group) + { + return std::end(group_.children); + } else { + throw std::domain_error("Only group selrestrs have children"); + } + } + + bool selrestr::getOrlogic() const + { + if (type_ == type::group) + { + return group_.orlogic; + } else { + throw std::domain_error("Only group selrestrs have logic"); + } + } + + nlohmann::json selrestr::toJson() const + { + switch (type_) + { + case type::empty: + { + return {}; + } + + case type::singleton: + { + return { + {"type", singleton_.restriction}, + {"pos", singleton_.pos} + }; + } + + case type::group: + { + std::string logic; + if (group_.orlogic) + { + logic = "or"; + } else { + logic = "and"; + } + + std::list children; + std::transform(std::begin(group_.children), std::end(group_.children), std::back_inserter(children), [] (const selrestr& child) { + return child.toJson(); + }); + + return { + {"logic", logic}, + {"children", children} + }; + } + } + } + + }; +}; diff --git a/generator/selrestr.h b/generator/selrestr.h new file mode 100644 index 0000000..5000970 --- /dev/null +++ b/generator/selrestr.h @@ -0,0 +1,88 @@ +#ifndef SELRESTR_H_50652FB7 +#define SELRESTR_H_50652FB7 + +#include +#include +#include + +namespace verbly { + namespace generator { + + class selrestr { + public: + enum class type { + empty, + singleton, + group + }; + + // Copy and move constructors + + selrestr(const selrestr& other); + selrestr(selrestr&& other); + + // Assignment + + selrestr& operator=(selrestr other); + + // Swap + + friend void swap(selrestr& first, selrestr& second); + + // Destructor + + ~selrestr(); + + // Generic accessors + + type getType() const + { + return type_; + } + + // Empty + + selrestr(); + + // Singleton + + selrestr(std::string restriction, bool pos); + + std::string getRestriction() const; + + bool getPos() const; + + // Group + + selrestr(std::list children, bool orlogic); + + std::list getChildren() const; + + std::list::const_iterator begin() const; + + std::list::const_iterator end() const; + + bool getOrlogic() const; + + // Helpers + + nlohmann::json toJson() const; + + private: + union { + struct { + bool pos; + std::string restriction; + } singleton_; + struct { + std::list children; + bool orlogic; + } group_; + }; + type type_; + }; + + }; +}; + +#endif /* end of include guard: SELRESTR_H_50652FB7 */ diff --git a/generator/word.cpp b/generator/word.cpp new file mode 100644 index 0000000..8ba3ce2 --- /dev/null +++ b/generator/word.cpp @@ -0,0 +1,77 @@ +#include "word.h" +#include +#include +#include "database.h" +#include "notion.h" +#include "lemma.h" +#include "field.h" +#include "group.h" + +namespace verbly { + namespace generator { + + int word::nextId_ = 0; + + word::word( + notion& n, + lemma& l) : + id_(nextId_++), + notion_(n), + lemma_(l) + { + } + + word::word( + notion& n, + lemma& l, + int tagCount) : + id_(nextId_++), + notion_(n), + lemma_(l), + tagCount_(tagCount), + hasTagCount_(true) + { + } + + void word::setAdjectivePosition(positioning adjectivePosition) + { + adjectivePosition_ = adjectivePosition; + } + + void word::setVerbGroup(const group& verbGroup) + { + verbGroup_ = &verbGroup; + } + + database& operator<<(database& db, const word& arg) + { + std::list fields; + + fields.emplace_back("word_id", arg.getId()); + fields.emplace_back("notion_id", arg.getNotion().getId()); + fields.emplace_back("lemma_id", arg.getLemma().getId()); + + if (arg.hasTagCount()) + { + fields.emplace_back("tag_count", arg.getTagCount()); + } + + if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective) + && (arg.getAdjectivePosition() != positioning::undefined)) + { + fields.emplace_back("position", static_cast(arg.getAdjectivePosition())); + } + + if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb) + && (arg.hasVerbGroup())) + { + fields.emplace_back("group_id", arg.getVerbGroup().getId()); + } + + db.insertIntoTable("words", std::move(fields)); + + return db; + } + + }; +}; diff --git a/generator/word.h b/generator/word.h new file mode 100644 index 0000000..bfed586 --- /dev/null +++ b/generator/word.h @@ -0,0 +1,110 @@ +#ifndef WORD_H_91F99D46 +#define WORD_H_91F99D46 + +#include +#include "enums.h" + +namespace verbly { + namespace generator { + + class notion; + class lemma; + class database; + class group; + + class word { + public: + + // Constructors + + word(notion& n, lemma& l); + + word(notion& n, lemma& l, int tagCount); + + // Mutators + + void setAdjectivePosition(positioning adjectivePosition); + + void setVerbGroup(const group& verbGroup); + + // Accessors + + int getId() const + { + return id_; + } + + notion& getNotion() + { + return notion_; + } + + const notion& getNotion() const + { + return notion_; + } + + lemma& getLemma() + { + return lemma_; + } + + const lemma& getLemma() const + { + return lemma_; + } + + bool hasTagCount() const + { + return hasTagCount_; + } + + int getTagCount() const + { + // Calling code should always call hasTagCount first. + assert(hasTagCount_); + + return tagCount_; + } + + positioning getAdjectivePosition() const + { + return adjectivePosition_; + } + + bool hasVerbGroup() const + { + return (verbGroup_ != nullptr); + } + + const group& getVerbGroup() const + { + // Calling code should always call hasVerbGroup first. + assert(verbGroup_ != nullptr); + + return *verbGroup_; + } + + private: + + static int nextId_; + + const int id_; + notion& notion_; + lemma& lemma_; + const int tagCount_ = 0; + const bool hasTagCount_ = false; + + positioning adjectivePosition_ = positioning::undefined; + const group* verbGroup_ = nullptr; + + }; + + // Serializer + + database& operator<<(database& db, const word& arg); + + }; +}; + +#endif /* end of include guard: WORD_H_91F99D46 */ diff --git a/lib/adjective.cpp b/lib/adjective.cpp deleted file mode 100644 index ba8254a..0000000 --- a/lib/adjective.cpp +++ /dev/null @@ -1,113 +0,0 @@ -#include "verbly.h" - -namespace verbly { - - adjective::adjective() - { - - } - - adjective::adjective(const data& _data, int _id) : word(_data, _id) - { - - } - - std::string adjective::base_form() const - { - assert(_valid == true); - - return _base_form; - } - - std::string adjective::comparative_form() const - { - assert(_valid == true); - - return _comparative_form; - } - - std::string adjective::superlative_form() const - { - assert(_valid == true); - - return _superlative_form; - } - - adjective::positioning adjective::position() const - { - assert(_valid == true); - - return _position; - } - - bool adjective::has_comparative_form() const - { - assert(_valid == true); - - return !_comparative_form.empty(); - } - - bool adjective::has_superlative_form() const - { - assert(_valid == true); - - return !_superlative_form.empty(); - } - - bool adjective::has_position() const - { - assert(_valid == true); - - return _position != adjective::positioning::undefined; - } - - adjective_query adjective::antonyms() const - { - assert(_valid == true); - - return _data->adjectives().antonym_of(*this); - } - - adjective_query adjective::synonyms() const - { - assert(_valid == true); - - return _data->adjectives().synonym_of(*this); - } - - adjective_query adjective::generalizations() const - { - assert(_valid == true); - - return _data->adjectives().generalization_of(*this); - } - - adjective_query adjective::specifications() const - { - assert(_valid == true); - - return _data->adjectives().specification_of(*this); - } - - noun_query adjective::anti_pertainyms() const - { - assert(_valid == true); - - return _data->nouns().anti_pertainym_of(*this); - } - - adverb_query adjective::mannernyms() const - { - assert(_valid == true); - - return _data->adverbs().mannernym_of(*this); - } - - noun_query adjective::attributes() const - { - assert(_valid == true); - - return _data->nouns().attribute_of(*this); - } - -}; diff --git a/lib/adjective.h b/lib/adjective.h deleted file mode 100644 index a6eb293..0000000 --- a/lib/adjective.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef ADJECTIVE_H_87B3FB75 -#define ADJECTIVE_H_87B3FB75 - -namespace verbly { - - class adjective_query; - class adverb_query; - class noun_query; - - class adjective : public word { - public: - enum class positioning { - undefined, - predicate, - attributive, - postnominal - }; - - private: - std::string _base_form; - std::string _comparative_form; - std::string _superlative_form; - positioning _position = positioning::undefined; - - friend class adjective_query; - - public: - adjective(); - adjective(const data& _data, int _id); - - std::string base_form() const; - std::string comparative_form() const; - std::string superlative_form() const; - positioning position() const; - - bool has_comparative_form() const; - bool has_superlative_form() const; - bool has_position() const; - - adjective_query antonyms() const; - adjective_query synonyms() const; - adjective_query generalizations() const; - adjective_query specifications() const; - noun_query anti_pertainyms() const; - adverb_query mannernyms() const; - noun_query attributes() const; - }; - -}; - -#endif /* end of include guard: ADJECTIVE_H_87B3FB75 */ diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp deleted file mode 100644 index 90ccef4..0000000 --- a/lib/adjective_query.cpp +++ /dev/null @@ -1,1072 +0,0 @@ -#include "verbly.h" - -namespace verbly { - - adjective_query::adjective_query(const data& _data) : _data(_data) - { - - } - - adjective_query& adjective_query::limit(int _limit) - { - if ((_limit > 0) || (_limit == unlimited)) - { - this->_limit = _limit; - } - - return *this; - } - - adjective_query& adjective_query::random() - { - this->_random = true; - - return *this; - } - - adjective_query& adjective_query::except(const adjective& _word) - { - _except.push_back(_word); - - return *this; - } - - adjective_query& adjective_query::rhymes_with(const word& _word) - { - for (auto rhyme : _word.get_rhymes()) - { - _rhymes.push_back(rhyme); - } - - if (dynamic_cast(&_word) != nullptr) - { - _except.push_back(dynamic_cast(_word)); - } - - return *this; - } - - adjective_query& adjective_query::rhymes_with(rhyme _r) - { - _rhymes.push_back(_r); - - return *this; - } - - adjective_query& adjective_query::has_pronunciation() - { - this->_has_prn = true; - - return *this; - } - - adjective_query& adjective_query::has_rhyming_noun() - { - _has_rhyming_noun = true; - - return *this; - } - - adjective_query& adjective_query::has_rhyming_adjective() - { - _has_rhyming_adjective = true; - - return *this; - } - - adjective_query& adjective_query::has_rhyming_adverb() - { - _has_rhyming_adverb = true; - - return *this; - } - - adjective_query& adjective_query::has_rhyming_verb() - { - _has_rhyming_verb = true; - - return *this; - } - - adjective_query& adjective_query::with_stress(filter> _arg) - { - _stress = _arg; - - return *this; - } - - adjective_query& adjective_query::with_prefix(filter _f) - { - _f.clean(); - _with_prefix = _f; - - return *this; - } - - adjective_query& adjective_query::with_suffix(filter _f) - { - _f.clean(); - _with_suffix = _f; - - return *this; - } - - adjective_query& adjective_query::with_complexity(int _arg) - { - _with_complexity = _arg; - - return *this; - } - - adjective_query& adjective_query::requires_comparative_form() - { - _requires_comparative_form = true; - - return *this; - } - - adjective_query& adjective_query::requires_superlative_form() - { - _requires_superlative_form = true; - - return *this; - } - - adjective_query& adjective_query::position(adjective::positioning pos) - { - _position = pos; - - return *this; - } - - adjective_query& adjective_query::is_variant() - { - this->_is_variant = true; - - return *this; - } - - adjective_query& adjective_query::variant_of(filter _f) - { - _f.clean(); - _variant_of = _f; - - return *this; - } - - adjective_query& adjective_query::has_antonyms() - { - this->_is_antonymic = true; - - return *this; - } - - adjective_query& adjective_query::antonym_of(filter _f) - { - _f.clean(); - _antonym_of = _f; - - return *this; - } - - adjective_query& adjective_query::has_synonyms() - { - this->_is_synonymic = true; - - return *this; - } - - adjective_query& adjective_query::synonym_of(filter _f) - { - _f.clean(); - _synonym_of = _f; - - return *this; - } - - adjective_query& adjective_query::is_generalization() - { - this->_is_generalization = true; - - return *this; - } - - adjective_query& adjective_query::generalization_of(filter _f) - { - _f.clean(); - _generalization_of = _f; - - return *this; - } - - adjective_query& adjective_query::is_specification() - { - this->_is_specification = true; - - return *this; - } - - adjective_query& adjective_query::specification_of(filter _f) - { - _f.clean(); - _specification_of = _f; - - return *this; - } - - adjective_query& adjective_query::is_pertainymic() - { - this->_is_pertainymic = true; - - return *this; - } - - adjective_query& adjective_query::pertainym_of(filter _f) - { - _f.clean(); - _pertainym_of = _f; - - return *this; - } - - adjective_query& adjective_query::is_mannernymic() - { - this->_is_mannernymic = true; - - return *this; - } - - adjective_query& adjective_query::anti_mannernym_of(filter _f) - { - _f.clean(); - _anti_mannernym_of = _f; - - return *this; - } - /* - adjective_query& adjective_query::derived_from(const word& _w) - { - if (dynamic_cast(&_w) != nullptr) - { - _derived_from_adjective.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _derived_from_adverb.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _derived_from_noun.push_back(dynamic_cast(_w)); - } - - return *this; - } - - adjective_query& adjective_query::not_derived_from(const word& _w) - { - if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_adjective.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_adverb.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_noun.push_back(dynamic_cast(_w)); - } - - return *this; - } - */ - std::list adjective_query::run() const - { - std::stringstream construct; - construct << "SELECT adjective_id, base_form, comparative, superlative, position FROM adjectives"; - std::list conditions; - std::list bindings; - - if (_has_prn) - { - conditions.push_back("adjective_id IN (SELECT adjective_id FROM adjective_pronunciations)"); - } - - if (!_rhymes.empty()) - { - std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); - std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - - for (auto rhy : _rhymes) - { - bindings.emplace_back(rhy.get_prerhyme()); - bindings.emplace_back(rhy.get_rhyme()); - } - } - - if (_has_rhyming_noun) - { - conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_adjective) - { - conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adjective_id != curp.adjective_id)"); - } - - if (_has_rhyming_adverb) - { - conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_verb) - { - conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - for (auto except : _except) - { - conditions.push_back("adjective_id != ?"); - bindings.emplace_back(except._id); - } - - if (_requires_comparative_form) - { - conditions.push_back("comparative IS NOT NULL"); - } - - if (_requires_superlative_form) - { - conditions.push_back("superlative IS NOT NULL"); - } - - switch (_position) - { - case adjective::positioning::predicate: conditions.push_back("position = 'p'"); break; - case adjective::positioning::attributive: conditions.push_back("position = 'a'"); break; - case adjective::positioning::postnominal: conditions.push_back("position = 'i'"); break; - case adjective::positioning::undefined: break; - } - - if (!_stress.empty()) - { - std::stringstream cond; - if (_stress.get_notlogic()) - { - cond << "adjective_id NOT IN"; - } else { - cond << "adjective_id IN"; - } - - cond << "(SELECT adjective_id FROM adjective_pronunciations WHERE "; - - std::function>, bool)> recur = [&] (filter> f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter>::type::singleton: - { - std::ostringstream _val; - for (auto syl : f.get_elem()) - { - if (syl) - { - _val << "1"; - } else { - _val << "0"; - } - } - - bindings.emplace_back(_val.str()); - - if (notlogic == f.get_notlogic()) - { - return "stress = ?"; - } else { - return "stress != ?"; - } - } - - case filter>::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter> f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_stress, _stress.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (!_with_prefix.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem() + "%"); - - if (notlogic == f.get_notlogic()) - { - return "base_form LIKE ?"; - } else { - return "base_form NOT LIKE ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_with_prefix, false)); - } - - if (!_with_suffix.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back("%" + f.get_elem()); - - if (notlogic == f.get_notlogic()) - { - return "base_form LIKE ?"; - } else { - return "base_form NOT LIKE ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_with_suffix, false)); - } - - if (_with_complexity != unlimited) - { - conditions.push_back("complexity = ?"); - bindings.emplace_back(_with_complexity); - } - - if (_is_variant) - { - conditions.push_back("adjective_id IN (SELECT adjective_id FROM variation)"); - } - - if (!_variant_of.empty()) - { - std::stringstream cond; - if (_variant_of.get_notlogic()) - { - cond << "adjective_id NOT IN"; - } else { - cond << "adjective_id IN"; - } - - cond << "(SELECT adjective_id FROM variation WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "noun_id = ?"; - } else { - return "noun_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_variant_of, _variant_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_antonymic) - { - conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_antonymy)"); - } - - if (!_antonym_of.empty()) - { - std::stringstream cond; - if (_antonym_of.get_notlogic()) - { - cond << "adjective_id NOT IN"; - } else { - cond << "adjective_id IN"; - } - - cond << "(SELECT adjective_2_id FROM adjective_antonymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "adjective_1_id = ?"; - } else { - return "adjective_1_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_antonym_of, _antonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_synonymic) - { - conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_synonymy)"); - } - - if (!_synonym_of.empty()) - { - std::stringstream cond; - if (_synonym_of.get_notlogic()) - { - cond << "adjective_id NOT IN"; - } else { - cond << "adjective_id IN"; - } - - cond << "(SELECT adjective_2_id FROM adjective_synonymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "adjective_1_id = ?"; - } else { - return "adjective_1_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_synonym_of, _synonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_generalization) - { - conditions.push_back("adjective_id IN (SELECT general_id FROM specification)"); - } - - if (!_generalization_of.empty()) - { - std::stringstream cond; - if (_generalization_of.get_notlogic()) - { - cond << "adjective_id NOT IN"; - } else { - cond << "adjective_id IN"; - } - - cond << "(SELECT general_id FROM specification WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "specific_id = ?"; - } else { - return "specific_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_generalization_of, _generalization_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_specification) - { - conditions.push_back("adjective_id IN (SELECT specific_id FROM specification)"); - } - - if (!_specification_of.empty()) - { - std::stringstream cond; - if (_specification_of.get_notlogic()) - { - cond << "adjective_id NOT IN"; - } else { - cond << "adjective_id IN"; - } - - cond << "(SELECT specific_id FROM specification WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "general_id = ?"; - } else { - return "general_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_specification_of, _specification_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_pertainymic) - { - conditions.push_back("adjective_id IN (SELECT pertainym_id FROM pertainymy)"); - } - - if (!_pertainym_of.empty()) - { - std::stringstream cond; - if (_pertainym_of.get_notlogic()) - { - cond << "adjective_id NOT IN"; - } else { - cond << "adjective_id IN"; - } - - cond << "(SELECT pertainym_id FROM pertainymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "noun_id = ?"; - } else { - return "noun_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_pertainym_of, _pertainym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_mannernymic) - { - conditions.push_back("adjective_id IN (SELECT adjective_id FROM mannernymy)"); - } - - if (!_anti_mannernym_of.empty()) - { - std::stringstream cond; - if (_anti_mannernym_of.get_notlogic()) - { - cond << "adjective_id NOT IN"; - } else { - cond << "adjective_id IN"; - } - - cond << "(SELECT adjective_id FROM mannernymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "mannernym_id = ?"; - } else { - return "mannernym_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_anti_mannernym_of, _anti_mannernym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } -/* - if (!_derived_from_adjective.empty()) - { - std::list clauses(_derived_from_adjective.size(), "adjective_2_id = @DERADJ"); - std::string cond = "adjective_id IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_adjective.empty()) - { - std::list clauses(_not_derived_from_adjective.size(), "adjective_2_id = @NDERADJ"); - std::string cond = "adjective_id NOT IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_derived_from_adverb.empty()) - { - std::list clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); - std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_adverb.empty()) - { - std::list clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); - std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_derived_from_noun.empty()) - { - std::list clauses(_derived_from_noun.size(), "noun_id = @DERN"); - std::string cond = "adjective_id IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_noun.empty()) - { - std::list clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); - std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - }*/ - - if (!conditions.empty()) - { - construct << " WHERE "; - construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); - } - - if (_random) - { - construct << " ORDER BY RANDOM()"; - } - - if (_limit != unlimited) - { - construct << " LIMIT " << _limit; - } - - sqlite3_stmt* ppstmt; - std::string query = construct.str(); - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - int i = 1; - for (auto& binding : bindings) - { - switch (binding.get_type()) - { - case binding::type::integer: - { - sqlite3_bind_int(ppstmt, i, binding.get_integer()); - - break; - } - - case binding::type::string: - { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); - - break; - } - } - - i++; - } - - /* - for (auto adj : _derived_from_adjective) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); - } - - for (auto adj : _not_derived_from_adjective) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); - } - - for (auto adv : _derived_from_adverb) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); - } - - for (auto adv : _not_derived_from_adverb) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); - } - - for (auto n : _derived_from_noun) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); - } - - for (auto n : _not_derived_from_noun) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); - } -*/ - std::list output; - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - adjective tnc {_data, sqlite3_column_int(ppstmt, 0)}; - tnc._base_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); - - if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) - { - tnc._comparative_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); - } - - if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) - { - tnc._superlative_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 3))); - } - - if (sqlite3_column_type(ppstmt, 4) != SQLITE_NULL) - { - std::string adjpos(reinterpret_cast(sqlite3_column_text(ppstmt, 4))); - if (adjpos == "p") - { - tnc._position = adjective::positioning::predicate; - } else if (adjpos == "a") - { - tnc._position = adjective::positioning::attributive; - } else if (adjpos == "i") - { - tnc._position = adjective::positioning::postnominal; - } - } - - output.push_back(tnc); - } - - sqlite3_finalize(ppstmt); - - for (auto& adjective : output) - { - query = "SELECT pronunciation, prerhyme, rhyme FROM adjective_pronunciations WHERE adjective_id = ?"; - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - sqlite3_bind_int(ppstmt, 1, adjective._id); - - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - std::string pronunciation(reinterpret_cast(sqlite3_column_text(ppstmt, 0))); - auto phonemes = verbly::split>(pronunciation, " "); - - adjective.pronunciations.push_back(phonemes); - - if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) - { - std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); - std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); - adjective.rhymes.emplace_back(prerhyme, rhyming); - } - } - - sqlite3_finalize(ppstmt); - } - - return output; - } - -}; diff --git a/lib/adjective_query.h b/lib/adjective_query.h deleted file mode 100644 index e6a6609..0000000 --- a/lib/adjective_query.h +++ /dev/null @@ -1,112 +0,0 @@ -#ifndef ADJECTIVE_QUERY_H_05E590FD -#define ADJECTIVE_QUERY_H_05E590FD - -namespace verbly { - - class adjective_query { - public: - adjective_query(const data& _data); - - adjective_query& limit(int _limit); - adjective_query& random(); - adjective_query& except(const adjective& _word); - adjective_query& rhymes_with(const word& _word); - adjective_query& rhymes_with(rhyme _r); - adjective_query& has_pronunciation(); - adjective_query& has_rhyming_noun(); - adjective_query& has_rhyming_adjective(); - adjective_query& has_rhyming_adverb(); - adjective_query& has_rhyming_verb(); - adjective_query& with_stress(filter> _arg); - - adjective_query& requires_comparative_form(); - adjective_query& requires_superlative_form(); - adjective_query& position(adjective::positioning pos); - - adjective_query& with_prefix(filter _f); - adjective_query& with_suffix(filter _f); - - adjective_query& with_complexity(int _arg); - - adjective_query& is_variant(); - adjective_query& variant_of(filter _f); - - adjective_query& has_antonyms(); - adjective_query& antonym_of(filter _f); - - adjective_query& has_synonyms(); - adjective_query& synonym_of(filter _f); - - adjective_query& is_generalization(); - adjective_query& generalization_of(filter _f); - - adjective_query& is_specification(); - adjective_query& specification_of(filter _f); - - adjective_query& is_pertainymic(); - adjective_query& pertainym_of(filter _f); - - adjective_query& is_mannernymic(); - adjective_query& anti_mannernym_of(filter _f); - -/* adjective_query& derived_from(const word& _w); - adjective_query& not_derived_from(const word& _w);*/ - - std::list run() const; - - const static int unlimited = -1; - - protected: - const data& _data; - int _limit = unlimited; - bool _random = false; - std::list _rhymes; - std::list _except; - bool _has_prn = false; - bool _has_rhyming_noun = false; - bool _has_rhyming_adjective = false; - bool _has_rhyming_adverb = false; - bool _has_rhyming_verb = false; - filter> _stress; - - bool _requires_comparative_form = false; - bool _requires_superlative_form = false; - adjective::positioning _position = adjective::positioning::undefined; - - filter _with_prefix; - filter _with_suffix; - - int _with_complexity = unlimited; - - bool _is_variant = false; - filter _variant_of; - - bool _is_antonymic = false; - filter _antonym_of; - - bool _is_synonymic = false; - filter _synonym_of; - - bool _is_generalization = false; - filter _generalization_of; - - bool _is_specification = false; - filter _specification_of; - - bool _is_pertainymic = false; - filter _pertainym_of; - - bool _is_mannernymic = false; - filter _anti_mannernym_of; - -/* std::list _derived_from_adjective; - std::list _not_derived_from_adjective; - std::list _derived_from_adverb; - std::list _not_derived_from_adverb; - std::list _derived_from_noun; - std::list _not_derived_from_noun;*/ - }; - -}; - -#endif /* end of include guard: ADJECTIVE_QUERY_H_05E590FD */ diff --git a/lib/adverb.cpp b/lib/adverb.cpp deleted file mode 100644 index 442574e..0000000 --- a/lib/adverb.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include "verbly.h" - -namespace verbly { - - adverb::adverb() - { - - } - - adverb::adverb(const data& _data, int _id) : word(_data, _id) - { - - } - - std::string adverb::base_form() const - { - assert(_valid == true); - - return _base_form; - } - - std::string adverb::comparative_form() const - { - assert(_valid == true); - - return _comparative_form; - } - - std::string adverb::superlative_form() const - { - assert(_valid == true); - - return _superlative_form; - } - - bool adverb::has_comparative_form() const - { - assert(_valid == true); - - return !_comparative_form.empty(); - } - - bool adverb::has_superlative_form() const - { - assert(_valid == true); - - return !_superlative_form.empty(); - } - - adverb_query adverb::antonyms() const - { - assert(_valid == true); - - return _data->adverbs().antonym_of(*this); - } - - adverb_query adverb::synonyms() const - { - assert(_valid == true); - - return _data->adverbs().synonym_of(*this); - } - - adjective_query adverb::anti_mannernyms() const - { - assert(_valid == true); - - return _data->adjectives().anti_mannernym_of(*this); - } - -}; diff --git a/lib/adverb.h b/lib/adverb.h deleted file mode 100644 index 56d4e28..0000000 --- a/lib/adverb.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef ADVERB_H_86F8302F -#define ADVERB_H_86F8302F - -namespace verbly { - - class adverb : public word { - private: - std::string _base_form; - std::string _comparative_form; - std::string _superlative_form; - - friend class adverb_query; - - public: - adverb(); - adverb(const data& _data, int _id); - - std::string base_form() const; - std::string comparative_form() const; - std::string superlative_form() const; - - bool has_comparative_form() const; - bool has_superlative_form() const; - - adverb_query antonyms() const; - adverb_query synonyms() const; - adjective_query anti_mannernyms() const; - - adverb_query& derived_from(const word& _w); - adverb_query& not_derived_from(const word& _w); - }; - -}; - -#endif /* end of include guard: ADVERB_H_86F8302F */ diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp deleted file mode 100644 index 3e62bb7..0000000 --- a/lib/adverb_query.cpp +++ /dev/null @@ -1,758 +0,0 @@ -#include "verbly.h" - -namespace verbly { - - adverb_query::adverb_query(const data& _data) : _data(_data) - { - - } - - adverb_query& adverb_query::limit(int _limit) - { - if ((_limit > 0) || (_limit == unlimited)) - { - this->_limit = _limit; - } - - return *this; - } - - adverb_query& adverb_query::random() - { - this->_random = true; - - return *this; - } - - adverb_query& adverb_query::except(const adverb& _word) - { - _except.push_back(_word); - - return *this; - } - - adverb_query& adverb_query::rhymes_with(const word& _word) - { - for (auto rhyme : _word.get_rhymes()) - { - _rhymes.push_back(rhyme); - } - - if (dynamic_cast(&_word) != nullptr) - { - _except.push_back(dynamic_cast(_word)); - } - - return *this; - } - - adverb_query& adverb_query::rhymes_with(rhyme _r) - { - _rhymes.push_back(_r); - - return *this; - } - - adverb_query& adverb_query::has_pronunciation() - { - this->_has_prn = true; - - return *this; - } - - adverb_query& adverb_query::has_rhyming_noun() - { - _has_rhyming_noun = true; - - return *this; - } - - adverb_query& adverb_query::has_rhyming_adjective() - { - _has_rhyming_adjective = true; - - return *this; - } - - adverb_query& adverb_query::has_rhyming_adverb() - { - _has_rhyming_adverb = true; - - return *this; - } - - adverb_query& adverb_query::has_rhyming_verb() - { - _has_rhyming_verb = true; - - return *this; - } - - adverb_query& adverb_query::requires_comparative_form() - { - _requires_comparative_form = true; - - return *this; - } - - adverb_query& adverb_query::requires_superlative_form() - { - _requires_superlative_form = true; - - return *this; - } - - adverb_query& adverb_query::with_stress(filter> _arg) - { - _stress = _arg; - - return *this; - } - - adverb_query& adverb_query::with_prefix(filter _f) - { - _f.clean(); - _with_prefix = _f; - - return *this; - } - - adverb_query& adverb_query::with_suffix(filter _f) - { - _f.clean(); - _with_suffix = _f; - - return *this; - } - - adverb_query& adverb_query::with_complexity(int _arg) - { - _with_complexity = _arg; - - return *this; - } - - adverb_query& adverb_query::has_antonyms() - { - _has_antonyms = true; - - return *this; - } - - adverb_query& adverb_query::antonym_of(filter _f) - { - _f.clean(); - _antonym_of = _f; - - return *this; - } - - adverb_query& adverb_query::has_synonyms() - { - _has_synonyms = true; - - return *this; - } - - adverb_query& adverb_query::synonym_of(filter _f) - { - _f.clean(); - _synonym_of = _f; - - return *this; - } - - adverb_query& adverb_query::is_mannernymic() - { - _is_mannernymic = true; - - return *this; - } - - adverb_query& adverb_query::mannernym_of(filter _f) - { - _f.clean(); - _mannernym_of = _f; - - return *this; - } - /* - adverb_query& adverb_query::derived_from(const word& _w) - { - if (dynamic_cast(&_w) != nullptr) - { - _derived_from_adjective.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _derived_from_adverb.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _derived_from_noun.push_back(dynamic_cast(_w)); - } - - return *this; - } - - adverb_query& adverb_query::not_derived_from(const word& _w) - { - if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_adjective.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_adverb.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_noun.push_back(dynamic_cast(_w)); - } - - return *this; - } - */ - std::list adverb_query::run() const - { - std::stringstream construct; - construct << "SELECT adverb_id, base_form, comparative, superlative FROM adverbs"; - std::list conditions; - std::list bindings; - - if (_has_prn) - { - conditions.push_back("adverb_id IN (SELECT adverb_id FROM adverb_pronunciations)"); - } - - if (!_rhymes.empty()) - { - std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); - std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - - for (auto rhy : _rhymes) - { - bindings.emplace_back(rhy.get_prerhyme()); - bindings.emplace_back(rhy.get_rhyme()); - } - } - - if (_has_rhyming_noun) - { - conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_adjective) - { - conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_adverb) - { - conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adverb_id != curp.adverb_id)"); - } - - if (_has_rhyming_verb) - { - conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - for (auto except : _except) - { - conditions.push_back("adverb_id != ?"); - bindings.emplace_back(except._id); - } - - if (_requires_comparative_form) - { - conditions.push_back("comparative IS NOT NULL"); - } - - if (_requires_superlative_form) - { - conditions.push_back("superlative IS NOT NULL"); - } - - if (!_stress.empty()) - { - std::stringstream cond; - if (_stress.get_notlogic()) - { - cond << "adverb_id NOT IN"; - } else { - cond << "adverb_id IN"; - } - - cond << "(SELECT adverb_id FROM adverb_pronunciations WHERE "; - - std::function>, bool)> recur = [&] (filter> f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter>::type::singleton: - { - std::ostringstream _val; - for (auto syl : f.get_elem()) - { - if (syl) - { - _val << "1"; - } else { - _val << "0"; - } - } - - bindings.emplace_back(_val.str()); - - if (notlogic == f.get_notlogic()) - { - return "stress = ?"; - } else { - return "stress != ?"; - } - } - - case filter>::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter> f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_stress, _stress.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (!_with_prefix.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem() + "%"); - - if (notlogic == f.get_notlogic()) - { - return "base_form LIKE ?"; - } else { - return "base_form NOT LIKE ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_with_prefix, false)); - } - - if (!_with_suffix.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back("%" + f.get_elem()); - - if (notlogic == f.get_notlogic()) - { - return "base_form LIKE ?"; - } else { - return "base_form NOT LIKE ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_with_suffix, false)); - } - - if (_with_complexity != unlimited) - { - conditions.push_back("complexity = ?"); - bindings.emplace_back(_with_complexity); - } - - if (_has_antonyms) - { - conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_antonymy)"); - } - - if (!_antonym_of.empty()) - { - std::stringstream cond; - if (_antonym_of.get_notlogic()) - { - cond << "adverb_id NOT IN"; - } else { - cond << "adverb_id IN"; - } - - cond << "(SELECT adverb_2_id FROM adverb_antonymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "adverb_1_id = ?"; - } else { - return "adverb_1_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_antonym_of, _antonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_has_synonyms) - { - conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_synonymy)"); - } - - if (!_synonym_of.empty()) - { - std::stringstream cond; - if (_antonym_of.get_notlogic()) - { - cond << "adverb_id NOT IN"; - } else { - cond << "adverb_id IN"; - } - - cond << "(SELECT adverb_2_id FROM adverb_synonymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "adverb_1_id = ?"; - } else { - return "adverb_1_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_synonym_of, _synonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_mannernymic) - { - conditions.push_back("adverb_id IN (SELECT mannernym_id FROM mannernymy)"); - } - - if (!_mannernym_of.empty()) - { - std::stringstream cond; - if (_antonym_of.get_notlogic()) - { - cond << "adverb_id NOT IN"; - } else { - cond << "adverb_id IN"; - } - - cond << "(SELECT mannernym_id FROM mannernymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "adjective_id = ?"; - } else { - return "adjective_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_mannernym_of, _mannernym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - -/* if (!_derived_from_adjective.empty()) - { - std::list clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); - std::string cond = "adverb_id IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_adjective.empty()) - { - std::list clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); - std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_derived_from_adverb.empty()) - { - std::list clauses(_derived_from_adverb.size(), "adverb_2_id = @DERADV"); - std::string cond = "adverb_id IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_adverb.empty()) - { - std::list clauses(_not_derived_from_adverb.size(), "adverb_2_id = @NDERADV"); - std::string cond = "adverb_id NOT IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_derived_from_noun.empty()) - { - std::list clauses(_derived_from_noun.size(), "noun_id = @DERN"); - std::string cond = "adverb_id IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_noun.empty()) - { - std::list clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); - std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - }*/ - - if (!conditions.empty()) - { - construct << " WHERE "; - construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); - } - - if (_random) - { - construct << " ORDER BY RANDOM()"; - } - - if (_limit != unlimited) - { - construct << " LIMIT " << _limit; - } - - sqlite3_stmt* ppstmt; - std::string query = construct.str(); - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - int i = 1; - for (auto& binding : bindings) - { - switch (binding.get_type()) - { - case binding::type::integer: - { - sqlite3_bind_int(ppstmt, i, binding.get_integer()); - - break; - } - - case binding::type::string: - { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); - - break; - } - } - - i++; - } - - /* - for (auto adj : _derived_from_adjective) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); - } - - for (auto adj : _not_derived_from_adjective) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); - } - - for (auto adv : _derived_from_adverb) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); - } - - for (auto adv : _not_derived_from_adverb) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); - } - - for (auto n : _derived_from_noun) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); - } - - for (auto n : _not_derived_from_noun) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); - }*/ - - std::list output; - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - adverb tnc {_data, sqlite3_column_int(ppstmt, 0)}; - tnc._base_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); - - if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) - { - tnc._comparative_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); - } - - if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) - { - tnc._superlative_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 3))); - } - - output.push_back(tnc); - } - - sqlite3_finalize(ppstmt); - - for (auto& adverb : output) - { - query = "SELECT pronunciation, prerhyme, rhyme FROM adverb_pronunciations WHERE adverb_id = ?"; - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - sqlite3_bind_int(ppstmt, 1, adverb._id); - - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - std::string pronunciation(reinterpret_cast(sqlite3_column_text(ppstmt, 0))); - auto phonemes = verbly::split>(pronunciation, " "); - - adverb.pronunciations.push_back(phonemes); - - if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) - { - std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); - std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); - adverb.rhymes.emplace_back(prerhyme, rhyming); - } - } - - sqlite3_finalize(ppstmt); - } - - return output; - } - -}; diff --git a/lib/adverb_query.h b/lib/adverb_query.h deleted file mode 100644 index 30e7400..0000000 --- a/lib/adverb_query.h +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef ADVERB_QUERY_H_CA13CCDD -#define ADVERB_QUERY_H_CA13CCDD - -namespace verbly { - - class adverb_query { - public: - adverb_query(const data& _data); - - adverb_query& limit(int _limit); - adverb_query& random(); - adverb_query& except(const adverb& _word); - adverb_query& rhymes_with(const word& _word); - adverb_query& rhymes_with(rhyme _r); - adverb_query& has_pronunciation(); - adverb_query& has_rhyming_noun(); - adverb_query& has_rhyming_adjective(); - adverb_query& has_rhyming_adverb(); - adverb_query& has_rhyming_verb(); - adverb_query& with_stress(filter> _arg); - - adverb_query& requires_comparative_form(); - adverb_query& requires_superlative_form(); - - adverb_query& with_prefix(filter _f); - adverb_query& with_suffix(filter _f); - - adverb_query& with_complexity(int _arg); - - adverb_query& has_antonyms(); - adverb_query& antonym_of(filter _f); - - adverb_query& has_synonyms(); - adverb_query& synonym_of(filter _f); - - adverb_query& is_mannernymic(); - adverb_query& mannernym_of(filter _f); - -/* adverb_query& derived_from(const word& _w); - adverb_query& not_derived_from(const word& _w);*/ - - std::list run() const; - - const static int unlimited = -1; - - private: - const data& _data; - int _limit = unlimited; - bool _random = false; - std::list _rhymes; - std::list _except; - bool _has_prn = false; - bool _has_rhyming_noun = false; - bool _has_rhyming_adjective = false; - bool _has_rhyming_adverb = false; - bool _has_rhyming_verb = false; - filter> _stress; - - bool _requires_comparative_form = false; - bool _requires_superlative_form = false; - - filter _with_prefix; - filter _with_suffix; - - int _with_complexity = unlimited; - - bool _has_antonyms = false; - filter _antonym_of; - - bool _has_synonyms = false; - filter _synonym_of; - - bool _is_mannernymic = false; - filter _mannernym_of; - -/* std::list _derived_from_adjective; - std::list _not_derived_from_adjective; - std::list _derived_from_adverb; - std::list _not_derived_from_adverb; - std::list _derived_from_noun; - std::list _not_derived_from_noun;*/ - }; - -}; - -#endif /* end of include guard: ADVERB_QUERY_H_CA13CCDD */ diff --git a/lib/binding.cpp b/lib/binding.cpp new file mode 100644 index 0000000..349cd6f --- /dev/null +++ b/lib/binding.cpp @@ -0,0 +1,180 @@ +#include "binding.h" +#include +#include + +namespace verbly { + + binding::binding(const binding& other) + { + type_ = other.type_; + + switch (type_) + { + case type::integer: + { + integer_ = other.integer_; + + break; + } + + case type::string: + { + new(&string_) std::string(other.string_); + + break; + } + + case type::invalid: + { + break; + } + } + } + + binding::binding(binding&& other) : binding() + { + swap(*this, other); + } + + binding& binding::operator=(binding other) + { + swap(*this, other); + + return *this; + } + + void swap(binding& first, binding& second) + { + using type = binding::type; + + type tempType = first.type_; + int tempInteger; + std::string tempString; + + switch (first.type_) + { + case type::integer: + { + tempInteger = first.integer_; + + break; + } + + case type::string: + { + tempString = std::move(tempString); + + break; + } + + case type::invalid: + { + break; + } + } + + first.~binding(); + + first.type_ = second.type_; + + switch (second.type_) + { + case type::integer: + { + first.integer_ = second.integer_; + + break; + } + + case type::string: + { + new(&first.string_) std::string(std::move(second.string_)); + + break; + } + + case type::invalid: + { + break; + } + } + + second.~binding(); + + second.type_ = tempType; + + switch (tempType) + { + case type::integer: + { + second.integer_ = tempInteger; + + break; + } + + case type::string: + { + new(&second.string_) std::string(std::move(tempString)); + + break; + } + + case type::invalid: + { + break; + } + } + } + + binding::~binding() + { + switch (type_) + { + case type::string: + { + using string_type = std::string; + string_.~string_type(); + + break; + } + + case type::integer: + case type::invalid: + { + break; + } + } + } + + binding::binding(int arg) : + type_(type::integer), + integer_(arg) + { + } + + int binding::getInteger() const + { + if (type_ != type::integer) + { + throw std::domain_error("binding::getInteger called on non-integer binding"); + } + + return integer_; + } + + binding::binding(std::string arg) : type_(type::string) + { + new(&string_) std::string(arg); + } + + std::string binding::getString() const + { + if (type_ != type::string) + { + throw std::domain_error("binding::getString called on non-string binding"); + } + + return string_; + } + +}; diff --git a/lib/binding.h b/lib/binding.h new file mode 100644 index 0000000..7fbe20e --- /dev/null +++ b/lib/binding.h @@ -0,0 +1,70 @@ +#ifndef BINDING_H_CAE0B18E +#define BINDING_H_CAE0B18E + +#include + +namespace verbly { + + class binding { + public: + enum class type { + invalid, + integer, + string + }; + + // Default constructor + + binding() + { + } + + // Copy and move constructors + + binding(const binding& other); + binding(binding&& other); + + // Assignment + + binding& operator=(binding other); + + // Swap + + friend void swap(binding& first, binding& second); + + // Destructor + + ~binding(); + + // Generic accessors + + type getType() const + { + return type_; + } + + // Integer + + binding(int arg); + + int getInteger() const; + + // String + + binding(std::string arg); + + std::string getString() const; + + private: + + union { + int integer_; + std::string string_; + }; + + type type_ = type::invalid; + }; + +}; + +#endif /* end of include guard: BINDING_H_CAE0B18E */ diff --git a/lib/data.cpp b/lib/data.cpp deleted file mode 100644 index db42487..0000000 --- a/lib/data.cpp +++ /dev/null @@ -1,177 +0,0 @@ -#include "verbly.h" - -namespace verbly { - - data::data(std::string datafile) - { - if (sqlite3_open_v2(datafile.c_str(), &ppdb, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) - { - throw std::invalid_argument(sqlite3_errmsg(ppdb)); - } - } - - data::data(data&& other) - { - ppdb = other.ppdb; - } - - data& data::operator=(data&& other) - { - ppdb = other.ppdb; - - return *this; - } - - data::~data() - { - sqlite3_close_v2(ppdb); - } - - verb_query data::verbs() const - { - return verb_query(*this); - } - - adjective_query data::adjectives() const - { - return adjective_query(*this); - } - - adverb_query data::adverbs() const - { - return adverb_query(*this); - } - - noun_query data::nouns() const - { - return noun_query(*this); - } - - frame_query data::frames() const - { - return frame_query(*this); - } - - preposition_query data::prepositions() const - { - return preposition_query(*this); - } - - binding::type binding::get_type() const - { - return _type; - } - - binding::binding(const binding& other) - { - _type = other._type; - - switch (_type) - { - case type::integer: - { - _integer = other._integer; - - break; - } - - case type::string: - { - new(&_string) std::string(other._string); - - break; - } - } - } - - binding::~binding() - { - switch (_type) - { - case type::string: - { - using string_type = std::string; - _string.~string_type(); - - break; - } - } - } - - binding& binding::operator=(const binding& other) - { - this->~binding(); - - _type = other._type; - - switch (_type) - { - case type::integer: - { - _integer = other._integer; - - break; - } - - case type::string: - { - new(&_string) std::string(other._string); - - break; - } - } - - return *this; - } - - binding::binding(int _arg) - { - _type = type::integer; - _integer = _arg; - } - - int binding::get_integer() const - { - assert(_type == type::integer); - - return _integer; - } - - void binding::set_integer(int _arg) - { - *this = binding(_arg); - } - - binding& binding::operator=(int _arg) - { - *this = binding(_arg); - - return *this; - } - - binding::binding(std::string _arg) - { - _type = type::string; - new(&_string) std::string(_arg); - } - - std::string binding::get_string() const - { - assert(_type == type::string); - - return _string; - } - - void binding::set_string(std::string _arg) - { - *this = binding(_arg); - } - - binding& binding::operator=(std::string _arg) - { - *this = binding(_arg); - - return *this; - } - -}; diff --git a/lib/data.h b/lib/data.h deleted file mode 100644 index b8b12b9..0000000 --- a/lib/data.h +++ /dev/null @@ -1,380 +0,0 @@ -#ifndef DATA_H_C4AEC3DD -#define DATA_H_C4AEC3DD - -#include - -namespace verbly { - - class data; - class word; - class adjective; - class noun; - class verb; - class adverb; - class frame; - class adjective_query; - class adverb_query; - class noun_query; - class verb_query; - class frame_query; - class preposition_query; - - class data { - private: - sqlite3* ppdb; - - friend class adjective_query; - friend class noun_query; - friend class verb_query; - friend class adverb_query; - friend class frame_query; - friend class preposition_query; - - public: - data(std::string datafile); - - data(const data& other) = delete; - data& operator=(const data& other) = delete; - - data(data&& other); - data& operator=(data&& other); - - ~data(); - - verb_query verbs() const; - adjective_query adjectives() const; - adverb_query adverbs() const; - noun_query nouns() const; - frame_query frames() const; - preposition_query prepositions() const; - - }; - - template - class filter { - public: - enum class type { - singleton, - group - }; - - typedef filter value_type; - - type get_type() const - { - return _type; - } - - filter(const filter& other) - { - _type = other._type; - _notlogic = other._notlogic; - - switch (_type) - { - case type::singleton: - { - new(&_singleton.elem) T(other._singleton.elem); - - break; - } - - case type::group: - { - new(&_group.elems) std::list>(other._group.elems); - _group.orlogic = other._group.orlogic; - - break; - } - } - } - - filter& operator=(const filter& other) - { - this->~filter(); - - _type = other._type; - _notlogic = other._notlogic; - - switch (_type) - { - case type::singleton: - { - new(&_singleton.elem) T(other._singleton.elem); - - break; - } - - case type::group: - { - new(&_group.elems) std::list>(other._group.elems); - _group.orlogic = other._group.orlogic; - - break; - } - } - - return *this; - } - - ~filter() - { - switch (_type) - { - case type::singleton: - { - _singleton.elem.~T(); - - break; - } - - case type::group: - { - using list_type = std::list>; - _group.elems.~list_type(); - - break; - } - } - } - - bool get_notlogic() const - { - return _notlogic; - } - - void set_notlogic(bool _nl) - { - _notlogic = _nl; - } - - std::list inorder_flatten() const - { - std::list result; - - if (_type == type::singleton) - { - result.push_back(_singleton.elem); - } else if (_type == type::group) - { - for (auto elem : _group.elems) - { - auto l = elem.inorder_flatten(); - result.insert(std::end(result), std::begin(l), std::end(l)); - } - } - - return result; - } - - std::set uniq_flatten() const - { - std::set result; - - if (_type == type::singleton) - { - result.insert(_singleton.elem); - } else if (_type == type::group) - { - for (auto elem : _group.elems) - { - auto l = elem.uniq_flatten(); - result.insert(std::begin(l), std::end(l)); - } - } - - return result; - } - - void clean() - { - if (_type == type::group) - { - std::list>::iterator> toremove; - for (auto it = _group.elems.begin(); it != _group.elems.end(); it++) - { - it->clean(); - - if (it->get_type() == type::group) - { - if (it->_group.elems.size() == 0) - { - toremove.push_back(it); - } else if (it->_group.elems.size() == 1) - { - bool truelogic = it->_notlogic != it->_group.elems.front()._notlogic; - filter e = it->_group.elems.front(); - *it = e; - it->_notlogic = truelogic; - } - } - } - - for (auto rem : toremove) - { - _group.elems.erase(rem); - } - - if (_group.elems.size() == 1) - { - bool truelogic = _notlogic != _group.elems.front()._notlogic; - filter e = _group.elems.front(); - *this = e; - _notlogic = truelogic; - } - } - } - - // Singleton - filter(T _elem, bool _notlogic = false) : _type(type::singleton) - { - new(&_singleton.elem) T(_elem); - this->_notlogic = _notlogic; - } - - filter& operator=(T _elem) - { - *this = filter{_elem}; - - return *this; - } - - T get_elem() const - { - assert(_type == type::singleton); - - return _singleton.elem; - } - - void set_elem(T _elem) - { - assert(_type == type::singleton); - - _singleton.elem = _elem; - } - - // Group - typedef typename std::list>::iterator iterator; - - filter() : _type(type::group) - { - new(&_group.elems) std::list>(); - _group.orlogic = false; - } - - filter(std::initializer_list> _init) : _type(type::group) - { - new(&_group.elems) std::list>(_init); - _group.orlogic = false; - } - - iterator begin() - { - assert(_type == type::group); - - return _group.elems.begin(); - } - - iterator end() - { - assert(_type == type::group); - - return _group.elems.end(); - } - - filter& operator<<(filter _elem) - { - assert(_type == type::group); - - _group.elems.push_back(_elem); - - return *this; - } - - void push_back(filter _elem) - { - assert(_type == type::group); - - _group.elems.push_back(_elem); - } - - bool get_orlogic() const - { - assert(_type == type::group); - - return _group.orlogic; - } - - void set_orlogic(bool _ol) - { - assert(_type == type::group); - - _group.orlogic = _ol; - } - - bool empty() const - { - if (_type == type::group) - { - return _group.elems.empty(); - } else { - return false; - } - } - - int size() const - { - assert(_type == type::group); - - return _group.elems.size(); - } - - private: - type _type; - bool _notlogic = false; - union { - struct { - T elem; - } _singleton; - struct { - std::list> elems; - bool orlogic; - } _group; - }; - }; - - class binding { - public: - enum class type { - integer, - string - }; - - type get_type() const; - binding(const binding& other); - ~binding(); - binding& operator=(const binding& other); - - // Integer - binding(int _arg); - int get_integer() const; - void set_integer(int _arg); - binding& operator=(int _arg); - - // String - binding(std::string _arg); - std::string get_string() const; - void set_string(std::string _arg); - binding& operator=(std::string _arg); - - private: - union { - int _integer; - std::string _string; - }; - type _type; - }; - -}; - -#endif /* end of include guard: DATA_H_C4AEC3DD */ diff --git a/lib/database.cpp b/lib/database.cpp new file mode 100644 index 0000000..351b93d --- /dev/null +++ b/lib/database.cpp @@ -0,0 +1,79 @@ +#include "database.h" +#include +#include +#include "query.h" + +namespace verbly { + + database::database(std::string path) + { + if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) + { + // We still have to free the resources allocated. In the event that + // allocation failed, ppdb will be null and sqlite3_close_v2 will just + // ignore it. + std::string errmsg(sqlite3_errmsg(ppdb_)); + sqlite3_close_v2(ppdb_); + + throw database_error("Could not open verbly datafile", errmsg); + } + } + + database::database(database&& other) : database() + { + swap(*this, other); + } + + database& database::operator=(database&& other) + { + swap(*this, other); + + return *this; + } + + void swap(database& first, database& second) + { + std::swap(first.ppdb_, second.ppdb_); + } + + database::~database() + { + sqlite3_close_v2(ppdb_); + } + + query database::notions(filter where, bool random, int limit) const + { + return query(*this, ppdb_, std::move(where), random, limit); + } + + query database::words(filter where, bool random, int limit) const + { + return query(*this, ppdb_, std::move(where), random, limit); + } + + query database::groups(filter where, bool random, int limit) const + { + return query(*this, ppdb_, std::move(where), random, limit); + } + + query database::frames(filter where, bool random, int limit) const + { + return query(*this, ppdb_, std::move(where), random, limit); + } + + query database::lemmas(filter where, bool random, int limit) const + { + return query(*this, ppdb_, std::move(where), random, limit); + } + + query database::forms(filter where, bool random, int limit) const + { + return query(*this, ppdb_, std::move(where), random, limit); + } + + query database::pronunciations(filter where, bool random, int limit) const + { + return query(*this, ppdb_, std::move(where), random, limit); + } + +}; diff --git a/lib/database.h b/lib/database.h new file mode 100644 index 0000000..d68c40b --- /dev/null +++ b/lib/database.h @@ -0,0 +1,73 @@ +#ifndef DATABASE_H_0B0A47D2 +#define DATABASE_H_0B0A47D2 + +#include +#include +#include +#include "notion.h" +#include "word.h" +#include "group.h" +#include "frame.h" +#include "lemma.h" +#include "form.h" +#include "pronunciation.h" + +struct sqlite3; + +namespace verbly { + + template + class query; + + class database { + public: + + // Constructor + + explicit database(std::string path); + + // Disable copying + + database(const database& other) = delete; + database& operator=(const database& other) = delete; + + // Move constructor and move assignment + + database(database&& other); + database& operator=(database&& other); + + // Swap + + friend void swap(database& first, database& second); + + // Destructor + + ~database(); + + // Queries + + query notions(filter where, bool random = true, int limit = 1) const; + + query words(filter where, bool random = true, int limit = 1) const; + + query groups(filter where, bool random = true, int limit = 1) const; + + query frames(filter where, bool random = true, int limit = 1) const; + + query lemmas(filter where, bool random = true, int limit = 1) const; + + query forms(filter where, bool random = true, int limit = 1) const; + + query pronunciations(filter where, bool random = true, int limit = 1) const; + + private: + + database() = default; + + sqlite3* ppdb_ = nullptr; + + }; + +}; + +#endif /* end of include guard: DATABASE_H_0B0A47D2 */ diff --git a/lib/enums.h b/lib/enums.h new file mode 100644 index 0000000..b37be7b --- /dev/null +++ b/lib/enums.h @@ -0,0 +1,45 @@ +#ifndef ENUMS_H_260BA847 +#define ENUMS_H_260BA847 + +namespace verbly { + + enum class part_of_speech { + noun = 0, + adjective = 1, + adverb = 2, + verb = 3, + preposition = 4 + }; + + enum class positioning { + undefined = -1, + predicate = 0, + attributive = 1, + postnominal = 2 + }; + + enum class inflection { + base = 0, + plural = 1, + comparative = 2, + superlative = 3, + past_tense = 4, + past_participle = 5, + ing_form = 6, + s_form = 7 + }; + + enum class object { + undefined = -1, + notion = 0, + word = 1, + group = 2, + frame = 3, + lemma = 4, + form = 5, + pronunciation = 6 + }; + +}; + +#endif /* end of include guard: ENUMS_H_260BA847 */ diff --git a/lib/field.cpp b/lib/field.cpp new file mode 100644 index 0000000..d7adbb3 --- /dev/null +++ b/lib/field.cpp @@ -0,0 +1,91 @@ +#include "field.h" +#include "filter.h" + +namespace verbly { + + filter field::operator==(int value) const + { + return filter(*this, filter::comparison::int_equals, value); + } + + filter field::operator!=(int value) const + { + return filter(*this, filter::comparison::int_does_not_equal, value); + } + + filter field::operator<(int value) const + { + return filter(*this, filter::comparison::int_is_less_than, value); + } + + filter field::operator<=(int value) const + { + return filter(*this, filter::comparison::int_is_at_most, value); + } + + filter field::operator>(int value) const + { + return filter(*this, filter::comparison::int_is_greater_than, value); + } + + filter field::operator>=(int value) const + { + return filter(*this, filter::comparison::int_is_at_least, value); + } + + filter field::operator==(part_of_speech value) const + { + return filter(*this, filter::comparison::int_equals, static_cast(value)); + } + + filter field::operator==(positioning value) const + { + return filter(*this, filter::comparison::int_equals, static_cast(value)); + } + + filter field::operator==(inflection value) const + { + return filter(*this, filter::comparison::int_equals, static_cast(value)); + } + + filter field::operator==(bool value) const + { + return filter(*this, filter::comparison::boolean_equals, value); + } + + filter field::operator==(std::string value) const + { + return filter(*this, filter::comparison::string_equals, std::move(value)); + } + + filter field::operator!=(std::string value) const + { + return filter(*this, filter::comparison::string_does_not_equal, std::move(value)); + } + + filter field::operator%=(std::string value) const + { + return filter(*this, filter::comparison::string_is_like, std::move(value)); + } + + field::operator filter() const + { + return filter(*this, filter::comparison::is_not_null); + } + + filter field::operator!() const + { + return filter(*this, filter::comparison::is_null); + } + + filter field::operator%=(filter joinCondition) const + { + if (type_ == type::hierarchal_join) + { + return filter(*this, filter::comparison::hierarchally_matches, std::move(joinCondition)); + } else { + return filter(*this, filter::comparison::matches, std::move(joinCondition)); + } + } + +}; diff --git a/lib/field.h b/lib/field.h new file mode 100644 index 0000000..30c62be --- /dev/null +++ b/lib/field.h @@ -0,0 +1,306 @@ +#ifndef FIELD_H_43258321 +#define FIELD_H_43258321 + +#include "enums.h" +#include +#include + +namespace verbly { + + class filter; + + class field { + public: + enum class type { + undefined, + string, + integer, + boolean, + join, + join_through, + hierarchal_join + }; + + // Default constructor + + field() + { + } + + // Static factories + + static field stringField( + object obj, + const char* name, + bool nullable = false) + { + return field(obj, type::string, name, nullable); + } + + static field stringField( + const char* table, + const char* name, + bool nullable = false) + { + return field(object::undefined, type::string, name, nullable, table); + } + + static field integerField( + object obj, + const char* name, + bool nullable = false) + { + return field(obj, type::integer, name, nullable); + } + + static field integerField( + const char* table, + const char* name, + bool nullable = false) + { + return field(object::undefined, type::integer, name, nullable, table); + } + + static field booleanField( + object obj, + const char* name, + bool nullable = false) + { + return field(obj, type::boolean, name, nullable); + } + + static field booleanField( + const char* table, + const char* name, + bool nullable = false) + { + return field(object::undefined, type::boolean, name, nullable, table); + } + + static field joinField( + object obj, + const char* name, + object joinWith, + bool nullable = false) + { + return field(obj, type::join, name, nullable, 0, joinWith); + } + + static field joinField( + object obj, + const char* name, + const char* table, + bool nullable = false) + { + return field(obj, type::join, name, nullable, table); + } + + static field joinThrough( + object obj, + const char* name, + object joinWith, + const char* joinTable, + const char* foreignColumn) + { + return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, name, foreignColumn); + } + + static field joinThrough( + object obj, + const char* name, + object joinWith, + const char* joinTable, + const char* foreignColumn, + const char* joinColumn, + const char* foreignJoinColumn) + { + return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, joinColumn, foreignJoinColumn); + } + + static field selfJoin( + object obj, + const char* name, + const char* joinTable, + const char* joinColumn, + const char* foreignJoinColumn) + { + return field(obj, type::join_through, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn); + } + + static field hierarchalSelfJoin( + object obj, + const char* name, + const char* joinTable, + const char* joinColumn, + const char* foreignJoinColumn) + { + return field(obj, type::hierarchal_join, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn); + } + + // Accessors + + object getObject() const + { + return object_; + } + + type getType() const + { + return type_; + } + + bool isJoin() const + { + return ((type_ == type::join) || (type_ == type::join_through) || (type_ == type::hierarchal_join)); + } + + const char* getColumn() const + { + return column_; + } + + bool isNullable() const + { + return nullable_; + } + + bool hasTable() const + { + return (table_ != 0); + } + + const char* getTable() const + { + return table_; + } + + // Joins + + object getJoinObject() const + { + // We ignore hierarchal joins because they are always self joins. + return ((type_ == type::join) || (type_ == type::join_through)) + ? joinObject_ + : throw std::domain_error("Non-join fields don't have join objects"); + } + + // Many-to-many joins + + const char* getForeignColumn() const + { + // We ignore hierarchal joins because they are always self joins. + return (type_ == type::join_through) + ? foreignColumn_ + : throw std::domain_error("Only many-to-many join fields have a foreign column"); + } + + const char* getJoinColumn() const + { + return ((type_ == type::join_through) || (type_ == type::hierarchal_join)) + ? joinColumn_ + : throw std::domain_error("Only many-to-many join fields have a join column"); + } + + const char* getForeignJoinColumn() const + { + return ((type_ == type::join_through) || (type_ == type::hierarchal_join)) + ? foreignJoinColumn_ + : throw std::domain_error("Only many-to-many join fields have a foreign join column"); + } + + // Ordering + + bool operator<(const field& other) const + { + // For the most part, (object, column) uniquely identifies fields. + // However, there do exist a number of relationships from an object to + // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have + // the same object (notion), the same column (notion_id), and the same + // table (hypernymy); however, they have different join columns. + return std::tie(object_, column_, table_, joinColumn_) < std::tie(other.object_, other.column_, other.table_, other.joinColumn_); + } + + // Equality + + bool operator==(const field& other) const + { + // For the most part, (object, column) uniquely identifies fields. + // However, there do exist a number of relationships from an object to + // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have + // the same object (notion), the same column (notion_id), and the same + // table (hypernymy); however, they have different join columns. + return std::tie(object_, column_, table_, joinColumn_) == std::tie(other.object_, other.column_, other.table_, other.joinColumn_); + } + + // Filter construction + + filter operator==(int value) const; // Integer equality + filter operator!=(int value) const; // Integer inequality + filter operator<(int value) const; // Integer is less than + filter operator<=(int value) const; // Integer is at most + filter operator>(int value) const; // Integer is greater than + filter operator>=(int value) const; // Integer is at least + + filter operator==(part_of_speech value) const; // Part of speech equality + filter operator==(positioning value) const; // Adjective positioning equality + filter operator==(inflection value) const; // Inflection category equality + + filter operator==(bool value) const; // Boolean equality + + filter operator==(std::string value) const; // String equality + filter operator!=(std::string value) const; // String inequality + filter operator%=(std::string value) const; // String matching + + operator filter() const; // Non-nullity + filter operator!() const; // Nullity + + filter operator%=(filter joinCondition) const; // Join + + private: + + // Constructor + + field( + object obj, + type datatype, + const char* column, + bool nullable = false, + const char* table = 0, + object joinObject = object::undefined, + const char* foreignColumn = 0, + const char* joinColumn = 0, + const char* foreignJoinColumn = 0) : + object_(obj), + type_(datatype), + column_(column), + nullable_(nullable), + table_(table), + joinObject_(joinObject), + foreignColumn_(foreignColumn), + joinColumn_(joinColumn), + foreignJoinColumn_(foreignJoinColumn) + { + } + + // General + object object_ = object::undefined; + type type_ = type::undefined; + const char* column_ = 0; + const char* table_ = 0; + + // Non-joins and belongs-to joins + bool nullable_ = false; + + // Joins + object joinObject_ = object::undefined; + + // Many-to-many joins + const char* foreignColumn_ = 0; + const char* joinColumn_ = 0; + const char* foreignJoinColumn_ = 0; + + }; + +}; + +#endif /* end of include guard: FIELD_H_43258321 */ diff --git a/lib/filter.cpp b/lib/filter.cpp new file mode 100644 index 0000000..959fa05 --- /dev/null +++ b/lib/filter.cpp @@ -0,0 +1,1365 @@ +#include "filter.h" +#include +#include +#include "notion.h" +#include "word.h" +#include "group.h" +#include "frame.h" +#include "lemma.h" +#include "form.h" +#include "pronunciation.h" + +namespace verbly { + + filter::filter(const filter& other) + { + type_ = other.type_; + + switch (type_) + { + case type::empty: + { + break; + } + + case type::singleton: + { + new(&singleton_.filterField) field(other.singleton_.filterField); + singleton_.filterType = other.singleton_.filterType; + + switch (singleton_.filterType) + { + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + { + singleton_.intValue = other.singleton_.intValue; + + break; + } + + case comparison::boolean_equals: + { + singleton_.boolValue = other.singleton_.boolValue; + + break; + } + + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + { + new(&singleton_.stringValue) std::string(other.singleton_.stringValue); + + break; + } + + case comparison::is_null: + case comparison::is_not_null: + { + break; + } + + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + new(&singleton_.join) std::unique_ptr(new filter(*other.singleton_.join)); + + break; + } + } + + break; + } + + case type::group: + { + new(&group_.children) std::list(other.group_.children); + group_.orlogic = other.group_.orlogic; + + break; + } + } + } + + filter::filter(filter&& other) : filter() + { + swap(*this, other); + } + + filter& filter::operator=(filter other) + { + swap(*this, other); + + return *this; + } + + void swap(filter& first, filter& second) + { + using type = filter::type; + using comparison = filter::comparison; + + type tempType = first.type_; + field tempField; + comparison tempComparison; + std::unique_ptr tempJoin; + std::string tempStringValue; + int tempIntValue; + bool tempBoolValue; + std::list tempChildren; + bool tempOrlogic; + + switch (tempType) + { + case type::empty: + { + break; + } + + case type::singleton: + { + tempField = std::move(first.singleton_.filterField); + tempComparison = first.singleton_.filterType; + + switch (tempComparison) + { + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + { + tempIntValue = first.singleton_.intValue; + + break; + } + + case comparison::boolean_equals: + { + tempBoolValue = first.singleton_.boolValue; + + break; + } + + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + { + tempStringValue = std::move(first.singleton_.stringValue); + + break; + } + + case comparison::is_null: + case comparison::is_not_null: + { + break; + } + + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + tempJoin = std::move(first.singleton_.join); + + break; + } + } + + break; + } + + case type::group: + { + tempChildren = std::move(first.group_.children); + tempOrlogic = first.group_.orlogic; + + break; + } + } + + first.~filter(); + + first.type_ = second.type_; + + switch (first.type_) + { + case type::empty: + { + break; + } + + case type::singleton: + { + new(&first.singleton_.filterField) field(std::move(second.singleton_.filterField)); + first.singleton_.filterType = second.singleton_.filterType; + + switch (first.singleton_.filterType) + { + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + { + first.singleton_.intValue = second.singleton_.intValue; + + break; + } + + case comparison::boolean_equals: + { + first.singleton_.boolValue = second.singleton_.boolValue; + + break; + } + + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + { + new(&first.singleton_.stringValue) std::string(std::move(second.singleton_.stringValue)); + + break; + } + + case comparison::is_null: + case comparison::is_not_null: + { + break; + } + + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + new(&first.singleton_.join) std::unique_ptr(std::move(second.singleton_.join)); + + break; + } + } + + break; + } + + case type::group: + { + new(&first.group_.children) std::list(std::move(second.group_.children)); + first.group_.orlogic = second.group_.orlogic; + + break; + } + } + + second.~filter(); + + second.type_ = tempType; + + switch (second.type_) + { + case type::empty: + { + break; + } + + case type::singleton: + { + new(&second.singleton_.filterField) field(std::move(tempField)); + second.singleton_.filterType = tempComparison; + + switch (second.singleton_.filterType) + { + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + { + second.singleton_.intValue = tempIntValue; + + break; + } + + case comparison::boolean_equals: + { + second.singleton_.boolValue = tempBoolValue; + + break; + } + + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + { + new(&second.singleton_.stringValue) std::string(std::move(tempStringValue)); + + break; + } + + case comparison::is_null: + case comparison::is_not_null: + { + break; + } + + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + new(&second.singleton_.join) std::unique_ptr(std::move(tempJoin)); + + break; + } + } + + break; + } + + case type::group: + { + new(&second.group_.children) std::list(std::move(tempChildren)); + second.group_.orlogic = tempOrlogic; + + break; + } + } + } + + filter::~filter() + { + switch (type_) + { + case type::empty: + { + break; + } + + case type::singleton: + { + singleton_.filterField.~field(); + + switch (singleton_.filterType) + { + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + case comparison::boolean_equals: + case comparison::is_null: + case comparison::is_not_null: + { + break; + } + + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + { + using string_type = std::string; + + singleton_.stringValue.~string_type(); + + break; + } + + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + using ptr_type = std::unique_ptr; + + singleton_.join.~ptr_type(); + + break; + } + } + + break; + } + + case type::group: + { + using list_type = std::list; + + group_.children.~list_type(); + + break; + } + } + } + + filter::filter() + { + } + + filter::filter( + field filterField, + comparison filterType, + int filterValue) : + type_(type::singleton) + { + if (filterField.getType() == field::type::integer) + { + switch (filterType) + { + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + { + new(&singleton_.filterField) field(std::move(filterField)); + singleton_.filterType = filterType; + singleton_.intValue = filterValue; + + break; + } + + case comparison::boolean_equals: + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + case comparison::is_null: + case comparison::is_not_null: + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + throw std::invalid_argument("Invalid comparison for integer field"); + } + } + } else { + throw std::domain_error("Cannot match a non-integer field against an integer value"); + } + } + + filter::filter( + field filterField, + comparison filterType, + std::string filterValue) : + type_(type::singleton) + { + if (filterField.getType() == field::type::string) + { + switch (filterType) + { + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + { + new(&singleton_.filterField) field(std::move(filterField)); + singleton_.filterType = filterType; + new(&singleton_.stringValue) std::string(std::move(filterValue)); + + break; + } + + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + case comparison::boolean_equals: + case comparison::is_null: + case comparison::is_not_null: + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + throw std::invalid_argument("Invalid comparison for string field"); + } + } + } else { + throw std::domain_error("Cannot match a non-string field against an string value"); + } + } + + filter::filter( + field filterField, + comparison filterType, + bool filterValue) : + type_(type::singleton) + { + if (filterField.getType() == field::type::boolean) + { + switch (filterType) + { + case comparison::boolean_equals: + { + new(&singleton_.filterField) field(std::move(filterField)); + singleton_.filterType = filterType; + singleton_.boolValue = filterValue; + + break; + } + + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + case comparison::is_null: + case comparison::is_not_null: + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + throw std::invalid_argument("Invalid comparison for boolean field"); + } + } + } else { + throw std::domain_error("Cannot match a non-boolean field against a boolean value"); + } + } + + filter::filter( + field filterField, + comparison filterType) : + type_(type::singleton) + { + if (filterField.isNullable()) + { + switch (filterType) + { + case comparison::is_null: + case comparison::is_not_null: + { + new(&singleton_.filterField) field(std::move(filterField)); + singleton_.filterType = filterType; + + break; + } + + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + case comparison::boolean_equals: + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + throw std::invalid_argument("Incorrect constructor for given comparison"); + } + } + } else { + throw std::domain_error("Cannot check nullity/non-nullity of non-nullable field"); + } + } + + filter::filter( + field joinOn, + comparison filterType, + filter joinCondition) : + type_(type::singleton) + { + switch (joinOn.getType()) + { + case field::type::join: + case field::type::join_through: + { + switch (filterType) + { + case comparison::matches: + case comparison::does_not_match: + { + new(&singleton_.filterField) field(std::move(joinOn)); + singleton_.filterType = filterType; + new(&singleton_.join) std::unique_ptr(new filter(joinCondition.normalize(singleton_.filterField.getJoinObject()))); + + break; + } + + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + case comparison::boolean_equals: + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + case comparison::is_null: + case comparison::is_not_null: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + throw std::invalid_argument("Incorrect constructor for given comparison"); + } + } + + break; + } + + case field::type::hierarchal_join: + { + switch (filterType) + { + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + new(&singleton_.filterField) field(std::move(joinOn)); + singleton_.filterType = filterType; + new(&singleton_.join) std::unique_ptr(new filter(joinCondition.normalize(singleton_.filterField.getObject()))); + + break; + } + + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + case comparison::boolean_equals: + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + case comparison::is_null: + case comparison::is_not_null: + case comparison::matches: + case comparison::does_not_match: + { + throw std::invalid_argument("Incorrect constructor for given comparison"); + } + } + + break; + } + + case field::type::undefined: + case field::type::string: + case field::type::integer: + case field::type::boolean: + { + throw std::domain_error("Matching field must be a join field"); + } + } + } + + field filter::getField() const + { + if (type_ == type::singleton) + { + return singleton_.filterField; + } else { + throw std::domain_error("This filter does not have a field"); + } + } + + filter::comparison filter::getComparison() const + { + if (type_ == type::singleton) + { + return singleton_.filterType; + } else { + throw std::domain_error("This filter does not have a comparison"); + } + } + + filter filter::getJoinCondition() const + { + if (type_ == type::singleton) + { + switch (singleton_.filterType) + { + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + return *singleton_.join; + } + + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + case comparison::boolean_equals: + case comparison::is_null: + case comparison::is_not_null: + { + throw std::domain_error("This filter does not have a join condition"); + } + } + } else { + throw std::domain_error("This filter does not have a join condition"); + } + } + + std::string filter::getStringArgument() const + { + if (type_ == type::singleton) + { + switch (singleton_.filterType) + { + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + { + return singleton_.stringValue; + } + + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + case comparison::boolean_equals: + case comparison::is_null: + case comparison::is_not_null: + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + throw std::domain_error("This filter does not have a string argument"); + } + } + } else { + throw std::domain_error("This filter does not have a string argument"); + } + } + + int filter::getIntegerArgument() const + { + if (type_ == type::singleton) + { + switch (singleton_.filterType) + { + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + { + return singleton_.intValue; + } + + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + case comparison::boolean_equals: + case comparison::is_null: + case comparison::is_not_null: + case comparison::matches: + case comparison::does_not_match: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + throw std::domain_error("This filter does not have an integer argument"); + } + } + } else { + throw std::domain_error("This filter does not have an integer argument"); + } + } + + bool filter::getBooleanArgument() const + { + if ((type_ == type::singleton) && (singleton_.filterType == comparison::boolean_equals)) + { + return singleton_.boolValue; + } else { + throw std::domain_error("This filter does not have a boolean argument"); + } + } + + filter::filter(bool orlogic) : type_(type::group) + { + new(&group_.children) std::list(); + group_.orlogic = orlogic; + } + + bool filter::getOrlogic() const + { + if (type_ == type::group) + { + return group_.orlogic; + } else { + throw std::domain_error("This filter is not a group filter"); + } + } + + filter filter::operator+(filter condition) const + { + filter result(*this); + result += std::move(condition); + + return result; + } + + filter& filter::operator+=(filter condition) + { + if (type_ == type::group) + { + group_.children.push_back(std::move(condition)); + + return *this; + } else { + throw std::domain_error("Children can only be added to group filters"); + } + } + + filter::const_iterator filter::begin() const + { + if (type_ == type::group) + { + return std::begin(group_.children); + } else { + throw std::domain_error("This filter has no children"); + } + } + + filter::const_iterator filter::end() const + { + if (type_ == type::group) + { + return std::end(group_.children); + } else { + throw std::domain_error("This filter has no children"); + } + } + + filter filter::operator!() const + { + switch (type_) + { + case type::empty: + { + return {}; + } + + case type::singleton: + { + switch (singleton_.filterType) + { + case comparison::int_equals: + { + return filter(singleton_.filterField, comparison::int_does_not_equal, singleton_.intValue); + } + + case comparison::int_does_not_equal: + { + return filter(singleton_.filterField, comparison::int_equals, singleton_.intValue); + } + + case comparison::int_is_at_least: + { + return filter(singleton_.filterField, comparison::int_is_less_than, singleton_.intValue); + } + + case comparison::int_is_greater_than: + { + return filter(singleton_.filterField, comparison::int_is_at_most, singleton_.intValue); + } + + case comparison::int_is_at_most: + { + return filter(singleton_.filterField, comparison::int_is_greater_than, singleton_.intValue); + } + + case comparison::int_is_less_than: + { + return filter(singleton_.filterField, comparison::int_is_at_least, singleton_.intValue); + } + + case comparison::boolean_equals: + { + return filter(singleton_.filterField, comparison::boolean_equals, !singleton_.boolValue); + } + + case comparison::string_equals: + { + return filter(singleton_.filterField, comparison::string_does_not_equal, singleton_.stringValue); + } + + case comparison::string_does_not_equal: + { + return filter(singleton_.filterField, comparison::string_equals, singleton_.stringValue); + } + + case comparison::string_is_like: + { + return filter(singleton_.filterField, comparison::string_is_not_like, singleton_.stringValue); + } + + case comparison::string_is_not_like: + { + return filter(singleton_.filterField, comparison::string_is_like, singleton_.stringValue); + } + + case comparison::is_null: + { + return filter(singleton_.filterField, comparison::is_not_null); + } + + case comparison::is_not_null: + { + return filter(singleton_.filterField, comparison::is_null); + } + + case comparison::matches: + { + return filter(singleton_.filterField, comparison::does_not_match, *singleton_.join); + } + + case comparison::does_not_match: + { + return filter(singleton_.filterField, comparison::matches, *singleton_.join); + } + + case comparison::hierarchally_matches: + { + return filter(singleton_.filterField, comparison::does_not_hierarchally_match, *singleton_.join); + } + + case comparison::does_not_hierarchally_match: + { + return filter(singleton_.filterField, comparison::hierarchally_matches, *singleton_.join); + } + } + } + + case type::group: + { + filter result(!group_.orlogic); + + for (const filter& child : group_.children) + { + result += !child; + } + + return result; + } + } + } + + filter& filter::operator&=(filter condition) + { + return (*this = (*this && std::move(condition))); + } + + filter& filter::operator|=(filter condition) + { + return (*this = (*this || std::move(condition))); + } + + filter filter::operator&&(filter condition) const + { + switch (type_) + { + case type::empty: + { + return condition; + } + + case type::singleton: + { + filter result(false); + result.group_.children.push_back(*this); + result.group_.children.push_back(std::move(condition)); + + return result; + } + + case type::group: + { + if (group_.orlogic) + { + filter result(false); + result.group_.children.push_back(*this); + result.group_.children.push_back(std::move(condition)); + + return result; + } else { + filter result(*this); + result.group_.children.push_back(std::move(condition)); + + return result; + } + } + } + } + + filter filter::operator||(filter condition) const + { + switch (type_) + { + case type::empty: + { + return condition; + } + + case type::singleton: + { + filter result(true); + result.group_.children.push_back(*this); + result.group_.children.push_back(std::move(condition)); + + return result; + } + + case type::group: + { + if (!group_.orlogic) + { + filter result(true); + result.group_.children.push_back(*this); + result.group_.children.push_back(std::move(condition)); + + return result; + } else { + filter result(*this); + result.group_.children.push_back(std::move(condition)); + + return result; + } + } + } + } + + filter filter::normalize(object context) const + { + { + switch (type_) + { + case type::empty: + { + return *this; + } + + case type::singleton: + { + // First, switch on the normalized context, and then switch on the + // current context. We recursively recontextualize by using the + // current filter as a subquery for a join such that the context of + // the subquery is one step closer to the context of the current + // filter, and then letting the filter constructor normalize the + // subquery. + switch (context) + { + case object::undefined: + { + // An undefined object indicates no participation in + // recontexualization. + return *this; + } + + case object::notion: + { + switch (singleton_.filterField.getObject()) + { + case object::undefined: + case object::notion: + { + return *this; + } + + case object::word: + case object::group: + case object::frame: + case object::lemma: + case object::form: + case object::pronunciation: + { + return (verbly::notion::word %= *this); + } + } + } + + case object::word: + { + switch (singleton_.filterField.getObject()) + { + case object::notion: + { + return (verbly::word::notion %= *this); + } + + case object::undefined: + case object::word: + { + return *this; + } + + case object::group: + case object::frame: + { + return (verbly::word::group %= *this); + } + + case object::lemma: + case object::form: + case object::pronunciation: + { + return (verbly::word::lemma %= *this); + } + } + + case object::group: + { + switch (singleton_.filterField.getObject()) + { + case object::undefined: + case object::group: + { + return *this; + } + + case object::notion: + case object::word: + case object::lemma: + case object::form: + case object::pronunciation: + { + return (verbly::group::word %= *this); + } + + case object::frame: + { + return (verbly::group::frame %= *this); + } + } + } + + case object::frame: + { + switch (singleton_.filterField.getObject()) + { + case object::undefined: + case object::frame: + { + return *this; + } + + case object::notion: + case object::word: + case object::group: + case object::lemma: + case object::form: + case object::pronunciation: + { + return (verbly::frame::group %= *this); + } + } + } + + case object::lemma: + { + switch (singleton_.filterField.getObject()) + { + case object::notion: + case object::word: + case object::group: + case object::frame: + { + return verbly::lemma::word %= *this; + } + + case object::undefined: + case object::lemma: + { + return *this; + } + + case object::form: + case object::pronunciation: + { + return (verbly::lemma::form(inflection::base) %= *this); + } + } + } + + case object::form: + { + switch (singleton_.filterField.getObject()) + { + case object::notion: + case object::word: + case object::group: + case object::frame: + case object::lemma: + { + return verbly::form::lemma(inflection::base) %= *this; + } + + case object::undefined: + case object::form: + { + return *this; + } + + case object::pronunciation: + { + return (verbly::form::pronunciation %= *this); + } + } + } + + case object::pronunciation: + { + switch (singleton_.filterField.getObject()) + { + case object::notion: + case object::word: + case object::group: + case object::frame: + case object::lemma: + case object::form: + { + return verbly::pronunciation::form %= *this; + } + + case object::undefined: + case object::pronunciation: + { + return *this; + } + } + } + } + } + } + + case type::group: + { + filter result(group_.orlogic); + std::map joins; + + for (const filter& child : group_.children) + { + filter normalized = child.normalize(context); + + // Notably, this does not attempt to merge hierarchal matches. + switch (normalized.getType()) + { + case type::singleton: + { + switch (normalized.getComparison()) + { + case comparison::matches: + { + if (!joins.count(normalized.singleton_.filterField)) + { + joins[normalized.getField()] = filter(group_.orlogic); + } + + joins.at(normalized.getField()) += std::move(*normalized.singleton_.join); + + break; + } + + case comparison::does_not_match: + { + if (!joins.count(normalized.singleton_.filterField)) + { + joins[normalized.getField()] = filter(group_.orlogic); + } + + joins.at(normalized.getField()) += !*normalized.singleton_.join; + + break; + } + + case comparison::int_equals: + case comparison::int_does_not_equal: + case comparison::int_is_at_least: + case comparison::int_is_greater_than: + case comparison::int_is_at_most: + case comparison::int_is_less_than: + case comparison::boolean_equals: + case comparison::string_equals: + case comparison::string_does_not_equal: + case comparison::string_is_like: + case comparison::string_is_not_like: + case comparison::is_null: + case comparison::is_not_null: + case comparison::hierarchally_matches: + case comparison::does_not_hierarchally_match: + { + result += std::move(normalized); + + break; + } + } + + break; + } + + case type::group: + case type::empty: + { + result += std::move(normalized); + + break; + } + } + } + + for (auto& mapping : joins) + { + const field& joinOn = mapping.first; + filter& joinCondition = mapping.second; + + result += (joinOn %= joinCondition.normalize(joinOn.getJoinObject())); + } + + return result; + } + } + } + } + +}; diff --git a/lib/filter.h b/lib/filter.h new file mode 100644 index 0000000..d213d7a --- /dev/null +++ b/lib/filter.h @@ -0,0 +1,143 @@ +#ifndef FILTER_H_932BA9C6 +#define FILTER_H_932BA9C6 + +#include +#include +#include +#include "field.h" +#include "enums.h" + +namespace verbly { + + class filter { + public: + enum class type { + empty, + singleton, + group + }; + + enum class comparison { + int_equals, + int_does_not_equal, + int_is_at_least, + int_is_greater_than, + int_is_at_most, + int_is_less_than, + boolean_equals, + string_equals, + string_does_not_equal, + string_is_like, + string_is_not_like, + is_null, + is_not_null, + matches, + does_not_match, + hierarchally_matches, + does_not_hierarchally_match + }; + + // Copy and move constructors + + filter(const filter& other); + filter(filter&& other); + + // Assignment + + filter& operator=(filter other); + + // Swap + + friend void swap(filter& first, filter& second); + + // Destructor + + ~filter(); + + // Accessors + + type getType() const + { + return type_; + } + + // Empty + + filter(); + + // Singleton + + filter(field filterField, comparison filterType, int filterValue); + filter(field filterField, comparison filterType, std::string filterValue); + filter(field filterField, comparison filterType, bool filterValue); + filter(field filterField, comparison filterType); + filter(field joinOn, comparison filterType, filter joinCondition); + + field getField() const; + + comparison getComparison() const; + + filter getJoinCondition() const; + + std::string getStringArgument() const; + + int getIntegerArgument() const; + + bool getBooleanArgument() const; + + // Group + + explicit filter(bool orlogic); + + bool getOrlogic() const; + + filter operator+(filter condition) const; + + filter& operator+=(filter condition); + + using const_iterator = std::list::const_iterator; + + const_iterator begin() const; + + const_iterator end() const; + + // Negation + + filter operator!() const; + + // Groupifying + + filter operator&&(filter condition) const; + filter operator||(filter condition) const; + + filter& operator&=(filter condition); + filter& operator|=(filter condition); + + // Utility + + filter normalize(object context) const; + + private: + union { + struct { + field filterField; + comparison filterType; + union { + std::unique_ptr join; + std::string stringValue; + int intValue; + bool boolValue; + }; + } singleton_; + struct { + std::list children; + bool orlogic; + } group_; + }; + type type_ = type::empty; + + }; + +}; + +#endif /* end of include guard: FILTER_H_932BA9C6 */ diff --git a/lib/form.cpp b/lib/form.cpp new file mode 100644 index 0000000..8ba3bd7 --- /dev/null +++ b/lib/form.cpp @@ -0,0 +1,53 @@ +#include "form.h" +#include +#include "filter.h" +#include "pronunciation.h" +#include "database.h" +#include "query.h" + +namespace verbly { + + const object form::objectType = object::form; + + const std::list form::select = {"form_id", "form", "complexity", "proper"}; + + const field form::id = field::integerField(object::form, "form_id"); + const field form::text = field::stringField(object::form, "form"); + const field form::complexity = field::integerField(object::form, "complexity"); + const field form::proper = field::booleanField(object::form, "proper"); + + const field form::pronunciation = field::joinThrough(object::form, "form_id", object::pronunciation, "forms_pronunciations", "pronunciation_id"); + + const field form::lemmaJoin = field::joinField(object::form, "form_id", object::lemma); + const field form::inflectionCategory = field::integerField("lemmas_forms", "category"); + + form::form(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) + { + id_ = sqlite3_column_int(row, 0); + text_ = std::string(reinterpret_cast(sqlite3_column_text(row, 1))); + complexity_ = sqlite3_column_int(row, 2); + proper_ = (sqlite3_column_int(row, 3) == 1); + } + + filter operator%=(form::inflection_field check, filter joinCondition) + { + return (form::lemmaJoin %= (joinCondition && (form::inflectionCategory == check.getCategory()))); + } + + const std::vector& form::getPronunciations() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized form"); + } + + if (!initializedPronunciations_) + { + pronunciations_ = db_->pronunciations(pronunciation::form %= *this, false, -1).all(); + initializedPronunciations_ = true; + } + + return pronunciations_; + } + +}; diff --git a/lib/form.h b/lib/form.h new file mode 100644 index 0000000..c6a1353 --- /dev/null +++ b/lib/form.h @@ -0,0 +1,149 @@ +#ifndef FORM_H_3A6C962C +#define FORM_H_3A6C962C + +#include +#include +#include +#include +#include "field.h" +#include "filter.h" + +struct sqlite3_stmt; + +namespace verbly { + + class pronunciation; + class database; + + class form { + public: + + // Default constructor + + form() = default; + + // Construct from database + + form(const database& db, sqlite3_stmt* row); + + // Accessors + + operator bool() const + { + return valid_; + } + + int getId() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized form"); + } + + return id_; + } + + std::string getText() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized form"); + } + + return text_; + } + + int getComplexity() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized form"); + } + + return complexity_; + } + + bool isProper() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized form"); + } + + return proper_; + } + + const std::vector& getPronunciations() const; + + // Type info + + static const object objectType; + + static const std::list select; + + // Query fields + + static const field id; + static const field text; + static const field complexity; + static const field proper; + + operator filter() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized form"); + } + + return (id == id_); + } + + // Relationships to other objects + + static const field pronunciation; + + class inflection_field { + public: + + inflection_field(inflection category) : category_(category) + { + } + + const inflection getCategory() const + { + return category_; + } + + private: + + const inflection category_; + }; + + static const inflection_field lemma(inflection category) + { + return inflection_field(category); + } + + friend filter operator%=(form::inflection_field check, filter joinCondition); + + private: + bool valid_ = false; + + int id_; + std::string text_; + int complexity_ ; + bool proper_; + + const database* db_; + + mutable bool initializedPronunciations_ = false; + mutable std::vector pronunciations_; + + static const field lemmaJoin; + static const field inflectionCategory; + + }; + +}; + +#endif /* end of include guard: FORM_H_3A6C962C */ diff --git a/lib/frame.cpp b/lib/frame.cpp index ccec81b..bc3f842 100644 --- a/lib/frame.cpp +++ b/lib/frame.cpp @@ -1,320 +1,21 @@ -#include "verbly.h" +#include "frame.h" +#include namespace verbly { - frame::selrestr::type frame::selrestr::get_type() const - { - return _type; - } - - frame::selrestr::selrestr(const selrestr& other) - { - _type = other._type; - - switch (_type) - { - case frame::selrestr::type::singleton: - { - _singleton.pos = other._singleton.pos; - new(&_singleton.restriction) std::string(other._singleton.restriction); - - break; - } - - case frame::selrestr::type::group: - { - new(&_group.children) std::list(other._group.children); - _group.orlogic = other._group.orlogic; - - break; - } - - case frame::selrestr::type::empty: - { - // Nothing! - - break; - } - } - } - - frame::selrestr::~selrestr() - { - switch (_type) - { - case frame::selrestr::type::singleton: - { - using string_type = std::string; - _singleton.restriction.~string_type(); - - break; - } - - case frame::selrestr::type::group: - { - using list_type = std::list; - _group.children.~list_type(); - - break; - } - - case frame::selrestr::type::empty: - { - // Nothing! - - break; - } - } - } - - frame::selrestr& frame::selrestr::operator=(const selrestr& other) - { - this->~selrestr(); - - _type = other._type; - - switch (_type) - { - case frame::selrestr::type::singleton: - { - _singleton.pos = other._singleton.pos; - new(&_singleton.restriction) std::string(other._singleton.restriction); - - break; - } - - case frame::selrestr::type::group: - { - new(&_group.children) std::list(other._group.children); - _group.orlogic = other._group.orlogic; - - break; - } - - case frame::selrestr::type::empty: - { - // Nothing! - - break; - } - } - - return *this; - } - - frame::selrestr::selrestr() : _type(frame::selrestr::type::empty) - { - - } - - frame::selrestr::selrestr(std::string restriction, bool pos) : _type(frame::selrestr::type::singleton) - { - new(&_singleton.restriction) std::string(restriction); - _singleton.pos = pos; - } - - std::string frame::selrestr::get_restriction() const - { - assert(_type == frame::selrestr::type::singleton); - - return _singleton.restriction; - } - - bool frame::selrestr::get_pos() const - { - assert(_type == frame::selrestr::type::singleton); - - return _singleton.pos; - } - - frame::selrestr::selrestr(std::list children, bool orlogic) : _type(frame::selrestr::type::group) - { - new(&_group.children) std::list(children); - _group.orlogic = orlogic; - } - - std::list frame::selrestr::get_children() const - { - assert(_type == frame::selrestr::type::group); - - return _group.children; - } - - std::list::const_iterator frame::selrestr::begin() const - { - assert(_type == frame::selrestr::type::group); - - return _group.children.begin(); - } - - std::list::const_iterator frame::selrestr::end() const - { - assert(_type == frame::selrestr::type::group); - - return _group.children.end(); - } - - bool frame::selrestr::get_orlogic() const - { - assert(_type == frame::selrestr::type::group); - - return _group.orlogic; - } - - frame::part::type frame::part::get_type() const - { - return _type; - } - - frame::part::part() - { - - } + const object frame::objectType = object::frame; - frame::part::part(const part& other) - { - _type = other._type; - - switch (_type) - { - case frame::part::type::noun_phrase: - { - new(&_noun_phrase.role) std::string(other._noun_phrase.role); - new(&_noun_phrase.selrestrs) selrestr(other._noun_phrase.selrestrs); - new(&_noun_phrase.synrestrs) std::set(other._noun_phrase.synrestrs); - - break; - } - - case frame::part::type::literal_preposition: - { - new(&_literal_preposition.choices) std::vector(other._literal_preposition.choices); - - break; - } - - case frame::part::type::selection_preposition: - { - new(&_selection_preposition.preprestrs) std::vector(other._selection_preposition.preprestrs); - - break; - } - - case frame::part::type::literal: - { - new(&_literal.lexval) std::string(other._literal.lexval); - - break; - } - - default: - { - // Nothing! - - break; - } - } - } + const std::list frame::select = {"frame_id", "data"}; - frame::part::~part() - { - switch (_type) - { - case frame::part::type::noun_phrase: - { - using string_type = std::string; - using set_type = std::set; - - _noun_phrase.role.~string_type(); - _noun_phrase.selrestrs.~selrestr(); - _noun_phrase.synrestrs.~set_type(); - - break; - } - - case frame::part::type::literal_preposition: - { - using vector_type = std::vector; - _literal_preposition.choices.~vector_type(); - - break; - } - - case frame::part::type::selection_preposition: - { - using vector_type = std::vector; - _selection_preposition.preprestrs.~vector_type(); - - break; - } - - case frame::part::type::literal: - { - using string_type = std::string; - _literal.lexval.~string_type(); - - break; - } - - default: - { - // Nothing! - - break; - } - } - } + const field frame::id = field::integerField(object::frame, "frame_id"); - std::string frame::part::get_role() const - { - assert(_type == frame::part::type::noun_phrase); - - return _noun_phrase.role; - } + const field frame::group = field::joinThrough(object::frame, "frame_id", object::group, "groups_frames", "group_id"); - frame::selrestr frame::part::get_selrestrs() const + frame::frame(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) { - assert(_type == frame::part::type::noun_phrase); + id_ = sqlite3_column_int(row, 0); - return _noun_phrase.selrestrs; - } - - std::set frame::part::get_synrestrs() const - { - assert(_type == frame::part::type::noun_phrase); - - return _noun_phrase.synrestrs; - } - - std::vector frame::part::get_choices() const - { - assert(_type == frame::part::type::literal_preposition); - - return _literal_preposition.choices; - } - - std::vector frame::part::get_preprestrs() const - { - assert(_type == frame::part::type::selection_preposition); - - return _selection_preposition.preprestrs; - } - - std::string frame::part::get_literal() const - { - assert(_type == frame::part::type::literal); - - return _literal.lexval; - } - - std::vector frame::parts() const - { - return _parts; - } - - std::map frame::roles() const - { - return _roles; + // TODO: Initialize frame data from row. } }; diff --git a/lib/frame.h b/lib/frame.h index fa57e1b..68a4346 100644 --- a/lib/frame.h +++ b/lib/frame.h @@ -1,118 +1,78 @@ -#ifndef FRAME_H_9A5D90FE -#define FRAME_H_9A5D90FE +#ifndef FRAME_H_EA29065A +#define FRAME_H_EA29065A + +#include +#include +#include "field.h" +#include "filter.h" + +struct sqlite3_stmt; namespace verbly { - class frame_query; - + class database; + class frame { - public: - class selrestr { - public: - enum class type { - empty, - singleton, - group - }; - - type get_type() const; - selrestr(const selrestr& other); - ~selrestr(); - selrestr& operator=(const selrestr& other); - - // Empty - selrestr(); - - // Singleton - selrestr(std::string restriction, bool pos); - std::string get_restriction() const; - bool get_pos() const; - - // Group - selrestr(std::list children, bool orlogic); - std::list get_children() const; - std::list::const_iterator begin() const; - std::list::const_iterator end() const; - bool get_orlogic() const; - - private: - union { - struct { - bool pos; - std::string restriction; - } _singleton; - struct { - std::list children; - bool orlogic; - } _group; - }; - type _type; - }; + public: + + // Default constructor + + frame() = default; + + // Construct from database + + frame(const database& db, sqlite3_stmt* row); + + // Accessors + + operator bool() const + { + return valid_; + } + + int getId() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized frame"); + } - class part { - public: - enum class type { - noun_phrase, - verb, - literal_preposition, - selection_preposition, - adjective, - adverb, - literal - }; - - type get_type() const; - part(const part& other); - ~part(); - - // Noun phrase - std::string get_role() const; - selrestr get_selrestrs() const; - std::set get_synrestrs() const; - - // Literal preposition - std::vector get_choices() const; - - // Selection preposition - std::vector get_preprestrs() const; - - // Literal - std::string get_literal() const; - - private: - friend class frame_query; - - part(); - - union { - struct { - std::string role; - selrestr selrestrs; - std::set synrestrs; - } _noun_phrase; - struct { - std::vector choices; - } _literal_preposition; - struct { - std::vector preprestrs; - } _selection_preposition; - struct { - std::string lexval; - } _literal; - }; - type _type; - }; + return id_; + } + + // Type info + + static const object objectType; + + static const std::list select; + + // Query fields + + static const field id; + + operator filter() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized frame"); + } - std::vector parts() const; - std::map roles() const; - - private: - friend class frame_query; - - std::vector _parts; - std::map _roles; + return (id == id_); + } + + // Relationships to other objects + + static const field group; + + private: + bool valid_ = false; + + int id_; + + const database* db_; + }; - + }; -#endif /* end of include guard: FRAME_H_9A5D90FE */ +#endif /* end of include guard: FRAME_H_EA29065A */ diff --git a/lib/group.cpp b/lib/group.cpp new file mode 100644 index 0000000..8b6d985 --- /dev/null +++ b/lib/group.cpp @@ -0,0 +1,43 @@ +#include "group.h" +#include +#include "frame.h" +#include "database.h" +#include "query.h" + +namespace verbly { + + const object group::objectType = object::group; + + const std::list group::select = {"group_id", "data"}; + + const field group::id = field::integerField(object::group, "group_id"); + + const field group::frame = field::joinThrough(object::group, "group_id", object::frame, "groups_frames", "frame_id"); + const field group::word = field::joinField(object::group, "group_id", object::word); + + group::group(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) + { + id_ = sqlite3_column_int(row, 0); + + // TODO: Initialize role data from row. + } + + const std::vector& group::getFrames() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized group"); + } + + if (!initializedFrames_) + { + frames_ = db_->frames(frame::group %= *this, false, -1).all(); + + initializedFrames_ = true; + } + + return frames_; + } + +}; + diff --git a/lib/group.h b/lib/group.h new file mode 100644 index 0000000..dd53503 --- /dev/null +++ b/lib/group.h @@ -0,0 +1,87 @@ +#ifndef GROUP_H_BD6933C0 +#define GROUP_H_BD6933C0 + +#include +#include +#include +#include "field.h" +#include "filter.h" + +struct sqlite3_stmt; + +namespace verbly { + + class database; + class frame; + + class group { + public: + + // Default constructor + + group() = default; + + // Construct from database + + group(const database& db, sqlite3_stmt* row); + + // Accessors + + operator bool() const + { + return valid_; + } + + int getId() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized group"); + } + + return id_; + } + + const std::vector& getFrames() const; + + // Type info + + static const object objectType; + + static const std::list select; + + // Query fields + + static const field id; + + operator filter() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized group"); + } + + return (id == id_); + } + + // Relationships to other objects + + static const field frame; + + static const field word; + + private: + bool valid_ = false; + + int id_; + + const database* db_; + + mutable bool initializedFrames_ = false; + mutable std::vector frames_; + + }; + +}; + +#endif /* end of include guard: GROUP_H_BD6933C0 */ diff --git a/lib/lemma.cpp b/lib/lemma.cpp new file mode 100644 index 0000000..f9e9fcc --- /dev/null +++ b/lib/lemma.cpp @@ -0,0 +1,69 @@ +#include "lemma.h" +#include +#include "database.h" +#include "query.h" + +namespace verbly { + + const object lemma::objectType = object::lemma; + + const std::list lemma::select = {"lemma_id"}; + + const field lemma::id = field::integerField(object::lemma, "lemma_id"); + + const field lemma::word = field::joinField(object::lemma, "lemma_id", object::word); + + const field lemma::formJoin = field::joinField(object::lemma, "form_id", object::form); + const field lemma::inflectionCategory = field::integerField(object::lemma, "category"); + + filter operator%=(lemma::inflection_field check, filter joinCondition) + { + return (lemma::formJoin %= joinCondition) && (lemma::inflectionCategory == check.getCategory()); + } + + lemma::lemma(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) + { + id_ = sqlite3_column_int(row, 0); + } + + const form& lemma::getBaseForm() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized lemma"); + } + + if (!forms_.count(inflection::base)) + { + initializeForm(inflection::base); + } + + return forms_.at(inflection::base).front(); + } + + bool lemma::hasInflection(inflection category) const + { + return !getInflections(category).empty(); + } + + const std::vector& lemma::getInflections(inflection category) const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized lemma"); + } + + if (!forms_.count(category)) + { + initializeForm(category); + } + + return forms_.at(category); + } + + void lemma::initializeForm(inflection infl) const + { + forms_[infl] = db_->forms(form::lemma(infl) %= *this, false, -1).all(); + } + +}; diff --git a/lib/lemma.h b/lib/lemma.h new file mode 100644 index 0000000..9a07f16 --- /dev/null +++ b/lib/lemma.h @@ -0,0 +1,120 @@ +#ifndef LEMMA_H_0A180D30 +#define LEMMA_H_0A180D30 + +#include +#include +#include +#include +#include "field.h" +#include "enums.h" +#include "filter.h" + +struct sqlite3_stmt; + +namespace verbly { + + class form; + class database; + + class lemma { + public: + + // Default constructor + + lemma() = default; + + // Construct from database + + lemma(const database& db, sqlite3_stmt* row); + + // Accessors + + operator bool() const + { + return valid_; + } + + int getId() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized lemma"); + } + + return id_; + } + + const form& getBaseForm() const; + + bool hasInflection(inflection category) const; + + const std::vector& getInflections(inflection category) const; + + // Type info + + static const object objectType; + + static const std::list select; + + // Query fields + + static const field id; + + operator filter() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized lemma"); + } + + return (id == id_); + } + + // Relationships to other objects + + static const field word; + + class inflection_field { + public: + + inflection_field(inflection category) : category_(category) + { + } + + const inflection getCategory() const + { + return category_; + } + + private: + + const inflection category_; + }; + + static const inflection_field form(inflection category) + { + return inflection_field(category); + } + + friend filter operator%=(lemma::inflection_field check, filter joinCondition); + + private: + + void initializeForm(inflection category) const; + + bool valid_ = false; + + int id_; + + mutable std::map> forms_; + + const database* db_; + + static const field formJoin; + static const field inflectionCategory; + + }; + +}; + +#endif /* end of include guard: LEMMA_H_0A180D30 */ diff --git a/lib/notion.cpp b/lib/notion.cpp new file mode 100644 index 0000000..16794d3 --- /dev/null +++ b/lib/notion.cpp @@ -0,0 +1,94 @@ +#include "notion.h" +#include +#include + +namespace verbly { + + const object notion::objectType = object::notion; + + const std::list notion::select = {"notion_id", "part_of_speech", "wnid", "images"}; + + const field notion::id = field::integerField(object::notion, "notion_id"); + const field notion::partOfSpeech = field::integerField(object::notion, "part_of_speech"); + const field notion::wnid = field::integerField(object::notion, "wnid", true); + const field notion::numOfImages = field::integerField(object::notion, "images", true); + + const field notion::word = field::joinField(object::notion, "word_id", object::word); + + const field notion::hypernyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id"); + const field notion::hyponyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id"); + + const field notion::fullHypernyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id"); + const field notion::fullHyponyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id"); + + const field notion::instances = field::selfJoin(object::notion, "notion_id", "instantiation", "class_id", "instance_id"); + const field notion::classes = field::selfJoin(object::notion, "notion_id", "instantiation", "instance_id", "class_id"); + + const field notion::memberMeronyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id"); + const field notion::memberHolonyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id"); + + const field notion::fullMemberMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id"); + const field notion::fullMemberHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id"); + + const field notion::partMeronyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id"); + const field notion::partHolonyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id"); + + const field notion::fullPartMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id"); + const field notion::fullPartHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id"); + + const field notion::substanceMeronyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id"); + const field notion::substanceHolonyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id"); + + const field notion::fullSubstanceMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id"); + const field notion::fullSubstanceHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id"); + + const field notion::variants = field::selfJoin(object::notion, "notion_id", "variation", "noun_id", "adjective_id"); + const field notion::attributes = field::selfJoin(object::notion, "notion_id", "variation", "adjective_id", "noun_id"); + + const field notion::similarAdjectives = field::selfJoin(object::notion, "notion_id", "similarity", "adjective_2_id", "adjective_1_id"); + + const field notion::entails = field::selfJoin(object::notion, "notion_id", "entailment", "given_id", "entailment_id"); + const field notion::entailedBy = field::selfJoin(object::notion, "notion_id", "entailment", "entailment_id", "given_id"); + + const field notion::causes = field::selfJoin(object::notion, "notion_id", "causality", "effect_id", "cause_id"); + const field notion::effects = field::selfJoin(object::notion, "notion_id", "causality", "cause_id", "effect_id"); + + const notion::preposition_group_field prepositionGroup = {}; + + const field notion::preposition_group_field::isA = field::joinField(object::notion, "notion_id", "is_a"); + const field notion::preposition_group_field::groupNameField = field::stringField("is_a", "groupname"); + + notion::notion(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) + { + id_ = sqlite3_column_int(row, 0); + partOfSpeech_ = static_cast(sqlite3_column_int(row, 1)); + + if (sqlite3_column_type(row, 2) != SQLITE_NULL) + { + hasWnid_ = true; + wnid_ = sqlite3_column_int(row, 2); + } + + if (sqlite3_column_type(row, 3) != SQLITE_NULL) + { + hasNumOfImages_ = true; + numOfImages_ = sqlite3_column_int(row, 3); + } + } + + std::string notion::getImageNetUrl() const + { + std::stringstream url; + url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n"; + url.width(8); + url.fill('0'); + url << (getWnid() % 100000000); + return url.str(); + } + + filter notion::preposition_group_field::operator==(std::string groupName) const + { + return (isA %= (groupNameField == groupName)); + } + +}; diff --git a/lib/notion.h b/lib/notion.h new file mode 100644 index 0000000..a180d73 --- /dev/null +++ b/lib/notion.h @@ -0,0 +1,200 @@ +#ifndef NOTION_H_FD1C7646 +#define NOTION_H_FD1C7646 + +#include +#include +#include "field.h" +#include "filter.h" + +struct sqlite3_stmt; + +namespace verbly { + + class database; + + class notion { + public: + + // Default constructor + + notion() = default; + + // Construct from database + + notion(const database& db, sqlite3_stmt* row); + + // Accessors + + operator bool() const + { + return valid_; + } + + int getId() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized notion"); + } + + return id_; + } + + part_of_speech getPartOfSpeech() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized notion"); + } + + return partOfSpeech_; + } + + bool hasWnid() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized notion"); + } + + return hasWnid_; + } + + int getWnid() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized notion"); + } + + if (!hasWnid_) + { + throw std::domain_error("Notion has no wnid"); + } + + return wnid_; + } + + bool hasNumOfImages() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized notion"); + } + + return hasNumOfImages_; + } + + int getNumOfImages() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized notion"); + } + + if (!hasNumOfImages_) + { + throw std::domain_error("Notion does not have a number of images"); + } + + return numOfImages_; + } + + // Convenience + + std::string getImageNetUrl() const; + + // Type info + + static const object objectType; + + static const std::list select; + + // Query fields + + static const field id; + static const field partOfSpeech; + static const field wnid; + static const field numOfImages; + + operator filter() const + { + return (id == id_); + } + + // Relationships with other objects + + static const field word; + + // Relationships with self + + static const field hypernyms; + static const field hyponyms; + + static const field fullHypernyms; + static const field fullHyponyms; + + static const field instances; + static const field classes; + + static const field memberMeronyms; + static const field memberHolonyms; + + static const field fullMemberMeronyms; + static const field fullMemberHolonyms; + + static const field partMeronyms; + static const field partHolonyms; + + static const field fullPartMeronyms; + static const field fullPartHolonyms; + + static const field substanceMeronyms; + static const field substanceHolonyms; + + static const field fullSubstanceMeronyms; + static const field fullSubstanceHolonyms; + + static const field variants; + static const field attributes; + + static const field similarAdjectives; + + static const field entails; + static const field entailedBy; + + static const field causes; + static const field effects; + + // Preposition group relationship + + class preposition_group_field { + public: + + filter operator==(std::string groupName) const; + + private: + + static const field isA; + static const field groupNameField; + }; + + static const preposition_group_field prepositionGroup; + + private: + bool valid_ = false; + + int id_; + part_of_speech partOfSpeech_; + bool hasWnid_ = false; + int wnid_; + bool hasNumOfImages_ = false; + int numOfImages_; + + const database* db_; + + }; + +}; + +#endif /* end of include guard: NOTION_H_FD1C7646 */ diff --git a/lib/noun.cpp b/lib/noun.cpp deleted file mode 100644 index d8b34c9..0000000 --- a/lib/noun.cpp +++ /dev/null @@ -1,221 +0,0 @@ -#include "verbly.h" -#include -#include - -namespace verbly { - - noun::noun() - { - - } - - noun::noun(const data& _data, int _id) : word(_data, _id) - { - - } - - std::string noun::base_form() const - { - assert(_valid == true); - - return _singular; - } - - std::string noun::singular_form() const - { - assert(_valid == true); - - return _singular; - } - - std::string noun::plural_form() const - { - assert(_valid == true); - - return _plural; - } - - int noun::wnid() const - { - assert(_valid == true); - - return _wnid; - } - - bool noun::has_plural_form() const - { - assert(_valid == true); - - return !_plural.empty(); - } - - noun_query noun::hypernyms() const - { - assert(_valid == true); - - return _data->nouns().hypernym_of(*this); - } - - noun_query noun::full_hypernyms() const - { - assert(_valid == true); - - return _data->nouns().full_hypernym_of(*this); - } - - noun_query noun::hyponyms() const - { - assert(_valid == true); - - return _data->nouns().hyponym_of(*this); - } - - noun_query noun::full_hyponyms() const - { - assert(_valid == true); - - return _data->nouns().full_hyponym_of(*this); - } - - noun_query noun::part_meronyms() const - { - assert(_valid == true); - - return _data->nouns().part_meronym_of(*this); - } - - noun_query noun::full_part_meronyms() const - { - assert(_valid == true); - - return _data->nouns().full_part_meronym_of(*this); - } - - noun_query noun::part_holonyms() const - { - assert(_valid == true); - - return _data->nouns().part_holonym_of(*this); - } - - noun_query noun::full_part_holonyms() const - { - assert(_valid == true); - - return _data->nouns().full_part_holonym_of(*this); - } - - noun_query noun::substance_meronyms() const - { - assert(_valid == true); - - return _data->nouns().substance_meronym_of(*this); - } - - noun_query noun::full_substance_meronyms() const - { - assert(_valid == true); - - return _data->nouns().full_substance_meronym_of(*this); - } - - noun_query noun::substance_holonyms() const - { - assert(_valid == true); - - return _data->nouns().substance_holonym_of(*this); - } - - noun_query noun::full_substance_holonyms() const - { - assert(_valid == true); - - return _data->nouns().full_substance_holonym_of(*this); - } - - noun_query noun::member_meronyms() const - { - assert(_valid == true); - - return _data->nouns().member_meronym_of(*this); - } - - noun_query noun::full_member_meronyms() const - { - assert(_valid == true); - - return _data->nouns().full_member_meronym_of(*this); - } - - noun_query noun::member_holonyms() const - { - assert(_valid == true); - - return _data->nouns().member_holonym_of(*this); - } - - noun_query noun::full_member_holonyms() const - { - assert(_valid == true); - - return _data->nouns().full_member_holonym_of(*this); - } - - noun_query noun::classes() const - { - assert(_valid == true); - - return _data->nouns().class_of(*this); - } - - noun_query noun::instances() const - { - assert(_valid == true); - - return _data->nouns().instance_of(*this); - } - - noun_query noun::synonyms() const - { - assert(_valid == true); - - return _data->nouns().synonym_of(*this); - } - - noun_query noun::antonyms() const - { - assert(_valid == true); - - return _data->nouns().antonym_of(*this); - } - - adjective_query noun::pertainyms() const - { - assert(_valid == true); - - return _data->adjectives().pertainym_of(*this); - } - - adjective_query noun::variations() const - { - assert(_valid == true); - - return _data->adjectives().variant_of(*this); - } - - std::string noun::imagenet_url() const - { - std::stringstream url; - url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n"; - url.width(8); - url.fill('0'); - url << (_wnid % 100000000); - return url.str(); - } - - bool noun::operator<(const noun& other) const - { - return _id < other._id; - } - -}; diff --git a/lib/noun.h b/lib/noun.h deleted file mode 100644 index bd71e57..0000000 --- a/lib/noun.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef NOUN_H_24A03C83 -#define NOUN_H_24A03C83 - -namespace verbly { - - class noun : public word { - private: - std::string _singular; - std::string _plural; - int _wnid; - - friend class noun_query; - - public: - noun(); - noun(const data& _data, int _id); - - std::string base_form() const; - std::string singular_form() const; - std::string plural_form() const; - int wnid() const; - - bool has_plural_form() const; - - noun_query hypernyms() const; - noun_query full_hypernyms() const; - noun_query hyponyms() const; - noun_query full_hyponyms() const; - noun_query part_meronyms() const; - noun_query full_part_meronyms() const; - noun_query part_holonyms() const; - noun_query full_part_holonyms() const; - noun_query substance_meronyms() const; - noun_query full_substance_meronyms() const; - noun_query substance_holonyms() const; - noun_query full_substance_holonyms() const; - noun_query member_meronyms() const; - noun_query full_member_meronyms() const; - noun_query member_holonyms() const; - noun_query full_member_holonyms() const; - noun_query classes() const; - noun_query instances() const; - noun_query synonyms() const; - noun_query antonyms() const; - adjective_query pertainyms() const; - adjective_query variations() const; - - std::string imagenet_url() const; - - bool operator<(const noun& other) const; - }; - -}; - -#endif /* end of include guard: NOUN_H_24A03C83 */ diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp deleted file mode 100644 index 8648227..0000000 --- a/lib/noun_query.cpp +++ /dev/null @@ -1,2013 +0,0 @@ -#include "verbly.h" - -namespace verbly { - - noun_query::noun_query(const data& _data) : _data(_data) - { - - } - - noun_query& noun_query::limit(int _limit) - { - if ((_limit > 0) || (_limit == unlimited)) - { - this->_limit = _limit; - } - - return *this; - } - - noun_query& noun_query::random() - { - this->_random = true; - - return *this; - } - - noun_query& noun_query::except(const noun& _word) - { - _except.push_back(_word); - - return *this; - } - - noun_query& noun_query::rhymes_with(const word& _word) - { - for (auto rhyme : _word.get_rhymes()) - { - _rhymes.push_back(rhyme); - } - - if (dynamic_cast(&_word) != nullptr) - { - _except.push_back(dynamic_cast(_word)); - } - - return *this; - } - - noun_query& noun_query::rhymes_with(rhyme _r) - { - _rhymes.push_back(_r); - - return *this; - } - - noun_query& noun_query::has_pronunciation() - { - this->_has_prn = true; - - return *this; - } - - noun_query& noun_query::has_rhyming_noun() - { - _has_rhyming_noun = true; - - return *this; - } - - noun_query& noun_query::has_rhyming_adjective() - { - _has_rhyming_adjective = true; - - return *this; - } - - noun_query& noun_query::has_rhyming_adverb() - { - _has_rhyming_adverb = true; - - return *this; - } - - noun_query& noun_query::has_rhyming_verb() - { - _has_rhyming_verb = true; - - return *this; - } - - noun_query& noun_query::with_stress(filter> _arg) - { - _stress = _arg; - - return *this; - } - - noun_query& noun_query::with_singular_form(std::string _arg) - { - _with_singular_form.push_back(_arg); - - return *this; - } - - noun_query& noun_query::with_prefix(filter _f) - { - _f.clean(); - _with_prefix = _f; - - return *this; - } - - noun_query& noun_query::with_suffix(filter _f) - { - _f.clean(); - _with_suffix = _f; - - return *this; - } - - noun_query& noun_query::requires_plural_form() - { - _requires_plural_form = true; - - return *this; - } - - noun_query& noun_query::with_complexity(int _arg) - { - _with_complexity = _arg; - - return *this; - } - - noun_query& noun_query::is_hypernym() - { - _is_hypernym = true; - - return *this; - } - - noun_query& noun_query::hypernym_of(filter _f) - { - _f.clean(); - _hypernym_of = _f; - - return *this; - } - - noun_query& noun_query::full_hypernym_of(filter _f) - { - _f.clean(); - _full_hypernym_of = _f; - - return *this; - } - - noun_query& noun_query::is_hyponym() - { - _is_hyponym = true; - - return *this; - } - - noun_query& noun_query::hyponym_of(filter _f) - { - _f.clean(); - _hyponym_of = _f; - - return *this; - } - - noun_query& noun_query::full_hyponym_of(filter _f) - { - _f.clean(); - _full_hyponym_of = _f; - - return *this; - } - - noun_query& noun_query::is_part_meronym() - { - _is_part_meronym = true; - - return *this; - } - - noun_query& noun_query::part_meronym_of(filter _f) - { - _f.clean(); - _part_meronym_of = _f; - - return *this; - } - - noun_query& noun_query::full_part_meronym_of(filter _f) - { - _f.clean(); - _full_part_meronym_of = _f; - - return *this; - } - - noun_query& noun_query::is_part_holonym() - { - _is_part_holonym = true; - - return *this; - } - - noun_query& noun_query::part_holonym_of(filter _f) - { - _f.clean(); - _part_holonym_of = _f; - - return *this; - } - - noun_query& noun_query::full_part_holonym_of(filter _f) - { - _f.clean(); - _full_part_holonym_of = _f; - - return *this; - } - - noun_query& noun_query::is_substance_meronym() - { - _is_substance_meronym = true; - - return *this; - } - - noun_query& noun_query::substance_meronym_of(filter _f) - { - _f.clean(); - _substance_meronym_of = _f; - - return *this; - } - - noun_query& noun_query::full_substance_meronym_of(filter _f) - { - _f.clean(); - _full_substance_meronym_of = _f; - - return *this; - } - - noun_query& noun_query::is_substance_holonym() - { - _is_substance_holonym = true; - - return *this; - } - - noun_query& noun_query::substance_holonym_of(filter _f) - { - _f.clean(); - _substance_holonym_of = _f; - - return *this; - } - - noun_query& noun_query::full_substance_holonym_of(filter _f) - { - _f.clean(); - _full_substance_holonym_of = _f; - - return *this; - } - - noun_query& noun_query::is_member_meronym() - { - _is_member_meronym = true; - - return *this; - } - - noun_query& noun_query::member_meronym_of(filter _f) - { - _f.clean(); - _member_meronym_of = _f; - - return *this; - } - - noun_query& noun_query::full_member_meronym_of(filter _f) - { - _f.clean(); - _full_member_meronym_of = _f; - - return *this; - } - - noun_query& noun_query::is_member_holonym() - { - _is_member_holonym = true; - - return *this; - } - - noun_query& noun_query::member_holonym_of(filter _f) - { - _f.clean(); - _member_holonym_of = _f; - - return *this; - } - - noun_query& noun_query::full_member_holonym_of(filter _f) - { - _f.clean(); - _full_member_holonym_of = _f; - - return *this; - } - - noun_query& noun_query::is_proper() - { - _is_proper = true; - - return *this; - } - - noun_query& noun_query::is_not_proper() - { - _is_not_proper = true; - - return *this; - } - - noun_query& noun_query::is_instance() - { - _is_instance = true; - - return *this; - } - - noun_query& noun_query::instance_of(filter _f) - { - _f.clean(); - _instance_of = _f; - - return *this; - } - - noun_query& noun_query::is_class() - { - _is_class = true; - - return *this; - } - - noun_query& noun_query::class_of(filter _f) - { - _f.clean(); - _class_of = _f; - - return *this; - } - - noun_query& noun_query::has_synonyms() - { - _has_synonyms = true; - - return *this; - } - - noun_query& noun_query::synonym_of(filter _f) - { - _f.clean(); - _synonym_of = _f; - - return *this; - } - - noun_query& noun_query::has_antonyms() - { - _has_antonyms = true; - - return *this; - } - - noun_query& noun_query::antonym_of(filter _f) - { - _f.clean(); - _antonym_of = _f; - - return *this; - } - - noun_query& noun_query::has_pertainym() - { - _has_pertainym = true; - - return *this; - } - - noun_query& noun_query::anti_pertainym_of(filter _f) - { - _f.clean(); - _anti_pertainym_of = _f; - - return *this; - } - - noun_query& noun_query::is_attribute() - { - _is_attribute = true; - - return *this; - } - - noun_query& noun_query::attribute_of(filter _f) - { - _f.clean(); - _attribute_of = _f; - - return *this; - } - - noun_query& noun_query::at_least_n_images(int _arg) - { - _at_least_n_images = _arg; - - return *this; - } - - noun_query& noun_query::with_wnid(int _arg) - { - _with_wnid.insert(_arg); - - return *this; - } - - /* - noun_query& noun_query::derived_from(const word& _w) - { - if (dynamic_cast(&_w) != nullptr) - { - _derived_from_adjective.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _derived_from_adverb.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _derived_from_noun.push_back(dynamic_cast(_w)); - } - - return *this; - } - - noun_query& noun_query::not_derived_from(const word& _w) - { - if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_adjective.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_adverb.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_noun.push_back(dynamic_cast(_w)); - } - - return *this; - }*/ - - std::list noun_query::run() const - { - std::stringstream construct; - - if (!_full_hypernym_of.empty() || !_full_hyponym_of.empty() || !_full_part_meronym_of.empty() || !_full_part_holonym_of.empty() || !_full_substance_meronym_of.empty() || !_full_substance_holonym_of.empty() || !_full_member_meronym_of.empty() || !_full_member_holonym_of.empty()) - { - construct << "WITH RECURSIVE "; - - std::list ctes; - - for (auto hyponym : _full_hypernym_of.uniq_flatten()) - { - ctes.push_back("hypernym_tree_" + std::to_string(hyponym._id) + " AS (SELECT hypernym_id FROM hypernymy WHERE hyponym_id = " + std::to_string(hyponym._id) + " UNION SELECT h.hypernym_id FROM hypernym_tree_" + std::to_string(hyponym._id) + " AS t INNER JOIN hypernymy AS h ON t.hypernym_id = h.hyponym_id)"); - } - - for (auto hypernym : _full_hyponym_of.uniq_flatten()) - { - ctes.push_back("hyponym_tree_" + std::to_string(hypernym._id) + " AS (SELECT hyponym_id FROM hypernymy WHERE hypernym_id = " + std::to_string(hypernym._id) + " UNION SELECT h.hyponym_id FROM hyponym_tree_" + std::to_string(hypernym._id) + " AS t INNER JOIN hypernymy AS h ON t.hyponym_id = h.hypernym_id)"); - } - - for (auto holonym : _full_part_meronym_of.uniq_flatten()) - { - ctes.push_back("part_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM part_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM part_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN part_meronymy AS h ON t.meronym_id = h.holonym_id)"); - } - - for (auto meronym : _full_part_holonym_of.uniq_flatten()) - { - ctes.push_back("part_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM part_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM part_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN part_meronymy AS h ON t.holonym_id = h.meronym_id)"); - } - - for (auto holonym : _full_substance_meronym_of.uniq_flatten()) - { - ctes.push_back("substance_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM substance_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM substance_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.meronym_id = h.holonym_id)"); - } - - for (auto meronym : _full_substance_holonym_of.uniq_flatten()) - { - ctes.push_back("substance_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM substance_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM substance_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.holonym_id = h.meronym_id)"); - } - - for (auto holonym : _full_member_meronym_of.uniq_flatten()) - { - ctes.push_back("member_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM member_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM member_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN member_meronymy AS h ON t.meronym_id = h.holonym_id)"); - } - - for (auto meronym : _full_member_holonym_of.uniq_flatten()) - { - ctes.push_back("member_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM member_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM member_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN member_meronymy AS h ON t.holonym_id = h.meronym_id)"); - } - - construct << verbly::implode(std::begin(ctes), std::end(ctes), ", "); - construct << " "; - } - - construct << "SELECT noun_id, singular, plural, wnid FROM nouns"; - std::list conditions; - std::list bindings; - - if (_has_prn) - { - conditions.push_back("noun_id IN (SELECT noun_id FROM noun_pronunciations)"); - } - - if (!_rhymes.empty()) - { - std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); - std::string cond = "noun_id IN (SELECT noun_id FROM noun_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - - for (auto rhy : _rhymes) - { - bindings.emplace_back(rhy.get_prerhyme()); - bindings.emplace_back(rhy.get_rhyme()); - } - } - - if (_has_rhyming_noun) - { - conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.noun_id != curp.noun_id)"); - } - - if (_has_rhyming_adjective) - { - conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_adverb) - { - conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_verb) - { - conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (!_stress.empty()) - { - std::stringstream cond; - if (_stress.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT noun_id FROM noun_pronunciations WHERE "; - - std::function>, bool)> recur = [&] (filter> f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter>::type::singleton: - { - std::ostringstream _val; - for (auto syl : f.get_elem()) - { - if (syl) - { - _val << "1"; - } else { - _val << "0"; - } - } - - bindings.emplace_back(_val.str()); - - if (notlogic == f.get_notlogic()) - { - return "stress = ?"; - } else { - return "stress != ?"; - } - } - - case filter>::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter> f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_stress, _stress.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - for (auto except : _except) - { - conditions.push_back("noun_id != ?"); - bindings.emplace_back(except._id); - } - - if (!_with_singular_form.empty()) - { - std::list clauses(_with_singular_form.size(), "singular = ?"); - std::string cond = "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - - for (auto form : _with_singular_form) - { - bindings.emplace_back(form); - } - } - - if (_requires_plural_form) - { - conditions.push_back("plural IS NOT NULL"); - } - - if (!_with_prefix.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem() + "%"); - - if (notlogic == f.get_notlogic()) - { - return "singular LIKE ?"; - } else { - return "singular NOT LIKE ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_with_prefix, false)); - } - - if (!_with_suffix.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back("%" + f.get_elem()); - - if (notlogic == f.get_notlogic()) - { - return "singular LIKE ?"; - } else { - return "singular NOT LIKE ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_with_suffix, false)); - } - - if (_with_complexity != unlimited) - { - conditions.push_back("complexity = ?"); - bindings.emplace_back(_with_complexity); - } - - if (_is_hypernym) - { - conditions.push_back("noun_id IN (SELECT hypernym_id FROM hypernymy)"); - } - - if (!_hypernym_of.empty()) - { - std::stringstream cond; - if (_hypernym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT hypernym_id FROM hypernymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "hyponym_id = ?"; - } else { - return "hyponym_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_hypernym_of, _hypernym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (!_full_hypernym_of.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - if (notlogic == f.get_notlogic()) - { - return "noun_id IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } else { - return "noun_id NOT IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_full_hypernym_of, false)); - } - - if (!_full_hyponym_of.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - if (notlogic == f.get_notlogic()) - { - return "noun_id IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } else { - return "noun_id NOT IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_full_hyponym_of, false)); - } - - if (_is_hyponym) - { - conditions.push_back("noun_id IN (SELECT hyponym_id FROM hypernymy)"); - } - - if (!_hyponym_of.empty()) - { - std::stringstream cond; - if (_hyponym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT hyponym_id FROM hypernymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "hypernym_id = ?"; - } else { - return "hypernym_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_hyponym_of, _hyponym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_part_meronym) - { - conditions.push_back("noun_id IN (SELECT meronym_id FROM part_meronymy)"); - } - - if (!_part_meronym_of.empty()) - { - std::stringstream cond; - if (_part_meronym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT meronym_id FROM part_meronymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "holonym_id = ?"; - } else { - return "holonym_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_part_meronym_of, _part_meronym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (!_full_part_meronym_of.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - if (notlogic == f.get_notlogic()) - { - return "noun_id IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } else { - return "noun_id NOT IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_full_part_meronym_of, false)); - } - - if (_is_part_holonym) - { - conditions.push_back("noun_id IN (SELECT holonym_id FROM part_meronymy)"); - } - - if (!_part_holonym_of.empty()) - { - std::stringstream cond; - if (_part_holonym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT holonym_id FROM part_meronymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "meronym_id = ?"; - } else { - return "meronym_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_part_holonym_of, _part_holonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (!_full_part_holonym_of.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - if (notlogic == f.get_notlogic()) - { - return "noun_id IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } else { - return "noun_id NOT IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_full_part_holonym_of, false)); - } - - if (_is_substance_meronym) - { - conditions.push_back("noun_id IN (SELECT meronym_id FROM substance_meronymy)"); - } - - if (!_substance_meronym_of.empty()) - { - std::stringstream cond; - if (_substance_meronym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT meronym_id FROM substance_meronymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "holonym_id = ?"; - } else { - return "holonym_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_substance_meronym_of, _substance_meronym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (!_full_substance_meronym_of.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - if (notlogic == f.get_notlogic()) - { - return "noun_id IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } else { - return "noun_id NOT IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_full_substance_meronym_of, false)); - } - - if (_is_substance_holonym) - { - conditions.push_back("noun_id IN (SELECT holonym_id FROM substance_meronymy)"); - } - - if (!_substance_holonym_of.empty()) - { - std::stringstream cond; - if (_substance_holonym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT holonym_id FROM substance_meronymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "meronym_id = ?"; - } else { - return "meronym_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_substance_holonym_of, _substance_holonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (!_full_substance_holonym_of.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - if (notlogic == f.get_notlogic()) - { - return "noun_id IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } else { - return "noun_id NOT IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_full_substance_holonym_of, false)); - } - - if (_is_member_meronym) - { - conditions.push_back("noun_id IN (SELECT meronym_id FROM member_meronymy)"); - } - - if (!_member_meronym_of.empty()) - { - std::stringstream cond; - if (_member_meronym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT meronym_id FROM member_meronymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "holonym_id = ?"; - } else { - return "holonym_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_member_meronym_of, _member_meronym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (!_full_member_meronym_of.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - if (notlogic == f.get_notlogic()) - { - return "noun_id IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } else { - return "noun_id NOT IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_full_member_meronym_of, false)); - } - - if (_is_member_holonym) - { - conditions.push_back("noun_id IN (SELECT holonym_id FROM member_meronym)"); - } - - if (!_member_holonym_of.empty()) - { - std::stringstream cond; - if (_member_holonym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT holonym_id FROM member_meronymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "meronym_id = ?"; - } else { - return "meronym_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_member_holonym_of, _member_holonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (!_full_member_holonym_of.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - if (notlogic == f.get_notlogic()) - { - return "noun_id IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } else { - return "noun_id NOT IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_full_member_holonym_of, false)); - } - - if (_is_proper) - { - conditions.push_back("proper = 1"); - } - - if (_is_not_proper) - { - conditions.push_back("proper = 0"); - } - - if (_is_instance) - { - conditions.push_back("noun_id IN (SELECT instance_id FROM instantiation)"); - } - - if (!_instance_of.empty()) - { - std::stringstream cond; - if (_instance_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT instance_id FROM instantiation WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "class_id = ?"; - } else { - return "class_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_instance_of, _instance_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_class) - { - conditions.push_back("noun_id IN (SELECT class_id FROM instantiation)"); - } - - if (!_class_of.empty()) - { - std::stringstream cond; - if (_class_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT class_id FROM instantiation WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "instance_id = ?"; - } else { - return "instance_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_class_of, _class_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_has_synonyms) - { - conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_synonymy)"); - } - - if (!_synonym_of.empty()) - { - std::stringstream cond; - if (_synonym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT noun_2_id FROM noun_synonymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "noun_1_id = ?"; - } else { - return "noun_1_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_synonym_of, _synonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_has_antonyms) - { - conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_antonymy)"); - } - - if (!_antonym_of.empty()) - { - std::stringstream cond; - if (_antonym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT noun_2_id FROM noun_antonymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "noun_1_id = ?"; - } else { - return "noun_1_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_antonym_of, _antonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_has_pertainym) - { - conditions.push_back("noun_id IN (SELECT noun_id FROM pertainymy)"); - } - - if (!_anti_pertainym_of.empty()) - { - std::stringstream cond; - if (_anti_pertainym_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT noun_id FROM pertainymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "pertainym_id = ?"; - } else { - return "pertainym_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_anti_pertainym_of, _anti_pertainym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_attribute) - { - conditions.push_back("noun_id IN (SELECT noun_id FROM variation)"); - } - - if (!_attribute_of.empty()) - { - std::stringstream cond; - if (_attribute_of.get_notlogic()) - { - cond << "noun_id NOT IN"; - } else { - cond << "noun_id IN"; - } - - cond << "(SELECT noun_id FROM variation WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "adjective_id = ?"; - } else { - return "adjective_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_attribute_of, _attribute_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_at_least_n_images != unlimited) - { - conditions.push_back("images >= ?"); - bindings.emplace_back(_at_least_n_images); - } - - if (!_with_wnid.empty()) - { - std::vector clauses(_with_wnid.size(), "wnid = ?"); - std::string cond = verbly::implode(std::begin(clauses), std::end(clauses), " OR "); - conditions.push_back("(" + cond + ")"); - - for (auto wnid : _with_wnid) - { - bindings.emplace_back(wnid); - } - } - - /* - if (!_derived_from_adjective.empty()) - { - std::list clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); - std::string cond = "noun_id IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_adjective.empty()) - { - std::list clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); - std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_derived_from_adverb.empty()) - { - std::list clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); - std::string cond = "noun_id IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_adverb.empty()) - { - std::list clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); - std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_derived_from_noun.empty()) - { - std::list clauses(_derived_from_noun.size(), "noun_2_id = @DERN"); - std::string cond = "noun_id IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_noun.empty()) - { - std::list clauses(_not_derived_from_noun.size(), "noun_2_id = @NDERN"); - std::string cond = "noun_id NOT IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - */ - if (!conditions.empty()) - { - construct << " WHERE "; - construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); - } - - if (_random) - { - construct << " ORDER BY RANDOM()"; - } - - if (_limit != unlimited) - { - construct << " LIMIT " << _limit; - } - - sqlite3_stmt* ppstmt; - std::string query = construct.str(); - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - int i = 1; - for (auto& binding : bindings) - { - switch (binding.get_type()) - { - case binding::type::integer: - { - sqlite3_bind_int(ppstmt, i, binding.get_integer()); - - break; - } - - case binding::type::string: - { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); - - break; - } - } - - i++; - } - - /* - for (auto adj : _derived_from_adjective) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); - } - - for (auto adj : _not_derived_from_adjective) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); - } - - for (auto adv : _derived_from_adverb) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); - } - - for (auto adv : _not_derived_from_adverb) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); - } - - for (auto n : _derived_from_noun) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); - } - - for (auto n : _not_derived_from_noun) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); - } -*/ - std::list output; - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - noun tnc {_data, sqlite3_column_int(ppstmt, 0)}; - tnc._singular = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); - - if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) - { - tnc._plural = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); - } - - tnc._wnid = sqlite3_column_int(ppstmt, 3); - - output.push_back(tnc); - } - - sqlite3_finalize(ppstmt); - - for (auto& noun : output) - { - query = "SELECT pronunciation, prerhyme, rhyme FROM noun_pronunciations WHERE noun_id = ?"; - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - sqlite3_bind_int(ppstmt, 1, noun._id); - - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - std::string pronunciation(reinterpret_cast(sqlite3_column_text(ppstmt, 0))); - auto phonemes = verbly::split>(pronunciation, " "); - - noun.pronunciations.push_back(phonemes); - - if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) - { - std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); - std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); - - noun.rhymes.emplace_back(prerhyme, rhyming); - } - } - - sqlite3_finalize(ppstmt); - } - - return output; - } - -}; diff --git a/lib/noun_query.h b/lib/noun_query.h deleted file mode 100644 index 74df260..0000000 --- a/lib/noun_query.h +++ /dev/null @@ -1,180 +0,0 @@ -#ifndef NOUN_QUERY_H_5DE51DD7 -#define NOUN_QUERY_H_5DE51DD7 - -namespace verbly { - - class noun_query { - public: - noun_query(const data& _data); - - noun_query& limit(int _limit); - noun_query& random(); - noun_query& except(const noun& _word); - noun_query& rhymes_with(const word& _word); - noun_query& rhymes_with(rhyme _r); - noun_query& has_pronunciation(); - noun_query& has_rhyming_noun(); - noun_query& has_rhyming_adjective(); - noun_query& has_rhyming_adverb(); - noun_query& has_rhyming_verb(); - noun_query& with_stress(filter> _arg); - - noun_query& with_singular_form(std::string _arg); - noun_query& with_prefix(filter _f); - noun_query& with_suffix(filter _f); - - noun_query& requires_plural_form(); - - noun_query& with_complexity(int _arg); - - noun_query& is_hypernym(); - noun_query& hypernym_of(filter _f); - noun_query& full_hypernym_of(filter _f); - - noun_query& is_hyponym(); - noun_query& hyponym_of(filter _f); - noun_query& full_hyponym_of(filter _f); - - noun_query& is_part_meronym(); - noun_query& part_meronym_of(filter _f); - noun_query& full_part_meronym_of(filter _f); - - noun_query& is_part_holonym(); - noun_query& part_holonym_of(filter _f); - noun_query& full_part_holonym_of(filter _f); - - noun_query& is_substance_meronym(); - noun_query& substance_meronym_of(filter _f); - noun_query& full_substance_meronym_of(filter _f); - - noun_query& is_substance_holonym(); - noun_query& substance_holonym_of(filter _f); - noun_query& full_substance_holonym_of(filter _f); - - noun_query& is_member_meronym(); - noun_query& member_meronym_of(filter _f); - noun_query& full_member_meronym_of(filter _f); - - noun_query& is_member_holonym(); - noun_query& member_holonym_of(filter _f); - noun_query& full_member_holonym_of(filter _f); - - noun_query& is_proper(); - noun_query& is_not_proper(); - - noun_query& is_instance(); - noun_query& instance_of(filter _f); - - noun_query& is_class(); - noun_query& class_of(filter _f); - - noun_query& has_synonyms(); - noun_query& synonym_of(filter _f); - - noun_query& has_antonyms(); - noun_query& antonym_of(filter _f); - - noun_query& has_pertainym(); - noun_query& anti_pertainym_of(filter _f); - - noun_query& is_attribute(); - noun_query& attribute_of(filter _f); - - noun_query& at_least_n_images(int _arg); - noun_query& with_wnid(int _arg); - -/* noun_query& derived_from(const word& _w); - noun_query& not_derived_from(const word& _w);*/ - - std::list run() const; - - const static int unlimited = -1; - - private: - const data& _data; - int _limit = unlimited; - bool _random = false; - std::list _rhymes; - std::list _except; - bool _has_prn = false; - bool _has_rhyming_noun = false; - bool _has_rhyming_adjective = false; - bool _has_rhyming_adverb = false; - bool _has_rhyming_verb = false; - filter> _stress; - - std::list _with_singular_form; - filter _with_prefix; - filter _with_suffix; - - int _with_complexity = unlimited; - - bool _requires_plural_form = false; - - bool _is_hypernym = false; - filter _hypernym_of; - filter _full_hypernym_of; - - bool _is_hyponym = false; - filter _hyponym_of; - filter _full_hyponym_of; - - bool _is_part_meronym = false; - filter _part_meronym_of; - filter _full_part_meronym_of; - - bool _is_substance_meronym = false; - filter _substance_meronym_of; - filter _full_substance_meronym_of; - - bool _is_member_meronym = false; - filter _member_meronym_of; - filter _full_member_meronym_of; - - bool _is_part_holonym = false; - filter _part_holonym_of; - filter _full_part_holonym_of; - - bool _is_substance_holonym = false; - filter _substance_holonym_of; - filter _full_substance_holonym_of; - - bool _is_member_holonym = false; - filter _member_holonym_of; - filter _full_member_holonym_of; - - bool _is_proper = false; - bool _is_not_proper = false; - - bool _is_instance = false; - filter _instance_of; - - bool _is_class = false; - filter _class_of; - - bool _has_synonyms = false; - filter _synonym_of; - - bool _has_antonyms = false; - filter _antonym_of; - - bool _has_pertainym = false; - filter _anti_pertainym_of; - - bool _is_attribute = false; - filter _attribute_of; - - int _at_least_n_images = unlimited; - std::set _with_wnid; - -/* std::list _derived_from_adjective; - std::list _not_derived_from_adjective; - std::list _derived_from_adverb; - std::list _not_derived_from_adverb; - std::list _derived_from_noun; - std::list _not_derived_from_noun;*/ - }; - -}; - -#endif /* end of include guard: NOUN_QUERY_H_5DE51DD7 */ diff --git a/lib/preposition.cpp b/lib/preposition.cpp deleted file mode 100644 index cea9165..0000000 --- a/lib/preposition.cpp +++ /dev/null @@ -1,107 +0,0 @@ -#include "verbly.h" - -namespace verbly { - - std::string preposition::get_form() const - { - return form; - } - - preposition_query::preposition_query(const data& _data) : _data(_data) - { - - } - - preposition_query& preposition_query::limit(int _limit) - { - this->_limit = _limit; - - return *this; - } - - preposition_query& preposition_query::random() - { - _random = true; - - return *this; - } - - preposition_query& preposition_query::in_group(std::string _arg) - { - _in_group.push_back(_arg); - - return *this; - } - - std::list preposition_query::run() const - { - std::stringstream construct; - construct << "SELECT form FROM prepositions"; - std::list bindings; - - if (!_in_group.empty()) - { - std::list clauses(_in_group.size(), "groupname = ?"); - construct << " WHERE preposition_id IN (SELECT preposition_id FROM preposition_groups WHERE "; - construct << verbly::implode(std::begin(clauses), std::end(clauses), " OR "); - construct << ")"; - - for (auto g : _in_group) - { - bindings.emplace_back(g); - } - } - - if (_random) - { - construct << " ORDER BY RANDOM()"; - } - - if (_limit != unlimited) - { - construct << " LIMIT " << _limit; - } - - sqlite3_stmt* ppstmt; - std::string query = construct.str(); - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - int i = 1; - for (auto& binding : bindings) - { - switch (binding.get_type()) - { - case binding::type::integer: - { - sqlite3_bind_int(ppstmt, i, binding.get_integer()); - - break; - } - - case binding::type::string: - { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); - - break; - } - } - - i++; - } - - std::list output; - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - preposition pp; - pp.form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 0))); - - output.push_back(pp); - } - - return output; - } - -}; diff --git a/lib/preposition.h b/lib/preposition.h deleted file mode 100644 index 89f24fa..0000000 --- a/lib/preposition.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef PREPOSITION_H_FF908021 -#define PREPOSITION_H_FF908021 - -namespace verbly { - - class preposition_query; - - class preposition { - public: - std::string get_form() const; - - private: - friend class preposition_query; - - std::string form; - }; - - class preposition_query { - public: - preposition_query(const data& _data); - - preposition_query& limit(int _limit); - preposition_query& random(); - preposition_query& in_group(std::string _arg); - - std::list run() const; - - const static int unlimited = -1; - private: - const data& _data; - int _limit = unlimited; - bool _random = false; - std::list _in_group; - }; - -}; - -#endif /* end of include guard: PREPOSITION_H_FF908021 */ diff --git a/lib/pronunciation.cpp b/lib/pronunciation.cpp new file mode 100644 index 0000000..f5b742f --- /dev/null +++ b/lib/pronunciation.cpp @@ -0,0 +1,69 @@ +#include "pronunciation.h" +#include +#include "form.h" +#include "lemma.h" +#include "word.h" +#include "util.h" + +namespace verbly { + + const object pronunciation::objectType = object::pronunciation; + + const std::list pronunciation::select = {"pronunciation_id", "phonemes", "syllables", "stress", "prerhyme", "rhyme"}; + + const field pronunciation::id = field::integerField(object::pronunciation, "pronunciation_id"); + const field pronunciation::numOfSyllables = field::integerField(object::pronunciation, "syllables"); + const field pronunciation::stress = field::stringField(object::pronunciation, "stress"); + + const field pronunciation::form = field::joinThrough(object::pronunciation, "pronunciation_id", object::form, "forms_pronunciations", "form_id"); + + const field pronunciation::prerhyme = field::stringField(object::pronunciation, "prerhyme", true); + const field pronunciation::rhyme = field::stringField(object::pronunciation, "rhyme", true); + + pronunciation::pronunciation(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) + { + id_ = sqlite3_column_int(row, 0); + + std::string phonemesStr(reinterpret_cast(sqlite3_column_text(row, 1))); + phonemes_ = split>(phonemesStr, " "); + + syllables_ = sqlite3_column_int(row, 2); + stress_ = std::string(reinterpret_cast(sqlite3_column_text(row, 3))); + + if (sqlite3_column_type(row, 5) != SQLITE_NULL) + { + hasRhyme_ = true; + + prerhyme_ = std::string(reinterpret_cast(sqlite3_column_text(row, 4))); + rhyme_ = std::string(reinterpret_cast(sqlite3_column_text(row, 5))); + } + } + + filter pronunciation::rhymesWith(const pronunciation& arg) + { + return (prerhyme != arg.getPrerhyme()) && (rhyme == arg.getRhyme()); + } + + /*filter pronunciation::rhymesWith(const class form& arg) + { + filter result; + + for (const pronunciation& p : arg.getPronunciations()) + { + result |= rhymesWith(p); + } + + return result; + } + + filter pronunciation::rhymesWith(const lemma& arg) + { + return rhymesWith(arg.getBaseForm()); + } + + filter pronunciation::rhymesWith(const word& arg) + { + return rhymesWith(arg.getLemma()); + }*/ + +}; diff --git a/lib/pronunciation.h b/lib/pronunciation.h new file mode 100644 index 0000000..c7a1d4d --- /dev/null +++ b/lib/pronunciation.h @@ -0,0 +1,163 @@ +#ifndef PRONUNCIATION_H_C68F86B0 +#define PRONUNCIATION_H_C68F86B0 + +#include +#include +#include +#include "field.h" +#include "filter.h" + +struct sqlite3_stmt; + +namespace verbly { + + class form; + class lemma; + class word; + class database; + + class pronunciation { + public: + + // Default constructor + + pronunciation() = default; + + // Construct from database + + pronunciation(const database& db, sqlite3_stmt* row); + + // Accessors + + operator bool() const + { + return valid_; + } + + int getId() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized pronunciation"); + } + + return id_; + } + + const std::vector& getPhonemes() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized pronunciation"); + } + + return phonemes_; + } + + int getSyllables() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized pronunciation"); + } + + return syllables_; + } + + std::string getStress() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized pronunciation"); + } + + return stress_; + } + + bool hasRhyme() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized pronunciation"); + } + + return hasRhyme_; + } + + std::string getPrerhyme() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized pronunciation"); + } + + if (!hasRhyme_) + { + throw std::domain_error("This pronunciation has no rhyme"); + } + + return prerhyme_; + } + + std::string getRhyme() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized pronunciation"); + } + + if (!hasRhyme_) + { + throw std::domain_error("This pronunciation has no rhyme"); + } + + return rhyme_; + } + + // Type info + + static const object objectType; + + static const std::list select; + + // Query fields + + static const field id; + static const field numOfSyllables; + static const field stress; + + operator filter() const + { + return (id == id_); + } + + static filter rhymesWith(const pronunciation& arg); + static filter rhymesWith(const class form& arg); + static filter rhymesWith(const lemma& arg); + static filter rhymesWith(const word& arg); + + // Relationships to other objects + + static const field form; + + private: + bool valid_ = false; + + int id_; + std::vector phonemes_; + int syllables_; + std::string stress_; + bool hasRhyme_ = false; + std::string prerhyme_; + std::string rhyme_; + + const database* db_; + + static const field prerhyme; + static const field rhyme; + + }; + +}; + +#endif /* end of include guard: PRONUNCIATION_H_C68F86B0 */ diff --git a/lib/query.h b/lib/query.h new file mode 100644 index 0000000..e31be3d --- /dev/null +++ b/lib/query.h @@ -0,0 +1,123 @@ +#ifndef QUERY_H_7CC5284C +#define QUERY_H_7CC5284C + +#include +#include +#include +#include +#include +#include +#include "statement.h" +#include "binding.h" + +namespace verbly { + + class database_error : public std::logic_error { + public: + + database_error(std::string msg, std::string sqlMsg) : std::logic_error(msg + " (" + sqlMsg + ")") + { + } + }; + + template + class query { + public: + + query(const database& db, sqlite3* ppdb, filter queryFilter, bool random, int limit) : db_(&db) + { + statement stmt(Object::objectType, std::move(queryFilter)); + + std::string queryString = stmt.getQueryString(Object::select, random, limit); + std::list bindings = stmt.getBindings(); + + std::cout << queryString << std::endl; + + if (sqlite3_prepare_v2(ppdb, queryString.c_str(), queryString.length(), &ppstmt_, NULL) != SQLITE_OK) + { + std::string errorMsg = sqlite3_errmsg(ppdb); + sqlite3_finalize(ppstmt_); + + throw database_error("Error preparing query", errorMsg); + } + + int i = 1; + for (const binding& value : bindings) + { + switch (value.getType()) + { + case binding::type::integer: + { + if (sqlite3_bind_int(ppstmt_, i, value.getInteger()) != SQLITE_OK) + { + std::string errorMsg = sqlite3_errmsg(ppdb); + sqlite3_finalize(ppstmt_); + + throw database_error("Error binding value to query", errorMsg); + } + + break; + } + + case binding::type::string: + { + if (sqlite3_bind_text(ppstmt_, i, value.getString().c_str(), value.getString().length(), SQLITE_TRANSIENT) != SQLITE_OK) + { + std::string errorMsg = sqlite3_errmsg(ppdb); + sqlite3_finalize(ppstmt_); + + throw database_error("Error binding value to query", errorMsg); + } + + break; + } + + case binding::type::invalid: + { + throw std::logic_error("Cannot use invalid bindings"); + } + } + + i++; + } + } + + ~query() + { + sqlite3_finalize(ppstmt_); + } + + std::vector all() const + { + std::vector result; + + while (sqlite3_step(ppstmt_) == SQLITE_ROW) + { + result.emplace_back(*db_, ppstmt_); + } + + sqlite3_reset(ppstmt_); + + return result; + } + + Object first() const + { + std::vector results = all(); + if (!results.empty()) + { + return results.front(); + } else { + throw std::logic_error("query returned empty dataset"); + } + } + + private: + const database* db_; + sqlite3_stmt* ppstmt_; + + }; + +}; + +#endif /* end of include guard: QUERY_H_7CC5284C */ diff --git a/lib/statement.cpp b/lib/statement.cpp new file mode 100644 index 0000000..52fa00d --- /dev/null +++ b/lib/statement.cpp @@ -0,0 +1,806 @@ +#include "statement.h" +#include +#include +#include "filter.h" +#include "util.h" +#include "notion.h" +#include "word.h" +#include "group.h" +#include "frame.h" +#include "lemma.h" +#include "form.h" +#include "pronunciation.h" + +namespace verbly { + + statement::statement( + object context, + filter queryFilter) : + statement(getTableForContext(context), queryFilter.normalize(context)) + { + } + + std::string statement::getQueryString(std::list select, bool random, int limit) const + { + std::stringstream queryStream; + + if (!withs_.empty()) + { + queryStream << "WITH RECURSIVE "; + + std::list ctes; + for (const with& cte : withs_) + { + std::stringstream cteStream; + cteStream << cte.getIdentifier(); + cteStream << " AS (SELECT "; + cteStream << cte.getTopTable(); + cteStream << ".* FROM "; + cteStream << cte.getTableForId(cte.getTopTable()); + cteStream << " AS "; + cteStream << cte.getTopTable(); + + for (const join& j : cte.getJoins()) + { + cteStream << " "; + cteStream << j; + } + + if (cte.getCondition().getType() != condition::type::empty) + { + cteStream << " WHERE "; + cteStream << cte.getCondition().toSql(); + } + + cteStream << " UNION SELECT l.* FROM "; + cteStream << cte.getIdentifier(); + cteStream << " AS t INNER JOIN "; + cteStream << cte.getField().getTable(); + cteStream << " AS j ON t."; + cteStream << cte.getField().getColumn(); + cteStream << " = j."; + cteStream << cte.getField().getForeignJoinColumn(); + cteStream << " INNER JOIN "; + cteStream << cte.getTableForId(cte.getTopTable()); + cteStream << " AS l ON j."; + cteStream << cte.getField().getJoinColumn(); + cteStream << " = l."; + cteStream << cte.getField().getColumn(); + cteStream << ")"; + + ctes.push_back(cteStream.str()); + } + + queryStream << implode(std::begin(ctes), std::end(ctes), ", "); + queryStream << " "; + } + + std::list realSelect; + for (std::string& s : select) + { + realSelect.push_back(topTable_ + "." + s); + } + + queryStream << "SELECT "; + queryStream << implode(std::begin(realSelect), std::end(realSelect), ", "); + queryStream << " FROM "; + queryStream << tables_.at(topTable_); + queryStream << " AS "; + queryStream << topTable_; + + for (const join& j : joins_) + { + queryStream << " "; + queryStream << j; + } + + if (topCondition_.getType() != condition::type::empty) + { + queryStream << " WHERE "; + queryStream << topCondition_.toSql(); + } + + if (random) + { + queryStream << " ORDER BY RANDOM()"; + } + + if (limit > 0) + { + queryStream << " LIMIT "; + queryStream << limit; + } + + return queryStream.str(); + } + + std::list statement::getBindings() const + { + std::list result; + + for (const with& w : withs_) + { + for (binding value : w.getCondition().flattenBindings()) + { + result.push_back(std::move(value)); + } + } + + for (binding value : topCondition_.flattenBindings()) + { + result.push_back(std::move(value)); + } + + return result; + } + + statement::statement( + std::string tableName, + filter clause, + int nextTableId, + int nextWithId) : + nextTableId_(nextTableId), + nextWithId_(nextWithId), + topTable_(instantiateTable(std::move(tableName))), + topCondition_(parseFilter(std::move(clause))) + { + } + + statement::condition statement::parseFilter(filter clause) + { + switch (clause.getType()) + { + case filter::type::empty: + { + return {}; + } + + case filter::type::singleton: + { + switch (clause.getField().getType()) + { + case field::type::undefined: + { + return {}; + } + + case field::type::string: + case field::type::integer: + case field::type::boolean: + { + switch (clause.getComparison()) + { + case filter::comparison::is_null: + { + return condition(topTable_, clause.getField().getColumn(), true); + } + + case filter::comparison::is_not_null: + { + return condition(topTable_, clause.getField().getColumn(), false); + } + + case filter::comparison::int_equals: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getIntegerArgument()); + } + + case filter::comparison::int_does_not_equal: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getIntegerArgument()); + } + + case filter::comparison::int_is_at_least: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_least, clause.getIntegerArgument()); + } + + case filter::comparison::int_is_greater_than: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_greater_than, clause.getIntegerArgument()); + } + + case filter::comparison::int_is_at_most: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_most, clause.getIntegerArgument()); + } + + case filter::comparison::int_is_less_than: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_less_than, clause.getIntegerArgument()); + } + + case filter::comparison::boolean_equals: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getBooleanArgument() ? 1 : 0); + } + + case filter::comparison::string_equals: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getStringArgument()); + } + + case filter::comparison::string_does_not_equal: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getStringArgument()); + } + + case filter::comparison::string_is_like: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_like, clause.getStringArgument()); + } + + case filter::comparison::string_is_not_like: + { + return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_not_like, clause.getStringArgument()); + } + + case filter::comparison::matches: + case filter::comparison::does_not_match: + case filter::comparison::hierarchally_matches: + case filter::comparison::does_not_hierarchally_match: + { + throw std::logic_error("Invalid comparison type for field"); + } + } + } + + case field::type::join: + { + std::string joinTableName; + if (clause.getField().hasTable()) + { + joinTableName = clause.getField().getTable(); + } else { + joinTableName = getTableForContext(clause.getField().getJoinObject()); + } + + statement joinStmt( + joinTableName, + clause.getJoinCondition().normalize(clause.getField().getJoinObject()), + nextTableId_, + nextWithId_); + + std::string joinTable = joinStmt.topTable_; + condition curCond = integrate(std::move(joinStmt)); + + bool outer = false; + if (clause.getComparison() == filter::comparison::does_not_match) + { + outer = true; + + curCond &= condition(joinTable, clause.getField().getColumn(), true); + } + + joins_.emplace_back(outer, joinTableName, topTable_, clause.getField().getColumn(), joinTable, clause.getField().getColumn()); + + return curCond; + } + + case field::type::join_through: + { + statement joinStmt( + getTableForContext(clause.getField().getJoinObject()), + clause.getJoinCondition().normalize(clause.getField().getJoinObject()), + nextTableId_, + nextWithId_); + + std::string joinTable = joinStmt.topTable_; + std::string throughTable = instantiateTable(clause.getField().getTable()); + condition curCond = integrate(std::move(joinStmt)); + + bool outer = false; + if (clause.getComparison() == filter::comparison::does_not_match) + { + outer = true; + + curCond &= condition(throughTable, clause.getField().getJoinColumn(), true); + } + + joins_.emplace_back(outer, clause.getField().getTable(), topTable_, clause.getField().getColumn(), throughTable, clause.getField().getJoinColumn()); + joins_.emplace_back(false, getTableForContext(clause.getField().getJoinObject()), throughTable, clause.getField().getForeignJoinColumn(), joinTable, clause.getField().getForeignColumn()); + + return curCond; + } + + case field::type::hierarchal_join: + { + std::string withName = std::string(clause.getField().getTable()) + "_tree_" + std::to_string(nextWithId_++); + std::string withInstName = instantiateTable(withName); + + bool outer = false; + if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) + { + outer = true; + } + + joins_.emplace_back(outer, withName, topTable_, clause.getField().getColumn(), withInstName, clause.getField().getColumn()); + + statement withStmt( + getTableForContext(clause.getField().getObject()), + clause.getJoinCondition().normalize(clause.getField().getObject()), + nextTableId_, + nextWithId_); + + for (auto& w : withStmt.withs_) + { + withs_.push_back(std::move(w)); + } + + nextTableId_ = withStmt.nextTableId_; + nextWithId_ = withStmt.nextWithId_; + + withs_.emplace_back( + withName, + clause.getField(), + std::move(withStmt.tables_), + std::move(withStmt.topTable_), + std::move(withStmt.topCondition_), + std::move(withStmt.joins_)); + + if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) + { + return condition(withInstName, clause.getField().getColumn(), true); + } else { + return {}; + } + } + } + } + + case filter::type::group: + { + condition grp(clause.getOrlogic()); + + for (const filter& child : clause) + { + condition newChild = parseFilter(child); + if (newChild.getType() != condition::type::empty) + { + grp += std::move(newChild); + } + } + + if (grp.getChildren().empty()) + { + grp = {}; + } + + return grp; + } + } + } + + std::string statement::instantiateTable(std::string name) + { + std::string identifier = name + "_" + std::to_string(nextTableId_++); + tables_[identifier] = name; + + return identifier; + } + + statement::condition statement::integrate(statement subStmt) + { + for (auto& mapping : subStmt.tables_) + { + tables_[mapping.first] = mapping.second; + } + + for (auto& j : subStmt.joins_) + { + joins_.push_back(j); + } + + for (auto& w : subStmt.withs_) + { + withs_.push_back(w); + } + + nextTableId_ = subStmt.nextTableId_; + nextWithId_ = subStmt.nextWithId_; + + return subStmt.topCondition_; + } + + std::ostream& operator<<(std::ostream& oss, const statement::join& j) + { + if (j.isOuterJoin()) + { + oss << "LEFT"; + } else { + oss << "INNER"; + } + + return oss + << " JOIN " + << j.getForeignTableName() + << " AS " + << j.getForeignTable() + << " ON " + << j.getForeignTable() + << "." + << j.getForeignColumn() + << " = " + << j.getJoinTable() + << "." + << j.getJoinColumn(); + } + + statement::condition::condition(const condition& other) + { + type_ = other.type_; + + switch (type_) + { + case type::empty: + { + break; + } + + case type::singleton: + { + new(&singleton_.table_) std::string(other.singleton_.table_); + new(&singleton_.column_) std::string(other.singleton_.column_); + singleton_.comparison_ = other.singleton_.comparison_; + new(&singleton_.value_) binding(other.singleton_.value_); + + break; + } + + case type::group: + { + new(&group_.children_) std::list(other.group_.children_); + group_.orlogic_ = other.group_.orlogic_; + + break; + } + } + } + + statement::condition::condition(condition&& other) : condition() + { + swap(*this, other); + } + + statement::condition& statement::condition::operator=(condition other) + { + swap(*this, other); + + return *this; + } + + void swap(statement::condition& first, statement::condition& second) + { + using type = statement::condition::type; + using condition = statement::condition; + + type tempType = first.type_; + std::string tempTable; + std::string tempColumn; + condition::comparison tempComparison; + binding tempBinding; + std::list tempChildren; + bool tempOrlogic; + + switch (tempType) + { + case type::empty: + { + break; + } + + case type::singleton: + { + tempTable = std::move(first.singleton_.table_); + tempColumn = std::move(first.singleton_.column_); + tempComparison = first.singleton_.comparison_; + tempBinding = std::move(first.singleton_.value_); + + break; + } + + case type::group: + { + tempChildren = std::move(first.group_.children_); + tempOrlogic = first.group_.orlogic_; + + break; + } + } + + first.~condition(); + + first.type_ = second.type_; + + switch (first.type_) + { + case type::empty: + { + break; + } + + case type::singleton: + { + new(&first.singleton_.table_) std::string(std::move(second.singleton_.table_)); + new(&first.singleton_.column_) std::string(std::move(second.singleton_.column_)); + first.singleton_.comparison_ = second.singleton_.comparison_; + new(&first.singleton_.value_) binding(std::move(second.singleton_.value_)); + + break; + } + + case type::group: + { + new(&first.group_.children_) std::list(std::move(second.group_.children_)); + first.group_.orlogic_ = second.group_.orlogic_; + + break; + } + } + + second.~condition(); + + second.type_ = tempType; + + switch (second.type_) + { + case type::empty: + { + break; + } + + case type::singleton: + { + new(&second.singleton_.table_) std::string(std::move(tempTable)); + new(&second.singleton_.column_) std::string(std::move(tempColumn)); + second.singleton_.comparison_ = tempComparison; + new(&second.singleton_.value_) binding(std::move(tempBinding)); + + break; + } + + case type::group: + { + new(&second.group_.children_) std::list(std::move(tempChildren)); + second.group_.orlogic_ = tempOrlogic; + + break; + } + } + } + + statement::condition::~condition() + { + switch (type_) + { + case type::empty: + { + break; + } + + case type::singleton: + { + using string_type = std::string; + + singleton_.table_.~string_type(); + singleton_.column_.~string_type(); + singleton_.value_.~binding(); + + break; + } + + case type::group: + { + using list_type = std::list; + + group_.children_.~list_type(); + + break; + } + } + } + + statement::condition::condition() : type_(type::empty) + { + } + + statement::condition::condition( + std::string table, + std::string column, + bool isNull) : + type_(type::singleton) + { + new(&singleton_.table_) std::string(std::move(table)); + new(&singleton_.column_) std::string(std::move(column)); + + if (isNull) + { + singleton_.comparison_ = comparison::is_null; + } else { + singleton_.comparison_ = comparison::is_not_null; + } + } + + statement::condition::condition( + std::string table, + std::string column, + comparison comp, + binding value) : + type_(type::singleton) + { + new(&singleton_.table_) std::string(std::move(table)); + new(&singleton_.column_) std::string(std::move(column)); + singleton_.comparison_ = comp; + new(&singleton_.value_) binding(std::move(value)); + } + + std::string statement::condition::toSql() const + { + switch (type_) + { + case type::empty: + { + return ""; + } + + case type::singleton: + { + switch (singleton_.comparison_) + { + case comparison::equals: + { + return singleton_.table_ + "." + singleton_.column_ + " = ?"; + } + + case comparison::does_not_equal: + { + return singleton_.table_ + "." + singleton_.column_ + " != ?"; + } + + case comparison::is_greater_than: + { + return singleton_.table_ + "." + singleton_.column_ + " > ?"; + } + + case comparison::is_at_most: + { + return singleton_.table_ + "." + singleton_.column_ + " <= ?"; + } + + case comparison::is_less_than: + { + return singleton_.table_ + "." + singleton_.column_ + " < ?"; + } + + case comparison::is_at_least: + { + return singleton_.table_ + "." + singleton_.column_ + " >= ?"; + } + + case comparison::is_like: + { + return singleton_.table_ + "." + singleton_.column_ + " LIKE ?"; + } + + case comparison::is_not_like: + { + return singleton_.table_ + "." + singleton_.column_ + " NOT LIKE ?"; + } + + case comparison::is_not_null: + { + return singleton_.table_ + "." + singleton_.column_ + " IS NOT NULL"; + } + + case comparison::is_null: + { + return singleton_.table_ + "." + singleton_.column_ + " IS NULL"; + } + } + } + + case type::group: + { + std::list clauses; + for (const condition& cond : group_.children_) + { + clauses.push_back(cond.toSql()); + } + + return implode(std::begin(clauses), std::end(clauses), group_.orlogic_ ? " OR " : " AND "); + } + } + } + + std::list statement::condition::flattenBindings() const + { + switch (type_) + { + case type::empty: + { + return {}; + } + + case type::singleton: + { + return {singleton_.value_}; + } + + case type::group: + { + std::list bindings; + for (const condition& cond : group_.children_) + { + for (binding value : cond.flattenBindings()) + { + bindings.push_back(std::move(value)); + } + } + + return bindings; + } + } + } + + statement::condition::condition(bool orlogic) : type_(type::group) + { + new(&group_.children_) std::list(); + group_.orlogic_ = orlogic; + } + + statement::condition& statement::condition::operator+=(condition n) + { + if (type_ == type::group) + { + group_.children_.push_back(std::move(n)); + + return *this; + } else { + throw std::domain_error("Cannot add condition to non-group condition"); + } + } + + statement::condition& statement::condition::operator&=(condition n) + { + switch (type_) + { + case type::empty: + { + *this = std::move(n); + + break; + } + + case type::singleton: + { + condition grp(false); + grp += *this; + grp += std::move(n); + + *this = grp; + + break; + } + + case type::group: + { + *this += std::move(n); + + break; + } + } + + return *this; + } + + const std::list& statement::condition::getChildren() const + { + if (type_ == type::group) + { + return group_.children_; + } else { + throw std::domain_error("Cannot get children of non-group condition"); + } + } + +}; diff --git a/lib/statement.h b/lib/statement.h new file mode 100644 index 0000000..a528d60 --- /dev/null +++ b/lib/statement.h @@ -0,0 +1,272 @@ +#ifndef STATEMENT_H_29F51659 +#define STATEMENT_H_29F51659 + +#include +#include +#include +#include +#include "binding.h" +#include "enums.h" +#include "field.h" +#include "filter.h" + +namespace verbly { + + class filter; + + class statement { + public: + + statement(object context, filter queryFilter); + + std::string getQueryString(std::list select, bool random, int limit) const; + + std::list getBindings() const; + + private: + + class join { + public: + + join( + bool outer, + std::string foreignTableName, + std::string joinTable, + std::string joinColumn, + std::string foreignTable, + std::string foreignColumn) : + outer_(outer), + foreignTableName_(std::move(foreignTableName)), + joinTable_(std::move(joinTable)), + joinColumn_(std::move(joinColumn)), + foreignTable_(std::move(foreignTable)), + foreignColumn_(std::move(foreignColumn)) + { + } + + bool isOuterJoin() const + { + return outer_; + } + + const std::string& getForeignTableName() const + { + return foreignTableName_; + } + + const std::string& getJoinTable() const + { + return joinTable_; + } + + const std::string& getJoinColumn() const + { + return joinColumn_; + } + + const std::string& getForeignTable() const + { + return foreignTable_; + } + + const std::string& getForeignColumn() const + { + return foreignColumn_; + } + + private: + bool outer_ = false; + std::string foreignTableName_; + std::string joinTable_; + std::string joinColumn_; + std::string foreignTable_; + std::string foreignColumn_; + + }; + + friend std::ostream& operator<<(std::ostream& oss, const join& j); + + class condition { + public: + enum class type { + empty, + singleton, + group + }; + + enum class comparison { + equals, + does_not_equal, + is_greater_than, + is_at_most, + is_less_than, + is_at_least, + is_like, + is_not_like, + is_not_null, + is_null + }; + + // Copy and move constructors + + condition(const condition& other); + condition(condition&& other); + + // Assignment + + condition& operator=(condition other); + + // Swap + + friend void swap(condition& first, condition& second); + + // Destructor + + ~condition(); + + // Accessors + + type getType() const + { + return type_; + } + + // Empty + + condition(); + + // Singleton + + condition(std::string table, std::string column, bool isNull); + + condition(std::string table, std::string column, comparison comp, binding value); + + // Group + + explicit condition(bool orlogic); + + condition& operator+=(condition n); + + condition& operator&=(condition n); + + const std::list& getChildren() const; + + // Utility + + std::string toSql() const; + + std::list flattenBindings() const; + + private: + union { + struct { + std::string table_; + std::string column_; + comparison comparison_; + binding value_; + } singleton_; + struct { + std::list children_; + bool orlogic_; + } group_; + }; + type type_; + }; + + friend void swap(condition& first, condition& second); + + class with { + public: + + with( + std::string identifier, + field f, + std::map tables, + std::string topTable, + condition where, + std::list joins) : + identifier_(std::move(identifier)), + field_(f), + tables_(std::move(tables)), + topTable_(std::move(topTable)), + topCondition_(std::move(where)), + joins_(std::move(joins)) + { + } + + const std::string& getIdentifier() const + { + return identifier_; + } + + field getField() const + { + return field_; + } + + std::string getTableForId(std::string identifier) const + { + return tables_.at(identifier); + } + + const std::string& getTopTable() const + { + return topTable_; + } + + const condition& getCondition() const + { + return topCondition_; + } + + const std::list& getJoins() const + { + return joins_; + } + + private: + std::string identifier_; + field field_; + std::map tables_; + std::string topTable_; + condition topCondition_; + std::list joins_; + + }; + + static constexpr const char* getTableForContext(object context) + { + return (context == object::notion) ? "notions" + : (context == object::word) ? "words" + : (context == object::group) ? "groups" + : (context == object::frame) ? "frames" + : (context == object::lemma) ? "lemmas_forms" + : (context == object::form) ? "forms" + : (context == object::pronunciation) ? "pronunciations" + : throw std::domain_error("Provided context has no associated table"); + } + + static const std::list getSelectForContext(object context); + + statement(std::string tableName, filter clause, int nextTableId = 0, int nextWithId = 0); + + condition parseFilter(filter queryFilter); + + std::string instantiateTable(std::string name); + + condition integrate(statement subStmt); + + int nextTableId_; + int nextWithId_; + + std::map tables_; + std::string topTable_; + std::list joins_; + std::list withs_; + condition topCondition_; + + }; + +}; + +#endif /* end of include guard: STATEMENT_H_29F51659 */ diff --git a/lib/util.h b/lib/util.h index fb5fe67..b74b050 100644 --- a/lib/util.h +++ b/lib/util.h @@ -1,6 +1,10 @@ #ifndef UTIL_H_15DDCA2D #define UTIL_H_15DDCA2D +#include +#include +#include + namespace verbly { template @@ -21,25 +25,33 @@ namespace verbly { return result.str(); } - template - Container split(std::string input, std::string delimiter) + template + void split(std::string input, std::string delimiter, OutputIterator out) { - Container result; - while (!input.empty()) { int divider = input.find(delimiter); if (divider == std::string::npos) { - result.push_back(input); + *out = input; + out++; input = ""; } else { - result.push_back(input.substr(0, divider)); + *out = input.substr(0, divider); + out++; input = input.substr(divider+delimiter.length()); } } + } + + template + Container split(std::string input, std::string delimiter) + { + Container result; + + split(input, delimiter, std::back_inserter(result)); return result; } diff --git a/lib/verb.cpp b/lib/verb.cpp deleted file mode 100644 index 1f45d53..0000000 --- a/lib/verb.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include "verbly.h" - -namespace verbly { - - verb::verb() - { - - } - - verb::verb(const data& _data, int _id) : word(_data, _id) - { - - } - - std::string verb::base_form() const - { - assert(_valid == true); - - return _infinitive; - } - - std::string verb::infinitive_form() const - { - assert(_valid == true); - - return _infinitive; - } - - std::string verb::past_tense_form() const - { - assert(_valid == true); - - return _past_tense; - } - - std::string verb::past_participle_form() const - { - assert(_valid == true); - - return _past_participle; - } - - std::string verb::ing_form() const - { - assert(_valid == true); - - return _ing_form; - } - - std::string verb::s_form() const - { - assert(_valid == true); - - return _s_form; - } - - frame_query verb::frames() const - { - assert(_valid == true); - - return _data->frames().for_verb(*this); - } - -}; diff --git a/lib/verb.h b/lib/verb.h deleted file mode 100644 index 7a2486e..0000000 --- a/lib/verb.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef VERB_H_BCC929AD -#define VERB_H_BCC929AD - -namespace verbly { - - class frame_query; - - class verb : public word { - private: - std::string _infinitive; - std::string _past_tense; - std::string _past_participle; - std::string _ing_form; - std::string _s_form; - - friend class verb_query; - - public: - verb(); - verb(const data& _data, int _id); - - std::string base_form() const; - std::string infinitive_form() const; - std::string past_tense_form() const; - std::string past_participle_form() const; - std::string ing_form() const; - std::string s_form() const; - - frame_query frames() const; - }; - -}; - -#endif /* end of include guard: VERB_H_BCC929AD */ diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp deleted file mode 100644 index 4e6c253..0000000 --- a/lib/verb_query.cpp +++ /dev/null @@ -1,315 +0,0 @@ -#include "verbly.h" - -namespace verbly { - - verb_query::verb_query(const data& _data) : _data(_data) - { - - } - - verb_query& verb_query::limit(int _limit) - { - if ((_limit > 0) || (_limit == unlimited)) - { - this->_limit = _limit; - } - - return *this; - } - - verb_query& verb_query::random() - { - this->_random = true; - - return *this; - } - - verb_query& verb_query::except(const verb& _word) - { - _except.push_back(_word); - - return *this; - } - - verb_query& verb_query::rhymes_with(const word& _word) - { - for (auto rhyme : _word.get_rhymes()) - { - _rhymes.push_back(rhyme); - } - - if (dynamic_cast(&_word) != nullptr) - { - _except.push_back(dynamic_cast(_word)); - } - - return *this; - } - - verb_query& verb_query::rhymes_with(rhyme _r) - { - _rhymes.push_back(_r); - - return *this; - } - - verb_query& verb_query::has_pronunciation() - { - this->_has_prn = true; - - return *this; - } - - verb_query& verb_query::has_rhyming_noun() - { - _has_rhyming_noun = true; - - return *this; - } - - verb_query& verb_query::has_rhyming_adjective() - { - _has_rhyming_adjective = true; - - return *this; - } - - verb_query& verb_query::has_rhyming_adverb() - { - _has_rhyming_adverb = true; - - return *this; - } - - verb_query& verb_query::has_rhyming_verb() - { - _has_rhyming_verb = true; - - return *this; - } - - verb_query& verb_query::with_stress(filter> _arg) - { - _stress = _arg; - - return *this; - } - - verb_query& verb_query::has_frames() - { - this->_has_frames = true; - - return *this; - } - - std::list verb_query::run() const - { - std::stringstream construct; - construct << "SELECT verb_id, infinitive, past_tense, past_participle, ing_form, s_form FROM verbs"; - std::list conditions; - std::list bindings; - - if (_has_prn) - { - conditions.push_back("verb_id IN (SELECT verb_id FROM verb_pronunciations)"); - } - - if (!_rhymes.empty()) - { - std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); - std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - - for (auto rhy : _rhymes) - { - bindings.emplace_back(rhy.get_prerhyme()); - bindings.emplace_back(rhy.get_rhyme()); - } - } - - if (_has_rhyming_noun) - { - conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_adjective) - { - conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_adverb) - { - conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_verb) - { - conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)"); - } - - if (!_stress.empty()) - { - std::stringstream cond; - if (_stress.get_notlogic()) - { - cond << "verb_id NOT IN"; - } else { - cond << "verb_id IN"; - } - - cond << "(SELECT verb_id FROM verb_pronunciations WHERE "; - - std::function>, bool)> recur = [&] (filter> f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter>::type::singleton: - { - std::ostringstream _val; - for (auto syl : f.get_elem()) - { - if (syl) - { - _val << "1"; - } else { - _val << "0"; - } - } - - bindings.emplace_back(_val.str()); - - if (notlogic == f.get_notlogic()) - { - return "stress = ?"; - } else { - return "stress != ?"; - } - } - - case filter>::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter> f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_stress, _stress.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - for (auto except : _except) - { - conditions.push_back("verb_id != ?"); - bindings.emplace_back(except._id); - } - - if (!_has_frames) - { - conditions.push_back("verb_id IN (SELECT verb_id FROM verb_groups)"); - } - - if (!conditions.empty()) - { - construct << " WHERE "; - construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); - } - - if (_random) - { - construct << " ORDER BY RANDOM()"; - } - - if (_limit != unlimited) - { - construct << " LIMIT " << _limit; - } - - sqlite3_stmt* ppstmt; - std::string query = construct.str(); - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - int i = 1; - for (auto& binding : bindings) - { - switch (binding.get_type()) - { - case binding::type::integer: - { - sqlite3_bind_int(ppstmt, i, binding.get_integer()); - - break; - } - - case binding::type::string: - { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); - - break; - } - } - - i++; - } - - std::list output; - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - verb tnc {_data, sqlite3_column_int(ppstmt, 0)}; - tnc._infinitive = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); - tnc._past_tense = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); - tnc._past_participle = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 3))); - tnc._ing_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 4))); - tnc._s_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 5))); - - output.push_back(tnc); - } - - sqlite3_finalize(ppstmt); - - for (auto& verb : output) - { - query = "SELECT pronunciation, prerhyme, rhyme FROM verb_pronunciations WHERE verb_id = ?"; - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - sqlite3_bind_int(ppstmt, 1, verb._id); - - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - std::string pronunciation(reinterpret_cast(sqlite3_column_text(ppstmt, 0))); - auto phonemes = verbly::split>(pronunciation, " "); - - verb.pronunciations.push_back(phonemes); - - if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) - { - std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); - std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); - verb.rhymes.emplace_back(prerhyme, rhyming); - } - } - - sqlite3_finalize(ppstmt); - } - - return output; - } - -}; diff --git a/lib/verb_query.h b/lib/verb_query.h deleted file mode 100644 index 566ae37..0000000 --- a/lib/verb_query.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef VERB_QUERY_H_34E5A679 -#define VERB_QUERY_H_34E5A679 - -namespace verbly { - - class verb_query { - public: - verb_query(const data& _data); - - verb_query& limit(int _limit); - verb_query& random(); - verb_query& except(const verb& _word); - verb_query& rhymes_with(const word& _word); - verb_query& rhymes_with(rhyme _r); - verb_query& has_pronunciation(); - verb_query& has_rhyming_noun(); - verb_query& has_rhyming_adjective(); - verb_query& has_rhyming_adverb(); - verb_query& has_rhyming_verb(); - verb_query& with_stress(filter> _arg); - - verb_query& has_frames(); - - std::list run() const; - - const static int unlimited = -1; - - private: - const data& _data; - int _limit = unlimited; - bool _random = false; - std::list _rhymes; - std::list _except; - bool _has_prn = false; - bool _has_frames = false; - bool _has_rhyming_noun = false; - bool _has_rhyming_adjective = false; - bool _has_rhyming_adverb = false; - bool _has_rhyming_verb = false; - filter> _stress; - }; - -}; - -#endif /* end of include guard: VERB_QUERY_H_34E5A679 */ diff --git a/lib/verbly.h b/lib/verbly.h index cfaf5bc..6dfc01a 100644 --- a/lib/verbly.h +++ b/lib/verbly.h @@ -1,35 +1,17 @@ #ifndef VERBLY_H_5B39CE50 #define VERBLY_H_5B39CE50 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include "util.h" -#include "data.h" +#include "database.h" +#include "filter.h" +#include "field.h" +#include "query.h" +#include "notion.h" #include "word.h" -#include "verb.h" -#include "adverb.h" -#include "adjective.h" -#include "noun.h" +#include "group.h" #include "frame.h" -#include "preposition.h" -#include "token.h" -#include "noun_query.h" -#include "adverb_query.h" -#include "adjective_query.h" -#include "verb_query.h" -#include "frame_query.h" +#include "lemma.h" +#include "form.h" +#include "pronunciation.h" #endif /* end of include guard: VERBLY_H_5B39CE50 */ diff --git a/lib/word.cpp b/lib/word.cpp index 49e34a1..3edf2d2 100644 --- a/lib/word.cpp +++ b/lib/word.cpp @@ -1,60 +1,112 @@ -#include "verbly.h" -#include +#include "word.h" +#include +#include "form.h" +#include "util.h" +#include "database.h" +#include "query.h" namespace verbly { - rhyme::rhyme(std::string prerhyme, std::string phonemes) : _prerhyme(prerhyme), _rhyme(phonemes) - { - - } + const object word::objectType = object::word; - std::string rhyme::get_prerhyme() const - { - return _prerhyme; - } + const std::list word::select = {"word_id", "notion_id", "lemma_id", "tag_count", "position", "group_id"}; - std::string rhyme::get_rhyme() const - { - return _rhyme; - } + const field word::id = field::integerField(object::word, "word_id"); + const field word::tagCount = field::integerField(object::word, "tag_count", true); + const field word::adjectivePosition = field::integerField(object::word, "position", true); + + const field word::notion = field::joinField(object::word, "notion_id", object::notion); + const field word::lemma = field::joinField(object::word, "lemma_id", object::lemma); + const field word::group = field::joinField(object::word, "group_id", object::group, true); + + const field word::antonyms = field::selfJoin(object::word, "word_id", "antonymy", "antonym_2_id", "antonym_1_id"); + + const field word::specifications = field::selfJoin(object::word, "word_id", "specification", "general_id", "specific_id"); + const field word::generalizations = field::selfJoin(object::word, "word_id", "specification", "specific_id", "general_id"); - bool rhyme::operator==(const rhyme& other) const + const field word::pertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "noun_id", "pertainym_id"); + const field word::antiPertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "pertainym_id", "noun_id"); + + const field word::mannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "adjective_id", "mannernym_id"); + const field word::antiMannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "mannernym_id", "adjective_id"); + + const field word::usageTerms = field::selfJoin(object::word, "word_id", "usage", "domain_id", "term_id"); + const field word::usageDomains = field::selfJoin(object::word, "word_id", "usage", "term_id", "domain_id"); + + const field word::topicalTerms = field::selfJoin(object::word, "word_id", "topicality", "domain_id", "term_id"); + const field word::topicalDomains = field::selfJoin(object::word, "word_id", "topicality", "term_id", "domain_id"); + + const field word::regionalTerms = field::selfJoin(object::word, "word_id", "regionality", "domain_id", "term_id"); + const field word::regionalDomains = field::selfJoin(object::word, "word_id", "regionality", "term_id", "domain_id"); + + word::word(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) { - return std::tie(_prerhyme, _rhyme) == std::tie(other._prerhyme, other._rhyme); + id_ = sqlite3_column_int(row, 0); + notionId_ = sqlite3_column_int(row, 1); + lemmaId_ = sqlite3_column_int(row, 2); + + if (sqlite3_column_type(row, 3) != SQLITE_NULL) + { + hasTagCount_ = true; + tagCount_ = sqlite3_column_int(row, 3); + } + + if (sqlite3_column_type(row, 4) != SQLITE_NULL) + { + adjectivePosition_ = static_cast(sqlite3_column_int(row, 4)); + } + + if (sqlite3_column_type(row, 5) != SQLITE_NULL) + { + hasGroup_ = true; + groupId_ = sqlite3_column_int(row, 5); + } } - word::word() + const notion& word::getNotion() const { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized word"); + } + + if (!notion_) + { + notion_ = db_->notions(notion::id == notionId_).first(); + } + return notion_; } - word::word(const data& _data, int _id) : _data(&_data), _id(_id), _valid(true) + const lemma& word::getLemma() const { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized word"); + } + if (!lemma_) + { + lemma_ = db_->lemmas(lemma::id == lemmaId_).first(); + } + + return lemma_; } - std::list word::get_rhymes() const + std::string word::getBaseForm() const { - assert(_valid == true); - - return rhymes; + return getLemma().getBaseForm().getText(); } - bool word::starts_with_vowel_sound() const + std::list word::getInflections(inflection category) const { - assert(_valid == true); - - if (pronunciations.size() > 0) + std::list result; + for (const form& infl : getLemma().getInflections(category)) { - return std::any_of(std::begin(pronunciations), std::end(pronunciations), [] (std::list phonemes) { - return (phonemes.front().find_first_of("012") != std::string::npos); - }); - } else { - // If the word is not in CMUDICT, fall back to checking whether the first letter is a vowel - // Not perfect but will work in most cases - char ch = tolower(base_form().front()); - return (ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u'); + result.push_back(infl.getText()); } + + return result; } }; diff --git a/lib/word.h b/lib/word.h index 08797a3..f71dad9 100644 --- a/lib/word.h +++ b/lib/word.h @@ -1,48 +1,173 @@ -#ifndef WORD_H_8FC89498 -#define WORD_H_8FC89498 +#ifndef WORD_H_DF91B1B4 +#define WORD_H_DF91B1B4 + +#include +#include +#include "field.h" +#include "filter.h" +#include "notion.h" +#include "lemma.h" +#include "group.h" + +struct sqlite3_stmt; namespace verbly { - class rhyme { - public: - rhyme(std::string prerhyme, std::string phonemes); + class database; + + class word { + public: + + // Default constructor + + word() = default; + + // Construct from database + + word(const database& db, sqlite3_stmt* row); + + // Accessors + + operator bool() const + { + return valid_; + } + + int getId() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized word"); + } - std::string get_prerhyme() const; - std::string get_rhyme() const; + return id_; + } + + bool hasTagCount() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized word"); + } - bool operator==(const rhyme& other) const; + return hasTagCount_; + } + + int getTagCount() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized word"); + } - private: - std::string _prerhyme; - std::string _rhyme; - }; - - class word { - protected: - const data* _data; - int _id; - bool _valid = false; + if (!hasTagCount_) + { + throw std::domain_error("Word has no tag count"); + } - std::list> pronunciations; - std::list rhymes; + return tagCount_; + } + + bool hasAdjectivePositioning() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized word"); + } - word(); - word(const data& _data, int _id); + return (adjectivePosition_ != positioning::undefined); + } + + positioning getAdjectivePosition() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized word"); + } - friend class adjective_query; - friend class verb_query; - friend class noun_query; - friend class adverb_query; - friend class frame_query; - friend class preposition_query; - - public: - virtual std::string base_form() const = 0; + if (adjectivePosition_ == positioning::undefined) + { + throw std::domain_error("Word has no adjective position"); + } - std::list get_rhymes() const; - bool starts_with_vowel_sound() const; + return adjectivePosition_; + } + + const notion& getNotion() const; + + const lemma& getLemma() const; + + // Convenience accessors + + std::string getBaseForm() const; + + std::list getInflections(inflection infl) const; + + // Type info + + static const object objectType; + + static const std::list select; + + // Query fields + + static const field id; + static const field tagCount; + static const field adjectivePosition; + + operator filter() const + { + return (id == id_); + } + + // Relationships with other objects + + static const field notion; + static const field lemma; + static const field group; + + // Relationships with self + + static const field antonyms; + + static const field specifications; + static const field generalizations; + + static const field pertainyms; + static const field antiPertainyms; + + static const field mannernyms; + static const field antiMannernyms; + + static const field usageTerms; + static const field usageDomains; + + static const field topicalTerms; + static const field topicalDomains; + + static const field regionalTerms; + static const field regionalDomains; + + private: + bool valid_ = false; + + int id_; + bool hasTagCount_ = false; + int tagCount_; + positioning adjectivePosition_ = positioning::undefined; + int notionId_; + int lemmaId_; + bool hasGroup_ = false; + int groupId_; + + const database* db_; + + mutable class notion notion_; + mutable class lemma lemma_; + mutable class group group_; + }; }; -#endif /* end of include guard: WORD_H_8FC89498 */ +#endif /* end of include guard: WORD_H_DF91B1B4 */ -- cgit 1.4.1