diff options
78 files changed, 8971 insertions, 8696 deletions
| diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c39d99..61fcce2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt | |||
| @@ -4,8 +4,10 @@ project (verbly) | |||
| 4 | find_package(PkgConfig) | 4 | find_package(PkgConfig) |
| 5 | pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) | 5 | pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) |
| 6 | 6 | ||
| 7 | set(CMAKE_BUILD_TYPE Debug) | ||
| 8 | |||
| 7 | include_directories(vendor/json) | 9 | include_directories(vendor/json) |
| 8 | add_library(verbly lib/data.cpp lib/adjective.cpp lib/noun.cpp lib/verb.cpp lib/adverb.cpp lib/token.cpp lib/word.cpp lib/frame.cpp lib/preposition.cpp lib/adjective_query.cpp lib/adverb_query.cpp lib/noun_query.cpp lib/verb_query.cpp lib/frame_query.cpp) | 10 | add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp) |
| 9 | set_property(TARGET verbly PROPERTY CXX_STANDARD 11) | 11 | set_property(TARGET verbly PROPERTY CXX_STANDARD 11) |
| 10 | set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) | 12 | set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) |
| 11 | target_link_libraries(verbly ${sqlite3_LIBRARIES}) | 13 | target_link_libraries(verbly ${sqlite3_LIBRARIES}) |
| diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 552526d..4f78eb8 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt | |||
| @@ -1,12 +1,12 @@ | |||
| 1 | cmake_minimum_required (VERSION 2.6) | 1 | cmake_minimum_required (VERSION 3.1) |
| 2 | project (generator) | 2 | project (generator) |
| 3 | 3 | ||
| 4 | find_package(PkgConfig) | 4 | find_package(PkgConfig) |
| 5 | pkg_check_modules(sqlite3 sqlite3 REQUIRED) | 5 | pkg_check_modules(sqlite3 sqlite3 REQUIRED) |
| 6 | find_package(libxml2 REQUIRED) | 6 | find_package(libxml2 REQUIRED) |
| 7 | 7 | ||
| 8 | include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json/src) | 8 | include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json) |
| 9 | add_executable(generator generator.cpp) | 9 | add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp selrestr.cpp database.cpp field.cpp generator.cpp main.cpp) |
| 10 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) | 10 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) |
| 11 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) | 11 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) |
| 12 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) | 12 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) |
| diff --git a/generator/database.cpp b/generator/database.cpp new file mode 100644 index 0000000..c7e4cfa --- /dev/null +++ b/generator/database.cpp | |||
| @@ -0,0 +1,173 @@ | |||
| 1 | #include "database.h" | ||
| 2 | #include <sqlite3.h> | ||
| 3 | #include <cassert> | ||
| 4 | #include <fstream> | ||
| 5 | #include <stdexcept> | ||
| 6 | #include <cstdio> | ||
| 7 | #include <sstream> | ||
| 8 | #include "field.h" | ||
| 9 | #include "../lib/util.h" | ||
| 10 | |||
| 11 | namespace verbly { | ||
| 12 | namespace generator { | ||
| 13 | |||
| 14 | sqlite3_error::sqlite3_error( | ||
| 15 | const std::string& what, | ||
| 16 | const std::string& db_err) : | ||
| 17 | what_(what + " (" + db_err + ")"), | ||
| 18 | db_err_(db_err) | ||
| 19 | { | ||
| 20 | } | ||
| 21 | |||
| 22 | const char* sqlite3_error::what() const noexcept | ||
| 23 | { | ||
| 24 | return what_.c_str(); | ||
| 25 | } | ||
| 26 | |||
| 27 | const char* sqlite3_error::db_err() const noexcept | ||
| 28 | { | ||
| 29 | return db_err_.c_str(); | ||
| 30 | } | ||
| 31 | |||
| 32 | database::database(std::string path) | ||
| 33 | { | ||
| 34 | // If there is already a file at this path, overwrite it. | ||
| 35 | if (std::ifstream(path)) | ||
| 36 | { | ||
| 37 | if (std::remove(path.c_str())) | ||
| 38 | { | ||
| 39 | throw std::logic_error("Could not overwrite file at path"); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) | ||
| 44 | { | ||
| 45 | // We still have to free the resources allocated. In the event that | ||
| 46 | // allocation failed, ppdb will be null and sqlite3_close_v2 will just | ||
| 47 | // ignore it. | ||
| 48 | std::string errmsg(sqlite3_errmsg(ppdb_)); | ||
| 49 | sqlite3_close_v2(ppdb_); | ||
| 50 | |||
| 51 | throw sqlite3_error("Could not create output datafile", errmsg); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | database::database(database&& other) : database() | ||
| 56 | { | ||
| 57 | swap(*this, other); | ||
| 58 | } | ||
| 59 | |||
| 60 | database& database::operator=(database&& other) | ||
| 61 | { | ||
| 62 | swap(*this, other); | ||
| 63 | |||
| 64 | return *this; | ||
| 65 | } | ||
| 66 | |||
| 67 | void swap(database& first, database& second) | ||
| 68 | { | ||
| 69 | std::swap(first.ppdb_, second.ppdb_); | ||
| 70 | } | ||
| 71 | |||
| 72 | database::~database() | ||
| 73 | { | ||
| 74 | sqlite3_close_v2(ppdb_); | ||
| 75 | } | ||
| 76 | |||
| 77 | void database::runQuery(std::string query) | ||
| 78 | { | ||
| 79 | // This can only happen when doing bad things with move semantics. | ||
| 80 | assert(ppdb_ != nullptr); | ||
| 81 | |||
| 82 | sqlite3_stmt* ppstmt; | ||
| 83 | |||
| 84 | if (sqlite3_prepare_v2(ppdb_, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 85 | { | ||
| 86 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
| 87 | } | ||
| 88 | |||
| 89 | int result = sqlite3_step(ppstmt); | ||
| 90 | sqlite3_finalize(ppstmt); | ||
| 91 | |||
| 92 | if (result != SQLITE_DONE) | ||
| 93 | { | ||
| 94 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 98 | void database::insertIntoTable(std::string table, std::list<field> fields) | ||
| 99 | { | ||
| 100 | // This can only happen when doing bad things with move semantics. | ||
| 101 | assert(ppdb_ != nullptr); | ||
| 102 | |||
| 103 | // This shouldn't happen. | ||
| 104 | assert(!fields.empty()); | ||
| 105 | |||
| 106 | std::list<std::string> fieldNames; | ||
| 107 | std::list<std::string> qs; | ||
| 108 | for (field& f : fields) | ||
| 109 | { | ||
| 110 | fieldNames.push_back(f.getName()); | ||
| 111 | qs.push_back("?"); | ||
| 112 | } | ||
| 113 | |||
| 114 | std::ostringstream query; | ||
| 115 | query << "INSERT INTO "; | ||
| 116 | query << table; | ||
| 117 | query << " ("; | ||
| 118 | query << implode(std::begin(fieldNames), std::end(fieldNames), ", "); | ||
| 119 | query << ") VALUES ("; | ||
| 120 | query << implode(std::begin(qs), std::end(qs), ", "); | ||
| 121 | query << ")"; | ||
| 122 | |||
| 123 | std::string query_str = query.str(); | ||
| 124 | |||
| 125 | sqlite3_stmt* ppstmt; | ||
| 126 | |||
| 127 | if (sqlite3_prepare_v2(ppdb_, query_str.c_str(), query_str.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 128 | { | ||
| 129 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
| 130 | } | ||
| 131 | |||
| 132 | int i = 1; | ||
| 133 | for (field& f : fields) | ||
| 134 | { | ||
| 135 | switch (f.getType()) | ||
| 136 | { | ||
| 137 | case field::type::integer: | ||
| 138 | { | ||
| 139 | sqlite3_bind_int(ppstmt, i, f.getInteger()); | ||
| 140 | |||
| 141 | break; | ||
| 142 | } | ||
| 143 | |||
| 144 | case field::type::string: | ||
| 145 | { | ||
| 146 | sqlite3_bind_text(ppstmt, i, f.getString().c_str(), f.getString().length(), SQLITE_TRANSIENT); | ||
| 147 | |||
| 148 | break; | ||
| 149 | } | ||
| 150 | |||
| 151 | case field::type::invalid: | ||
| 152 | { | ||
| 153 | // Fields can only be invalid when doing bad things with move semantics. | ||
| 154 | assert(false); | ||
| 155 | |||
| 156 | break; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | i++; | ||
| 161 | } | ||
| 162 | |||
| 163 | int result = sqlite3_step(ppstmt); | ||
| 164 | sqlite3_finalize(ppstmt); | ||
| 165 | |||
| 166 | if (result != SQLITE_DONE) | ||
| 167 | { | ||
| 168 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 | }; | ||
| 173 | }; | ||
| diff --git a/generator/database.h b/generator/database.h new file mode 100644 index 0000000..15cdff5 --- /dev/null +++ b/generator/database.h | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | #ifndef DATABASE_H_0B0A47D2 | ||
| 2 | #define DATABASE_H_0B0A47D2 | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | #include <exception> | ||
| 6 | #include <list> | ||
| 7 | |||
| 8 | struct sqlite3; | ||
| 9 | |||
| 10 | namespace verbly { | ||
| 11 | namespace generator { | ||
| 12 | |||
| 13 | class field; | ||
| 14 | |||
| 15 | class sqlite3_error : public std::exception { | ||
| 16 | public: | ||
| 17 | |||
| 18 | sqlite3_error(const std::string& what, const std::string& db_err); | ||
| 19 | |||
| 20 | const char* what() const noexcept override; | ||
| 21 | const char* db_err() const noexcept; | ||
| 22 | |||
| 23 | private: | ||
| 24 | std::string what_; | ||
| 25 | std::string db_err_; | ||
| 26 | |||
| 27 | }; | ||
| 28 | |||
| 29 | class database { | ||
| 30 | public: | ||
| 31 | |||
| 32 | // Constructor | ||
| 33 | |||
| 34 | explicit database(std::string path); | ||
| 35 | |||
| 36 | // Disable copying | ||
| 37 | |||
| 38 | database(const database& other) = delete; | ||
| 39 | database& operator=(const database& other) = delete; | ||
| 40 | |||
| 41 | // Move constructor and move assignment | ||
| 42 | |||
| 43 | database(database&& other); | ||
| 44 | database& operator=(database&& other); | ||
| 45 | |||
| 46 | // Swap | ||
| 47 | |||
| 48 | friend void swap(database& first, database& second); | ||
| 49 | |||
| 50 | // Destructor | ||
| 51 | |||
| 52 | ~database(); | ||
| 53 | |||
| 54 | // Actions | ||
| 55 | |||
| 56 | void runQuery(std::string query); | ||
| 57 | |||
| 58 | void insertIntoTable(std::string table, std::list<field> fields); | ||
| 59 | |||
| 60 | private: | ||
| 61 | |||
| 62 | database() | ||
| 63 | { | ||
| 64 | } | ||
| 65 | |||
| 66 | sqlite3* ppdb_ = nullptr; | ||
| 67 | |||
| 68 | }; | ||
| 69 | |||
| 70 | }; | ||
| 71 | }; | ||
| 72 | |||
| 73 | #endif /* end of include guard: DATABASE_H_0B0A47D2 */ | ||
| diff --git a/generator/field.cpp b/generator/field.cpp new file mode 100644 index 0000000..84b2f91 --- /dev/null +++ b/generator/field.cpp | |||
| @@ -0,0 +1,193 @@ | |||
| 1 | #include "field.h" | ||
| 2 | #include <stdexcept> | ||
| 3 | #include <utility> | ||
| 4 | |||
| 5 | namespace verbly { | ||
| 6 | namespace generator { | ||
| 7 | |||
| 8 | field::field(const field& other) | ||
| 9 | { | ||
| 10 | type_ = other.type_; | ||
| 11 | name_ = other.name_; | ||
| 12 | |||
| 13 | switch (type_) | ||
| 14 | { | ||
| 15 | case type::integer: | ||
| 16 | { | ||
| 17 | integer_ = other.integer_; | ||
| 18 | |||
| 19 | break; | ||
| 20 | } | ||
| 21 | |||
| 22 | case type::string: | ||
| 23 | { | ||
| 24 | new(&string_) std::string(other.string_); | ||
| 25 | |||
| 26 | break; | ||
| 27 | } | ||
| 28 | |||
| 29 | case type::invalid: | ||
| 30 | { | ||
| 31 | break; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | field::field(field&& other) : field() | ||
| 37 | { | ||
| 38 | swap(*this, other); | ||
| 39 | } | ||
| 40 | |||
| 41 | field& field::operator=(field other) | ||
| 42 | { | ||
| 43 | swap(*this, other); | ||
| 44 | |||
| 45 | return *this; | ||
| 46 | } | ||
| 47 | |||
| 48 | void swap(field& first, field& second) | ||
| 49 | { | ||
| 50 | using type = field::type; | ||
| 51 | |||
| 52 | type tempType = first.type_; | ||
| 53 | std::string tempName = std::move(first.name_); | ||
| 54 | int tempInteger; | ||
| 55 | std::string tempString; | ||
| 56 | |||
| 57 | switch (first.type_) | ||
| 58 | { | ||
| 59 | case type::integer: | ||
| 60 | { | ||
| 61 | tempInteger = first.integer_; | ||
| 62 | |||
| 63 | break; | ||
| 64 | } | ||
| 65 | |||
| 66 | case type::string: | ||
| 67 | { | ||
| 68 | tempString = std::move(tempString); | ||
| 69 | |||
| 70 | break; | ||
| 71 | } | ||
| 72 | |||
| 73 | case type::invalid: | ||
| 74 | { | ||
| 75 | break; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | first.~field(); | ||
| 80 | |||
| 81 | first.type_ = second.type_; | ||
| 82 | first.name_ = std::move(second.name_); | ||
| 83 | |||
| 84 | switch (second.type_) | ||
| 85 | { | ||
| 86 | case type::integer: | ||
| 87 | { | ||
| 88 | first.integer_ = second.integer_; | ||
| 89 | |||
| 90 | break; | ||
| 91 | } | ||
| 92 | |||
| 93 | case type::string: | ||
| 94 | { | ||
| 95 | new(&first.string_) std::string(std::move(second.string_)); | ||
| 96 | |||
| 97 | break; | ||
| 98 | } | ||
| 99 | |||
| 100 | case type::invalid: | ||
| 101 | { | ||
| 102 | break; | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | second.~field(); | ||
| 107 | |||
| 108 | second.type_ = tempType; | ||
| 109 | second.name_ = std::move(tempName); | ||
| 110 | |||
| 111 | switch (tempType) | ||
| 112 | { | ||
| 113 | case type::integer: | ||
| 114 | { | ||
| 115 | second.integer_ = tempInteger; | ||
| 116 | |||
| 117 | break; | ||
| 118 | } | ||
| 119 | |||
| 120 | case type::string: | ||
| 121 | { | ||
| 122 | new(&second.string_) std::string(std::move(tempString)); | ||
| 123 | |||
| 124 | break; | ||
| 125 | } | ||
| 126 | |||
| 127 | case type::invalid: | ||
| 128 | { | ||
| 129 | break; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | field::~field() | ||
| 135 | { | ||
| 136 | switch (type_) | ||
| 137 | { | ||
| 138 | case type::string: | ||
| 139 | { | ||
| 140 | using string_type = std::string; | ||
| 141 | string_.~string_type(); | ||
| 142 | |||
| 143 | break; | ||
| 144 | } | ||
| 145 | |||
| 146 | case type::integer: | ||
| 147 | case type::invalid: | ||
| 148 | { | ||
| 149 | break; | ||
| 150 | } | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | field::field( | ||
| 155 | std::string name, | ||
| 156 | int arg) : | ||
| 157 | type_(type::integer), | ||
| 158 | name_(name), | ||
| 159 | integer_(arg) | ||
| 160 | { | ||
| 161 | } | ||
| 162 | |||
| 163 | int field::getInteger() const | ||
| 164 | { | ||
| 165 | if (type_ != type::integer) | ||
| 166 | { | ||
| 167 | throw std::domain_error("field::getInteger called on non-integer field"); | ||
| 168 | } | ||
| 169 | |||
| 170 | return integer_; | ||
| 171 | } | ||
| 172 | |||
| 173 | field::field( | ||
| 174 | std::string name, | ||
| 175 | std::string arg) : | ||
| 176 | type_(type::string), | ||
| 177 | name_(name) | ||
| 178 | { | ||
| 179 | new(&string_) std::string(arg); | ||
| 180 | } | ||
| 181 | |||
| 182 | std::string field::getString() const | ||
| 183 | { | ||
| 184 | if (type_ != type::string) | ||
| 185 | { | ||
| 186 | throw std::domain_error("field::getString called on non-string field"); | ||
| 187 | } | ||
| 188 | |||
| 189 | return string_; | ||
| 190 | } | ||
| 191 | |||
| 192 | }; | ||
| 193 | }; | ||
| diff --git a/generator/field.h b/generator/field.h new file mode 100644 index 0000000..1fbabfc --- /dev/null +++ b/generator/field.h | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | #ifndef BINDING_H_CAE0B18E | ||
| 2 | #define BINDING_H_CAE0B18E | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | |||
| 6 | namespace verbly { | ||
| 7 | namespace generator { | ||
| 8 | |||
| 9 | class field { | ||
| 10 | public: | ||
| 11 | enum class type { | ||
| 12 | invalid, | ||
| 13 | integer, | ||
| 14 | string | ||
| 15 | }; | ||
| 16 | |||
| 17 | // Copy and move constructors | ||
| 18 | |||
| 19 | field(const field& other); | ||
| 20 | field(field&& other); | ||
| 21 | |||
| 22 | // Assignment | ||
| 23 | |||
| 24 | field& operator=(field other); | ||
| 25 | |||
| 26 | // Swap | ||
| 27 | |||
| 28 | friend void swap(field& first, field& second); | ||
| 29 | |||
| 30 | // Destructor | ||
| 31 | |||
| 32 | ~field(); | ||
| 33 | |||
| 34 | // Generic accessors | ||
| 35 | |||
| 36 | type getType() const | ||
| 37 | { | ||
| 38 | return type_; | ||
| 39 | } | ||
| 40 | |||
| 41 | std::string getName() const | ||
| 42 | { | ||
| 43 | return name_; | ||
| 44 | } | ||
| 45 | |||
| 46 | // Integer | ||
| 47 | |||
| 48 | field(std::string name, int arg); | ||
| 49 | |||
| 50 | int getInteger() const; | ||
| 51 | |||
| 52 | // String | ||
| 53 | |||
| 54 | field(std::string name, std::string arg); | ||
| 55 | |||
| 56 | std::string getString() const; | ||
| 57 | |||
| 58 | private: | ||
| 59 | |||
| 60 | field() | ||
| 61 | { | ||
| 62 | } | ||
| 63 | |||
| 64 | union { | ||
| 65 | int integer_; | ||
| 66 | std::string string_; | ||
| 67 | }; | ||
| 68 | |||
| 69 | type type_ = type::invalid; | ||
| 70 | std::string name_; | ||
| 71 | }; | ||
| 72 | |||
| 73 | }; | ||
| 74 | }; | ||
| 75 | |||
| 76 | #endif /* end of include guard: BINDING_H_CAE0B18E */ | ||
| diff --git a/generator/form.cpp b/generator/form.cpp new file mode 100644 index 0000000..6be9d47 --- /dev/null +++ b/generator/form.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | #include "form.h" | ||
| 2 | #include <algorithm> | ||
| 3 | #include <list> | ||
| 4 | #include "database.h" | ||
| 5 | #include "field.h" | ||
| 6 | #include "pronunciation.h" | ||
| 7 | |||
| 8 | namespace verbly { | ||
| 9 | namespace generator { | ||
| 10 | |||
| 11 | int form::nextId_ = 0; | ||
| 12 | |||
| 13 | form::form(std::string text) : | ||
| 14 | id_(nextId_++), | ||
| 15 | text_(text), | ||
| 16 | complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), | ||
| 17 | proper_(std::any_of(std::begin(text), std::end(text), std::isupper)) | ||
| 18 | { | ||
| 19 | } | ||
| 20 | |||
| 21 | void form::addPronunciation(const pronunciation& p) | ||
| 22 | { | ||
| 23 | pronunciations_.insert(&p); | ||
| 24 | } | ||
| 25 | |||
| 26 | database& operator<<(database& db, const form& arg) | ||
| 27 | { | ||
| 28 | // Serialize the form first. | ||
| 29 | { | ||
| 30 | std::list<field> fields; | ||
| 31 | fields.emplace_back("form_id", arg.getId()); | ||
| 32 | fields.emplace_back("form", arg.getText()); | ||
| 33 | fields.emplace_back("complexity", arg.getComplexity()); | ||
| 34 | fields.emplace_back("proper", arg.isProper()); | ||
| 35 | |||
| 36 | db.insertIntoTable("forms", std::move(fields)); | ||
| 37 | } | ||
| 38 | |||
| 39 | // Then, serialize the form/pronunciation relationship. | ||
| 40 | for (const pronunciation* p : arg.getPronunciations()) | ||
| 41 | { | ||
| 42 | std::list<field> fields; | ||
| 43 | fields.emplace_back("form_id", arg.getId()); | ||
| 44 | fields.emplace_back("pronunciation_id", p->getId()); | ||
| 45 | |||
| 46 | db.insertIntoTable("forms_pronunciations", std::move(fields)); | ||
| 47 | } | ||
| 48 | |||
| 49 | return db; | ||
| 50 | } | ||
| 51 | |||
| 52 | }; | ||
| 53 | }; | ||
| diff --git a/generator/form.h b/generator/form.h new file mode 100644 index 0000000..5576035 --- /dev/null +++ b/generator/form.h | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | #ifndef FORM_H_7EFBC970 | ||
| 2 | #define FORM_H_7EFBC970 | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | #include <set> | ||
| 6 | |||
| 7 | namespace verbly { | ||
| 8 | namespace generator { | ||
| 9 | |||
| 10 | class pronunciation; | ||
| 11 | class database; | ||
| 12 | |||
| 13 | class form { | ||
| 14 | public: | ||
| 15 | |||
| 16 | // Constructor | ||
| 17 | |||
| 18 | explicit form(std::string text); | ||
| 19 | |||
| 20 | // Mutators | ||
| 21 | |||
| 22 | void addPronunciation(const pronunciation& p); | ||
| 23 | |||
| 24 | // Accessors | ||
| 25 | |||
| 26 | int getId() const | ||
| 27 | { | ||
| 28 | return id_; | ||
| 29 | } | ||
| 30 | |||
| 31 | std::string getText() const | ||
| 32 | { | ||
| 33 | return text_; | ||
| 34 | } | ||
| 35 | |||
| 36 | int getComplexity() const | ||
| 37 | { | ||
| 38 | return complexity_; | ||
| 39 | } | ||
| 40 | |||
| 41 | bool isProper() const | ||
| 42 | { | ||
| 43 | return proper_; | ||
| 44 | } | ||
| 45 | |||
| 46 | std::set<const pronunciation*> getPronunciations() const | ||
| 47 | { | ||
| 48 | return pronunciations_; | ||
| 49 | } | ||
| 50 | |||
| 51 | private: | ||
| 52 | |||
| 53 | static int nextId_; | ||
| 54 | |||
| 55 | const int id_; | ||
| 56 | const std::string text_; | ||
| 57 | const int complexity_; | ||
| 58 | const bool proper_; | ||
| 59 | |||
| 60 | std::set<const pronunciation*> pronunciations_; | ||
| 61 | |||
| 62 | }; | ||
| 63 | |||
| 64 | // Serializer | ||
| 65 | |||
| 66 | database& operator<<(database& db, const form& arg); | ||
| 67 | |||
| 68 | }; | ||
| 69 | }; | ||
| 70 | |||
| 71 | #endif /* end of include guard: FORM_H_7EFBC970 */ | ||
| diff --git a/generator/frame.cpp b/generator/frame.cpp new file mode 100644 index 0000000..9f0653f --- /dev/null +++ b/generator/frame.cpp | |||
| @@ -0,0 +1,83 @@ | |||
| 1 | #include "frame.h" | ||
| 2 | #include "database.h" | ||
| 3 | #include "field.h" | ||
| 4 | |||
| 5 | namespace verbly { | ||
| 6 | namespace generator { | ||
| 7 | |||
| 8 | int frame::nextId_ = 0; | ||
| 9 | |||
| 10 | frame::frame() : id_(nextId_++) | ||
| 11 | { | ||
| 12 | } | ||
| 13 | |||
| 14 | void frame::push_back(part fp) | ||
| 15 | { | ||
| 16 | parts_.push_back(std::move(fp)); | ||
| 17 | } | ||
| 18 | |||
| 19 | database& operator<<(database& db, const frame& arg) | ||
| 20 | { | ||
| 21 | std::list<field> fields; | ||
| 22 | fields.emplace_back("frame_id", arg.getId()); | ||
| 23 | |||
| 24 | nlohmann::json jsonParts; | ||
| 25 | for (const part& p : arg) | ||
| 26 | { | ||
| 27 | nlohmann::json jsonPart; | ||
| 28 | jsonPart["type"] = static_cast<int>(p.getType()); | ||
| 29 | |||
| 30 | switch (p.getType()) | ||
| 31 | { | ||
| 32 | case part::type::noun_phrase: | ||
| 33 | { | ||
| 34 | jsonPart["role"] = p.getNounRole(); | ||
| 35 | jsonPart["selrestrs"] = p.getNounSelrestrs().toJson(); | ||
| 36 | jsonPart["synrestrs"] = p.getNounSynrestrs(); | ||
| 37 | |||
| 38 | break; | ||
| 39 | } | ||
| 40 | |||
| 41 | case part::type::preposition: | ||
| 42 | { | ||
| 43 | jsonPart["choices"] = p.getPrepositionChoices(); | ||
| 44 | jsonPart["literal"] = p.isPrepositionLiteral(); | ||
| 45 | |||
| 46 | break; | ||
| 47 | } | ||
| 48 | |||
| 49 | case part::type::literal: | ||
| 50 | { | ||
| 51 | jsonPart["value"] = p.getLiteralValue(); | ||
| 52 | |||
| 53 | break; | ||
| 54 | } | ||
| 55 | |||
| 56 | case part::type::verb: | ||
| 57 | case part::type::adjective: | ||
| 58 | case part::type::adverb: | ||
| 59 | { | ||
| 60 | break; | ||
| 61 | } | ||
| 62 | |||
| 63 | case part::type::invalid: | ||
| 64 | { | ||
| 65 | // Invalid parts should not be serialized. | ||
| 66 | assert(false); | ||
| 67 | |||
| 68 | break; | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | jsonParts.emplace_back(std::move(jsonPart)); | ||
| 73 | } | ||
| 74 | |||
| 75 | fields.emplace_back("data", jsonParts.dump()); | ||
| 76 | |||
| 77 | db.insertIntoTable("frames", std::move(fields)); | ||
| 78 | |||
| 79 | return db; | ||
| 80 | } | ||
| 81 | |||
| 82 | }; | ||
| 83 | }; | ||
| diff --git a/generator/frame.h b/generator/frame.h new file mode 100644 index 0000000..411ce6c --- /dev/null +++ b/generator/frame.h | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | #ifndef FRAME_H_26770FF1 | ||
| 2 | #define FRAME_H_26770FF1 | ||
| 3 | |||
| 4 | #include <list> | ||
| 5 | #include "part.h" | ||
| 6 | |||
| 7 | namespace verbly { | ||
| 8 | namespace generator { | ||
| 9 | |||
| 10 | class database; | ||
| 11 | |||
| 12 | class frame { | ||
| 13 | public: | ||
| 14 | |||
| 15 | // Aliases | ||
| 16 | |||
| 17 | using const_iterator = std::list<part>::const_iterator; | ||
| 18 | |||
| 19 | // Constructor | ||
| 20 | |||
| 21 | frame(); | ||
| 22 | |||
| 23 | // Mutators | ||
| 24 | |||
| 25 | void push_back(part fp); | ||
| 26 | |||
| 27 | // Accessors | ||
| 28 | |||
| 29 | int getId() const | ||
| 30 | { | ||
| 31 | return id_; | ||
| 32 | } | ||
| 33 | |||
| 34 | const_iterator begin() const | ||
| 35 | { | ||
| 36 | return std::begin(parts_); | ||
| 37 | } | ||
| 38 | |||
| 39 | const_iterator end() const | ||
| 40 | { | ||
| 41 | return std::end(parts_); | ||
| 42 | } | ||
| 43 | |||
| 44 | private: | ||
| 45 | |||
| 46 | static int nextId_; | ||
| 47 | |||
| 48 | const int id_; | ||
| 49 | |||
| 50 | std::list<part> parts_; | ||
| 51 | |||
| 52 | }; | ||
| 53 | |||
| 54 | database& operator<<(database& db, const frame& arg); | ||
| 55 | |||
| 56 | }; | ||
| 57 | }; | ||
| 58 | |||
| 59 | #endif /* end of include guard: FRAME_H_26770FF1 */ | ||
| diff --git a/generator/generator.cpp b/generator/generator.cpp index 6a16467..d88cb31 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
| @@ -1,2320 +1,1477 @@ | |||
| 1 | #include <libxml/parser.h> | 1 | #include "generator.h" |
| 2 | #include <cassert> | ||
| 3 | #include <stdexcept> | ||
| 2 | #include <iostream> | 4 | #include <iostream> |
| 5 | #include <regex> | ||
| 3 | #include <dirent.h> | 6 | #include <dirent.h> |
| 4 | #include <set> | ||
| 5 | #include <map> | ||
| 6 | #include <string> | ||
| 7 | #include <vector> | ||
| 8 | #include <fstream> | 7 | #include <fstream> |
| 9 | #include <sqlite3.h> | 8 | #include "enums.h" |
| 10 | #include <sstream> | ||
| 11 | #include <regex> | ||
| 12 | #include <list> | ||
| 13 | #include <algorithm> | ||
| 14 | #include <json.hpp> | ||
| 15 | #include "progress.h" | 9 | #include "progress.h" |
| 10 | #include "selrestr.h" | ||
| 11 | #include "role.h" | ||
| 12 | #include "part.h" | ||
| 13 | #include "field.h" | ||
| 16 | #include "../lib/util.h" | 14 | #include "../lib/util.h" |
| 17 | 15 | ||
| 18 | using json = nlohmann::json; | 16 | namespace verbly { |
| 19 | 17 | namespace generator { | |
| 20 | struct verb_t { | ||
| 21 | std::string infinitive; | ||
| 22 | std::string past_tense; | ||
| 23 | std::string past_participle; | ||
| 24 | std::string ing_form; | ||
| 25 | std::string s_form; | ||
| 26 | int id; | ||
| 27 | }; | ||
| 28 | |||
| 29 | struct adjective_t { | ||
| 30 | std::string base; | ||
| 31 | std::string comparative; | ||
| 32 | std::string superlative; | ||
| 33 | }; | ||
| 34 | |||
| 35 | struct noun_t { | ||
| 36 | std::string singular; | ||
| 37 | std::string plural; | ||
| 38 | }; | ||
| 39 | |||
| 40 | struct selrestr_t { | ||
| 41 | enum class type_t { | ||
| 42 | singleton, | ||
| 43 | andlogic, | ||
| 44 | orlogic, | ||
| 45 | empty | ||
| 46 | }; | ||
| 47 | type_t type; | ||
| 48 | std::string restriction; | ||
| 49 | bool pos; | ||
| 50 | std::list<selrestr_t> subordinates; | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct framepart_t { | ||
| 54 | enum class type_t { | ||
| 55 | np, | ||
| 56 | v, | ||
| 57 | pp, | ||
| 58 | adj, | ||
| 59 | adv, | ||
| 60 | lex | ||
| 61 | }; | ||
| 62 | type_t type; | ||
| 63 | std::string role; | ||
| 64 | selrestr_t selrestrs; | ||
| 65 | std::set<std::string> preprestrs; | ||
| 66 | std::set<std::string> synrestrs; | ||
| 67 | std::list<std::string> choices; | ||
| 68 | std::string lexval; | ||
| 69 | }; | ||
| 70 | |||
| 71 | struct group_t { | ||
| 72 | std::string id; | ||
| 73 | std::string parent; | ||
| 74 | std::set<std::string> members; | ||
| 75 | std::map<std::string, selrestr_t> roles; | ||
| 76 | std::list<std::list<framepart_t>> frames; | ||
| 77 | }; | ||
| 78 | |||
| 79 | struct pronunciation_t { | ||
| 80 | std::string phonemes; | ||
| 81 | std::string prerhyme; | ||
| 82 | std::string rhyme; | ||
| 83 | int syllables = 0; | ||
| 84 | std::string stress; | ||
| 85 | |||
| 86 | bool operator<(const pronunciation_t& other) const | ||
| 87 | { | ||
| 88 | return phonemes < other.phonemes; | ||
| 89 | } | ||
| 90 | }; | ||
| 91 | |||
| 92 | std::map<std::string, group_t> groups; | ||
| 93 | std::map<std::string, verb_t> verbs; | ||
| 94 | std::map<std::string, adjective_t> adjectives; | ||
| 95 | std::map<std::string, noun_t> nouns; | ||
| 96 | std::map<int, std::map<int, int>> wn; | ||
| 97 | std::map<int, int> images; | ||
| 98 | std::map<std::string, std::set<pronunciation_t>> pronunciations; | ||
| 99 | |||
| 100 | void print_usage() | ||
| 101 | { | ||
| 102 | std::cout << "Verbly Datafile Generator" << std::endl; | ||
| 103 | std::cout << "-------------------------" << std::endl; | ||
| 104 | std::cout << "Requires exactly six arguments." << std::endl; | ||
| 105 | std::cout << "1. The path to a VerbNet data directory." << std::endl; | ||
| 106 | std::cout << "2. The path to an AGID infl.txt file." << std::endl; | ||
| 107 | std::cout << "3. The path to a WordNet prolog data directory." << std::endl; | ||
| 108 | std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl; | ||
| 109 | std::cout << "5. The path to an ImageNet urls.txt file." << std::endl; | ||
| 110 | std::cout << "6. Datafile output path." << std::endl; | ||
| 111 | |||
| 112 | exit(1); | ||
| 113 | } | ||
| 114 | |||
| 115 | void db_error(sqlite3* ppdb, std::string query) | ||
| 116 | { | ||
| 117 | std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; | ||
| 118 | std::cout << query << std::endl; | ||
| 119 | sqlite3_close_v2(ppdb); | ||
| 120 | print_usage(); | ||
| 121 | } | ||
| 122 | |||
| 123 | json export_selrestrs(selrestr_t r) | ||
| 124 | { | ||
| 125 | if (r.type == selrestr_t::type_t::empty) | ||
| 126 | { | ||
| 127 | return {}; | ||
| 128 | } else if (r.type == selrestr_t::type_t::singleton) | ||
| 129 | { | ||
| 130 | json result; | ||
| 131 | result["type"] = r.restriction; | ||
| 132 | result["pos"] = r.pos; | ||
| 133 | return result; | ||
| 134 | } else { | ||
| 135 | json result; | ||
| 136 | if (r.type == selrestr_t::type_t::andlogic) | ||
| 137 | { | ||
| 138 | result["logic"] = "and"; | ||
| 139 | } else { | ||
| 140 | result["logic"] = "or"; | ||
| 141 | } | ||
| 142 | |||
| 143 | std::list<json> outlist; | ||
| 144 | std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs); | ||
| 145 | result["children"] = outlist; | ||
| 146 | 18 | ||
| 147 | return result; | 19 | generator::generator( |
| 148 | } | 20 | std::string verbNetPath, |
| 149 | } | 21 | std::string agidPath, |
| 150 | 22 | std::string wordNetPath, | |
| 151 | selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename) | 23 | std::string cmudictPath, |
| 152 | { | 24 | std::string imageNetPath, |
| 153 | selrestr_t r; | 25 | std::string outputPath) : |
| 154 | xmlChar* key; | 26 | verbNetPath_(verbNetPath), |
| 155 | 27 | agidPath_(agidPath), | |
| 156 | if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS")) | 28 | wordNetPath_(wordNetPath), |
| 157 | { | 29 | cmudictPath_(cmudictPath), |
| 158 | if (xmlChildElementCount(top) == 0) | 30 | imageNetPath_(imageNetPath), |
| 31 | db_(outputPath) | ||
| 159 | { | 32 | { |
| 160 | r.type = selrestr_t::type_t::empty; | 33 | // Ensure VerbNet directory exists |
| 161 | } else if (xmlChildElementCount(top) == 1) | 34 | DIR* dir; |
| 162 | { | 35 | if ((dir = opendir(verbNetPath_.c_str())) == nullptr) |
| 163 | r = parse_selrestrs(xmlFirstElementChild(top), filename); | ||
| 164 | } else { | ||
| 165 | r.type = selrestr_t::type_t::andlogic; | ||
| 166 | |||
| 167 | if (xmlHasProp(top, (const xmlChar*) "logic")) | ||
| 168 | { | 36 | { |
| 169 | key = xmlGetProp(top, (const xmlChar*) "logic"); | 37 | throw std::invalid_argument("Invalid VerbNet data directory"); |
| 170 | if (!xmlStrcmp(key, (const xmlChar*) "or")) | ||
| 171 | { | ||
| 172 | r.type = selrestr_t::type_t::orlogic; | ||
| 173 | } | ||
| 174 | xmlFree(key); | ||
| 175 | } | 38 | } |
| 176 | 39 | ||
| 177 | for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) | 40 | closedir(dir); |
| 41 | |||
| 42 | // Ensure AGID infl.txt exists | ||
| 43 | if (!std::ifstream(agidPath_)) | ||
| 178 | { | 44 | { |
| 179 | if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR")) | 45 | throw std::invalid_argument("AGID infl.txt file not found"); |
| 180 | { | ||
| 181 | r.subordinates.push_back(parse_selrestrs(selrestr, filename)); | ||
| 182 | } | ||
| 183 | } | 46 | } |
| 184 | } | 47 | |
| 185 | } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR")) | 48 | // Add directory separator to WordNet path |
| 186 | { | 49 | if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\')) |
| 187 | r.type = selrestr_t::type_t::singleton; | ||
| 188 | |||
| 189 | key = xmlGetProp(top, (xmlChar*) "Value"); | ||
| 190 | r.pos = (std::string((const char*)key) == "+"); | ||
| 191 | xmlFree(key); | ||
| 192 | |||
| 193 | key = xmlGetProp(top, (xmlChar*) "type"); | ||
| 194 | r.restriction = (const char*) key; | ||
| 195 | xmlFree(key); | ||
| 196 | } else { | ||
| 197 | // Invalid | ||
| 198 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
| 199 | print_usage(); | ||
| 200 | } | ||
| 201 | |||
| 202 | return r; | ||
| 203 | } | ||
| 204 | |||
| 205 | group_t& parse_group(xmlNodePtr top, std::string filename) | ||
| 206 | { | ||
| 207 | xmlChar* key = xmlGetProp(top, (xmlChar*) "ID"); | ||
| 208 | if (key == 0) | ||
| 209 | { | ||
| 210 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
| 211 | print_usage(); | ||
| 212 | } | ||
| 213 | std::string vnid = (const char*)key; | ||
| 214 | vnid = vnid.substr(vnid.find_first_of("-")+1); | ||
| 215 | xmlFree(key); | ||
| 216 | |||
| 217 | group_t g; | ||
| 218 | g.id = vnid; | ||
| 219 | |||
| 220 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) | ||
| 221 | { | ||
| 222 | if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES")) | ||
| 223 | { | ||
| 224 | for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next) | ||
| 225 | { | 50 | { |
| 226 | if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS")) | 51 | wordNetPath_ += '/'; |
| 227 | { | ||
| 228 | auto& sg = parse_group(subclass, filename); | ||
| 229 | sg.parent = vnid; | ||
| 230 | |||
| 231 | for (auto member : sg.members) | ||
| 232 | { | ||
| 233 | g.members.insert(member); | ||
| 234 | } | ||
| 235 | |||
| 236 | // The schema requires that subclasses appear after role definitions, so we can do this now | ||
| 237 | for (auto role : g.roles) | ||
| 238 | { | ||
| 239 | if (sg.roles.count(role.first) == 0) | ||
| 240 | { | ||
| 241 | sg.roles[role.first] = role.second; | ||
| 242 | } | ||
| 243 | } | ||
| 244 | } | ||
| 245 | } | 52 | } |
| 246 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) | 53 | |
| 247 | { | 54 | // Ensure WordNet tables exist |
| 248 | for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) | 55 | for (std::string table : { |
| 56 | "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax" | ||
| 57 | }) | ||
| 249 | { | 58 | { |
| 250 | if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) | 59 | if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl")) |
| 251 | { | 60 | { |
| 252 | key = xmlGetProp(member, (xmlChar*) "name"); | 61 | throw std::invalid_argument("WordNet " + table + " table not found"); |
| 253 | g.members.insert((const char*)key); | ||
| 254 | xmlFree(key); | ||
| 255 | } | 62 | } |
| 256 | } | 63 | } |
| 257 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES")) | 64 | |
| 258 | { | 65 | // Ensure CMUDICT file exists |
| 259 | for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next) | 66 | if (!std::ifstream(cmudictPath_)) |
| 260 | { | 67 | { |
| 261 | if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE")) | 68 | throw std::invalid_argument("CMUDICT file not found"); |
| 262 | { | ||
| 263 | selrestr_t r; | ||
| 264 | r.type = selrestr_t::type_t::empty; | ||
| 265 | |||
| 266 | key = xmlGetProp(role, (const xmlChar*) "type"); | ||
| 267 | std::string type = (const char*)key; | ||
| 268 | xmlFree(key); | ||
| 269 | |||
| 270 | for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) | ||
| 271 | { | ||
| 272 | if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS")) | ||
| 273 | { | ||
| 274 | r = parse_selrestrs(rolenode, filename); | ||
| 275 | } | ||
| 276 | } | ||
| 277 | |||
| 278 | g.roles[type] = r; | ||
| 279 | } | ||
| 280 | } | 69 | } |
| 281 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) | 70 | |
| 282 | { | 71 | // Ensure ImageNet urls.txt exists |
| 283 | for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) | 72 | if (!std::ifstream(imageNetPath_)) |
| 284 | { | 73 | { |
| 285 | if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) | 74 | throw std::invalid_argument("ImageNet urls.txt file not found"); |
| 286 | { | ||
| 287 | std::list<framepart_t> f; | ||
| 288 | |||
| 289 | for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) | ||
| 290 | { | ||
| 291 | if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX")) | ||
| 292 | { | ||
| 293 | for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) | ||
| 294 | { | ||
| 295 | framepart_t fp; | ||
| 296 | |||
| 297 | if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP")) | ||
| 298 | { | ||
| 299 | fp.type = framepart_t::type_t::np; | ||
| 300 | |||
| 301 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
| 302 | fp.role = (const char*)key; | ||
| 303 | xmlFree(key); | ||
| 304 | |||
| 305 | fp.selrestrs.type = selrestr_t::type_t::empty; | ||
| 306 | |||
| 307 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
| 308 | { | ||
| 309 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS")) | ||
| 310 | { | ||
| 311 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
| 312 | { | ||
| 313 | if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR")) | ||
| 314 | { | ||
| 315 | key = xmlGetProp(synrestr, (xmlChar*) "type"); | ||
| 316 | fp.synrestrs.insert(std::string((const char*)key)); | ||
| 317 | xmlFree(key); | ||
| 318 | } | ||
| 319 | } | ||
| 320 | } | ||
| 321 | |||
| 322 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) | ||
| 323 | { | ||
| 324 | fp.selrestrs = parse_selrestrs(npnode, filename); | ||
| 325 | } | ||
| 326 | } | ||
| 327 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB")) | ||
| 328 | { | ||
| 329 | fp.type = framepart_t::type_t::v; | ||
| 330 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP")) | ||
| 331 | { | ||
| 332 | fp.type = framepart_t::type_t::pp; | ||
| 333 | |||
| 334 | if (xmlHasProp(syntaxnode, (xmlChar*) "value")) | ||
| 335 | { | ||
| 336 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
| 337 | std::string choices = (const char*)key; | ||
| 338 | xmlFree(key); | ||
| 339 | |||
| 340 | fp.choices = verbly::split<std::list<std::string>>(choices, " "); | ||
| 341 | } | ||
| 342 | |||
| 343 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
| 344 | { | ||
| 345 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) | ||
| 346 | { | ||
| 347 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
| 348 | { | ||
| 349 | if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR")) | ||
| 350 | { | ||
| 351 | key = xmlGetProp(synrestr, (xmlChar*) "type"); | ||
| 352 | fp.preprestrs.insert(std::string((const char*)key)); | ||
| 353 | xmlFree(key); | ||
| 354 | } | ||
| 355 | } | ||
| 356 | } | ||
| 357 | } | ||
| 358 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ")) | ||
| 359 | { | ||
| 360 | fp.type = framepart_t::type_t::adj; | ||
| 361 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV")) | ||
| 362 | { | ||
| 363 | fp.type = framepart_t::type_t::adv; | ||
| 364 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX")) | ||
| 365 | { | ||
| 366 | fp.type = framepart_t::type_t::lex; | ||
| 367 | |||
| 368 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
| 369 | fp.lexval = (const char*)key; | ||
| 370 | xmlFree(key); | ||
| 371 | } else { | ||
| 372 | continue; | ||
| 373 | } | ||
| 374 | |||
| 375 | f.push_back(fp); | ||
| 376 | } | ||
| 377 | |||
| 378 | g.frames.push_back(f); | ||
| 379 | } | ||
| 380 | } | ||
| 381 | } | ||
| 382 | } | 75 | } |
| 383 | } | 76 | } |
| 384 | } | ||
| 385 | |||
| 386 | groups[vnid] = g; | ||
| 387 | |||
| 388 | return groups[vnid]; | ||
| 389 | } | ||
| 390 | |||
| 391 | int main(int argc, char** argv) | ||
| 392 | { | ||
| 393 | if (argc != 7) | ||
| 394 | { | ||
| 395 | print_usage(); | ||
| 396 | } | ||
| 397 | |||
| 398 | // VerbNet data | ||
| 399 | std::cout << "Reading verb frames..." << std::endl; | ||
| 400 | |||
| 401 | DIR* dir; | ||
| 402 | if ((dir = opendir(argv[1])) == nullptr) | ||
| 403 | { | ||
| 404 | std::cout << "Invalid VerbNet data directory." << std::endl; | ||
| 405 | |||
| 406 | print_usage(); | ||
| 407 | } | ||
| 408 | |||
| 409 | struct dirent* ent; | ||
| 410 | while ((ent = readdir(dir)) != nullptr) | ||
| 411 | { | ||
| 412 | std::string filename(argv[1]); | ||
| 413 | if (filename.back() != '/') | ||
| 414 | { | ||
| 415 | filename += '/'; | ||
| 416 | } | ||
| 417 | 77 | ||
| 418 | filename += ent->d_name; | 78 | void generator::run() |
| 419 | //std::cout << ent->d_name << std::endl; | ||
| 420 | |||
| 421 | if (filename.rfind(".xml") != filename.size() - 4) | ||
| 422 | { | ||
| 423 | continue; | ||
| 424 | } | ||
| 425 | |||
| 426 | xmlDocPtr doc = xmlParseFile(filename.c_str()); | ||
| 427 | if (doc == nullptr) | ||
| 428 | { | ||
| 429 | std::cout << "Error opening " << filename << std::endl; | ||
| 430 | print_usage(); | ||
| 431 | } | ||
| 432 | |||
| 433 | xmlNodePtr top = xmlDocGetRootElement(doc); | ||
| 434 | if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS"))) | ||
| 435 | { | ||
| 436 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
| 437 | print_usage(); | ||
| 438 | } | ||
| 439 | |||
| 440 | parse_group(top, filename); | ||
| 441 | } | ||
| 442 | |||
| 443 | closedir(dir); | ||
| 444 | |||
| 445 | // Get verbs from AGID | ||
| 446 | std::cout << "Reading inflections..." << std::endl; | ||
| 447 | |||
| 448 | std::ifstream agidfile(argv[2]); | ||
| 449 | if (!agidfile.is_open()) | ||
| 450 | { | ||
| 451 | std::cout << "Could not open AGID file: " << argv[2] << std::endl; | ||
| 452 | print_usage(); | ||
| 453 | } | ||
| 454 | |||
| 455 | for (;;) | ||
| 456 | { | ||
| 457 | std::string line; | ||
| 458 | if (!getline(agidfile, line)) | ||
| 459 | { | ||
| 460 | break; | ||
| 461 | } | ||
| 462 | |||
| 463 | if (line.back() == '\r') | ||
| 464 | { | 79 | { |
| 465 | line.pop_back(); | 80 | // Create notions, words, lemmas, and forms from WordNet synsets |
| 466 | } | 81 | readWordNetSynsets(); |
| 467 | 82 | ||
| 468 | int divider = line.find_first_of(" "); | 83 | // Reads adjective positioning WordNet data |
| 469 | std::string word = line.substr(0, divider); | 84 | readAdjectivePositioning(); |
| 470 | line = line.substr(divider+1); | 85 | |
| 471 | char type = line[0]; | 86 | // Counts the number of URLs ImageNet has per notion |
| 472 | 87 | readImageNetUrls(); | |
| 473 | if (line[1] == '?') | 88 | |
| 474 | { | 89 | // Creates a word by WordNet sense key lookup table |
| 475 | line.erase(0, 4); | 90 | readWordNetSenseKeys(); |
| 476 | } else { | 91 | |
| 477 | line.erase(0, 3); | 92 | // Creates groups and frames from VerbNet data |
| 478 | } | 93 | readVerbNet(); |
| 479 | 94 | ||
| 480 | std::vector<std::string> forms; | 95 | // Creates forms and inflections from AGID. To reduce the amount of forms |
| 481 | while (!line.empty()) | 96 | // created, we do this after most lemmas that need inflecting have been |
| 482 | { | 97 | // created through other means, and then only generate forms for |
| 483 | std::string inflection; | 98 | // inflections of already-existing lemmas. The exception to this regards |
| 484 | if ((divider = line.find(" | ")) != std::string::npos) | 99 | // verb lemmas. If a verb lemma in AGID either does not exist yet, or does |
| 485 | { | 100 | // exist but is not related to any words that are related to verb notions, |
| 486 | inflection = line.substr(0, divider); | 101 | // then a notion and a word is generated and the form generation proceeds |
| 487 | line = line.substr(divider + 3); | 102 | // as usual. |
| 488 | } else { | 103 | readAgidInflections(); |
| 489 | inflection = line; | 104 | |
| 490 | line = ""; | 105 | // Reads in prepositions and the is_a relationship |
| 491 | } | 106 | readPrepositions(); |
| 492 | 107 | ||
| 493 | if ((divider = inflection.find_first_of(",?")) != std::string::npos) | 108 | // Creates pronunciations from CMUDICT. To reduce the amount of |
| 494 | { | 109 | // pronunciations created, we do this after all forms have been created, |
| 495 | inflection = inflection.substr(0, divider); | 110 | // and then only generate pronunciations for already-exisiting forms. |
| 496 | } | 111 | readCmudictPronunciations(); |
| 497 | 112 | ||
| 498 | forms.push_back(inflection); | 113 | // Writes the database schema |
| 114 | writeSchema(); | ||
| 115 | |||
| 116 | // Dumps data to the database | ||
| 117 | dumpObjects(); | ||
| 118 | |||
| 119 | // Populates the antonymy relationship from WordNet | ||
| 120 | readWordNetAntonymy(); | ||
| 121 | |||
| 122 | // Populates the variation relationship from WordNet | ||
| 123 | readWordNetVariation(); | ||
| 124 | |||
| 125 | // Populates the usage, topicality, and regionality relationships from | ||
| 126 | // WordNet | ||
| 127 | readWordNetClasses(); | ||
| 128 | |||
| 129 | // Populates the causality relationship from WordNet | ||
| 130 | readWordNetCausality(); | ||
| 131 | |||
| 132 | // Populates the entailment relationship from WordNet | ||
| 133 | readWordNetEntailment(); | ||
| 134 | |||
| 135 | // Populates the hypernymy relationship from WordNet | ||
| 136 | readWordNetHypernymy(); | ||
| 137 | |||
| 138 | // Populates the instantiation relationship from WordNet | ||
| 139 | readWordNetInstantiation(); | ||
| 140 | |||
| 141 | // Populates the member meronymy relationship from WordNet | ||
| 142 | readWordNetMemberMeronymy(); | ||
| 143 | |||
| 144 | // Populates the part meronymy relationship from WordNet | ||
| 145 | readWordNetPartMeronymy(); | ||
| 146 | |||
| 147 | // Populates the substance meronymy relationship from WordNet | ||
| 148 | readWordNetSubstanceMeronymy(); | ||
| 149 | |||
| 150 | // Populates the pertainymy and mannernymy relationships from WordNet | ||
| 151 | readWordNetPertainymy(); | ||
| 152 | |||
| 153 | // Populates the specification relationship from WordNet | ||
| 154 | readWordNetSpecification(); | ||
| 155 | |||
| 156 | // Populates the adjective similarity relationship from WordNet | ||
| 157 | readWordNetSimilarity(); | ||
| 158 | |||
| 159 | |||
| 160 | |||
| 161 | |||
| 162 | |||
| 163 | |||
| 164 | |||
| 165 | |||
| 499 | } | 166 | } |
| 500 | 167 | ||
| 501 | switch (type) | 168 | void generator::readWordNetSynsets() |
| 502 | { | 169 | { |
| 503 | case 'V': | 170 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); |
| 171 | progress ppgs("Reading synsets from WordNet...", lines.size()); | ||
| 172 | |||
| 173 | for (std::string line : lines) | ||
| 504 | { | 174 | { |
| 505 | verb_t v; | 175 | ppgs.update(); |
| 506 | v.infinitive = word; | 176 | |
| 507 | if (forms.size() == 4) | 177 | std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); |
| 508 | { | 178 | std::smatch relation_data; |
| 509 | v.past_tense = forms[0]; | 179 | if (!std::regex_search(line, relation_data, relation)) |
| 510 | v.past_participle = forms[1]; | 180 | { |
| 511 | v.ing_form = forms[2]; | 181 | continue; |
| 512 | v.s_form = forms[3]; | ||
| 513 | } else if (forms.size() == 3) | ||
| 514 | { | ||
| 515 | v.past_tense = forms[0]; | ||
| 516 | v.past_participle = forms[0]; | ||
| 517 | v.ing_form = forms[1]; | ||
| 518 | v.s_form = forms[2]; | ||
| 519 | } else if (forms.size() == 8) | ||
| 520 | { | ||
| 521 | // As of AGID 2014.08.11, this is only "to be" | ||
| 522 | v.past_tense = forms[0]; | ||
| 523 | v.past_participle = forms[2]; | ||
| 524 | v.ing_form = forms[3]; | ||
| 525 | v.s_form = forms[4]; | ||
| 526 | } else { | ||
| 527 | // Words that don't fit the cases above as of AGID 2014.08.11: | ||
| 528 | // - may and shall do not conjugate the way we want them to | ||
| 529 | // - methinks only has a past tense and is an outlier | ||
| 530 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now | ||
| 531 | std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl; | ||
| 532 | } | 182 | } |
| 533 | 183 | ||
| 534 | verbs[word] = v; | 184 | int synset_id = std::stoi(relation_data[1]); |
| 535 | 185 | int wnum = std::stoi(relation_data[2]); | |
| 536 | break; | 186 | std::string text = relation_data[3]; |
| 537 | } | 187 | int tag_count = std::stoi(relation_data[4]); |
| 538 | 188 | size_t word_it; | |
| 539 | case 'A': | 189 | while ((word_it = text.find("''")) != std::string::npos) |
| 540 | { | ||
| 541 | adjective_t adj; | ||
| 542 | adj.base = word; | ||
| 543 | if (forms.size() == 2) | ||
| 544 | { | 190 | { |
| 545 | adj.comparative = forms[0]; | 191 | text.erase(word_it, 1); |
| 546 | adj.superlative = forms[1]; | ||
| 547 | } else { | ||
| 548 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" | ||
| 549 | std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl; | ||
| 550 | } | 192 | } |
| 551 | 193 | ||
| 552 | adjectives[word] = adj; | 194 | // The WordNet data does contain duplicates, so we need to check that we |
| 553 | 195 | // haven't already created this word. | |
| 554 | break; | 196 | std::pair<int, int> lookup(synset_id, wnum); |
| 555 | } | 197 | if (!wordByWnidAndWnum_.count(lookup)) |
| 556 | |||
| 557 | case 'N': | ||
| 558 | { | ||
| 559 | noun_t n; | ||
| 560 | n.singular = word; | ||
| 561 | if (forms.size() == 1) | ||
| 562 | { | 198 | { |
| 563 | n.plural = forms[0]; | 199 | notion& synset = lookupOrCreateNotion(synset_id); |
| 564 | } else { | 200 | lemma& lex = lookupOrCreateLemma(text); |
| 565 | // As of AGID 2014.08.11, this is non-existent. | 201 | word& entry = createWord(synset, lex, tag_count); |
| 566 | std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl; | 202 | |
| 203 | wordByWnidAndWnum_[lookup] = &entry; | ||
| 567 | } | 204 | } |
| 568 | |||
| 569 | nouns[word] = n; | ||
| 570 | |||
| 571 | break; | ||
| 572 | } | 205 | } |
| 573 | } | 206 | } |
| 574 | } | ||
| 575 | |||
| 576 | // Pronounciations | ||
| 577 | std::cout << "Reading pronunciations..." << std::endl; | ||
| 578 | |||
| 579 | std::ifstream pronfile(argv[4]); | ||
| 580 | if (!pronfile.is_open()) | ||
| 581 | { | ||
| 582 | std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl; | ||
| 583 | print_usage(); | ||
| 584 | } | ||
| 585 | |||
| 586 | for (;;) | ||
| 587 | { | ||
| 588 | std::string line; | ||
| 589 | if (!getline(pronfile, line)) | ||
| 590 | { | ||
| 591 | break; | ||
| 592 | } | ||
| 593 | |||
| 594 | if (line.back() == '\r') | ||
| 595 | { | ||
| 596 | line.pop_back(); | ||
| 597 | } | ||
| 598 | 207 | ||
| 599 | std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); | 208 | void generator::readAdjectivePositioning() |
| 600 | std::smatch phoneme_data; | ||
| 601 | if (std::regex_search(line, phoneme_data, phoneme)) | ||
| 602 | { | 209 | { |
| 603 | std::string canonical(phoneme_data[1]); | 210 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl")); |
| 604 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | 211 | progress ppgs("Reading adjective positionings from WordNet...", lines.size()); |
| 605 | |||
| 606 | std::string phonemes = phoneme_data[2]; | ||
| 607 | auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " "); | ||
| 608 | auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) { | ||
| 609 | return phoneme.find("1") != std::string::npos; | ||
| 610 | }); | ||
| 611 | 212 | ||
| 612 | pronunciation_t p; | 213 | for (std::string line : lines) |
| 613 | p.phonemes = phonemes; | ||
| 614 | |||
| 615 | // Rhyme detection | ||
| 616 | if (phemstrt != std::end(phoneme_set)) | ||
| 617 | { | 214 | { |
| 618 | std::stringstream rhymer; | 215 | ppgs.update(); |
| 619 | for (auto it = phemstrt; it != std::end(phoneme_set); it++) | ||
| 620 | { | ||
| 621 | std::string naked; | ||
| 622 | std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) { | ||
| 623 | return isdigit(ch); | ||
| 624 | }); | ||
| 625 | |||
| 626 | if (it != phemstrt) | ||
| 627 | { | ||
| 628 | rhymer << " "; | ||
| 629 | } | ||
| 630 | |||
| 631 | rhymer << naked; | ||
| 632 | } | ||
| 633 | 216 | ||
| 634 | p.rhyme = rhymer.str(); | 217 | std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); |
| 635 | 218 | std::smatch relation_data; | |
| 636 | if (phemstrt != std::begin(phoneme_set)) | 219 | if (!std::regex_search(line, relation_data, relation)) |
| 637 | { | 220 | { |
| 638 | phemstrt--; | 221 | continue; |
| 639 | p.prerhyme = *phemstrt; | ||
| 640 | } else { | ||
| 641 | p.prerhyme = ""; | ||
| 642 | } | 222 | } |
| 643 | } else { | ||
| 644 | p.prerhyme = ""; | ||
| 645 | p.rhyme = ""; | ||
| 646 | } | ||
| 647 | 223 | ||
| 648 | // Syllable/stress | 224 | int synset_id = stoi(relation_data[1]); |
| 649 | for (auto phm : phoneme_set) | 225 | int wnum = stoi(relation_data[2]); |
| 650 | { | 226 | std::string adjpos_str = relation_data[3]; |
| 651 | if (isdigit(phm.back())) | ||
| 652 | { | ||
| 653 | // It's a vowel! | ||
| 654 | p.syllables++; | ||
| 655 | 227 | ||
| 656 | if (phm.back() == '1') | 228 | std::pair<int, int> lookup(synset_id, wnum); |
| 229 | if (wordByWnidAndWnum_.count(lookup)) | ||
| 230 | { | ||
| 231 | word& adj = *wordByWnidAndWnum_.at(lookup); | ||
| 232 | |||
| 233 | if (adjpos_str == "p") | ||
| 234 | { | ||
| 235 | adj.setAdjectivePosition(positioning::predicate); | ||
| 236 | } else if (adjpos_str == "a") | ||
| 237 | { | ||
| 238 | adj.setAdjectivePosition(positioning::attributive); | ||
| 239 | } else if (adjpos_str == "i") | ||
| 657 | { | 240 | { |
| 658 | p.stress.push_back('1'); | 241 | adj.setAdjectivePosition(positioning::postnominal); |
| 659 | } else { | 242 | } else { |
| 660 | p.stress.push_back('0'); | 243 | // Can't happen because of how we specified the regex. |
| 244 | assert(false); | ||
| 661 | } | 245 | } |
| 662 | } | 246 | } |
| 663 | } | 247 | } |
| 664 | |||
| 665 | pronunciations[canonical].insert(p); | ||
| 666 | } | ||
| 667 | } | ||
| 668 | |||
| 669 | // Images | ||
| 670 | std::cout << "Reading images..." << std::endl; | ||
| 671 | |||
| 672 | std::ifstream imagefile(argv[5]); | ||
| 673 | if (!imagefile.is_open()) | ||
| 674 | { | ||
| 675 | std::cout << "Could not open ImageNet file: " << argv[5] << std::endl; | ||
| 676 | print_usage(); | ||
| 677 | } | ||
| 678 | |||
| 679 | for (;;) | ||
| 680 | { | ||
| 681 | std::string line; | ||
| 682 | if (!getline(imagefile, line)) | ||
| 683 | { | ||
| 684 | break; | ||
| 685 | } | ||
| 686 | |||
| 687 | if (line.back() == '\r') | ||
| 688 | { | ||
| 689 | line.pop_back(); | ||
| 690 | } | ||
| 691 | |||
| 692 | std::string wnid_s = line.substr(1, 8); | ||
| 693 | int wnid = stoi(wnid_s) + 100000000; | ||
| 694 | images[wnid]++; | ||
| 695 | } | ||
| 696 | |||
| 697 | imagefile.close(); | ||
| 698 | |||
| 699 | // Start writing output | ||
| 700 | std::cout << "Writing schema..." << std::endl; | ||
| 701 | |||
| 702 | sqlite3* ppdb; | ||
| 703 | if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) | ||
| 704 | { | ||
| 705 | std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl; | ||
| 706 | print_usage(); | ||
| 707 | } | ||
| 708 | |||
| 709 | std::ifstream schemafile("schema.sql"); | ||
| 710 | if (!schemafile.is_open()) | ||
| 711 | { | ||
| 712 | std::cout << "Could not find schema file" << std::endl; | ||
| 713 | print_usage(); | ||
| 714 | } | ||
| 715 | |||
| 716 | std::stringstream schemabuilder; | ||
| 717 | for (;;) | ||
| 718 | { | ||
| 719 | std::string line; | ||
| 720 | if (!getline(schemafile, line)) | ||
| 721 | { | ||
| 722 | break; | ||
| 723 | } | ||
| 724 | |||
| 725 | if (line.back() == '\r') | ||
| 726 | { | ||
| 727 | line.pop_back(); | ||
| 728 | } | ||
| 729 | |||
| 730 | schemabuilder << line << std::endl; | ||
| 731 | } | ||
| 732 | |||
| 733 | std::string schema = schemabuilder.str(); | ||
| 734 | while (!schema.empty()) | ||
| 735 | { | ||
| 736 | std::string query; | ||
| 737 | int divider = schema.find(";"); | ||
| 738 | if (divider != std::string::npos) | ||
| 739 | { | ||
| 740 | query = schema.substr(0, divider+1); | ||
| 741 | schema = schema.substr(divider+2); | ||
| 742 | } else { | ||
| 743 | break; | ||
| 744 | } | 248 | } |
| 745 | 249 | ||
| 746 | sqlite3_stmt* schmstmt; | 250 | void generator::readImageNetUrls() |
| 747 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK) | ||
| 748 | { | 251 | { |
| 749 | db_error(ppdb, query); | 252 | // The ImageNet datafile is so large that it is unreasonable and |
| 750 | } | 253 | // unnecessary to read it into memory; instead, we will parse each line as |
| 751 | 254 | // we read it. This has the caveat that we cannot display a progress bar. | |
| 752 | if (sqlite3_step(schmstmt) != SQLITE_DONE) | 255 | std::cout << "Reading image counts from ImageNet..." << std::endl; |
| 753 | { | ||
| 754 | db_error(ppdb, query); | ||
| 755 | } | ||
| 756 | |||
| 757 | sqlite3_finalize(schmstmt); | ||
| 758 | } | ||
| 759 | |||
| 760 | std::cout << "Writing prepositions..." << std::endl; | ||
| 761 | std::ifstream prepfile("prepositions.txt"); | ||
| 762 | if (!prepfile.is_open()) | ||
| 763 | { | ||
| 764 | std::cout << "Could not find prepositions file" << std::endl; | ||
| 765 | print_usage(); | ||
| 766 | } | ||
| 767 | |||
| 768 | for (;;) | ||
| 769 | { | ||
| 770 | std::string line; | ||
| 771 | if (!getline(prepfile, line)) | ||
| 772 | { | ||
| 773 | break; | ||
| 774 | } | ||
| 775 | |||
| 776 | if (line.back() == '\r') | ||
| 777 | { | ||
| 778 | line.pop_back(); | ||
| 779 | } | ||
| 780 | |||
| 781 | std::regex relation("^([^:]+): (.+)"); | ||
| 782 | std::smatch relation_data; | ||
| 783 | std::regex_search(line, relation_data, relation); | ||
| 784 | std::string prep = relation_data[1]; | ||
| 785 | std::list<std::string> groups = verbly::split<std::list<std::string>>(relation_data[2], ", "); | ||
| 786 | |||
| 787 | std::string query("INSERT INTO prepositions (form) VALUES (?)"); | ||
| 788 | sqlite3_stmt* ppstmt; | ||
| 789 | |||
| 790 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 791 | { | ||
| 792 | db_error(ppdb, query); | ||
| 793 | } | ||
| 794 | |||
| 795 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT); | ||
| 796 | |||
| 797 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 798 | { | ||
| 799 | db_error(ppdb, query); | ||
| 800 | } | ||
| 801 | |||
| 802 | sqlite3_finalize(ppstmt); | ||
| 803 | |||
| 804 | query = "SELECT last_insert_rowid()"; | ||
| 805 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 806 | { | ||
| 807 | db_error(ppdb, query); | ||
| 808 | } | ||
| 809 | |||
| 810 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
| 811 | { | ||
| 812 | db_error(ppdb, query); | ||
| 813 | } | ||
| 814 | |||
| 815 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
| 816 | sqlite3_finalize(ppstmt); | ||
| 817 | |||
| 818 | for (auto group : groups) | ||
| 819 | { | ||
| 820 | query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)"; | ||
| 821 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 822 | { | ||
| 823 | db_error(ppdb, query); | ||
| 824 | } | ||
| 825 | 256 | ||
| 826 | sqlite3_bind_int(ppstmt, 1, rowid); | 257 | std::ifstream file(imageNetPath_); |
| 827 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); | 258 | if (!file) |
| 828 | |||
| 829 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 830 | { | 259 | { |
| 831 | db_error(ppdb, query); | 260 | throw std::invalid_argument("Could not find file " + imageNetPath_); |
| 832 | } | 261 | } |
| 833 | |||
| 834 | sqlite3_finalize(ppstmt); | ||
| 835 | } | ||
| 836 | } | ||
| 837 | |||
| 838 | 262 | ||
| 839 | { | 263 | std::string line; |
| 840 | progress ppgs("Writing verbs...", verbs.size()); | 264 | while (std::getline(file, line)) |
| 841 | for (auto& mapping : verbs) | ||
| 842 | { | ||
| 843 | sqlite3_stmt* ppstmt; | ||
| 844 | std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)"); | ||
| 845 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 846 | { | ||
| 847 | db_error(ppdb, query); | ||
| 848 | } | ||
| 849 | |||
| 850 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT); | ||
| 851 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT); | ||
| 852 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT); | ||
| 853 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT); | ||
| 854 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT); | ||
| 855 | |||
| 856 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 857 | { | ||
| 858 | db_error(ppdb, query); | ||
| 859 | } | ||
| 860 | |||
| 861 | sqlite3_finalize(ppstmt); | ||
| 862 | |||
| 863 | std::string canonical(mapping.second.infinitive); | ||
| 864 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | ||
| 865 | if (pronunciations.count(canonical) == 1) | ||
| 866 | { | 265 | { |
| 867 | query = "SELECT last_insert_rowid()"; | 266 | if (line.back() == '\r') |
| 868 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 869 | { | 267 | { |
| 870 | db_error(ppdb, query); | 268 | line.pop_back(); |
| 871 | } | 269 | } |
| 872 | 270 | ||
| 873 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | 271 | std::string wnid_s = line.substr(1, 8); |
| 272 | int wnid = stoi(wnid_s) + 100000000; | ||
| 273 | if (notionByWnid_.count(wnid)) | ||
| 874 | { | 274 | { |
| 875 | db_error(ppdb, query); | 275 | // We know that this notion has a wnid and is a noun. |
| 876 | } | 276 | notionByWnid_.at(wnid)->incrementNumOfImages(); |
| 877 | |||
| 878 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
| 879 | |||
| 880 | sqlite3_finalize(ppstmt); | ||
| 881 | |||
| 882 | mapping.second.id = rowid; | ||
| 883 | |||
| 884 | for (auto pronunciation : pronunciations[canonical]) | ||
| 885 | { | ||
| 886 | if (!pronunciation.rhyme.empty()) | ||
| 887 | { | ||
| 888 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | ||
| 889 | } else { | ||
| 890 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | ||
| 891 | } | ||
| 892 | |||
| 893 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 894 | { | ||
| 895 | db_error(ppdb, query); | ||
| 896 | } | ||
| 897 | |||
| 898 | sqlite3_bind_int(ppstmt, 1, rowid); | ||
| 899 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); | ||
| 900 | sqlite3_bind_int(ppstmt, 3, pronunciation.syllables); | ||
| 901 | sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT); | ||
| 902 | |||
| 903 | if (!pronunciation.rhyme.empty()) | ||
| 904 | { | ||
| 905 | sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
| 906 | sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
| 907 | } | ||
| 908 | |||
| 909 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 910 | { | ||
| 911 | db_error(ppdb, query); | ||
| 912 | } | ||
| 913 | |||
| 914 | sqlite3_finalize(ppstmt); | ||
| 915 | } | 277 | } |
| 916 | } | 278 | } |
| 917 | |||
| 918 | ppgs.update(); | ||
| 919 | } | 279 | } |
| 920 | } | 280 | |
| 921 | 281 | void generator::readWordNetSenseKeys() | |
| 922 | { | ||
| 923 | progress ppgs("Writing verb frames...", groups.size()); | ||
| 924 | for (auto& mapping : groups) | ||
| 925 | { | 282 | { |
| 926 | std::list<json> roledatal; | 283 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl")); |
| 927 | std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair<std::string, selrestr_t> r) { | 284 | progress ppgs("Reading sense keys from WordNet...", lines.size()); |
| 928 | json role; | ||
| 929 | role["type"] = r.first; | ||
| 930 | role["selrestrs"] = export_selrestrs(r.second); | ||
| 931 | |||
| 932 | return role; | ||
| 933 | }); | ||
| 934 | |||
| 935 | json roledata(roledatal); | ||
| 936 | std::string rdm = roledata.dump(); | ||
| 937 | |||
| 938 | sqlite3_stmt* ppstmt; | ||
| 939 | std::string query("INSERT INTO groups (data) VALUES (?)"); | ||
| 940 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 941 | { | ||
| 942 | db_error(ppdb, query); | ||
| 943 | } | ||
| 944 | |||
| 945 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT); | ||
| 946 | |||
| 947 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 948 | { | ||
| 949 | db_error(ppdb, query); | ||
| 950 | } | ||
| 951 | 285 | ||
| 952 | sqlite3_finalize(ppstmt); | 286 | for (std::string line : lines) |
| 953 | |||
| 954 | query = "SELECT last_insert_rowid()"; | ||
| 955 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 956 | { | ||
| 957 | db_error(ppdb, query); | ||
| 958 | } | ||
| 959 | |||
| 960 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
| 961 | { | ||
| 962 | db_error(ppdb, query); | ||
| 963 | } | ||
| 964 | |||
| 965 | int gid = sqlite3_column_int(ppstmt, 0); | ||
| 966 | sqlite3_finalize(ppstmt); | ||
| 967 | |||
| 968 | for (auto frame : mapping.second.frames) | ||
| 969 | { | 287 | { |
| 970 | std::list<json> fdatap; | 288 | ppgs.update(); |
| 971 | std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) { | ||
| 972 | json part; | ||
| 973 | |||
| 974 | switch (fp.type) | ||
| 975 | { | ||
| 976 | case framepart_t::type_t::np: | ||
| 977 | { | ||
| 978 | part["type"] = "np"; | ||
| 979 | part["role"] = fp.role; | ||
| 980 | part["selrestrs"] = export_selrestrs(fp.selrestrs); | ||
| 981 | part["synrestrs"] = fp.synrestrs; | ||
| 982 | |||
| 983 | break; | ||
| 984 | } | ||
| 985 | |||
| 986 | case framepart_t::type_t::pp: | ||
| 987 | { | ||
| 988 | part["type"] = "pp"; | ||
| 989 | part["values"] = fp.choices; | ||
| 990 | part["preprestrs"] = fp.preprestrs; | ||
| 991 | |||
| 992 | break; | ||
| 993 | } | ||
| 994 | |||
| 995 | case framepart_t::type_t::v: | ||
| 996 | { | ||
| 997 | part["type"] = "v"; | ||
| 998 | |||
| 999 | break; | ||
| 1000 | } | ||
| 1001 | |||
| 1002 | case framepart_t::type_t::adj: | ||
| 1003 | { | ||
| 1004 | part["type"] = "adj"; | ||
| 1005 | |||
| 1006 | break; | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | case framepart_t::type_t::adv: | ||
| 1010 | { | ||
| 1011 | part["type"] = "adv"; | ||
| 1012 | |||
| 1013 | break; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | case framepart_t::type_t::lex: | ||
| 1017 | { | ||
| 1018 | part["type"] = "lex"; | ||
| 1019 | part["value"] = fp.lexval; | ||
| 1020 | |||
| 1021 | break; | ||
| 1022 | } | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | return part; | ||
| 1026 | }); | ||
| 1027 | |||
| 1028 | json fdata(fdatap); | ||
| 1029 | std::string marshall = fdata.dump(); | ||
| 1030 | |||
| 1031 | query = "INSERT INTO frames (group_id, data) VALUES (?, ?)"; | ||
| 1032 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 1033 | { | ||
| 1034 | db_error(ppdb, query); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | sqlite3_bind_int(ppstmt, 1, gid); | ||
| 1038 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT); | ||
| 1039 | 289 | ||
| 1040 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 290 | // We only actually need to lookup verbs by sense key so we'll just |
| 291 | // ignore everything that isn't a verb. | ||
| 292 | std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$"); | ||
| 293 | std::smatch relation_data; | ||
| 294 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1041 | { | 295 | { |
| 1042 | db_error(ppdb, query); | 296 | continue; |
| 1043 | } | 297 | } |
| 298 | |||
| 299 | int synset_id = stoi(relation_data[1]); | ||
| 300 | int wnum = stoi(relation_data[2]); | ||
| 301 | std::string sense_key = relation_data[3]; | ||
| 1044 | 302 | ||
| 1045 | sqlite3_finalize(ppstmt); | 303 | // We are treating this mapping as injective, which is not entirely |
| 1046 | } | 304 | // accurate. First, the WordNet table contains duplicate rows, so those |
| 1047 | 305 | // need to be ignored. More importantly, a small number of sense keys | |
| 1048 | for (auto member : mapping.second.members) | 306 | // (one for each letter of the Latin alphabet, plus 9 other words) each |
| 1049 | { | 307 | // map to two different words in the same synset which differ only by |
| 1050 | if (verbs.count(member) == 1) | 308 | // capitalization. Luckily, none of these exceptions are verbs, so we |
| 309 | // can pretend that the mapping is injective. | ||
| 310 | if (!wnSenseKeys_.count(sense_key)) | ||
| 1051 | { | 311 | { |
| 1052 | auto& v = verbs[member]; | 312 | std::pair<int, int> lookup(synset_id, wnum); |
| 1053 | 313 | if (wordByWnidAndWnum_.count(lookup)) | |
| 1054 | query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)"; | ||
| 1055 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 1056 | { | ||
| 1057 | db_error(ppdb, query); | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | sqlite3_bind_int(ppstmt, 1, v.id); | ||
| 1061 | sqlite3_bind_int(ppstmt, 2, gid); | ||
| 1062 | |||
| 1063 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 1064 | { | 314 | { |
| 1065 | db_error(ppdb, query); | 315 | wnSenseKeys_[sense_key] = wordByWnidAndWnum_.at(lookup); |
| 1066 | } | 316 | } |
| 1067 | |||
| 1068 | sqlite3_finalize(ppstmt); | ||
| 1069 | } | 317 | } |
| 1070 | } | 318 | } |
| 1071 | |||
| 1072 | ppgs.update(); | ||
| 1073 | } | 319 | } |
| 1074 | } | 320 | |
| 1075 | 321 | void generator::readVerbNet() | |
| 1076 | // Get nouns/adjectives/adverbs from WordNet | ||
| 1077 | // Useful relations: | ||
| 1078 | // - s: master list | ||
| 1079 | // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness) | ||
| 1080 | // - at: variation (e.g. a measurement can be standard or nonstandard) | ||
| 1081 | // - der: derivation (e.g. happy/happily, happily/happy) | ||
| 1082 | // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue) | ||
| 1083 | // - ins: instantiation (do we need this? let's see) | ||
| 1084 | // - mm: member meronymy/holonymy (e.g. family/mother, family/child) | ||
| 1085 | // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire) | ||
| 1086 | // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber) | ||
| 1087 | // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska) | ||
| 1088 | // mannernymy (e.g. something done quickly is done in a manner that is quick) | ||
| 1089 | // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) | ||
| 1090 | // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) | ||
| 1091 | // - syntax: positioning flags for some adjectives | ||
| 1092 | std::string wnpref {argv[3]}; | ||
| 1093 | if (wnpref.back() != '/') | ||
| 1094 | { | ||
| 1095 | wnpref += '/'; | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | // s table | ||
| 1099 | { | ||
| 1100 | std::ifstream wnsfile(wnpref + "wn_s.pl"); | ||
| 1101 | if (!wnsfile.is_open()) | ||
| 1102 | { | 322 | { |
| 1103 | std::cout << "Invalid WordNet data directory." << std::endl; | 323 | std::cout << "Reading frames from VerbNet..." << std::endl; |
| 1104 | print_usage(); | ||
| 1105 | } | ||
| 1106 | 324 | ||
| 1107 | std::list<std::string> lines; | 325 | DIR* dir; |
| 1108 | for (;;) | 326 | if ((dir = opendir(verbNetPath_.c_str())) == nullptr) |
| 1109 | { | ||
| 1110 | std::string line; | ||
| 1111 | if (!getline(wnsfile, line)) | ||
| 1112 | { | 327 | { |
| 1113 | break; | 328 | throw std::invalid_argument("Invalid VerbNet data directory"); |
| 1114 | } | 329 | } |
| 1115 | 330 | ||
| 1116 | if (line.back() == '\r') | 331 | struct dirent* ent; |
| 1117 | { | 332 | while ((ent = readdir(dir)) != nullptr) |
| 1118 | line.pop_back(); | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | lines.push_back(line); | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size()); | ||
| 1125 | for (auto line : lines) | ||
| 1126 | { | ||
| 1127 | ppgs.update(); | ||
| 1128 | |||
| 1129 | std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$"); | ||
| 1130 | std::smatch relation_data; | ||
| 1131 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1132 | { | 333 | { |
| 1133 | continue; | 334 | std::string filename(verbNetPath_); |
| 1134 | } | 335 | |
| 336 | if (filename.back() != '/') | ||
| 337 | { | ||
| 338 | filename += '/'; | ||
| 339 | } | ||
| 1135 | 340 | ||
| 1136 | int synset_id = stoi(relation_data[1]); | 341 | filename += ent->d_name; |
| 1137 | int wnum = stoi(relation_data[2]); | ||
| 1138 | std::string word = relation_data[3]; | ||
| 1139 | size_t word_it; | ||
| 1140 | while ((word_it = word.find("''")) != std::string::npos) | ||
| 1141 | { | ||
| 1142 | word.erase(word_it, 1); | ||
| 1143 | } | ||
| 1144 | 342 | ||
| 1145 | std::string query; | 343 | if (filename.rfind(".xml") != filename.size() - 4) |
| 1146 | switch (synset_id / 100000000) | ||
| 1147 | { | ||
| 1148 | case 1: // Noun | ||
| 1149 | { | 344 | { |
| 1150 | if (nouns.count(word) == 1) | 345 | continue; |
| 1151 | { | ||
| 1152 | query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)"; | ||
| 1153 | } else { | ||
| 1154 | query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)"; | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | break; | ||
| 1158 | } | 346 | } |
| 1159 | 347 | ||
| 1160 | case 2: // Verb | 348 | xmlDocPtr doc = xmlParseFile(filename.c_str()); |
| 349 | if (doc == nullptr) | ||
| 1161 | { | 350 | { |
| 1162 | // Ignore | 351 | throw std::logic_error("Error opening " + filename); |
| 1163 | |||
| 1164 | break; | ||
| 1165 | } | 352 | } |
| 1166 | 353 | ||
| 1167 | case 3: // Adjective | 354 | xmlNodePtr top = xmlDocGetRootElement(doc); |
| 355 | if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS")))) | ||
| 1168 | { | 356 | { |
| 1169 | if (adjectives.count(word) == 1) | 357 | throw std::logic_error("Bad VerbNet file format: " + filename); |
| 1170 | { | ||
| 1171 | query = "INSERT INTO adjectives (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; | ||
| 1172 | } else { | ||
| 1173 | query = "INSERT INTO adjectives (base_form, complexity) VALUES (?, ?)"; | ||
| 1174 | } | ||
| 1175 | |||
| 1176 | break; | ||
| 1177 | } | 358 | } |
| 1178 | 359 | ||
| 1179 | case 4: // Adverb | 360 | try |
| 1180 | { | 361 | { |
| 1181 | if (adjectives.count(word) == 1) | 362 | createGroup(top); |
| 1182 | { | 363 | } catch (const std::exception& e) |
| 1183 | query = "INSERT INTO adverbs (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; | 364 | { |
| 1184 | } else { | 365 | std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename)); |
| 1185 | query = "INSERT INTO adverbs (base_form, complexity) VALUES (?, ?)"; | ||
| 1186 | } | ||
| 1187 | |||
| 1188 | break; | ||
| 1189 | } | 366 | } |
| 1190 | } | 367 | } |
| 368 | |||
| 369 | closedir(dir); | ||
| 370 | } | ||
| 1191 | 371 | ||
| 1192 | sqlite3_stmt* ppstmt; | 372 | void generator::readAgidInflections() |
| 1193 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | 373 | { |
| 374 | std::list<std::string> lines(readFile(agidPath_)); | ||
| 375 | progress ppgs("Reading inflections from AGID...", lines.size()); | ||
| 376 | |||
| 377 | for (std::string line : lines) | ||
| 1194 | { | 378 | { |
| 1195 | db_error(ppdb, query); | 379 | ppgs.update(); |
| 1196 | } | 380 | |
| 381 | int divider = line.find_first_of(" "); | ||
| 382 | std::string infinitive = line.substr(0, divider); | ||
| 383 | line = line.substr(divider+1); | ||
| 384 | char type = line[0]; | ||
| 1197 | 385 | ||
| 1198 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); | 386 | if (line[1] == '?') |
| 1199 | switch (synset_id / 100000000) | ||
| 1200 | { | ||
| 1201 | case 1: // Noun | ||
| 1202 | { | 387 | { |
| 1203 | sqlite3_bind_int(ppstmt, 2, (std::any_of(std::begin(word), std::end(word), [] (char ch) { | 388 | line.erase(0, 4); |
| 1204 | return isupper(ch); | 389 | } else { |
| 1205 | }) ? 1 : 0)); | 390 | line.erase(0, 3); |
| 1206 | |||
| 1207 | sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size()); | ||
| 1208 | sqlite3_bind_int(ppstmt, 4, images[synset_id]); | ||
| 1209 | sqlite3_bind_int(ppstmt, 5, synset_id); | ||
| 1210 | |||
| 1211 | if (nouns.count(word) == 1) | ||
| 1212 | { | ||
| 1213 | sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT); | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | break; | ||
| 1217 | } | 391 | } |
| 1218 | 392 | ||
| 1219 | case 3: // Adjective | 393 | if (!lemmaByBaseForm_.count(infinitive) && (type != 'V')) |
| 1220 | case 4: // Adverb | ||
| 1221 | { | 394 | { |
| 1222 | sqlite3_bind_int(ppstmt, 2, verbly::split<std::list<std::string>>(word, " ").size()); | 395 | continue; |
| 1223 | 396 | } | |
| 1224 | if (adjectives.count(word) == 1) | 397 | |
| 398 | lemma& curLemma = lookupOrCreateLemma(infinitive); | ||
| 399 | |||
| 400 | auto forms = split<std::vector<std::string>>(line, " | "); | ||
| 401 | for (std::string& inflForm : forms) | ||
| 402 | { | ||
| 403 | int sympos = inflForm.find_first_of(",?"); | ||
| 404 | if (sympos != std::string::npos) | ||
| 1225 | { | 405 | { |
| 1226 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); | 406 | inflForm = inflForm.substr(0, sympos); |
| 1227 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT); | ||
| 1228 | } | 407 | } |
| 1229 | |||
| 1230 | break; | ||
| 1231 | } | 408 | } |
| 1232 | } | ||
| 1233 | 409 | ||
| 1234 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 410 | switch (type) |
| 1235 | { | ||
| 1236 | db_error(ppdb, query); | ||
| 1237 | } | ||
| 1238 | |||
| 1239 | sqlite3_finalize(ppstmt); | ||
| 1240 | |||
| 1241 | query = "SELECT last_insert_rowid()"; | ||
| 1242 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 1243 | { | ||
| 1244 | db_error(ppdb, query); | ||
| 1245 | } | ||
| 1246 | |||
| 1247 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
| 1248 | { | ||
| 1249 | db_error(ppdb, query); | ||
| 1250 | } | ||
| 1251 | |||
| 1252 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
| 1253 | wn[synset_id][wnum] = rowid; | ||
| 1254 | |||
| 1255 | sqlite3_finalize(ppstmt); | ||
| 1256 | |||
| 1257 | std::string canonical(word); | ||
| 1258 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | ||
| 1259 | if (pronunciations.count(canonical) == 1) | ||
| 1260 | { | ||
| 1261 | for (auto pronunciation : pronunciations[canonical]) | ||
| 1262 | { | 411 | { |
| 1263 | switch (synset_id / 100000000) | 412 | case 'V': |
| 1264 | { | 413 | { |
| 1265 | case 1: // Noun | 414 | if (forms.size() == 4) |
| 1266 | { | 415 | { |
| 1267 | if (!pronunciation.rhyme.empty()) | 416 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); |
| 1268 | { | 417 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1])); |
| 1269 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | 418 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2])); |
| 1270 | } else { | 419 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3])); |
| 1271 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | 420 | } else if (forms.size() == 3) |
| 1272 | } | ||
| 1273 | |||
| 1274 | break; | ||
| 1275 | } | ||
| 1276 | |||
| 1277 | case 3: // Adjective | ||
| 1278 | { | 421 | { |
| 1279 | if (!pronunciation.rhyme.empty()) | 422 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); |
| 1280 | { | 423 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0])); |
| 1281 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | 424 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1])); |
| 1282 | } else { | 425 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2])); |
| 1283 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | 426 | } else if (forms.size() == 8) |
| 1284 | } | 427 | { |
| 1285 | 428 | // As of AGID 2014.08.11, this is only "to be" | |
| 1286 | break; | 429 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); |
| 430 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2])); | ||
| 431 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3])); | ||
| 432 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4])); | ||
| 433 | } else { | ||
| 434 | // Words that don't fit the cases above as of AGID 2014.08.11: | ||
| 435 | // - may and shall do not conjugate the way we want them to | ||
| 436 | // - methinks only has a past tense and is an outlier | ||
| 437 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now | ||
| 438 | std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | ||
| 1287 | } | 439 | } |
| 1288 | 440 | ||
| 1289 | case 4: // Adverb | 441 | // For verbs in particular, we sometimes create a notion and a word |
| 442 | // from inflection data. Specifically, if there are not yet any | ||
| 443 | // verbs existing that have the same infinitive form. "Yet" means | ||
| 444 | // that this verb appears in the AGID data but not in either WordNet | ||
| 445 | // or VerbNet. | ||
| 446 | if (!wordsByBaseForm_.count(infinitive) | ||
| 447 | || !std::any_of(std::begin(wordsByBaseForm_.at(infinitive)), std::end(wordsByBaseForm_.at(infinitive)), [] (word* w) { | ||
| 448 | return w->getNotion().getPartOfSpeech() == part_of_speech::verb; | ||
| 449 | })) | ||
| 1290 | { | 450 | { |
| 1291 | if (!pronunciation.rhyme.empty()) | 451 | notion& n = createNotion(part_of_speech::verb); |
| 1292 | { | 452 | createWord(n, curLemma); |
| 1293 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | ||
| 1294 | } else { | ||
| 1295 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | break; | ||
| 1299 | } | 453 | } |
| 1300 | } | ||
| 1301 | |||
| 1302 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 1303 | { | ||
| 1304 | db_error(ppdb, query); | ||
| 1305 | } | ||
| 1306 | |||
| 1307 | sqlite3_bind_int(ppstmt, 1, rowid); | ||
| 1308 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); | ||
| 1309 | sqlite3_bind_int(ppstmt, 3, pronunciation.syllables); | ||
| 1310 | sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT); | ||
| 1311 | |||
| 1312 | if (!pronunciation.rhyme.empty()) | ||
| 1313 | { | ||
| 1314 | sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
| 1315 | sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
| 1316 | } | ||
| 1317 | 454 | ||
| 1318 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 455 | break; |
| 1319 | { | ||
| 1320 | db_error(ppdb, query); | ||
| 1321 | } | 456 | } |
| 1322 | |||
| 1323 | sqlite3_finalize(ppstmt); | ||
| 1324 | } | ||
| 1325 | } | ||
| 1326 | } | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | // While we're working on s | ||
| 1330 | { | ||
| 1331 | progress ppgs("Writing word synonyms...", wn.size()); | ||
| 1332 | for (auto sense : wn) | ||
| 1333 | { | ||
| 1334 | ppgs.update(); | ||
| 1335 | 457 | ||
| 1336 | for (auto word1 : sense.second) | 458 | case 'A': |
| 1337 | { | ||
| 1338 | for (auto word2 : sense.second) | ||
| 1339 | { | ||
| 1340 | if (word1 != word2) | ||
| 1341 | { | 459 | { |
| 1342 | std::string query; | 460 | if (forms.size() == 2) |
| 1343 | switch (sense.first / 100000000) | ||
| 1344 | { | 461 | { |
| 1345 | case 1: // Noun | 462 | curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0])); |
| 1346 | { | 463 | curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1])); |
| 1347 | query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; | 464 | } else { |
| 1348 | 465 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" | |
| 1349 | break; | 466 | std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; |
| 1350 | } | 467 | } |
| 1351 | |||
| 1352 | case 2: // Verb | ||
| 1353 | { | ||
| 1354 | // Ignore | ||
| 1355 | |||
| 1356 | break; | ||
| 1357 | } | ||
| 1358 | |||
| 1359 | case 3: // Adjective | ||
| 1360 | { | ||
| 1361 | query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; | ||
| 1362 | 468 | ||
| 1363 | break; | 469 | break; |
| 1364 | } | 470 | } |
| 1365 | 471 | ||
| 1366 | case 4: // Adverb | 472 | case 'N': |
| 1367 | { | 473 | { |
| 1368 | query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | 474 | if (forms.size() == 1) |
| 1369 | |||
| 1370 | break; | ||
| 1371 | } | ||
| 1372 | } | ||
| 1373 | |||
| 1374 | sqlite3_stmt* ppstmt; | ||
| 1375 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 1376 | { | ||
| 1377 | db_error(ppdb, query); | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | sqlite3_bind_int(ppstmt, 1, word1.second); | ||
| 1381 | sqlite3_bind_int(ppstmt, 2, word2.second); | ||
| 1382 | |||
| 1383 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 1384 | { | 475 | { |
| 1385 | db_error(ppdb, query); | 476 | curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0])); |
| 477 | } else { | ||
| 478 | // As of AGID 2014.08.11, this is non-existent. | ||
| 479 | std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | ||
| 1386 | } | 480 | } |
| 1387 | 481 | ||
| 1388 | sqlite3_finalize(ppstmt); | 482 | break; |
| 1389 | } | 483 | } |
| 1390 | } | 484 | } |
| 1391 | } | 485 | } |
| 1392 | } | 486 | } |
| 1393 | } | ||
| 1394 | |||
| 1395 | // ant table | ||
| 1396 | { | ||
| 1397 | std::ifstream wnantfile(wnpref + "wn_ant.pl"); | ||
| 1398 | if (!wnantfile.is_open()) | ||
| 1399 | { | ||
| 1400 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 1401 | print_usage(); | ||
| 1402 | } | ||
| 1403 | |||
| 1404 | std::list<std::string> lines; | ||
| 1405 | for (;;) | ||
| 1406 | { | ||
| 1407 | std::string line; | ||
| 1408 | if (!getline(wnantfile, line)) | ||
| 1409 | { | ||
| 1410 | break; | ||
| 1411 | } | ||
| 1412 | 487 | ||
| 1413 | if (line.back() == '\r') | 488 | void generator::readPrepositions() |
| 1414 | { | ||
| 1415 | line.pop_back(); | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | lines.push_back(line); | ||
| 1419 | } | ||
| 1420 | |||
| 1421 | progress ppgs("Writing antonyms...", lines.size()); | ||
| 1422 | for (auto line : lines) | ||
| 1423 | { | 489 | { |
| 1424 | ppgs.update(); | 490 | std::list<std::string> lines(readFile("prepositions.txt")); |
| 491 | progress ppgs("Reading prepositions...", lines.size()); | ||
| 1425 | 492 | ||
| 1426 | std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | 493 | for (std::string line : lines) |
| 1427 | std::smatch relation_data; | ||
| 1428 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1429 | { | ||
| 1430 | continue; | ||
| 1431 | } | ||
| 1432 | |||
| 1433 | int synset_id_1 = stoi(relation_data[1]); | ||
| 1434 | int wnum_1 = stoi(relation_data[2]); | ||
| 1435 | int synset_id_2 = stoi(relation_data[3]); | ||
| 1436 | int wnum_2 = stoi(relation_data[4]); | ||
| 1437 | |||
| 1438 | std::string query; | ||
| 1439 | switch (synset_id_1 / 100000000) | ||
| 1440 | { | 494 | { |
| 1441 | case 1: // Noun | 495 | ppgs.update(); |
| 1442 | { | ||
| 1443 | query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; | ||
| 1444 | 496 | ||
| 1445 | break; | 497 | std::regex relation("^([^:]+): (.+)"); |
| 1446 | } | 498 | std::smatch relation_data; |
| 1447 | 499 | std::regex_search(line, relation_data, relation); | |
| 1448 | case 2: // Verb | 500 | std::string prep = relation_data[1]; |
| 1449 | { | 501 | auto groups = split<std::list<std::string>>(relation_data[2], ", "); |
| 1450 | // Ignore | ||
| 1451 | 502 | ||
| 1452 | break; | 503 | notion& n = createNotion(part_of_speech::preposition); |
| 1453 | } | 504 | lemma& l = lookupOrCreateLemma(prep); |
| 1454 | 505 | word& w = createWord(n, l); | |
| 1455 | case 3: // Adjective | ||
| 1456 | { | ||
| 1457 | query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; | ||
| 1458 | 506 | ||
| 1459 | break; | 507 | n.setPrepositionGroups(groups); |
| 1460 | } | ||
| 1461 | |||
| 1462 | case 4: // Adverb | ||
| 1463 | { | ||
| 1464 | query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | ||
| 1465 | |||
| 1466 | break; | ||
| 1467 | } | ||
| 1468 | } | ||
| 1469 | |||
| 1470 | sqlite3_stmt* ppstmt; | ||
| 1471 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 1472 | { | ||
| 1473 | db_error(ppdb, query); | ||
| 1474 | } | ||
| 1475 | |||
| 1476 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
| 1477 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
| 1478 | |||
| 1479 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 1480 | { | ||
| 1481 | db_error(ppdb, query); | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | sqlite3_finalize(ppstmt); | ||
| 1485 | } | ||
| 1486 | } | ||
| 1487 | |||
| 1488 | // at table | ||
| 1489 | { | ||
| 1490 | std::ifstream wnatfile(wnpref + "wn_at.pl"); | ||
| 1491 | if (!wnatfile.is_open()) | ||
| 1492 | { | ||
| 1493 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 1494 | print_usage(); | ||
| 1495 | } | ||
| 1496 | |||
| 1497 | std::list<std::string> lines; | ||
| 1498 | for (;;) | ||
| 1499 | { | ||
| 1500 | std::string line; | ||
| 1501 | if (!getline(wnatfile, line)) | ||
| 1502 | { | ||
| 1503 | break; | ||
| 1504 | } | 508 | } |
| 1505 | |||
| 1506 | if (line.back() == '\r') | ||
| 1507 | { | ||
| 1508 | line.pop_back(); | ||
| 1509 | } | ||
| 1510 | |||
| 1511 | lines.push_back(line); | ||
| 1512 | } | 509 | } |
| 1513 | 510 | ||
| 1514 | progress ppgs("Writing variations...", lines.size()); | 511 | void generator::readCmudictPronunciations() |
| 1515 | for (auto line : lines) | ||
| 1516 | { | 512 | { |
| 1517 | ppgs.update(); | 513 | std::list<std::string> lines(readFile(cmudictPath_)); |
| 514 | progress ppgs("Reading pronunciations from CMUDICT...", lines.size()); | ||
| 1518 | 515 | ||
| 1519 | std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); | 516 | for (std::string line : lines) |
| 1520 | std::smatch relation_data; | ||
| 1521 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1522 | { | 517 | { |
| 1523 | continue; | 518 | ppgs.update(); |
| 1524 | } | 519 | |
| 1525 | 520 | std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); | |
| 1526 | int synset_id_1 = stoi(relation_data[1]); | 521 | std::smatch phoneme_data; |
| 1527 | int synset_id_2 = stoi(relation_data[2]); | 522 | if (std::regex_search(line, phoneme_data, phoneme)) |
| 1528 | std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)"); | ||
| 1529 | |||
| 1530 | for (auto mapping1 : wn[synset_id_1]) | ||
| 1531 | { | ||
| 1532 | for (auto mapping2 : wn[synset_id_2]) | ||
| 1533 | { | 523 | { |
| 1534 | sqlite3_stmt* ppstmt; | 524 | std::string canonical(phoneme_data[1]); |
| 1535 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 525 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); |
| 1536 | { | ||
| 1537 | db_error(ppdb, query); | ||
| 1538 | } | ||
| 1539 | |||
| 1540 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | ||
| 1541 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
| 1542 | 526 | ||
| 1543 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 527 | if (!formByText_.count(canonical)) |
| 1544 | { | 528 | { |
| 1545 | db_error(ppdb, query); | 529 | continue; |
| 1546 | } | 530 | } |
| 1547 | 531 | ||
| 1548 | sqlite3_finalize(ppstmt); | 532 | std::string phonemes = phoneme_data[2]; |
| 533 | pronunciations_.emplace_back(phonemes); | ||
| 534 | pronunciation& p = pronunciations_.back(); | ||
| 535 | formByText_.at(canonical)->addPronunciation(p); | ||
| 1549 | } | 536 | } |
| 1550 | } | 537 | } |
| 1551 | } | 538 | } |
| 1552 | } | ||
| 1553 | |||
| 1554 | // der table | ||
| 1555 | { | ||
| 1556 | std::ifstream wnderfile(wnpref + "wn_der.pl"); | ||
| 1557 | if (!wnderfile.is_open()) | ||
| 1558 | { | ||
| 1559 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 1560 | print_usage(); | ||
| 1561 | } | ||
| 1562 | 539 | ||
| 1563 | std::list<std::string> lines; | 540 | void generator::writeSchema() |
| 1564 | for (;;) | ||
| 1565 | { | 541 | { |
| 1566 | std::string line; | 542 | std::ifstream file("schema.sql"); |
| 1567 | if (!getline(wnderfile, line)) | 543 | if (!file) |
| 1568 | { | 544 | { |
| 1569 | break; | 545 | throw std::invalid_argument("Could not find database schema"); |
| 1570 | } | 546 | } |
| 1571 | 547 | ||
| 1572 | if (line.back() == '\r') | 548 | std::ostringstream schemaBuilder; |
| 549 | std::string line; | ||
| 550 | while (std::getline(file, line)) | ||
| 1573 | { | 551 | { |
| 1574 | line.pop_back(); | 552 | if (line.back() == '\r') |
| 553 | { | ||
| 554 | line.pop_back(); | ||
| 555 | } | ||
| 556 | |||
| 557 | schemaBuilder << line; | ||
| 1575 | } | 558 | } |
| 1576 | 559 | ||
| 1577 | lines.push_back(line); | 560 | std::string schema = schemaBuilder.str(); |
| 561 | auto queries = split<std::list<std::string>>(schema, ";"); | ||
| 562 | progress ppgs("Writing database schema...", queries.size()); | ||
| 563 | for (std::string query : queries) | ||
| 564 | { | ||
| 565 | if (!queries.empty()) | ||
| 566 | { | ||
| 567 | db_.runQuery(query); | ||
| 568 | } | ||
| 569 | |||
| 570 | ppgs.update(); | ||
| 571 | } | ||
| 1578 | } | 572 | } |
| 1579 | 573 | ||
| 1580 | progress ppgs("Writing morphological derivation...", lines.size()); | 574 | void generator::dumpObjects() |
| 1581 | for (auto line : lines) | ||
| 1582 | { | 575 | { |
| 1583 | ppgs.update(); | ||
| 1584 | |||
| 1585 | std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | ||
| 1586 | std::smatch relation_data; | ||
| 1587 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1588 | { | 576 | { |
| 1589 | continue; | 577 | progress ppgs("Writing notions...", notions_.size()); |
| 578 | |||
| 579 | for (notion& n : notions_) | ||
| 580 | { | ||
| 581 | db_ << n; | ||
| 582 | |||
| 583 | ppgs.update(); | ||
| 584 | } | ||
| 1590 | } | 585 | } |
| 1591 | 586 | ||
| 1592 | int synset_id_1 = stoi(relation_data[1]); | ||
| 1593 | int wnum_1 = stoi(relation_data[2]); | ||
| 1594 | int synset_id_2 = stoi(relation_data[3]); | ||
| 1595 | int wnum_2 = stoi(relation_data[4]); | ||
| 1596 | std::string query; | ||
| 1597 | switch (synset_id_1 / 100000000) | ||
| 1598 | { | 587 | { |
| 1599 | case 1: // Noun | 588 | progress ppgs("Writing words...", words_.size()); |
| 589 | |||
| 590 | for (word& w : words_) | ||
| 1600 | { | 591 | { |
| 1601 | switch (synset_id_2 / 100000000) | 592 | db_ << w; |
| 1602 | { | ||
| 1603 | case 1: // Noun | ||
| 1604 | { | ||
| 1605 | query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)"; | ||
| 1606 | break; | ||
| 1607 | } | ||
| 1608 | |||
| 1609 | case 3: // Adjective | ||
| 1610 | { | ||
| 1611 | query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)"; | ||
| 1612 | break; | ||
| 1613 | } | ||
| 1614 | |||
| 1615 | case 4: // Adverb | ||
| 1616 | { | ||
| 1617 | query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)"; | ||
| 1618 | break; | ||
| 1619 | } | ||
| 1620 | } | ||
| 1621 | 593 | ||
| 1622 | break; | 594 | ppgs.update(); |
| 1623 | } | 595 | } |
| 596 | } | ||
| 597 | |||
| 598 | { | ||
| 599 | progress ppgs("Writing lemmas...", lemmas_.size()); | ||
| 1624 | 600 | ||
| 1625 | case 3: // Adjective | 601 | for (lemma& l : lemmas_) |
| 1626 | { | 602 | { |
| 1627 | switch (synset_id_2 / 100000000) | 603 | db_ << l; |
| 1628 | { | ||
| 1629 | case 1: // Noun | ||
| 1630 | { | ||
| 1631 | query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)"; | ||
| 1632 | break; | ||
| 1633 | } | ||
| 1634 | |||
| 1635 | case 3: // Adjective | ||
| 1636 | { | ||
| 1637 | query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)"; | ||
| 1638 | break; | ||
| 1639 | } | ||
| 1640 | |||
| 1641 | case 4: // Adverb | ||
| 1642 | { | ||
| 1643 | query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)"; | ||
| 1644 | break; | ||
| 1645 | } | ||
| 1646 | } | ||
| 1647 | 604 | ||
| 1648 | break; | 605 | ppgs.update(); |
| 1649 | } | 606 | } |
| 607 | } | ||
| 608 | |||
| 609 | { | ||
| 610 | progress ppgs("Writing forms...", forms_.size()); | ||
| 1650 | 611 | ||
| 1651 | case 4: // Adverb | 612 | for (form& f : forms_) |
| 1652 | { | 613 | { |
| 1653 | switch (synset_id_2 / 100000000) | 614 | db_ << f; |
| 1654 | { | ||
| 1655 | case 1: // Noun | ||
| 1656 | { | ||
| 1657 | query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)"; | ||
| 1658 | break; | ||
| 1659 | } | ||
| 1660 | |||
| 1661 | case 3: // Adjective | ||
| 1662 | { | ||
| 1663 | query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)"; | ||
| 1664 | break; | ||
| 1665 | } | ||
| 1666 | |||
| 1667 | case 4: // Adverb | ||
| 1668 | { | ||
| 1669 | query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | ||
| 1670 | break; | ||
| 1671 | } | ||
| 1672 | } | ||
| 1673 | 615 | ||
| 1674 | break; | 616 | ppgs.update(); |
| 1675 | } | 617 | } |
| 1676 | } | 618 | } |
| 1677 | 619 | ||
| 1678 | sqlite3_stmt* ppstmt; | ||
| 1679 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
| 1680 | { | 620 | { |
| 1681 | db_error(ppdb, query); | 621 | progress ppgs("Writing pronunciations...", pronunciations_.size()); |
| 622 | |||
| 623 | for (pronunciation& p : pronunciations_) | ||
| 624 | { | ||
| 625 | db_ << p; | ||
| 626 | |||
| 627 | ppgs.update(); | ||
| 628 | } | ||
| 1682 | } | 629 | } |
| 1683 | 630 | ||
| 1684 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
| 1685 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
| 1686 | |||
| 1687 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 1688 | { | 631 | { |
| 1689 | db_error(ppdb, query); | 632 | progress ppgs("Writing verb groups...", groups_.size()); |
| 633 | |||
| 634 | for (group& g : groups_) | ||
| 635 | { | ||
| 636 | db_ << g; | ||
| 637 | |||
| 638 | ppgs.update(); | ||
| 639 | } | ||
| 1690 | } | 640 | } |
| 1691 | 641 | ||
| 1692 | sqlite3_finalize(ppstmt); | ||
| 1693 | } | ||
| 1694 | } | ||
| 1695 | |||
| 1696 | // hyp table | ||
| 1697 | { | ||
| 1698 | std::ifstream wnhypfile(wnpref + "wn_hyp.pl"); | ||
| 1699 | if (!wnhypfile.is_open()) | ||
| 1700 | { | ||
| 1701 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 1702 | print_usage(); | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | std::list<std::string> lines; | ||
| 1706 | for (;;) | ||
| 1707 | { | ||
| 1708 | std::string line; | ||
| 1709 | if (!getline(wnhypfile, line)) | ||
| 1710 | { | ||
| 1711 | break; | ||
| 1712 | } | ||
| 1713 | |||
| 1714 | if (line.back() == '\r') | ||
| 1715 | { | 642 | { |
| 1716 | line.pop_back(); | 643 | progress ppgs("Writing verb frames...", frames_.size()); |
| 644 | |||
| 645 | for (frame& f : frames_) | ||
| 646 | { | ||
| 647 | db_ << f; | ||
| 648 | |||
| 649 | ppgs.update(); | ||
| 650 | } | ||
| 1717 | } | 651 | } |
| 1718 | |||
| 1719 | lines.push_back(line); | ||
| 1720 | } | 652 | } |
| 1721 | 653 | ||
| 1722 | progress ppgs("Writing hypernyms...", lines.size()); | 654 | void generator::readWordNetAntonymy() |
| 1723 | for (auto line : lines) | ||
| 1724 | { | 655 | { |
| 1725 | ppgs.update(); | 656 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); |
| 1726 | 657 | progress ppgs("Writing antonyms...", lines.size()); | |
| 1727 | std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\."); | 658 | for (auto line : lines) |
| 1728 | std::smatch relation_data; | ||
| 1729 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1730 | { | 659 | { |
| 1731 | continue; | 660 | ppgs.update(); |
| 1732 | } | ||
| 1733 | |||
| 1734 | int synset_id_1 = stoi(relation_data[1]); | ||
| 1735 | int synset_id_2 = stoi(relation_data[2]); | ||
| 1736 | std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)"); | ||
| 1737 | 661 | ||
| 1738 | for (auto mapping1 : wn[synset_id_1]) | 662 | std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); |
| 1739 | { | 663 | std::smatch relation_data; |
| 1740 | for (auto mapping2 : wn[synset_id_2]) | 664 | if (!std::regex_search(line, relation_data, relation)) |
| 1741 | { | 665 | { |
| 1742 | sqlite3_stmt* ppstmt; | 666 | continue; |
| 1743 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 667 | } |
| 1744 | { | 668 | |
| 1745 | db_error(ppdb, query); | 669 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); |
| 1746 | } | 670 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); |
| 1747 | 671 | ||
| 1748 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 672 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) |
| 1749 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 673 | { |
| 674 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | ||
| 675 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | ||
| 1750 | 676 | ||
| 1751 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 677 | std::list<field> fields; |
| 1752 | { | 678 | fields.emplace_back("antonym_1_id", word1.getId()); |
| 1753 | db_error(ppdb, query); | 679 | fields.emplace_back("antonym_2_id", word2.getId()); |
| 1754 | } | ||
| 1755 | 680 | ||
| 1756 | sqlite3_finalize(ppstmt); | 681 | db_.insertIntoTable("antonymy", std::move(fields)); |
| 1757 | } | 682 | } |
| 1758 | } | 683 | } |
| 1759 | } | 684 | } |
| 1760 | } | ||
| 1761 | |||
| 1762 | // ins table | ||
| 1763 | { | ||
| 1764 | std::ifstream wninsfile(wnpref + "wn_ins.pl"); | ||
| 1765 | if (!wninsfile.is_open()) | ||
| 1766 | { | ||
| 1767 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 1768 | print_usage(); | ||
| 1769 | } | ||
| 1770 | |||
| 1771 | std::list<std::string> lines; | ||
| 1772 | for (;;) | ||
| 1773 | { | ||
| 1774 | std::string line; | ||
| 1775 | if (!getline(wninsfile, line)) | ||
| 1776 | { | ||
| 1777 | break; | ||
| 1778 | } | ||
| 1779 | 685 | ||
| 1780 | if (line.back() == '\r') | 686 | void generator::readWordNetVariation() |
| 687 | { | ||
| 688 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl")); | ||
| 689 | progress ppgs("Writing variation...", lines.size()); | ||
| 690 | for (auto line : lines) | ||
| 1781 | { | 691 | { |
| 1782 | line.pop_back(); | 692 | ppgs.update(); |
| 1783 | } | ||
| 1784 | 693 | ||
| 1785 | lines.push_back(line); | 694 | std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); |
| 695 | std::smatch relation_data; | ||
| 696 | if (!std::regex_search(line, relation_data, relation)) | ||
| 697 | { | ||
| 698 | continue; | ||
| 699 | } | ||
| 700 | |||
| 701 | int lookup1 = std::stoi(relation_data[1]); | ||
| 702 | int lookup2 = std::stoi(relation_data[2]); | ||
| 703 | |||
| 704 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
| 705 | { | ||
| 706 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
| 707 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
| 708 | |||
| 709 | std::list<field> fields; | ||
| 710 | fields.emplace_back("noun_id", notion1.getId()); | ||
| 711 | fields.emplace_back("adjective_id", notion2.getId()); | ||
| 712 | |||
| 713 | db_.insertIntoTable("variation", std::move(fields)); | ||
| 714 | } | ||
| 715 | } | ||
| 1786 | } | 716 | } |
| 1787 | 717 | ||
| 1788 | progress ppgs("Writing instantiations...", lines.size()); | 718 | void generator::readWordNetClasses() |
| 1789 | for (auto line : lines) | ||
| 1790 | { | 719 | { |
| 1791 | ppgs.update(); | 720 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); |
| 1792 | 721 | progress ppgs("Writing usage, topicality, and regionality...", lines.size()); | |
| 1793 | std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); | 722 | for (auto line : lines) |
| 1794 | std::smatch relation_data; | ||
| 1795 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1796 | { | 723 | { |
| 1797 | continue; | 724 | ppgs.update(); |
| 1798 | } | ||
| 1799 | |||
| 1800 | int synset_id_1 = stoi(relation_data[1]); | ||
| 1801 | int synset_id_2 = stoi(relation_data[2]); | ||
| 1802 | std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)"); | ||
| 1803 | 725 | ||
| 1804 | for (auto mapping1 : wn[synset_id_1]) | 726 | std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); |
| 1805 | { | 727 | std::smatch relation_data; |
| 1806 | for (auto mapping2 : wn[synset_id_2]) | 728 | if (!std::regex_search(line, relation_data, relation)) |
| 729 | { | ||
| 730 | continue; | ||
| 731 | } | ||
| 732 | |||
| 733 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | ||
| 734 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | ||
| 735 | std::string class_type = relation_data[5]; | ||
| 736 | |||
| 737 | std::string table_name; | ||
| 738 | if (class_type == "t") | ||
| 739 | { | ||
| 740 | table_name += "topicality"; | ||
| 741 | } else if (class_type == "u") | ||
| 742 | { | ||
| 743 | table_name += "usage"; | ||
| 744 | } else if (class_type == "r") | ||
| 745 | { | ||
| 746 | table_name += "regionality"; | ||
| 747 | } | ||
| 748 | |||
| 749 | std::list<int> leftJoin; | ||
| 750 | std::list<int> rightJoin; | ||
| 751 | |||
| 752 | if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) | ||
| 1807 | { | 753 | { |
| 1808 | sqlite3_stmt* ppstmt; | 754 | std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) { |
| 1809 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 755 | return w->getId(); |
| 756 | }); | ||
| 757 | } else if (wordByWnidAndWnum_.count(lookup1)) { | ||
| 758 | leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); | ||
| 759 | } | ||
| 760 | |||
| 761 | if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) | ||
| 762 | { | ||
| 763 | std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) { | ||
| 764 | return w->getId(); | ||
| 765 | }); | ||
| 766 | } else if (wordByWnidAndWnum_.count(lookup2)) { | ||
| 767 | rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); | ||
| 768 | } | ||
| 769 | |||
| 770 | for (int word1 : leftJoin) | ||
| 771 | { | ||
| 772 | for (int word2 : rightJoin) | ||
| 1810 | { | 773 | { |
| 1811 | db_error(ppdb, query); | 774 | std::list<field> fields; |
| 1812 | } | 775 | fields.emplace_back("term_id", word1); |
| 776 | fields.emplace_back("domain_id", word2); | ||
| 1813 | 777 | ||
| 1814 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 778 | db_.insertIntoTable(table_name, std::move(fields)); |
| 1815 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
| 1816 | |||
| 1817 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 1818 | { | ||
| 1819 | db_error(ppdb, query); | ||
| 1820 | } | 779 | } |
| 1821 | |||
| 1822 | sqlite3_finalize(ppstmt); | ||
| 1823 | } | 780 | } |
| 1824 | } | 781 | } |
| 1825 | } | 782 | } |
| 1826 | } | ||
| 1827 | |||
| 1828 | // mm table | ||
| 1829 | { | ||
| 1830 | std::ifstream wnmmfile(wnpref + "wn_mm.pl"); | ||
| 1831 | if (!wnmmfile.is_open()) | ||
| 1832 | { | ||
| 1833 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 1834 | print_usage(); | ||
| 1835 | } | ||
| 1836 | |||
| 1837 | std::list<std::string> lines; | ||
| 1838 | for (;;) | ||
| 1839 | { | ||
| 1840 | std::string line; | ||
| 1841 | if (!getline(wnmmfile, line)) | ||
| 1842 | { | ||
| 1843 | break; | ||
| 1844 | } | ||
| 1845 | 783 | ||
| 1846 | if (line.back() == '\r') | 784 | void generator::readWordNetCausality() |
| 785 | { | ||
| 786 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl")); | ||
| 787 | progress ppgs("Writing causality...", lines.size()); | ||
| 788 | for (auto line : lines) | ||
| 1847 | { | 789 | { |
| 1848 | line.pop_back(); | 790 | ppgs.update(); |
| 1849 | } | ||
| 1850 | 791 | ||
| 1851 | lines.push_back(line); | 792 | std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\."); |
| 793 | std::smatch relation_data; | ||
| 794 | if (!std::regex_search(line, relation_data, relation)) | ||
| 795 | { | ||
| 796 | continue; | ||
| 797 | } | ||
| 798 | |||
| 799 | int lookup1 = std::stoi(relation_data[1]); | ||
| 800 | int lookup2 = std::stoi(relation_data[2]); | ||
| 801 | |||
| 802 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
| 803 | { | ||
| 804 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
| 805 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
| 806 | |||
| 807 | std::list<field> fields; | ||
| 808 | fields.emplace_back("effect_id", notion1.getId()); | ||
| 809 | fields.emplace_back("cause_id", notion2.getId()); | ||
| 810 | |||
| 811 | db_.insertIntoTable("causality", std::move(fields)); | ||
| 812 | } | ||
| 813 | } | ||
| 1852 | } | 814 | } |
| 1853 | 815 | ||
| 1854 | progress ppgs("Writing member meronyms...", lines.size()); | 816 | void generator::readWordNetEntailment() |
| 1855 | for (auto line : lines) | ||
| 1856 | { | 817 | { |
| 1857 | ppgs.update(); | 818 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl")); |
| 1858 | 819 | progress ppgs("Writing entailment...", lines.size()); | |
| 1859 | std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); | 820 | for (auto line : lines) |
| 1860 | std::smatch relation_data; | ||
| 1861 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1862 | { | 821 | { |
| 1863 | continue; | 822 | ppgs.update(); |
| 1864 | } | ||
| 1865 | 823 | ||
| 1866 | int synset_id_1 = stoi(relation_data[1]); | 824 | std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\."); |
| 1867 | int synset_id_2 = stoi(relation_data[2]); | 825 | std::smatch relation_data; |
| 1868 | std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | 826 | if (!std::regex_search(line, relation_data, relation)) |
| 1869 | |||
| 1870 | for (auto mapping1 : wn[synset_id_1]) | ||
| 1871 | { | ||
| 1872 | for (auto mapping2 : wn[synset_id_2]) | ||
| 1873 | { | 827 | { |
| 1874 | sqlite3_stmt* ppstmt; | 828 | continue; |
| 1875 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 829 | } |
| 1876 | { | 830 | |
| 1877 | db_error(ppdb, query); | 831 | int lookup1 = std::stoi(relation_data[1]); |
| 1878 | } | 832 | int lookup2 = std::stoi(relation_data[2]); |
| 1879 | 833 | ||
| 1880 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 834 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
| 1881 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 835 | { |
| 836 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
| 837 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
| 1882 | 838 | ||
| 1883 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 839 | std::list<field> fields; |
| 1884 | { | 840 | fields.emplace_back("given_id", notion1.getId()); |
| 1885 | db_error(ppdb, query); | 841 | fields.emplace_back("entailment_id", notion2.getId()); |
| 1886 | } | ||
| 1887 | 842 | ||
| 1888 | sqlite3_finalize(ppstmt); | 843 | db_.insertIntoTable("entailment", std::move(fields)); |
| 1889 | } | 844 | } |
| 1890 | } | 845 | } |
| 1891 | } | 846 | } |
| 1892 | } | 847 | |
| 1893 | 848 | void generator::readWordNetHypernymy() | |
| 1894 | // ms table | ||
| 1895 | { | ||
| 1896 | std::ifstream wnmsfile(wnpref + "wn_ms.pl"); | ||
| 1897 | if (!wnmsfile.is_open()) | ||
| 1898 | { | ||
| 1899 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 1900 | print_usage(); | ||
| 1901 | } | ||
| 1902 | |||
| 1903 | std::list<std::string> lines; | ||
| 1904 | for (;;) | ||
| 1905 | { | 849 | { |
| 1906 | std::string line; | 850 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl")); |
| 1907 | if (!getline(wnmsfile, line)) | 851 | progress ppgs("Writing hypernymy...", lines.size()); |
| 852 | for (auto line : lines) | ||
| 1908 | { | 853 | { |
| 1909 | break; | 854 | ppgs.update(); |
| 855 | |||
| 856 | std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\."); | ||
| 857 | std::smatch relation_data; | ||
| 858 | if (!std::regex_search(line, relation_data, relation)) | ||
| 859 | { | ||
| 860 | continue; | ||
| 861 | } | ||
| 862 | |||
| 863 | int lookup1 = std::stoi(relation_data[1]); | ||
| 864 | int lookup2 = std::stoi(relation_data[2]); | ||
| 865 | |||
| 866 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
| 867 | { | ||
| 868 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
| 869 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
| 870 | |||
| 871 | std::list<field> fields; | ||
| 872 | fields.emplace_back("hyponym_id", notion1.getId()); | ||
| 873 | fields.emplace_back("hypernym_id", notion2.getId()); | ||
| 874 | |||
| 875 | db_.insertIntoTable("hypernymy", std::move(fields)); | ||
| 876 | } | ||
| 1910 | } | 877 | } |
| 878 | } | ||
| 1911 | 879 | ||
| 1912 | if (line.back() == '\r') | 880 | void generator::readWordNetInstantiation() |
| 881 | { | ||
| 882 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl")); | ||
| 883 | progress ppgs("Writing instantiation...", lines.size()); | ||
| 884 | for (auto line : lines) | ||
| 1913 | { | 885 | { |
| 1914 | line.pop_back(); | 886 | ppgs.update(); |
| 1915 | } | ||
| 1916 | 887 | ||
| 1917 | lines.push_back(line); | 888 | std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); |
| 889 | std::smatch relation_data; | ||
| 890 | if (!std::regex_search(line, relation_data, relation)) | ||
| 891 | { | ||
| 892 | continue; | ||
| 893 | } | ||
| 894 | |||
| 895 | int lookup1 = std::stoi(relation_data[1]); | ||
| 896 | int lookup2 = std::stoi(relation_data[2]); | ||
| 897 | |||
| 898 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
| 899 | { | ||
| 900 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
| 901 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
| 902 | |||
| 903 | std::list<field> fields; | ||
| 904 | fields.emplace_back("instance_id", notion1.getId()); | ||
| 905 | fields.emplace_back("class_id", notion2.getId()); | ||
| 906 | |||
| 907 | db_.insertIntoTable("instantiation", std::move(fields)); | ||
| 908 | } | ||
| 909 | } | ||
| 1918 | } | 910 | } |
| 1919 | 911 | ||
| 1920 | progress ppgs("Writing substance meronyms...", lines.size()); | 912 | void generator::readWordNetMemberMeronymy() |
| 1921 | for (auto line : lines) | ||
| 1922 | { | 913 | { |
| 1923 | ppgs.update(); | 914 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl")); |
| 1924 | 915 | progress ppgs("Writing member meronymy...", lines.size()); | |
| 1925 | std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); | 916 | for (auto line : lines) |
| 1926 | std::smatch relation_data; | ||
| 1927 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1928 | { | 917 | { |
| 1929 | continue; | 918 | ppgs.update(); |
| 1930 | } | ||
| 1931 | |||
| 1932 | int synset_id_1 = stoi(relation_data[1]); | ||
| 1933 | int synset_id_2 = stoi(relation_data[2]); | ||
| 1934 | std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | ||
| 1935 | 919 | ||
| 1936 | for (auto mapping1 : wn[synset_id_1]) | 920 | std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); |
| 1937 | { | 921 | std::smatch relation_data; |
| 1938 | for (auto mapping2 : wn[synset_id_2]) | 922 | if (!std::regex_search(line, relation_data, relation)) |
| 1939 | { | 923 | { |
| 1940 | sqlite3_stmt* ppstmt; | 924 | continue; |
| 1941 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 925 | } |
| 1942 | { | 926 | |
| 1943 | db_error(ppdb, query); | 927 | int lookup1 = std::stoi(relation_data[1]); |
| 1944 | } | 928 | int lookup2 = std::stoi(relation_data[2]); |
| 929 | |||
| 930 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
| 931 | { | ||
| 932 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
| 933 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
| 1945 | 934 | ||
| 1946 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 935 | std::list<field> fields; |
| 1947 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 936 | fields.emplace_back("holonym_id", notion1.getId()); |
| 937 | fields.emplace_back("meronym_id", notion2.getId()); | ||
| 1948 | 938 | ||
| 1949 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 939 | db_.insertIntoTable("member_meronymy", std::move(fields)); |
| 1950 | { | ||
| 1951 | db_error(ppdb, query); | ||
| 1952 | } | ||
| 1953 | |||
| 1954 | sqlite3_finalize(ppstmt); | ||
| 1955 | } | 940 | } |
| 1956 | } | 941 | } |
| 1957 | } | 942 | } |
| 1958 | } | 943 | |
| 1959 | 944 | void generator::readWordNetPartMeronymy() | |
| 1960 | // mm table | ||
| 1961 | { | ||
| 1962 | std::ifstream wnmpfile(wnpref + "wn_mp.pl"); | ||
| 1963 | if (!wnmpfile.is_open()) | ||
| 1964 | { | ||
| 1965 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 1966 | print_usage(); | ||
| 1967 | } | ||
| 1968 | |||
| 1969 | std::list<std::string> lines; | ||
| 1970 | for (;;) | ||
| 1971 | { | 945 | { |
| 1972 | std::string line; | 946 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl")); |
| 1973 | if (!getline(wnmpfile, line)) | 947 | progress ppgs("Writing part meronymy...", lines.size()); |
| 948 | for (auto line : lines) | ||
| 1974 | { | 949 | { |
| 1975 | break; | 950 | ppgs.update(); |
| 951 | |||
| 952 | std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); | ||
| 953 | std::smatch relation_data; | ||
| 954 | if (!std::regex_search(line, relation_data, relation)) | ||
| 955 | { | ||
| 956 | continue; | ||
| 957 | } | ||
| 958 | |||
| 959 | int lookup1 = std::stoi(relation_data[1]); | ||
| 960 | int lookup2 = std::stoi(relation_data[2]); | ||
| 961 | |||
| 962 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
| 963 | { | ||
| 964 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
| 965 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
| 966 | |||
| 967 | std::list<field> fields; | ||
| 968 | fields.emplace_back("holonym_id", notion1.getId()); | ||
| 969 | fields.emplace_back("meronym_id", notion2.getId()); | ||
| 970 | |||
| 971 | db_.insertIntoTable("part_meronymy", std::move(fields)); | ||
| 972 | } | ||
| 1976 | } | 973 | } |
| 974 | } | ||
| 1977 | 975 | ||
| 1978 | if (line.back() == '\r') | 976 | void generator::readWordNetSubstanceMeronymy() |
| 977 | { | ||
| 978 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl")); | ||
| 979 | progress ppgs("Writing substance meronymy...", lines.size()); | ||
| 980 | for (auto line : lines) | ||
| 1979 | { | 981 | { |
| 1980 | line.pop_back(); | 982 | ppgs.update(); |
| 1981 | } | ||
| 1982 | 983 | ||
| 1983 | lines.push_back(line); | 984 | std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); |
| 985 | std::smatch relation_data; | ||
| 986 | if (!std::regex_search(line, relation_data, relation)) | ||
| 987 | { | ||
| 988 | continue; | ||
| 989 | } | ||
| 990 | |||
| 991 | int lookup1 = std::stoi(relation_data[1]); | ||
| 992 | int lookup2 = std::stoi(relation_data[2]); | ||
| 993 | |||
| 994 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
| 995 | { | ||
| 996 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
| 997 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
| 998 | |||
| 999 | std::list<field> fields; | ||
| 1000 | fields.emplace_back("holonym_id", notion1.getId()); | ||
| 1001 | fields.emplace_back("meronym_id", notion2.getId()); | ||
| 1002 | |||
| 1003 | db_.insertIntoTable("substance_meronymy", std::move(fields)); | ||
| 1004 | } | ||
| 1005 | } | ||
| 1984 | } | 1006 | } |
| 1985 | 1007 | ||
| 1986 | progress ppgs("Writing part meronyms...", lines.size()); | 1008 | void generator::readWordNetPertainymy() |
| 1987 | for (auto line : lines) | ||
| 1988 | { | 1009 | { |
| 1989 | ppgs.update(); | 1010 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); |
| 1990 | 1011 | progress ppgs("Writing pertainymy and mannernymy...", lines.size()); | |
| 1991 | std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); | 1012 | for (auto line : lines) |
| 1992 | std::smatch relation_data; | ||
| 1993 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1994 | { | 1013 | { |
| 1995 | continue; | 1014 | ppgs.update(); |
| 1996 | } | ||
| 1997 | |||
| 1998 | int synset_id_1 = stoi(relation_data[1]); | ||
| 1999 | int synset_id_2 = stoi(relation_data[2]); | ||
| 2000 | std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | ||
| 2001 | 1015 | ||
| 2002 | for (auto mapping1 : wn[synset_id_1]) | 1016 | std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); |
| 2003 | { | 1017 | std::smatch relation_data; |
| 2004 | for (auto mapping2 : wn[synset_id_2]) | 1018 | if (!std::regex_search(line, relation_data, relation)) |
| 2005 | { | 1019 | { |
| 2006 | sqlite3_stmt* ppstmt; | 1020 | continue; |
| 2007 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 1021 | } |
| 2008 | { | 1022 | |
| 2009 | db_error(ppdb, query); | 1023 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); |
| 2010 | } | 1024 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); |
| 1025 | |||
| 1026 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | ||
| 1027 | { | ||
| 1028 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | ||
| 1029 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | ||
| 2011 | 1030 | ||
| 2012 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 1031 | if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective) |
| 2013 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 1032 | { |
| 1033 | std::list<field> fields; | ||
| 1034 | fields.emplace_back("pertainym_id", word1.getId()); | ||
| 1035 | fields.emplace_back("noun_id", word2.getId()); | ||
| 2014 | 1036 | ||
| 2015 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1037 | db_.insertIntoTable("pertainymy", std::move(fields)); |
| 1038 | } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb) | ||
| 2016 | { | 1039 | { |
| 2017 | db_error(ppdb, query); | 1040 | std::list<field> fields; |
| 2018 | } | 1041 | fields.emplace_back("mannernym_id", word1.getId()); |
| 1042 | fields.emplace_back("adjective_id", word2.getId()); | ||
| 2019 | 1043 | ||
| 2020 | sqlite3_finalize(ppstmt); | 1044 | db_.insertIntoTable("mannernymy", std::move(fields)); |
| 1045 | } | ||
| 2021 | } | 1046 | } |
| 2022 | } | 1047 | } |
| 2023 | } | 1048 | } |
| 2024 | } | ||
| 2025 | |||
| 2026 | // per table | ||
| 2027 | { | ||
| 2028 | std::ifstream wnperfile(wnpref + "wn_per.pl"); | ||
| 2029 | if (!wnperfile.is_open()) | ||
| 2030 | { | ||
| 2031 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 2032 | print_usage(); | ||
| 2033 | } | ||
| 2034 | |||
| 2035 | std::list<std::string> lines; | ||
| 2036 | for (;;) | ||
| 2037 | { | ||
| 2038 | std::string line; | ||
| 2039 | if (!getline(wnperfile, line)) | ||
| 2040 | { | ||
| 2041 | break; | ||
| 2042 | } | ||
| 2043 | 1049 | ||
| 2044 | if (line.back() == '\r') | 1050 | void generator::readWordNetSpecification() |
| 1051 | { | ||
| 1052 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl")); | ||
| 1053 | progress ppgs("Writing specifications...", lines.size()); | ||
| 1054 | for (auto line : lines) | ||
| 2045 | { | 1055 | { |
| 2046 | line.pop_back(); | 1056 | ppgs.update(); |
| 1057 | |||
| 1058 | std::regex relation("^sa\\((23\\d{8}),(\\d+),(23\\d{8}),(\\d+)\\)\\."); | ||
| 1059 | std::smatch relation_data; | ||
| 1060 | if (!std::regex_search(line, relation_data, relation)) | ||
| 1061 | { | ||
| 1062 | continue; | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | ||
| 1066 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | ||
| 1067 | |||
| 1068 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | ||
| 1069 | { | ||
| 1070 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | ||
| 1071 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | ||
| 1072 | |||
| 1073 | std::list<field> fields; | ||
| 1074 | fields.emplace_back("general_id", word1.getId()); | ||
| 1075 | fields.emplace_back("specific_id", word2.getId()); | ||
| 1076 | |||
| 1077 | db_.insertIntoTable("specification", std::move(fields)); | ||
| 1078 | } | ||
| 2047 | } | 1079 | } |
| 2048 | |||
| 2049 | lines.push_back(line); | ||
| 2050 | } | 1080 | } |
| 2051 | 1081 | ||
| 2052 | progress ppgs("Writing pertainyms and mannernyms...", lines.size()); | 1082 | void generator::readWordNetSimilarity() |
| 2053 | for (auto line : lines) | ||
| 2054 | { | 1083 | { |
| 2055 | ppgs.update(); | 1084 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl")); |
| 2056 | 1085 | progress ppgs("Writing adjective similarity...", lines.size()); | |
| 2057 | std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); | 1086 | for (auto line : lines) |
| 2058 | std::smatch relation_data; | ||
| 2059 | if (!std::regex_search(line, relation_data, relation)) | ||
| 2060 | { | 1087 | { |
| 2061 | continue; | 1088 | ppgs.update(); |
| 2062 | } | ||
| 2063 | 1089 | ||
| 2064 | int synset_id_1 = stoi(relation_data[1]); | 1090 | std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); |
| 2065 | int wnum_1 = stoi(relation_data[2]); | 1091 | std::smatch relation_data; |
| 2066 | int synset_id_2 = stoi(relation_data[3]); | 1092 | if (!std::regex_search(line, relation_data, relation)) |
| 2067 | int wnum_2 = stoi(relation_data[4]); | ||
| 2068 | std::string query; | ||
| 2069 | switch (synset_id_1 / 100000000) | ||
| 2070 | { | ||
| 2071 | case 3: // Adjective | ||
| 2072 | { | 1093 | { |
| 2073 | // This is a pertainym, the second word should be a noun | 1094 | continue; |
| 2074 | // Technically it can be an adjective but we're ignoring that | ||
| 2075 | if (synset_id_2 / 100000000 != 1) | ||
| 2076 | { | ||
| 2077 | continue; | ||
| 2078 | } | ||
| 2079 | |||
| 2080 | query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)"; | ||
| 2081 | |||
| 2082 | break; | ||
| 2083 | } | 1095 | } |
| 1096 | |||
| 1097 | int lookup1 = std::stoi(relation_data[1]); | ||
| 1098 | int lookup2 = std::stoi(relation_data[2]); | ||
| 2084 | 1099 | ||
| 2085 | case 4: // Adverb | 1100 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
| 2086 | { | 1101 | { |
| 2087 | // This is a mannernym, the second word should be an adjective | 1102 | notion& notion1 = *notionByWnid_.at(lookup1); |
| 2088 | if (synset_id_2 / 100000000 != 3) | 1103 | notion& notion2 = *notionByWnid_.at(lookup2); |
| 2089 | { | ||
| 2090 | continue; | ||
| 2091 | } | ||
| 2092 | 1104 | ||
| 2093 | query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)"; | 1105 | std::list<field> fields; |
| 1106 | fields.emplace_back("adjective_1_id", notion1.getId()); | ||
| 1107 | fields.emplace_back("adjective_2_id", notion2.getId()); | ||
| 2094 | 1108 | ||
| 2095 | break; | 1109 | db_.insertIntoTable("similarity", std::move(fields)); |
| 2096 | } | 1110 | } |
| 2097 | } | 1111 | } |
| 2098 | 1112 | } | |
| 2099 | sqlite3_stmt* ppstmt; | ||
| 2100 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 2101 | { | ||
| 2102 | db_error(ppdb, query); | ||
| 2103 | } | ||
| 2104 | |||
| 2105 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
| 2106 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
| 2107 | 1113 | ||
| 2108 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1114 | std::list<std::string> generator::readFile(std::string path) |
| 1115 | { | ||
| 1116 | std::ifstream file(path); | ||
| 1117 | if (!file) | ||
| 2109 | { | 1118 | { |
| 2110 | db_error(ppdb, query); | 1119 | throw std::invalid_argument("Could not find file " + path); |
| 2111 | } | 1120 | } |
| 2112 | |||
| 2113 | sqlite3_finalize(ppstmt); | ||
| 2114 | } | ||
| 2115 | } | ||
| 2116 | 1121 | ||
| 2117 | // sa table | 1122 | std::list<std::string> lines; |
| 2118 | { | ||
| 2119 | std::ifstream wnsafile(wnpref + "wn_sa.pl"); | ||
| 2120 | if (!wnsafile.is_open()) | ||
| 2121 | { | ||
| 2122 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 2123 | print_usage(); | ||
| 2124 | } | ||
| 2125 | |||
| 2126 | std::list<std::string> lines; | ||
| 2127 | for (;;) | ||
| 2128 | { | ||
| 2129 | std::string line; | 1123 | std::string line; |
| 2130 | if (!getline(wnsafile, line)) | 1124 | while (std::getline(file, line)) |
| 2131 | { | ||
| 2132 | break; | ||
| 2133 | } | ||
| 2134 | |||
| 2135 | if (line.back() == '\r') | ||
| 2136 | { | 1125 | { |
| 2137 | line.pop_back(); | 1126 | if (line.back() == '\r') |
| 1127 | { | ||
| 1128 | line.pop_back(); | ||
| 1129 | } | ||
| 1130 | |||
| 1131 | lines.push_back(line); | ||
| 2138 | } | 1132 | } |
| 2139 | 1133 | ||
| 2140 | lines.push_back(line); | 1134 | return lines; |
| 2141 | } | 1135 | } |
| 2142 | 1136 | ||
| 2143 | progress ppgs("Writing specifications...", lines.size()); | 1137 | part_of_speech generator::partOfSpeechByWnid(int wnid) |
| 2144 | for (auto line : lines) | ||
| 2145 | { | 1138 | { |
| 2146 | ppgs.update(); | 1139 | switch (wnid / 100000000) |
| 2147 | |||
| 2148 | std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\."); | ||
| 2149 | std::smatch relation_data; | ||
| 2150 | if (!std::regex_search(line, relation_data, relation)) | ||
| 2151 | { | ||
| 2152 | continue; | ||
| 2153 | } | ||
| 2154 | |||
| 2155 | int synset_id_1 = stoi(relation_data[1]); | ||
| 2156 | int wnum_1 = stoi(relation_data[2]); | ||
| 2157 | int synset_id_2 = stoi(relation_data[3]); | ||
| 2158 | int wnum_2 = stoi(relation_data[4]); | ||
| 2159 | std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)"); | ||
| 2160 | |||
| 2161 | sqlite3_stmt* ppstmt; | ||
| 2162 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 2163 | { | 1140 | { |
| 2164 | db_error(ppdb, query); | 1141 | case 1: return part_of_speech::noun; |
| 1142 | case 2: return part_of_speech::verb; | ||
| 1143 | case 3: return part_of_speech::adjective; | ||
| 1144 | case 4: return part_of_speech::adverb; | ||
| 1145 | default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid)); | ||
| 2165 | } | 1146 | } |
| 1147 | } | ||
| 2166 | 1148 | ||
| 2167 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | 1149 | notion& generator::createNotion(part_of_speech partOfSpeech) |
| 2168 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | 1150 | { |
| 1151 | notions_.emplace_back(partOfSpeech); | ||
| 1152 | |||
| 1153 | return notions_.back(); | ||
| 1154 | } | ||
| 2169 | 1155 | ||
| 2170 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1156 | notion& generator::lookupOrCreateNotion(int wnid) |
| 1157 | { | ||
| 1158 | if (!notionByWnid_.count(wnid)) | ||
| 2171 | { | 1159 | { |
| 2172 | db_error(ppdb, query); | 1160 | notions_.emplace_back(partOfSpeechByWnid(wnid), wnid); |
| 1161 | notionByWnid_[wnid] = ¬ions_.back(); | ||
| 2173 | } | 1162 | } |
| 2174 | 1163 | ||
| 2175 | sqlite3_finalize(ppstmt); | 1164 | return *notionByWnid_.at(wnid); |
| 2176 | } | ||
| 2177 | } | ||
| 2178 | |||
| 2179 | // sim table | ||
| 2180 | { | ||
| 2181 | std::ifstream wnsimfile(wnpref + "wn_sim.pl"); | ||
| 2182 | if (!wnsimfile.is_open()) | ||
| 2183 | { | ||
| 2184 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 2185 | print_usage(); | ||
| 2186 | } | 1165 | } |
| 2187 | 1166 | ||
| 2188 | std::list<std::string> lines; | 1167 | lemma& generator::lookupOrCreateLemma(std::string base_form) |
| 2189 | for (;;) | ||
| 2190 | { | 1168 | { |
| 2191 | std::string line; | 1169 | if (!lemmaByBaseForm_.count(base_form)) |
| 2192 | if (!getline(wnsimfile, line)) | ||
| 2193 | { | 1170 | { |
| 2194 | break; | 1171 | lemmas_.emplace_back(lookupOrCreateForm(base_form)); |
| 1172 | lemmaByBaseForm_[base_form] = &lemmas_.back(); | ||
| 2195 | } | 1173 | } |
| 1174 | |||
| 1175 | return *lemmaByBaseForm_.at(base_form); | ||
| 1176 | } | ||
| 2196 | 1177 | ||
| 2197 | if (line.back() == '\r') | 1178 | form& generator::lookupOrCreateForm(std::string text) |
| 1179 | { | ||
| 1180 | if (!formByText_.count(text)) | ||
| 2198 | { | 1181 | { |
| 2199 | line.pop_back(); | 1182 | forms_.emplace_back(text); |
| 1183 | formByText_[text] = &forms_.back(); | ||
| 2200 | } | 1184 | } |
| 2201 | 1185 | ||
| 2202 | lines.push_back(line); | 1186 | return *formByText_[text]; |
| 2203 | } | 1187 | } |
| 2204 | 1188 | ||
| 2205 | progress ppgs("Writing sense synonyms...", lines.size()); | 1189 | template <typename... Args> word& generator::createWord(Args&&... args) |
| 2206 | for (auto line : lines) | ||
| 2207 | { | 1190 | { |
| 2208 | ppgs.update(); | 1191 | words_.emplace_back(std::forward<Args>(args)...); |
| 1192 | word& w = words_.back(); | ||
| 2209 | 1193 | ||
| 2210 | std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); | 1194 | wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w); |
| 2211 | std::smatch relation_data; | 1195 | |
| 2212 | if (!std::regex_search(line, relation_data, relation)) | 1196 | if (w.getNotion().hasWnid()) |
| 2213 | { | 1197 | { |
| 2214 | continue; | 1198 | wordsByWnid_[w.getNotion().getWnid()].insert(&w); |
| 2215 | } | 1199 | } |
| 2216 | 1200 | ||
| 2217 | int synset_id_1 = stoi(relation_data[1]); | 1201 | return w; |
| 2218 | int synset_id_2 = stoi(relation_data[2]); | 1202 | } |
| 2219 | std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"); | 1203 | |
| 1204 | group& generator::createGroup(xmlNodePtr top) | ||
| 1205 | { | ||
| 1206 | groups_.emplace_back(); | ||
| 1207 | group& grp = groups_.back(); | ||
| 2220 | 1208 | ||
| 2221 | for (auto mapping1 : wn[synset_id_1]) | 1209 | xmlChar* key; |
| 1210 | |||
| 1211 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) | ||
| 2222 | { | 1212 | { |
| 2223 | for (auto mapping2 : wn[synset_id_2]) | 1213 | if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES"))) |
| 2224 | { | 1214 | { |
| 2225 | sqlite3_stmt* ppstmt; | 1215 | for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next) |
| 2226 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
| 2227 | { | 1216 | { |
| 2228 | db_error(ppdb, query); | 1217 | if (!xmlStrcmp(subclass->name, reinterpret_cast<const xmlChar*>("VNSUBCLASS"))) |
| 1218 | { | ||
| 1219 | try | ||
| 1220 | { | ||
| 1221 | group& subgrp = createGroup(subclass); | ||
| 1222 | subgrp.setParent(grp); | ||
| 1223 | } catch (const std::exception& e) | ||
| 1224 | { | ||
| 1225 | key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID")); | ||
| 1226 | |||
| 1227 | if (key == nullptr) | ||
| 1228 | { | ||
| 1229 | std::throw_with_nested(std::logic_error("Error parsing IDless subgroup")); | ||
| 1230 | } else { | ||
| 1231 | std::string subgroupId(reinterpret_cast<const char*>(key)); | ||
| 1232 | xmlFree(key); | ||
| 1233 | |||
| 1234 | std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId)); | ||
| 1235 | } | ||
| 1236 | } | ||
| 1237 | } | ||
| 2229 | } | 1238 | } |
| 2230 | 1239 | } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("MEMBERS"))) | |
| 2231 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 1240 | { |
| 2232 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 1241 | for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) |
| 2233 | |||
| 2234 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 2235 | { | 1242 | { |
| 2236 | db_error(ppdb, query); | 1243 | if (!xmlStrcmp(member->name, reinterpret_cast<const xmlChar*>("MEMBER"))) |
| 1244 | { | ||
| 1245 | key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn")); | ||
| 1246 | std::string wnSenses(reinterpret_cast<const char*>(key)); | ||
| 1247 | xmlFree(key); | ||
| 1248 | |||
| 1249 | auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " "); | ||
| 1250 | if (!wnSenseKeys.empty()) | ||
| 1251 | { | ||
| 1252 | std::list<std::string> tempKeys; | ||
| 1253 | |||
| 1254 | std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) { | ||
| 1255 | return sense + "::"; | ||
| 1256 | }); | ||
| 1257 | |||
| 1258 | std::list<std::string> filteredKeys; | ||
| 1259 | |||
| 1260 | std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) { | ||
| 1261 | return !wnSenseKeys_.count(sense); | ||
| 1262 | }); | ||
| 1263 | |||
| 1264 | wnSenseKeys = std::move(filteredKeys); | ||
| 1265 | } | ||
| 1266 | |||
| 1267 | if (!wnSenseKeys.empty()) | ||
| 1268 | { | ||
| 1269 | for (std::string sense : wnSenseKeys) | ||
| 1270 | { | ||
| 1271 | word& wordSense = *wnSenseKeys_[sense]; | ||
| 1272 | wordSense.setVerbGroup(grp); | ||
| 1273 | } | ||
| 1274 | } else { | ||
| 1275 | key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name")); | ||
| 1276 | std::string memberName(reinterpret_cast<const char*>(key)); | ||
| 1277 | xmlFree(key); | ||
| 1278 | |||
| 1279 | notion& n = createNotion(part_of_speech::verb); | ||
| 1280 | lemma& l = lookupOrCreateLemma(memberName); | ||
| 1281 | word& w = createWord(n, l); | ||
| 1282 | |||
| 1283 | w.setVerbGroup(grp); | ||
| 1284 | } | ||
| 1285 | } | ||
| 2237 | } | 1286 | } |
| 2238 | 1287 | } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("THEMROLES"))) | |
| 2239 | sqlite3_reset(ppstmt); | 1288 | { |
| 2240 | sqlite3_clear_bindings(ppstmt); | 1289 | for (xmlNodePtr roletopnode = node->xmlChildrenNode; roletopnode != nullptr; roletopnode = roletopnode->next) |
| 2241 | |||
| 2242 | sqlite3_bind_int(ppstmt, 1, mapping2.second); | ||
| 2243 | sqlite3_bind_int(ppstmt, 2, mapping1.second); | ||
| 2244 | |||
| 2245 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
| 2246 | { | 1290 | { |
| 2247 | db_error(ppdb, query); | 1291 | if (!xmlStrcmp(roletopnode->name, reinterpret_cast<const xmlChar*>("THEMROLE"))) |
| 1292 | { | ||
| 1293 | role r; | ||
| 1294 | |||
| 1295 | key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type")); | ||
| 1296 | std::string roleName = reinterpret_cast<const char*>(key); | ||
| 1297 | xmlFree(key); | ||
| 1298 | |||
| 1299 | for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) | ||
| 1300 | { | ||
| 1301 | if (!xmlStrcmp(rolenode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
| 1302 | { | ||
| 1303 | r.setSelrestrs(parseSelrestr(rolenode)); | ||
| 1304 | } | ||
| 1305 | } | ||
| 1306 | |||
| 1307 | grp.addRole(roleName, std::move(r)); | ||
| 1308 | } | ||
| 2248 | } | 1309 | } |
| 1310 | } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("FRAMES"))) | ||
| 1311 | { | ||
| 1312 | for (xmlNodePtr frametopnode = node->xmlChildrenNode; frametopnode != nullptr; frametopnode = frametopnode->next) | ||
| 1313 | { | ||
| 1314 | if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME"))) | ||
| 1315 | { | ||
| 1316 | frames_.emplace_back(); | ||
| 1317 | frame& fr = frames_.back(); | ||
| 2249 | 1318 | ||
| 2250 | sqlite3_finalize(ppstmt); | 1319 | for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) |
| 1320 | { | ||
| 1321 | if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX"))) | ||
| 1322 | { | ||
| 1323 | for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) | ||
| 1324 | { | ||
| 1325 | if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP"))) | ||
| 1326 | { | ||
| 1327 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | ||
| 1328 | std::string partRole = reinterpret_cast<const char*>(key); | ||
| 1329 | xmlFree(key); | ||
| 1330 | |||
| 1331 | selrestr partSelrestrs; | ||
| 1332 | std::set<std::string> partSynrestrs; | ||
| 1333 | |||
| 1334 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
| 1335 | { | ||
| 1336 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SYNRESTRS"))) | ||
| 1337 | { | ||
| 1338 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
| 1339 | { | ||
| 1340 | if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SYNRESTR"))) | ||
| 1341 | { | ||
| 1342 | key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type")); | ||
| 1343 | partSynrestrs.insert(reinterpret_cast<const char*>(key)); | ||
| 1344 | xmlFree(key); | ||
| 1345 | } | ||
| 1346 | } | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
| 1350 | { | ||
| 1351 | partSelrestrs = parseSelrestr(npnode); | ||
| 1352 | } | ||
| 1353 | } | ||
| 1354 | |||
| 1355 | fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs))); | ||
| 1356 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB"))) | ||
| 1357 | { | ||
| 1358 | fr.push_back(part::createVerb()); | ||
| 1359 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("PREP"))) | ||
| 1360 | { | ||
| 1361 | std::set<std::string> partChoices; | ||
| 1362 | bool partLiteral; | ||
| 1363 | |||
| 1364 | if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"))) | ||
| 1365 | { | ||
| 1366 | partLiteral = true; | ||
| 1367 | |||
| 1368 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | ||
| 1369 | std::string choicesStr = reinterpret_cast<const char*>(key); | ||
| 1370 | xmlFree(key); | ||
| 1371 | |||
| 1372 | split(choicesStr, " ", std::inserter(partChoices, std::end(partChoices))); | ||
| 1373 | } else { | ||
| 1374 | partLiteral = false; | ||
| 1375 | |||
| 1376 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
| 1377 | { | ||
| 1378 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
| 1379 | { | ||
| 1380 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
| 1381 | { | ||
| 1382 | if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
| 1383 | { | ||
| 1384 | key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type")); | ||
| 1385 | partChoices.insert(reinterpret_cast<const char*>(key)); | ||
| 1386 | xmlFree(key); | ||
| 1387 | } | ||
| 1388 | } | ||
| 1389 | } | ||
| 1390 | } | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | fr.push_back(part::createPreposition(std::move(partChoices), partLiteral)); | ||
| 1394 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ"))) | ||
| 1395 | { | ||
| 1396 | fr.push_back(part::createAdjective()); | ||
| 1397 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADV"))) | ||
| 1398 | { | ||
| 1399 | fr.push_back(part::createAdverb()); | ||
| 1400 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("LEX"))) | ||
| 1401 | { | ||
| 1402 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | ||
| 1403 | std::string literalValue = reinterpret_cast<const char*>(key); | ||
| 1404 | xmlFree(key); | ||
| 1405 | |||
| 1406 | fr.push_back(part::createLiteral(literalValue)); | ||
| 1407 | } else { | ||
| 1408 | continue; | ||
| 1409 | } | ||
| 1410 | } | ||
| 1411 | |||
| 1412 | grp.addFrame(fr); | ||
| 1413 | } | ||
| 1414 | } | ||
| 1415 | } | ||
| 1416 | } | ||
| 2251 | } | 1417 | } |
| 2252 | } | 1418 | } |
| 2253 | } | ||
| 2254 | } | ||
| 2255 | |||
| 2256 | // syntax table | ||
| 2257 | { | ||
| 2258 | std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl"); | ||
| 2259 | if (!wnsyntaxfile.is_open()) | ||
| 2260 | { | ||
| 2261 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
| 2262 | print_usage(); | ||
| 2263 | } | ||
| 2264 | 1419 | ||
| 2265 | std::list<std::string> lines; | 1420 | return grp; |
| 2266 | for (;;) | ||
| 2267 | { | ||
| 2268 | std::string line; | ||
| 2269 | if (!getline(wnsyntaxfile, line)) | ||
| 2270 | { | ||
| 2271 | break; | ||
| 2272 | } | ||
| 2273 | |||
| 2274 | if (line.back() == '\r') | ||
| 2275 | { | ||
| 2276 | line.pop_back(); | ||
| 2277 | } | ||
| 2278 | |||
| 2279 | lines.push_back(line); | ||
| 2280 | } | 1421 | } |
| 2281 | 1422 | ||
| 2282 | progress ppgs("Writing adjective syntax markers...", lines.size()); | 1423 | selrestr generator::parseSelrestr(xmlNodePtr top) |
| 2283 | for (auto line : lines) | ||
| 2284 | { | 1424 | { |
| 2285 | ppgs.update(); | 1425 | xmlChar* key; |
| 2286 | 1426 | ||
| 2287 | std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); | 1427 | if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) |
| 2288 | std::smatch relation_data; | ||
| 2289 | if (!std::regex_search(line, relation_data, relation)) | ||
| 2290 | { | ||
| 2291 | continue; | ||
| 2292 | } | ||
| 2293 | |||
| 2294 | int synset_id = stoi(relation_data[1]); | ||
| 2295 | int wnum = stoi(relation_data[2]); | ||
| 2296 | std::string syn = relation_data[3]; | ||
| 2297 | std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?"); | ||
| 2298 | |||
| 2299 | sqlite3_stmt* ppstmt; | ||
| 2300 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
| 2301 | { | 1428 | { |
| 2302 | db_error(ppdb, query); | 1429 | if (xmlChildElementCount(top) == 0) |
| 2303 | } | 1430 | { |
| 2304 | 1431 | return {}; | |
| 2305 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); | 1432 | } else if (xmlChildElementCount(top) == 1) |
| 2306 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); | 1433 | { |
| 2307 | 1434 | return parseSelrestr(xmlFirstElementChild(top)); | |
| 2308 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1435 | } else { |
| 1436 | bool orlogic = false; | ||
| 1437 | if (xmlHasProp(top, reinterpret_cast<const xmlChar*>("logic"))) | ||
| 1438 | { | ||
| 1439 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("logic")); | ||
| 1440 | if (!xmlStrcmp(key, reinterpret_cast<const xmlChar*>("or"))) | ||
| 1441 | { | ||
| 1442 | orlogic = true; | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | xmlFree(key); | ||
| 1446 | } | ||
| 1447 | |||
| 1448 | std::list<selrestr> children; | ||
| 1449 | for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) | ||
| 1450 | { | ||
| 1451 | if (!xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTRS")) | ||
| 1452 | || !xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
| 1453 | { | ||
| 1454 | children.push_back(parseSelrestr(selrestr)); | ||
| 1455 | } | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | return selrestr(children, orlogic); | ||
| 1459 | } | ||
| 1460 | } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
| 2309 | { | 1461 | { |
| 2310 | db_error(ppdb, query); | 1462 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("Value")); |
| 1463 | bool selPos = (std::string(reinterpret_cast<const char*>(key)) == "+"); | ||
| 1464 | xmlFree(key); | ||
| 1465 | |||
| 1466 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type")); | ||
| 1467 | std::string selRestriction = reinterpret_cast<const char*>(key); | ||
| 1468 | xmlFree(key); | ||
| 1469 | |||
| 1470 | return selrestr(selRestriction, selPos); | ||
| 1471 | } else { | ||
| 1472 | throw std::logic_error("Badly formatted selrestr"); | ||
| 2311 | } | 1473 | } |
| 2312 | |||
| 2313 | sqlite3_finalize(ppstmt); | ||
| 2314 | } | 1474 | } |
| 2315 | } | 1475 | |
| 2316 | 1476 | }; | |
| 2317 | sqlite3_close_v2(ppdb); | 1477 | }; |
| 2318 | |||
| 2319 | std::cout << "Done." << std::endl; | ||
| 2320 | } | ||
| diff --git a/generator/generator.h b/generator/generator.h new file mode 100644 index 0000000..e2a7404 --- /dev/null +++ b/generator/generator.h | |||
| @@ -0,0 +1,151 @@ | |||
| 1 | #ifndef GENERATOR_H_5B61CBC5 | ||
| 2 | #define GENERATOR_H_5B61CBC5 | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | #include <map> | ||
| 6 | #include <list> | ||
| 7 | #include <set> | ||
| 8 | #include <libxml/parser.h> | ||
| 9 | #include "database.h" | ||
| 10 | #include "notion.h" | ||
| 11 | #include "word.h" | ||
| 12 | #include "lemma.h" | ||
| 13 | #include "form.h" | ||
| 14 | #include "pronunciation.h" | ||
| 15 | #include "group.h" | ||
| 16 | #include "frame.h" | ||
| 17 | |||
| 18 | namespace verbly { | ||
| 19 | namespace generator { | ||
| 20 | |||
| 21 | enum class part_of_speech; | ||
| 22 | class selrestr; | ||
| 23 | |||
| 24 | class generator { | ||
| 25 | public: | ||
| 26 | |||
| 27 | // Constructor | ||
| 28 | |||
| 29 | generator( | ||
| 30 | std::string verbNetPath, | ||
| 31 | std::string agidPath, | ||
| 32 | std::string wordNetPath, | ||
| 33 | std::string cmudictPath, | ||
| 34 | std::string imageNetPath, | ||
| 35 | std::string outputPath); | ||
| 36 | |||
| 37 | // Action | ||
| 38 | |||
| 39 | void run(); | ||
| 40 | |||
| 41 | private: | ||
| 42 | |||
| 43 | // Subroutines | ||
| 44 | |||
| 45 | void readWordNetSynsets(); | ||
| 46 | |||
| 47 | void readAdjectivePositioning(); | ||
| 48 | |||
| 49 | void readImageNetUrls(); | ||
| 50 | |||
| 51 | void readWordNetSenseKeys(); | ||
| 52 | |||
| 53 | void readVerbNet(); | ||
| 54 | |||
| 55 | void readAgidInflections(); | ||
| 56 | |||
| 57 | void readPrepositions(); | ||
| 58 | |||
| 59 | void readCmudictPronunciations(); | ||
| 60 | |||
| 61 | void writeSchema(); | ||
| 62 | |||
| 63 | void dumpObjects(); | ||
| 64 | |||
| 65 | void readWordNetAntonymy(); | ||
| 66 | |||
| 67 | void readWordNetVariation(); | ||
| 68 | |||
| 69 | void readWordNetClasses(); | ||
| 70 | |||
| 71 | void readWordNetCausality(); | ||
| 72 | |||
| 73 | void readWordNetEntailment(); | ||
| 74 | |||
| 75 | void readWordNetHypernymy(); | ||
| 76 | |||
| 77 | void readWordNetInstantiation(); | ||
| 78 | |||
| 79 | void readWordNetMemberMeronymy(); | ||
| 80 | |||
| 81 | void readWordNetPartMeronymy(); | ||
| 82 | |||
| 83 | void readWordNetSubstanceMeronymy(); | ||
| 84 | |||
| 85 | void readWordNetPertainymy(); | ||
| 86 | |||
| 87 | void readWordNetSpecification(); | ||
| 88 | |||
| 89 | void readWordNetSimilarity(); | ||
| 90 | |||
| 91 | // Helpers | ||
| 92 | |||
| 93 | std::list<std::string> readFile(std::string path); | ||
| 94 | |||
| 95 | inline part_of_speech partOfSpeechByWnid(int wnid); | ||
| 96 | |||
| 97 | notion& createNotion(part_of_speech partOfSpeech); | ||
| 98 | |||
| 99 | notion& lookupOrCreateNotion(int wnid); | ||
| 100 | |||
| 101 | lemma& lookupOrCreateLemma(std::string base_form); | ||
| 102 | |||
| 103 | form& lookupOrCreateForm(std::string text); | ||
| 104 | |||
| 105 | template <typename... Args> word& createWord(Args&&... args); | ||
| 106 | |||
| 107 | group& createGroup(xmlNodePtr top); | ||
| 108 | |||
| 109 | selrestr parseSelrestr(xmlNodePtr top); | ||
| 110 | |||
| 111 | // Input | ||
| 112 | |||
| 113 | std::string verbNetPath_; | ||
| 114 | std::string agidPath_; | ||
| 115 | std::string wordNetPath_; | ||
| 116 | std::string cmudictPath_; | ||
| 117 | std::string imageNetPath_; | ||
| 118 | |||
| 119 | // Output | ||
| 120 | |||
| 121 | database db_; | ||
| 122 | |||
| 123 | // Data | ||
| 124 | |||
| 125 | std::list<notion> notions_; | ||
| 126 | std::list<word> words_; | ||
| 127 | std::list<lemma> lemmas_; | ||
| 128 | std::list<form> forms_; | ||
| 129 | std::list<pronunciation> pronunciations_; | ||
| 130 | std::list<frame> frames_; | ||
| 131 | std::list<group> groups_; | ||
| 132 | |||
| 133 | // Indexes | ||
| 134 | |||
| 135 | std::map<int, notion*> notionByWnid_; | ||
| 136 | std::map<int, std::set<word*>> wordsByWnid_; | ||
| 137 | std::map<std::pair<int, int>, word*> wordByWnidAndWnum_; | ||
| 138 | std::map<std::string, std::set<word*>> wordsByBaseForm_; | ||
| 139 | std::map<std::string, lemma*> lemmaByBaseForm_; | ||
| 140 | std::map<std::string, form*> formByText_; | ||
| 141 | |||
| 142 | // Caches | ||
| 143 | |||
| 144 | std::map<std::string, word*> wnSenseKeys_; | ||
| 145 | |||
| 146 | }; | ||
| 147 | |||
| 148 | }; | ||
| 149 | }; | ||
| 150 | |||
| 151 | #endif /* end of include guard: GENERATOR_H_5B61CBC5 */ | ||
| diff --git a/generator/group.cpp b/generator/group.cpp new file mode 100644 index 0000000..7cbd4c8 --- /dev/null +++ b/generator/group.cpp | |||
| @@ -0,0 +1,119 @@ | |||
| 1 | #include "group.h" | ||
| 2 | #include <stdexcept> | ||
| 3 | #include <list> | ||
| 4 | #include <json.hpp> | ||
| 5 | #include "database.h" | ||
| 6 | #include "field.h" | ||
| 7 | #include "frame.h" | ||
| 8 | |||
| 9 | namespace verbly { | ||
| 10 | namespace generator { | ||
| 11 | |||
| 12 | int group::nextId_ = 0; | ||
| 13 | |||
| 14 | group::group() : id_(nextId_++) | ||
| 15 | { | ||
| 16 | } | ||
| 17 | |||
| 18 | void group::setParent(const group& parent) | ||
| 19 | { | ||
| 20 | // Adding a group to itself is nonsensical. | ||
| 21 | assert(&parent != this); | ||
| 22 | |||
| 23 | parent_ = &parent; | ||
| 24 | } | ||
| 25 | |||
| 26 | void group::addRole(std::string name, role r) | ||
| 27 | { | ||
| 28 | roleNames_.insert(name); | ||
| 29 | roles_[name] = std::move(r); | ||
| 30 | } | ||
| 31 | |||
| 32 | void group::addFrame(const frame& f) | ||
| 33 | { | ||
| 34 | frames_.insert(&f); | ||
| 35 | } | ||
| 36 | |||
| 37 | std::set<std::string> group::getRoles() const | ||
| 38 | { | ||
| 39 | std::set<std::string> fullRoles = roleNames_; | ||
| 40 | |||
| 41 | if (hasParent()) | ||
| 42 | { | ||
| 43 | for (std::string name : getParent().getRoles()) | ||
| 44 | { | ||
| 45 | fullRoles.insert(name); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | return fullRoles; | ||
| 50 | } | ||
| 51 | |||
| 52 | const role& group::getRole(std::string name) const | ||
| 53 | { | ||
| 54 | if (roles_.count(name)) | ||
| 55 | { | ||
| 56 | return roles_.at(name); | ||
| 57 | } else if (hasParent()) | ||
| 58 | { | ||
| 59 | return getParent().getRole(name); | ||
| 60 | } else { | ||
| 61 | throw std::invalid_argument("Specified role not found in verb group"); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | std::set<const frame*> group::getFrames() const | ||
| 66 | { | ||
| 67 | std::set<const frame*> fullFrames = frames_; | ||
| 68 | |||
| 69 | if (hasParent()) | ||
| 70 | { | ||
| 71 | for (const frame* f : getParent().getFrames()) | ||
| 72 | { | ||
| 73 | fullFrames.insert(f); | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | return fullFrames; | ||
| 78 | } | ||
| 79 | |||
| 80 | database& operator<<(database& db, const group& arg) | ||
| 81 | { | ||
| 82 | // Serialize the group first | ||
| 83 | { | ||
| 84 | std::list<field> fields; | ||
| 85 | fields.emplace_back("group_id", arg.getId()); | ||
| 86 | |||
| 87 | nlohmann::json jsonRoles; | ||
| 88 | for (std::string name : arg.getRoles()) | ||
| 89 | { | ||
| 90 | const role& r = arg.getRole(name); | ||
| 91 | |||
| 92 | nlohmann::json jsonRole; | ||
| 93 | jsonRole["type"] = name; | ||
| 94 | jsonRole["selrestrs"] = r.getSelrestrs().toJson(); | ||
| 95 | |||
| 96 | jsonRoles.emplace_back(std::move(jsonRole)); | ||
| 97 | } | ||
| 98 | |||
| 99 | fields.emplace_back("data", jsonRoles.dump()); | ||
| 100 | |||
| 101 | db.insertIntoTable("groups", std::move(fields)); | ||
| 102 | } | ||
| 103 | |||
| 104 | // Then, serialize the group/frame relationship | ||
| 105 | for (const frame* f : arg.getFrames()) | ||
| 106 | { | ||
| 107 | std::list<field> fields; | ||
| 108 | |||
| 109 | fields.emplace_back("group_id", arg.getId()); | ||
| 110 | fields.emplace_back("frame_id", f->getId()); | ||
| 111 | |||
| 112 | db.insertIntoTable("groups_frames", std::move(fields)); | ||
| 113 | } | ||
| 114 | |||
| 115 | return db; | ||
| 116 | } | ||
| 117 | |||
| 118 | }; | ||
| 119 | }; | ||
| diff --git a/generator/group.h b/generator/group.h new file mode 100644 index 0000000..efb8c5d --- /dev/null +++ b/generator/group.h | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | #ifndef GROUP_H_EDAFB5DC | ||
| 2 | #define GROUP_H_EDAFB5DC | ||
| 3 | |||
| 4 | #include <map> | ||
| 5 | #include <set> | ||
| 6 | #include <string> | ||
| 7 | #include <cassert> | ||
| 8 | #include "role.h" | ||
| 9 | |||
| 10 | namespace verbly { | ||
| 11 | namespace generator { | ||
| 12 | |||
| 13 | class frame; | ||
| 14 | class database; | ||
| 15 | |||
| 16 | class group { | ||
| 17 | public: | ||
| 18 | |||
| 19 | // Constructor | ||
| 20 | |||
| 21 | group(); | ||
| 22 | |||
| 23 | // Mutators | ||
| 24 | |||
| 25 | void setParent(const group& parent); | ||
| 26 | |||
| 27 | void addRole(std::string name, role r); | ||
| 28 | |||
| 29 | void addFrame(const frame& f); | ||
| 30 | |||
| 31 | // Accessors | ||
| 32 | |||
| 33 | int getId() const | ||
| 34 | { | ||
| 35 | return id_; | ||
| 36 | } | ||
| 37 | |||
| 38 | bool hasParent() const | ||
| 39 | { | ||
| 40 | return (parent_ != nullptr); | ||
| 41 | } | ||
| 42 | |||
| 43 | const group& getParent() const | ||
| 44 | { | ||
| 45 | // Calling code should always call hasParent first | ||
| 46 | assert(parent_ != nullptr); | ||
| 47 | |||
| 48 | return *parent_; | ||
| 49 | } | ||
| 50 | |||
| 51 | std::set<std::string> getRoles() const; | ||
| 52 | |||
| 53 | const role& getRole(std::string name) const; | ||
| 54 | |||
| 55 | std::set<const frame*> getFrames() const; | ||
| 56 | |||
| 57 | private: | ||
| 58 | |||
| 59 | static int nextId_; | ||
| 60 | |||
| 61 | const int id_; | ||
| 62 | |||
| 63 | const group* parent_ = nullptr; | ||
| 64 | std::map<std::string, role> roles_; | ||
| 65 | std::set<const frame*> frames_; | ||
| 66 | |||
| 67 | // Caches | ||
| 68 | |||
| 69 | std::set<std::string> roleNames_; | ||
| 70 | |||
| 71 | }; | ||
| 72 | |||
| 73 | // Serializer | ||
| 74 | |||
| 75 | database& operator<<(database& db, const group& arg); | ||
| 76 | |||
| 77 | }; | ||
| 78 | }; | ||
| 79 | |||
| 80 | #endif /* end of include guard: GROUP_H_EDAFB5DC */ | ||
| diff --git a/generator/lemma.cpp b/generator/lemma.cpp new file mode 100644 index 0000000..e66b153 --- /dev/null +++ b/generator/lemma.cpp | |||
| @@ -0,0 +1,65 @@ | |||
| 1 | #include "lemma.h" | ||
| 2 | #include <list> | ||
| 3 | #include <cassert> | ||
| 4 | #include "field.h" | ||
| 5 | #include "database.h" | ||
| 6 | #include "form.h" | ||
| 7 | |||
| 8 | namespace verbly { | ||
| 9 | namespace generator { | ||
| 10 | |||
| 11 | int lemma::nextId_ = 0; | ||
| 12 | |||
| 13 | lemma::lemma(const form& baseForm) : | ||
| 14 | id_(nextId_++), | ||
| 15 | baseForm_(baseForm) | ||
| 16 | { | ||
| 17 | inflections_[inflection::base] = {&baseForm}; | ||
| 18 | } | ||
| 19 | |||
| 20 | void lemma::addInflection(inflection type, const form& f) | ||
| 21 | { | ||
| 22 | // There can only be one base form. | ||
| 23 | assert(type != inflection::base); | ||
| 24 | |||
| 25 | inflections_[type].insert(&f); | ||
| 26 | } | ||
| 27 | |||
| 28 | std::set<const form*> lemma::getInflections(inflection type) const | ||
| 29 | { | ||
| 30 | if (inflections_.count(type)) | ||
| 31 | { | ||
| 32 | return inflections_.at(type); | ||
| 33 | } else { | ||
| 34 | return {}; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | database& operator<<(database& db, const lemma& arg) | ||
| 39 | { | ||
| 40 | for (inflection type : { | ||
| 41 | inflection::base, | ||
| 42 | inflection::plural, | ||
| 43 | inflection::comparative, | ||
| 44 | inflection::superlative, | ||
| 45 | inflection::past_tense, | ||
| 46 | inflection::past_participle, | ||
| 47 | inflection::ing_form, | ||
| 48 | inflection::s_form}) | ||
| 49 | { | ||
| 50 | for (const form* f : arg.getInflections(type)) | ||
| 51 | { | ||
| 52 | std::list<field> fields; | ||
| 53 | fields.emplace_back("lemma_id", arg.getId()); | ||
| 54 | fields.emplace_back("form_id", f->getId()); | ||
| 55 | fields.emplace_back("category", static_cast<int>(type)); | ||
| 56 | |||
| 57 | db.insertIntoTable("lemmas_forms", std::move(fields)); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | |||
| 61 | return db; | ||
| 62 | } | ||
| 63 | |||
| 64 | }; | ||
| 65 | }; | ||
| diff --git a/generator/lemma.h b/generator/lemma.h new file mode 100644 index 0000000..6452e08 --- /dev/null +++ b/generator/lemma.h | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | #ifndef LEMMA_H_D73105A7 | ||
| 2 | #define LEMMA_H_D73105A7 | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | #include <map> | ||
| 6 | #include <set> | ||
| 7 | #include "enums.h" | ||
| 8 | |||
| 9 | namespace verbly { | ||
| 10 | namespace generator { | ||
| 11 | |||
| 12 | class database; | ||
| 13 | class form; | ||
| 14 | |||
| 15 | class lemma { | ||
| 16 | public: | ||
| 17 | |||
| 18 | // Constructors | ||
| 19 | |||
| 20 | explicit lemma(const form& baseForm); | ||
| 21 | |||
| 22 | // Mutators | ||
| 23 | |||
| 24 | void addInflection(inflection type, const form& f); | ||
| 25 | |||
| 26 | // Accessors | ||
| 27 | |||
| 28 | int getId() const | ||
| 29 | { | ||
| 30 | return id_; | ||
| 31 | } | ||
| 32 | |||
| 33 | const form& getBaseForm() const | ||
| 34 | { | ||
| 35 | return baseForm_; | ||
| 36 | } | ||
| 37 | |||
| 38 | std::set<const form*> getInflections(inflection type) const; | ||
| 39 | |||
| 40 | private: | ||
| 41 | |||
| 42 | static int nextId_; | ||
| 43 | |||
| 44 | const int id_; | ||
| 45 | const form& baseForm_; | ||
| 46 | |||
| 47 | std::map<inflection, std::set<const form*>> inflections_; | ||
| 48 | |||
| 49 | }; | ||
| 50 | |||
| 51 | // Serializer | ||
| 52 | |||
| 53 | database& operator<<(database& db, const lemma& arg); | ||
| 54 | |||
| 55 | }; | ||
| 56 | }; | ||
| 57 | |||
| 58 | #endif /* end of include guard: LEMMA_H_D73105A7 */ | ||
| diff --git a/generator/main.cpp b/generator/main.cpp new file mode 100644 index 0000000..827c963 --- /dev/null +++ b/generator/main.cpp | |||
| @@ -0,0 +1,40 @@ | |||
| 1 | #include <iostream> | ||
| 2 | #include <exception> | ||
| 3 | #include "generator.h" | ||
| 4 | |||
| 5 | void printUsage() | ||
| 6 | { | ||
| 7 | std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl; | ||
| 8 | std::cout << "verbnet :: path to a VerbNet data directory" << std::endl; | ||
| 9 | std::cout << "agid :: path to an AGID infl.txt file" << std::endl; | ||
| 10 | std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl; | ||
| 11 | std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl; | ||
| 12 | std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl; | ||
| 13 | std::cout << "output :: datafile output path" << std::endl; | ||
| 14 | } | ||
| 15 | |||
| 16 | int main(int argc, char** argv) | ||
| 17 | { | ||
| 18 | if (argc == 7) | ||
| 19 | { | ||
| 20 | try | ||
| 21 | { | ||
| 22 | verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); | ||
| 23 | |||
| 24 | try | ||
| 25 | { | ||
| 26 | app.run(); | ||
| 27 | } catch (const std::exception& e) | ||
| 28 | { | ||
| 29 | std::cout << e.what() << std::endl; | ||
| 30 | } | ||
| 31 | } catch (const std::exception& e) | ||
| 32 | { | ||
| 33 | std::cout << e.what() << std::endl; | ||
| 34 | printUsage(); | ||
| 35 | } | ||
| 36 | } else { | ||
| 37 | std::cout << "verbly datafile generator" << std::endl; | ||
| 38 | printUsage(); | ||
| 39 | } | ||
| 40 | } | ||
| diff --git a/generator/notion.cpp b/generator/notion.cpp new file mode 100644 index 0000000..290d982 --- /dev/null +++ b/generator/notion.cpp | |||
| @@ -0,0 +1,85 @@ | |||
| 1 | #include "notion.h" | ||
| 2 | #include <string> | ||
| 3 | #include <list> | ||
| 4 | #include "database.h" | ||
| 5 | #include "field.h" | ||
| 6 | |||
| 7 | namespace verbly { | ||
| 8 | namespace generator { | ||
| 9 | |||
| 10 | int notion::nextId_ = 0; | ||
| 11 | |||
| 12 | notion::notion( | ||
| 13 | part_of_speech partOfSpeech) : | ||
| 14 | id_(nextId_++), | ||
| 15 | partOfSpeech_(partOfSpeech) | ||
| 16 | { | ||
| 17 | } | ||
| 18 | |||
| 19 | notion::notion( | ||
| 20 | part_of_speech partOfSpeech, | ||
| 21 | int wnid) : | ||
| 22 | id_(nextId_++), | ||
| 23 | partOfSpeech_(partOfSpeech), | ||
| 24 | wnid_(wnid), | ||
| 25 | hasWnid_(true) | ||
| 26 | { | ||
| 27 | } | ||
| 28 | |||
| 29 | void notion::incrementNumOfImages() | ||
| 30 | { | ||
| 31 | // Calling code should always call hasWnid and check that the notion is a noun first. | ||
| 32 | assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); | ||
| 33 | |||
| 34 | numOfImages_++; | ||
| 35 | } | ||
| 36 | |||
| 37 | void notion::setPrepositionGroups(std::list<std::string> groups) | ||
| 38 | { | ||
| 39 | // Calling code should always check that the notion is a preposition first. | ||
| 40 | assert(partOfSpeech_ == part_of_speech::preposition); | ||
| 41 | |||
| 42 | prepositionGroups_ = groups; | ||
| 43 | } | ||
| 44 | |||
| 45 | database& operator<<(database& db, const notion& arg) | ||
| 46 | { | ||
| 47 | // First, serialize the notion | ||
| 48 | { | ||
| 49 | std::list<field> fields; | ||
| 50 | |||
| 51 | fields.emplace_back("notion_id", arg.getId()); | ||
| 52 | fields.emplace_back("part_of_speech", static_cast<int>(arg.getPartOfSpeech())); | ||
| 53 | |||
| 54 | if (arg.hasWnid()) | ||
| 55 | { | ||
| 56 | fields.emplace_back("wnid", arg.getWnid()); | ||
| 57 | |||
| 58 | if (arg.getPartOfSpeech() == part_of_speech::noun) | ||
| 59 | { | ||
| 60 | fields.emplace_back("images", arg.getNumOfImages()); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | |||
| 64 | db.insertIntoTable("notions", std::move(fields)); | ||
| 65 | } | ||
| 66 | |||
| 67 | // Next, serialize the is_a relationship if this is a preposition | ||
| 68 | if (arg.getPartOfSpeech() == part_of_speech::preposition) | ||
| 69 | { | ||
| 70 | for (std::string group : arg.getPrepositionGroups()) | ||
| 71 | { | ||
| 72 | std::list<field> fields; | ||
| 73 | |||
| 74 | fields.emplace_back("notion_id", arg.getId()); | ||
| 75 | fields.emplace_back("groupname", group); | ||
| 76 | |||
| 77 | db.insertIntoTable("is_a", std::move(fields)); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | return db; | ||
| 82 | } | ||
| 83 | |||
| 84 | }; | ||
| 85 | }; | ||
| diff --git a/generator/notion.h b/generator/notion.h new file mode 100644 index 0000000..76210de --- /dev/null +++ b/generator/notion.h | |||
| @@ -0,0 +1,91 @@ | |||
| 1 | #ifndef NOTION_H_221DE2BC | ||
| 2 | #define NOTION_H_221DE2BC | ||
| 3 | |||
| 4 | #include <cassert> | ||
| 5 | #include <list> | ||
| 6 | #include <string> | ||
| 7 | #include "enums.h" | ||
| 8 | |||
| 9 | namespace verbly { | ||
| 10 | namespace generator { | ||
| 11 | |||
| 12 | class database; | ||
| 13 | |||
| 14 | class notion { | ||
| 15 | public: | ||
| 16 | |||
| 17 | // Constructors | ||
| 18 | |||
| 19 | explicit notion(part_of_speech partOfSpeech); | ||
| 20 | |||
| 21 | notion(part_of_speech partOfSpeech, int wnid); | ||
| 22 | |||
| 23 | // Mutators | ||
| 24 | |||
| 25 | void incrementNumOfImages(); | ||
| 26 | |||
| 27 | void setPrepositionGroups(std::list<std::string> groups); | ||
| 28 | |||
| 29 | // Accessors | ||
| 30 | |||
| 31 | int getId() const | ||
| 32 | { | ||
| 33 | return id_; | ||
| 34 | } | ||
| 35 | |||
| 36 | part_of_speech getPartOfSpeech() const | ||
| 37 | { | ||
| 38 | return partOfSpeech_; | ||
| 39 | } | ||
| 40 | |||
| 41 | bool hasWnid() const | ||
| 42 | { | ||
| 43 | return hasWnid_; | ||
| 44 | } | ||
| 45 | |||
| 46 | int getWnid() const | ||
| 47 | { | ||
| 48 | // Calling code should always call hasWnid first. | ||
| 49 | assert(hasWnid_); | ||
| 50 | |||
| 51 | return wnid_; | ||
| 52 | } | ||
| 53 | |||
| 54 | int getNumOfImages() const | ||
| 55 | { | ||
| 56 | // Calling code should always call hasWnid and check that the notion is a noun first. | ||
| 57 | assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); | ||
| 58 | |||
| 59 | return numOfImages_; | ||
| 60 | } | ||
| 61 | |||
| 62 | std::list<std::string> getPrepositionGroups() const | ||
| 63 | { | ||
| 64 | // Calling code should always check that the notion is a preposition first. | ||
| 65 | assert(partOfSpeech_ == part_of_speech::preposition); | ||
| 66 | |||
| 67 | return prepositionGroups_; | ||
| 68 | } | ||
| 69 | |||
| 70 | private: | ||
| 71 | |||
| 72 | static int nextId_; | ||
| 73 | |||
| 74 | const int id_; | ||
| 75 | const part_of_speech partOfSpeech_; | ||
| 76 | const int wnid_ = 0; | ||
| 77 | const bool hasWnid_ = false; | ||
| 78 | |||
| 79 | int numOfImages_ = 0; | ||
| 80 | std::list<std::string> prepositionGroups_; | ||
| 81 | |||
| 82 | }; | ||
| 83 | |||
| 84 | // Serializer | ||
| 85 | |||
| 86 | database& operator<<(database& db, const notion& arg); | ||
| 87 | |||
| 88 | }; | ||
| 89 | }; | ||
| 90 | |||
| 91 | #endif /* end of include guard: NOTION_H_221DE2BC */ | ||
| diff --git a/generator/part.cpp b/generator/part.cpp new file mode 100644 index 0000000..dbd4e11 --- /dev/null +++ b/generator/part.cpp | |||
| @@ -0,0 +1,336 @@ | |||
| 1 | #include "part.h" | ||
| 2 | #include <stdexcept> | ||
| 3 | #include "selrestr.h" | ||
| 4 | |||
| 5 | namespace verbly { | ||
| 6 | namespace generator { | ||
| 7 | |||
| 8 | part part::createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs) | ||
| 9 | { | ||
| 10 | part p(type::noun_phrase); | ||
| 11 | |||
| 12 | new(&p.noun_phrase_.role) std::string(std::move(role)); | ||
| 13 | new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs)); | ||
| 14 | new(&p.noun_phrase_.synrestrs) std::set<std::string>(std::move(synrestrs)); | ||
| 15 | |||
| 16 | return p; | ||
| 17 | } | ||
| 18 | |||
| 19 | part part::createVerb() | ||
| 20 | { | ||
| 21 | return part(type::verb); | ||
| 22 | } | ||
| 23 | |||
| 24 | part part::createPreposition(std::set<std::string> choices, bool literal) | ||
| 25 | { | ||
| 26 | part p(type::preposition); | ||
| 27 | |||
| 28 | new(&p.preposition_.choices) std::set<std::string>(std::move(choices)); | ||
| 29 | p.preposition_.literal = literal; | ||
| 30 | |||
| 31 | return p; | ||
| 32 | } | ||
| 33 | |||
| 34 | part part::createAdjective() | ||
| 35 | { | ||
| 36 | return part(type::adjective); | ||
| 37 | } | ||
| 38 | |||
| 39 | part part::createAdverb() | ||
| 40 | { | ||
| 41 | return part(type::adverb); | ||
| 42 | } | ||
| 43 | |||
| 44 | part part::createLiteral(std::string value) | ||
| 45 | { | ||
| 46 | part p(type::literal); | ||
| 47 | |||
| 48 | new(&p.literal_) std::string(std::move(value)); | ||
| 49 | |||
| 50 | return p; | ||
| 51 | } | ||
| 52 | |||
| 53 | part::part(const part& other) | ||
| 54 | { | ||
| 55 | type_ = other.type_; | ||
| 56 | |||
| 57 | switch (type_) | ||
| 58 | { | ||
| 59 | case type::noun_phrase: | ||
| 60 | { | ||
| 61 | new(&noun_phrase_.role) std::string(other.noun_phrase_.role); | ||
| 62 | new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs); | ||
| 63 | new(&noun_phrase_.synrestrs) std::set<std::string>(other.noun_phrase_.synrestrs); | ||
| 64 | |||
| 65 | break; | ||
| 66 | } | ||
| 67 | |||
| 68 | case type::preposition: | ||
| 69 | { | ||
| 70 | new(&preposition_.choices) std::set<std::string>(other.preposition_.choices); | ||
| 71 | preposition_.literal = other.preposition_.literal; | ||
| 72 | |||
| 73 | break; | ||
| 74 | } | ||
| 75 | |||
| 76 | case type::literal: | ||
| 77 | { | ||
| 78 | new(&literal_) std::string(other.literal_); | ||
| 79 | |||
| 80 | break; | ||
| 81 | } | ||
| 82 | |||
| 83 | case type::verb: | ||
| 84 | case type::adjective: | ||
| 85 | case type::adverb: | ||
| 86 | case type::invalid: | ||
| 87 | { | ||
| 88 | break; | ||
| 89 | } | ||
| 90 | } | ||
| 91 | } | ||
| 92 | |||
| 93 | part::part(part&& other) : part() | ||
| 94 | { | ||
| 95 | swap(*this, other); | ||
| 96 | } | ||
| 97 | |||
| 98 | part& part::operator=(part other) | ||
| 99 | { | ||
| 100 | swap(*this, other); | ||
| 101 | |||
| 102 | return *this; | ||
| 103 | } | ||
| 104 | |||
| 105 | void swap(part& first, part& second) | ||
| 106 | { | ||
| 107 | using type = part::type; | ||
| 108 | |||
| 109 | type tempType = first.type_; | ||
| 110 | std::string tempRole; | ||
| 111 | selrestr tempSelrestrs; | ||
| 112 | std::set<std::string> tempSynrestrs; | ||
| 113 | std::set<std::string> tempChoices; | ||
| 114 | bool tempPrepLiteral; | ||
| 115 | std::string tempLiteralValue; | ||
| 116 | |||
| 117 | switch (tempType) | ||
| 118 | { | ||
| 119 | case type::noun_phrase: | ||
| 120 | { | ||
| 121 | tempRole = std::move(first.noun_phrase_.role); | ||
| 122 | tempSelrestrs = std::move(first.noun_phrase_.selrestrs); | ||
| 123 | tempSynrestrs = std::move(first.noun_phrase_.synrestrs); | ||
| 124 | |||
| 125 | break; | ||
| 126 | } | ||
| 127 | |||
| 128 | case type::preposition: | ||
| 129 | { | ||
| 130 | tempChoices = std::move(first.preposition_.choices); | ||
| 131 | tempPrepLiteral = first.preposition_.literal; | ||
| 132 | |||
| 133 | break; | ||
| 134 | } | ||
| 135 | |||
| 136 | case type::literal: | ||
| 137 | { | ||
| 138 | tempLiteralValue = std::move(first.literal_); | ||
| 139 | |||
| 140 | break; | ||
| 141 | } | ||
| 142 | |||
| 143 | case type::verb: | ||
| 144 | case type::adjective: | ||
| 145 | case type::adverb: | ||
| 146 | case type::invalid: | ||
| 147 | { | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | first.~part(); | ||
| 153 | |||
| 154 | first.type_ = second.type_; | ||
| 155 | |||
| 156 | switch (first.type_) | ||
| 157 | { | ||
| 158 | case type::noun_phrase: | ||
| 159 | { | ||
| 160 | new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role)); | ||
| 161 | new(&first.noun_phrase_.selrestrs) selrestr(std::move(second.noun_phrase_.selrestrs)); | ||
| 162 | new(&first.noun_phrase_.synrestrs) std::set<std::string>(std::move(second.noun_phrase_.synrestrs)); | ||
| 163 | |||
| 164 | break; | ||
| 165 | } | ||
| 166 | |||
| 167 | case type::preposition: | ||
| 168 | { | ||
| 169 | new(&first.preposition_.choices) std::set<std::string>(std::move(second.preposition_.choices)); | ||
| 170 | first.preposition_.literal = second.preposition_.literal; | ||
| 171 | |||
| 172 | break; | ||
| 173 | } | ||
| 174 | |||
| 175 | case type::literal: | ||
| 176 | { | ||
| 177 | new(&first.literal_) std::string(std::move(second.literal_)); | ||
| 178 | |||
| 179 | break; | ||
| 180 | } | ||
| 181 | |||
| 182 | case type::verb: | ||
| 183 | case type::adjective: | ||
| 184 | case type::adverb: | ||
| 185 | case type::invalid: | ||
| 186 | { | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | second.~part(); | ||
| 192 | |||
| 193 | second.type_ = tempType; | ||
| 194 | |||
| 195 | switch (second.type_) | ||
| 196 | { | ||
| 197 | case type::noun_phrase: | ||
| 198 | { | ||
| 199 | new(&second.noun_phrase_.role) std::string(std::move(tempRole)); | ||
| 200 | new(&second.noun_phrase_.selrestrs) selrestr(std::move(tempSelrestrs)); | ||
| 201 | new(&second.noun_phrase_.synrestrs) std::set<std::string>(std::move(tempSynrestrs)); | ||
| 202 | |||
| 203 | break; | ||
| 204 | } | ||
| 205 | |||
| 206 | case type::preposition: | ||
| 207 | { | ||
| 208 | new(&second.preposition_.choices) std::set<std::string>(std::move(tempChoices)); | ||
| 209 | second.preposition_.literal = tempPrepLiteral; | ||
| 210 | |||
| 211 | break; | ||
| 212 | } | ||
| 213 | |||
| 214 | case type::literal: | ||
| 215 | { | ||
| 216 | new(&second.literal_) std::string(std::move(tempLiteralValue)); | ||
| 217 | |||
| 218 | break; | ||
| 219 | } | ||
| 220 | |||
| 221 | case type::verb: | ||
| 222 | case type::adjective: | ||
| 223 | case type::adverb: | ||
| 224 | case type::invalid: | ||
| 225 | { | ||
| 226 | break; | ||
| 227 | } | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | part::~part() | ||
| 232 | { | ||
| 233 | switch (type_) | ||
| 234 | { | ||
| 235 | case type::noun_phrase: | ||
| 236 | { | ||
| 237 | using string_type = std::string; | ||
| 238 | using set_type = std::set<std::string>; | ||
| 239 | |||
| 240 | noun_phrase_.role.~string_type(); | ||
| 241 | noun_phrase_.selrestrs.~selrestr(); | ||
| 242 | noun_phrase_.synrestrs.~set_type(); | ||
| 243 | |||
| 244 | break; | ||
| 245 | } | ||
| 246 | |||
| 247 | case type::preposition: | ||
| 248 | { | ||
| 249 | using set_type = std::set<std::string>; | ||
| 250 | |||
| 251 | preposition_.choices.~set_type(); | ||
| 252 | |||
| 253 | break; | ||
| 254 | } | ||
| 255 | |||
| 256 | case type::literal: | ||
| 257 | { | ||
| 258 | using string_type = std::string; | ||
| 259 | |||
| 260 | literal_.~string_type(); | ||
| 261 | |||
| 262 | break; | ||
| 263 | } | ||
| 264 | |||
| 265 | case type::verb: | ||
| 266 | case type::adjective: | ||
| 267 | case type::adverb: | ||
| 268 | case type::invalid: | ||
| 269 | { | ||
| 270 | break; | ||
| 271 | } | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | std::string part::getNounRole() const | ||
| 276 | { | ||
| 277 | if (type_ == type::noun_phrase) | ||
| 278 | { | ||
| 279 | return noun_phrase_.role; | ||
| 280 | } else { | ||
| 281 | throw std::domain_error("part::getNounRole is only valid for noun phrase parts"); | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | selrestr part::getNounSelrestrs() const | ||
| 286 | { | ||
| 287 | if (type_ == type::noun_phrase) | ||
| 288 | { | ||
| 289 | return noun_phrase_.selrestrs; | ||
| 290 | } else { | ||
| 291 | throw std::domain_error("part::getNounSelrestrs is only valid for noun phrase parts"); | ||
| 292 | } | ||
| 293 | } | ||
| 294 | |||
| 295 | std::set<std::string> part::getNounSynrestrs() const | ||
| 296 | { | ||
| 297 | if (type_ == type::noun_phrase) | ||
| 298 | { | ||
| 299 | return noun_phrase_.synrestrs; | ||
| 300 | } else { | ||
| 301 | throw std::domain_error("part::getNounSynrestrs is only valid for noun phrase parts"); | ||
| 302 | } | ||
| 303 | } | ||
| 304 | |||
| 305 | std::set<std::string> part::getPrepositionChoices() const | ||
| 306 | { | ||
| 307 | if (type_ == type::preposition) | ||
| 308 | { | ||
| 309 | return preposition_.choices; | ||
| 310 | } else { | ||
| 311 | throw std::domain_error("part::getPrepositionChoices is only valid for preposition parts"); | ||
| 312 | } | ||
| 313 | } | ||
| 314 | |||
| 315 | bool part::isPrepositionLiteral() const | ||
| 316 | { | ||
| 317 | if (type_ == type::preposition) | ||
| 318 | { | ||
| 319 | return preposition_.literal; | ||
| 320 | } else { | ||
| 321 | throw std::domain_error("part::isPrepositionLiteral is only valid for preposition parts"); | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | std::string part::getLiteralValue() const | ||
| 326 | { | ||
| 327 | if (type_ == type::literal) | ||
| 328 | { | ||
| 329 | return literal_; | ||
| 330 | } else { | ||
| 331 | throw std::domain_error("part::getLiteralValue is only valid for literal parts"); | ||
| 332 | } | ||
| 333 | } | ||
| 334 | |||
| 335 | }; | ||
| 336 | }; | ||
| diff --git a/generator/part.h b/generator/part.h new file mode 100644 index 0000000..d044630 --- /dev/null +++ b/generator/part.h | |||
| @@ -0,0 +1,114 @@ | |||
| 1 | #ifndef PART_H_FB54F361 | ||
| 2 | #define PART_H_FB54F361 | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | #include <set> | ||
| 6 | #include "selrestr.h" | ||
| 7 | |||
| 8 | namespace verbly { | ||
| 9 | namespace generator { | ||
| 10 | |||
| 11 | class part { | ||
| 12 | public: | ||
| 13 | enum class type { | ||
| 14 | invalid = -1, | ||
| 15 | noun_phrase = 0, | ||
| 16 | verb = 1, | ||
| 17 | preposition = 2, | ||
| 18 | adjective = 3, | ||
| 19 | adverb = 4, | ||
| 20 | literal = 5 | ||
| 21 | }; | ||
| 22 | |||
| 23 | // Static factories | ||
| 24 | |||
| 25 | static part createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs); | ||
| 26 | |||
| 27 | static part createVerb(); | ||
| 28 | |||
| 29 | static part createPreposition(std::set<std::string> choices, bool literal); | ||
| 30 | |||
| 31 | static part createAdjective(); | ||
| 32 | |||
| 33 | static part createAdverb(); | ||
| 34 | |||
| 35 | static part createLiteral(std::string value); | ||
| 36 | |||
| 37 | // Copy and move constructors | ||
| 38 | |||
| 39 | part(const part& other); | ||
| 40 | |||
| 41 | part(part&& other); | ||
| 42 | |||
| 43 | // Assignment | ||
| 44 | |||
| 45 | part& operator=(part other); | ||
| 46 | |||
| 47 | // Swap | ||
| 48 | |||
| 49 | friend void swap(part& first, part& second); | ||
| 50 | |||
| 51 | // Destructor | ||
| 52 | |||
| 53 | ~part(); | ||
| 54 | |||
| 55 | // General accessors | ||
| 56 | |||
| 57 | type getType() const | ||
| 58 | { | ||
| 59 | return type_; | ||
| 60 | } | ||
| 61 | |||
| 62 | // Noun phrase accessors | ||
| 63 | |||
| 64 | std::string getNounRole() const; | ||
| 65 | |||
| 66 | selrestr getNounSelrestrs() const; | ||
| 67 | |||
| 68 | std::set<std::string> getNounSynrestrs() const; | ||
| 69 | |||
| 70 | // Preposition accessors | ||
| 71 | |||
| 72 | std::set<std::string> getPrepositionChoices() const; | ||
| 73 | |||
| 74 | bool isPrepositionLiteral() const; | ||
| 75 | |||
| 76 | // Literal accessors | ||
| 77 | |||
| 78 | std::string getLiteralValue() const; | ||
| 79 | |||
| 80 | private: | ||
| 81 | |||
| 82 | // Private constructors | ||
| 83 | |||
| 84 | part() | ||
| 85 | { | ||
| 86 | } | ||
| 87 | |||
| 88 | part(type t) : type_(t) | ||
| 89 | { | ||
| 90 | } | ||
| 91 | |||
| 92 | // Data | ||
| 93 | |||
| 94 | union { | ||
| 95 | struct { | ||
| 96 | std::string role; | ||
| 97 | selrestr selrestrs; | ||
| 98 | std::set<std::string> synrestrs; | ||
| 99 | } noun_phrase_; | ||
| 100 | struct { | ||
| 101 | std::set<std::string> choices; | ||
| 102 | bool literal; | ||
| 103 | } preposition_; | ||
| 104 | std::string literal_; | ||
| 105 | }; | ||
| 106 | |||
| 107 | type type_ = type::invalid; | ||
| 108 | |||
| 109 | }; | ||
| 110 | |||
| 111 | }; | ||
| 112 | }; | ||
| 113 | |||
| 114 | #endif /* end of include guard: PART_H_FB54F361 */ | ||
| diff --git a/generator/progress.h b/generator/progress.h index 81f07a3..fcb680d 100644 --- a/generator/progress.h +++ b/generator/progress.h | |||
| @@ -3,48 +3,54 @@ | |||
| 3 | 3 | ||
| 4 | #include <string> | 4 | #include <string> |
| 5 | 5 | ||
| 6 | class progress { | 6 | namespace verbly { |
| 7 | private: | 7 | namespace generator { |
| 8 | std::string message; | ||
| 9 | int total; | ||
| 10 | int cur = 0; | ||
| 11 | int lprint = 0; | ||
| 12 | 8 | ||
| 13 | public: | 9 | class progress { |
| 14 | progress(std::string message, int total) : message(message), total(total) | 10 | private: |
| 15 | { | 11 | std::string message; |
| 16 | std::cout << message << " 0%" << std::flush; | 12 | int total; |
| 17 | } | 13 | int cur = 0; |
| 14 | int lprint = 0; | ||
| 18 | 15 | ||
| 19 | void update(int val) | 16 | public: |
| 20 | { | 17 | progress(std::string message, int total) : message(message), total(total) |
| 21 | if (val <= total) | 18 | { |
| 22 | { | 19 | std::cout << message << " 0%" << std::flush; |
| 23 | cur = val; | 20 | } |
| 24 | } else { | 21 | |
| 25 | cur = total; | 22 | void update(int val) |
| 26 | } | 23 | { |
| 24 | if (val <= total) | ||
| 25 | { | ||
| 26 | cur = val; | ||
| 27 | } else { | ||
| 28 | cur = total; | ||
| 29 | } | ||
| 27 | 30 | ||
| 28 | int pp = cur * 100 / total; | 31 | int pp = cur * 100 / total; |
| 29 | if (pp != lprint) | 32 | if (pp != lprint) |
| 30 | { | 33 | { |
| 31 | lprint = pp; | 34 | lprint = pp; |
| 32 | 35 | ||
| 33 | std::cout << "\b\b\b\b" << std::right; | 36 | std::cout << "\b\b\b\b" << std::right; |
| 34 | std::cout.width(3); | 37 | std::cout.width(3); |
| 35 | std::cout << pp << "%" << std::flush; | 38 | std::cout << pp << "%" << std::flush; |
| 36 | } | 39 | } |
| 37 | } | 40 | } |
| 41 | |||
| 42 | void update() | ||
| 43 | { | ||
| 44 | update(cur+1); | ||
| 45 | } | ||
| 38 | 46 | ||
| 39 | void update() | 47 | ~progress() |
| 40 | { | 48 | { |
| 41 | update(cur+1); | 49 | std::cout << "\b\b\b\b100%" << std::endl; |
| 42 | } | 50 | } |
| 51 | }; | ||
| 43 | 52 | ||
| 44 | ~progress() | 53 | }; |
| 45 | { | ||
| 46 | std::cout << "\b\b\b\b100%" << std::endl; | ||
| 47 | } | ||
| 48 | }; | 54 | }; |
| 49 | 55 | ||
| 50 | #endif /* end of include guard: PROGRESS_H_A34EF856 */ | 56 | #endif /* end of include guard: PROGRESS_H_A34EF856 */ |
| diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp new file mode 100644 index 0000000..eb07607 --- /dev/null +++ b/generator/pronunciation.cpp | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | #include "pronunciation.h" | ||
| 2 | #include <list> | ||
| 3 | #include <algorithm> | ||
| 4 | #include <cctype> | ||
| 5 | #include <iterator> | ||
| 6 | #include "database.h" | ||
| 7 | #include "field.h" | ||
| 8 | #include "../lib/util.h" | ||
| 9 | |||
| 10 | namespace verbly { | ||
| 11 | namespace generator { | ||
| 12 | |||
| 13 | int pronunciation::nextId_ = 0; | ||
| 14 | |||
| 15 | pronunciation::pronunciation(std::string phonemes) : | ||
| 16 | id_(nextId_++), | ||
| 17 | phonemes_(phonemes) | ||
| 18 | { | ||
| 19 | auto phonemeList = split<std::list<std::string>>(phonemes, " "); | ||
| 20 | |||
| 21 | auto rhymeStart = std::find_if(std::begin(phonemeList), std::end(phonemeList), [] (std::string phoneme) { | ||
| 22 | return phoneme.find("1") != std::string::npos; | ||
| 23 | }); | ||
| 24 | |||
| 25 | // Rhyme detection | ||
| 26 | if (rhymeStart != std::end(phonemeList)) | ||
| 27 | { | ||
| 28 | std::list<std::string> rhymePhonemes; | ||
| 29 | |||
| 30 | std::transform(rhymeStart, std::end(phonemeList), std::back_inserter(rhymePhonemes), [] (std::string phoneme) { | ||
| 31 | std::string naked; | ||
| 32 | |||
| 33 | std::remove_copy_if(std::begin(phoneme), std::end(phoneme), std::back_inserter(naked), [] (char ch) { | ||
| 34 | return std::isdigit(ch); | ||
| 35 | }); | ||
| 36 | |||
| 37 | return naked; | ||
| 38 | }); | ||
| 39 | |||
| 40 | rhyme_ = implode(std::begin(rhymePhonemes), std::end(rhymePhonemes), " "); | ||
| 41 | |||
| 42 | if (rhymeStart != std::begin(phonemeList)) | ||
| 43 | { | ||
| 44 | prerhyme_ = *std::prev(rhymeStart); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | // Syllable/stress | ||
| 49 | for (std::string phoneme : phonemeList) | ||
| 50 | { | ||
| 51 | if (std::isdigit(phoneme.back())) | ||
| 52 | { | ||
| 53 | // It's a vowel! | ||
| 54 | syllables_++; | ||
| 55 | |||
| 56 | if (phoneme.back() == '1') | ||
| 57 | { | ||
| 58 | stress_.push_back('1'); | ||
| 59 | } else { | ||
| 60 | stress_.push_back('0'); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | database& operator<<(database& db, const pronunciation& arg) | ||
| 67 | { | ||
| 68 | std::list<field> fields; | ||
| 69 | |||
| 70 | fields.emplace_back("pronunciation_id", arg.getId()); | ||
| 71 | fields.emplace_back("phonemes", arg.getPhonemes()); | ||
| 72 | fields.emplace_back("syllables", arg.getSyllables()); | ||
| 73 | fields.emplace_back("stress", arg.getStress()); | ||
| 74 | |||
| 75 | if (arg.hasRhyme()) | ||
| 76 | { | ||
| 77 | fields.emplace_back("rhyme", arg.getRhymePhonemes()); | ||
| 78 | fields.emplace_back("prerhyme", arg.getPrerhyme()); | ||
| 79 | } | ||
| 80 | |||
| 81 | db.insertIntoTable("pronunciations", std::move(fields)); | ||
| 82 | |||
| 83 | return db; | ||
| 84 | } | ||
| 85 | |||
| 86 | }; | ||
| 87 | }; | ||
| diff --git a/generator/pronunciation.h b/generator/pronunciation.h new file mode 100644 index 0000000..81be6c4 --- /dev/null +++ b/generator/pronunciation.h | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | #ifndef PRONUNCIATION_H_584A08DD | ||
| 2 | #define PRONUNCIATION_H_584A08DD | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | #include <cassert> | ||
| 6 | |||
| 7 | namespace verbly { | ||
| 8 | namespace generator { | ||
| 9 | |||
| 10 | class database; | ||
| 11 | |||
| 12 | class pronunciation { | ||
| 13 | public: | ||
| 14 | |||
| 15 | // Constructor | ||
| 16 | |||
| 17 | explicit pronunciation(std::string phonemes); | ||
| 18 | |||
| 19 | // Accessors | ||
| 20 | |||
| 21 | int getId() const | ||
| 22 | { | ||
| 23 | return id_; | ||
| 24 | } | ||
| 25 | |||
| 26 | std::string getPhonemes() const | ||
| 27 | { | ||
| 28 | return phonemes_; | ||
| 29 | } | ||
| 30 | |||
| 31 | bool hasRhyme() const | ||
| 32 | { | ||
| 33 | return !rhyme_.empty(); | ||
| 34 | } | ||
| 35 | |||
| 36 | std::string getRhymePhonemes() const | ||
| 37 | { | ||
| 38 | // Calling code should always call hasRhyme first. | ||
| 39 | assert(!rhyme_.empty()); | ||
| 40 | |||
| 41 | return rhyme_; | ||
| 42 | } | ||
| 43 | |||
| 44 | std::string getPrerhyme() const | ||
| 45 | { | ||
| 46 | // Calling code should always call hasRhyme first. | ||
| 47 | assert(!rhyme_.empty()); | ||
| 48 | |||
| 49 | return prerhyme_; | ||
| 50 | } | ||
| 51 | |||
| 52 | int getSyllables() const | ||
| 53 | { | ||
| 54 | return syllables_; | ||
| 55 | } | ||
| 56 | |||
| 57 | std::string getStress() const | ||
| 58 | { | ||
| 59 | return stress_; | ||
| 60 | } | ||
| 61 | |||
| 62 | private: | ||
| 63 | |||
| 64 | static int nextId_; | ||
| 65 | |||
| 66 | const int id_; | ||
| 67 | const std::string phonemes_; | ||
| 68 | std::string rhyme_; | ||
| 69 | std::string prerhyme_; | ||
| 70 | int syllables_ = 0; | ||
| 71 | std::string stress_; | ||
| 72 | |||
| 73 | }; | ||
| 74 | |||
| 75 | // Serializer | ||
| 76 | |||
| 77 | database& operator<<(database& db, const pronunciation& arg); | ||
| 78 | |||
| 79 | }; | ||
| 80 | }; | ||
| 81 | |||
| 82 | #endif /* end of include guard: PRONUNCIATION_H_584A08DD */ | ||
| diff --git a/generator/role.h b/generator/role.h new file mode 100644 index 0000000..5fa68b8 --- /dev/null +++ b/generator/role.h | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | #ifndef ROLE_H_249F9A9C | ||
| 2 | #define ROLE_H_249F9A9C | ||
| 3 | |||
| 4 | #include "selrestr.h" | ||
| 5 | |||
| 6 | namespace verbly { | ||
| 7 | namespace generator { | ||
| 8 | |||
| 9 | class role { | ||
| 10 | public: | ||
| 11 | |||
| 12 | // Mutators | ||
| 13 | |||
| 14 | void setSelrestrs(selrestr selrestrs) | ||
| 15 | { | ||
| 16 | selrestrs_ = selrestrs; | ||
| 17 | } | ||
| 18 | |||
| 19 | // Accessors | ||
| 20 | |||
| 21 | const selrestr& getSelrestrs() const | ||
| 22 | { | ||
| 23 | return selrestrs_; | ||
| 24 | } | ||
| 25 | |||
| 26 | private: | ||
| 27 | |||
| 28 | selrestr selrestrs_; | ||
| 29 | |||
| 30 | }; | ||
| 31 | |||
| 32 | }; | ||
| 33 | }; | ||
| 34 | |||
| 35 | #endif /* end of include guard: ROLE_H_249F9A9C */ | ||
| diff --git a/generator/schema.sql b/generator/schema.sql index 410b536..c3e54d8 100644 --- a/generator/schema.sql +++ b/generator/schema.sql | |||
| @@ -1,286 +1,204 @@ | |||
| 1 | DROP TABLE IF EXISTS `verbs`; | 1 | CREATE TABLE `notions` ( |
| 2 | CREATE TABLE `verbs` ( | 2 | `notion_id` INTEGER PRIMARY KEY, |
| 3 | `verb_id` INTEGER PRIMARY KEY, | 3 | `part_of_speech` SMALLINT NOT NULL, |
| 4 | `infinitive` VARCHAR(32) NOT NULL, | 4 | `wnid` INTEGER, |
| 5 | `past_tense` VARCHAR(32) NOT NULL, | 5 | `images` INTEGER |
| 6 | `past_participle` VARCHAR(32) NOT NULL, | ||
| 7 | `ing_form` VARCHAR(32) NOT NULL, | ||
| 8 | `s_form` VARCHAR(32) NOT NULL | ||
| 9 | ); | 6 | ); |
| 10 | 7 | ||
| 11 | DROP TABLE IF EXISTS `groups`; | 8 | CREATE UNIQUE INDEX `notion_by_wnid` ON `notions`(`wnid`); |
| 12 | CREATE TABLE `groups` ( | ||
| 13 | `group_id` INTEGER PRIMARY KEY, | ||
| 14 | `data` BLOB NOT NULL | ||
| 15 | ); | ||
| 16 | |||
| 17 | DROP TABLE IF EXISTS `frames`; | ||
| 18 | CREATE TABLE `frames` ( | ||
| 19 | `frame_id` INTEGER PRIMARY KEY, | ||
| 20 | `group_id` INTEGER NOT NULL, | ||
| 21 | `data` BLOB NOT NULL, | ||
| 22 | FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) | ||
| 23 | ); | ||
| 24 | 9 | ||
| 25 | DROP TABLE IF EXISTS `verb_groups`; | ||
| 26 | CREATE TABLE `verb_groups` ( | ||
| 27 | `verb_id` INTEGER NOT NULL, | ||
| 28 | `group_id` INTEGER NOT NULL, | ||
| 29 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`), | ||
| 30 | FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) | ||
| 31 | ); | ||
| 32 | |||
| 33 | DROP TABLE IF EXISTS `adjectives`; | ||
| 34 | CREATE TABLE `adjectives` ( | ||
| 35 | `adjective_id` INTEGER PRIMARY KEY, | ||
| 36 | `base_form` VARCHAR(32) NOT NULL, | ||
| 37 | `comparative` VARCHAR(32), | ||
| 38 | `superlative` VARCHAR(32), | ||
| 39 | `position` CHAR(1), | ||
| 40 | `complexity` INTEGER NOT NULL | ||
| 41 | ); | ||
| 42 | |||
| 43 | DROP TABLE IF EXISTS `adverbs`; | ||
| 44 | CREATE TABLE `adverbs` ( | ||
| 45 | `adverb_id` INTEGER PRIMARY KEY, | ||
| 46 | `base_form` VARCHAR(32) NOT NULL, | ||
| 47 | `comparative` VARCHAR(32), | ||
| 48 | `superlative` VARCHAR(32), | ||
| 49 | `complexity` INTEGER NOT NULL | ||
| 50 | ); | ||
| 51 | |||
| 52 | DROP TABLE IF EXISTS `nouns`; | ||
| 53 | CREATE TABLE `nouns` ( | ||
| 54 | `noun_id` INTEGER PRIMARY KEY, | ||
| 55 | `singular` VARCHAR(32) NOT NULL, | ||
| 56 | `plural` VARCHAR(32), | ||
| 57 | `proper` INTEGER(1) NOT NULL, | ||
| 58 | `complexity` INTEGER NOT NULL, | ||
| 59 | `images` INTEGER NOT NULL, | ||
| 60 | `wnid` INTEGER NOT NULL | ||
| 61 | ); | ||
| 62 | |||
| 63 | DROP TABLE IF EXISTS `hypernymy`; | ||
| 64 | CREATE TABLE `hypernymy` ( | 10 | CREATE TABLE `hypernymy` ( |
| 65 | `hypernym_id` INTEGER NOT NULL, | 11 | `hypernym_id` INTEGER NOT NULL, |
| 66 | `hyponym_id` INTEGER NOT NULL, | 12 | `hyponym_id` INTEGER NOT NULL |
| 67 | FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`), | ||
| 68 | FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`) | ||
| 69 | ); | 13 | ); |
| 70 | 14 | ||
| 71 | DROP TABLE IF EXISTS `instantiation`; | 15 | CREATE INDEX `hyponym_of` ON `hypernymy`(`hypernym_id`); |
| 16 | CREATE INDEX `hypernym_of` ON `hypernymy`(`hyponym_id`); | ||
| 17 | |||
| 72 | CREATE TABLE `instantiation` ( | 18 | CREATE TABLE `instantiation` ( |
| 73 | `class_id` INTEGER NOT NULL, | 19 | `class_id` INTEGER NOT NULL, |
| 74 | `instance_id` INTEGER NOT NULL, | 20 | `instance_id` INTEGER NOT NULL |
| 75 | FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`), | ||
| 76 | FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`) | ||
| 77 | ); | 21 | ); |
| 78 | 22 | ||
| 79 | DROP TABLE IF EXISTS `member_meronymy`; | 23 | CREATE INDEX `instance_of` ON `instantiation`(`class_id`); |
| 24 | CREATE INDEX `class_of` ON `instantiation`(`instance_id`); | ||
| 25 | |||
| 80 | CREATE TABLE `member_meronymy` ( | 26 | CREATE TABLE `member_meronymy` ( |
| 81 | `meronym_id` INTEGER NOT NULL, | 27 | `meronym_id` INTEGER NOT NULL, |
| 82 | `holonym_id` INTEGER NOT NULL, | 28 | `holonym_id` INTEGER NOT NULL |
| 83 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
| 84 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
| 85 | ); | 29 | ); |
| 86 | 30 | ||
| 87 | DROP TABLE IF EXISTS `part_meronymy`; | 31 | CREATE INDEX `member_holonym_of` ON `member_meronymy`(`meronym_id`); |
| 32 | CREATE INDEX `member_meronym_of` ON `member_meronymy`(`holonym_id`); | ||
| 33 | |||
| 88 | CREATE TABLE `part_meronymy` ( | 34 | CREATE TABLE `part_meronymy` ( |
| 89 | `meronym_id` INTEGER NOT NULL, | 35 | `meronym_id` INTEGER NOT NULL, |
| 90 | `holonym_id` INTEGER NOT NULL, | 36 | `holonym_id` INTEGER NOT NULL |
| 91 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
| 92 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
| 93 | ); | 37 | ); |
| 94 | 38 | ||
| 95 | DROP TABLE IF EXISTS `substance_meronymy`; | 39 | CREATE INDEX `part_holonym_of` ON `part_meronymy`(`meronym_id`); |
| 40 | CREATE INDEX `part_meronym_of` ON `part_meronymy`(`holonym_id`); | ||
| 41 | |||
| 96 | CREATE TABLE `substance_meronymy` ( | 42 | CREATE TABLE `substance_meronymy` ( |
| 97 | `meronym_id` INTEGER NOT NULL, | 43 | `meronym_id` INTEGER NOT NULL, |
| 98 | `holonym_id` INTEGER NOT NULL, | 44 | `holonym_id` INTEGER NOT NULL |
| 99 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
| 100 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
| 101 | ); | 45 | ); |
| 102 | 46 | ||
| 103 | DROP TABLE IF EXISTS `variation`; | 47 | CREATE INDEX `substance_holonym_of` ON `substance_meronymy`(`meronym_id`); |
| 48 | CREATE INDEX `substance_meronym_of` ON `substance_meronymy`(`holonym_id`); | ||
| 49 | |||
| 104 | CREATE TABLE `variation` ( | 50 | CREATE TABLE `variation` ( |
| 105 | `noun_id` INTEGER NOT NULL, | 51 | `noun_id` INTEGER NOT NULL, |
| 106 | `adjective_id` INTEGER NOT NULL, | 52 | `adjective_id` INTEGER NOT NULL |
| 107 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
| 108 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
| 109 | ); | 53 | ); |
| 110 | 54 | ||
| 111 | DROP TABLE IF EXISTS `noun_antonymy`; | 55 | CREATE INDEX `variant_of` ON `variation`(`noun_id`); |
| 112 | CREATE TABLE `noun_antonymy` ( | 56 | CREATE INDEX `attribute_of` ON `variation`(`adjective_id`); |
| 113 | `noun_1_id` INTEGER NOT NULL, | ||
| 114 | `noun_2_id` INTEGER NOT NULL, | ||
| 115 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), | ||
| 116 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) | ||
| 117 | ); | ||
| 118 | 57 | ||
| 119 | DROP TABLE IF EXISTS `adjective_antonymy`; | 58 | CREATE TABLE `similarity` ( |
| 120 | CREATE TABLE `adjective_antonymy` ( | ||
| 121 | `adjective_1_id` INTEGER NOT NULL, | 59 | `adjective_1_id` INTEGER NOT NULL, |
| 122 | `adjective_2_id` INTEGER NOT NULL, | 60 | `adjective_2_id` INTEGER NOT NULL |
| 123 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | 61 | ); |
| 124 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | 62 | |
| 63 | CREATE INDEX `similar_to` ON `similarity`(`adjective_1_id`); | ||
| 64 | |||
| 65 | CREATE TABLE `is_a` ( | ||
| 66 | `notion_id` INTEGER NOT NULL, | ||
| 67 | `groupname` VARCHAR(32) NOT NULL | ||
| 125 | ); | 68 | ); |
| 126 | 69 | ||
| 127 | DROP TABLE IF EXISTS `adverb_antonymy`; | 70 | CREATE TABLE `entailment` ( |
| 128 | CREATE TABLE `adverb_antonymy` ( | 71 | `given_id` INTEGER NOT NULL, |
| 129 | `adverb_1_id` INTEGER NOT NULL, | 72 | `entailment_id` INTEGER NOT NULL |
| 130 | `adverb_2_id` INTEGER NOT NULL, | 73 | ); |
| 131 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | 74 | |
| 132 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | 75 | CREATE INDEX `entailment_of` ON `entailment`(`given_id`); |
| 76 | CREATE INDEX `entailed_by` ON `entailment`(`entailment_id`); | ||
| 77 | |||
| 78 | CREATE TABLE `causality` ( | ||
| 79 | `cause_id` INTEGER NOT NULL, | ||
| 80 | `effect_id` INTEGER NOT NULL | ||
| 81 | ); | ||
| 82 | |||
| 83 | CREATE INDEX `effect_of` ON `causality`(`cause_id`); | ||
| 84 | CREATE INDEX `cause_of` ON `causality`(`effect_id`); | ||
| 85 | |||
| 86 | CREATE TABLE `words` ( | ||
| 87 | `word_id` INTEGER PRIMARY KEY, | ||
| 88 | `notion_id` INTEGER NOT NULL, | ||
| 89 | `lemma_id` INTEGER NOT NULL, | ||
| 90 | `tag_count` INTEGER, | ||
| 91 | `position` SMALLINT, | ||
| 92 | `group_id` INTEGER | ||
| 93 | ); | ||
| 94 | |||
| 95 | CREATE INDEX `notion_words` ON `words`(`notion_id`); | ||
| 96 | CREATE INDEX `lemma_words` ON `words`(`lemma_id`); | ||
| 97 | CREATE INDEX `group_words` ON `words`(`group_id`); | ||
| 98 | |||
| 99 | CREATE TABLE `antonymy` ( | ||
| 100 | `antonym_1_id` INTEGER NOT NULL, | ||
| 101 | `antonym_2_id` INTEGER NOT NULL | ||
| 133 | ); | 102 | ); |
| 134 | 103 | ||
| 135 | DROP TABLE IF EXISTS `specification`; | 104 | CREATE INDEX `antonym_of` ON `antonymy`(`antonym_1_id`); |
| 105 | |||
| 136 | CREATE TABLE `specification` ( | 106 | CREATE TABLE `specification` ( |
| 137 | `general_id` INTEGER NOT NULL, | 107 | `general_id` INTEGER NOT NULL, |
| 138 | `specific_id` INTEGER NOT NULL, | 108 | `specific_id` INTEGER NOT NULL |
| 139 | FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`), | ||
| 140 | FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`) | ||
| 141 | ); | 109 | ); |
| 142 | 110 | ||
| 143 | DROP TABLE IF EXISTS `pertainymy`; | 111 | CREATE INDEX `specification_of` ON `specification`(`general_id`); |
| 112 | CREATE INDEX `generalization_of` ON `specification`(`specific_id`); | ||
| 113 | |||
| 144 | CREATE TABLE `pertainymy` ( | 114 | CREATE TABLE `pertainymy` ( |
| 145 | `noun_id` INTEGER NOT NULL, | 115 | `noun_id` INTEGER NOT NULL, |
| 146 | `pertainym_id` INTEGER NOT NULL, | 116 | `pertainym_id` INTEGER NOT NULL |
| 147 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
| 148 | FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`) | ||
| 149 | ); | 117 | ); |
| 150 | 118 | ||
| 151 | DROP TABLE IF EXISTS `mannernymy`; | 119 | CREATE INDEX `pertainym_of` ON `pertainymy`(`noun_id`); |
| 120 | CREATE INDEX `anti_pertainym_of` ON `pertainymy`(`pertainym_id`); | ||
| 121 | |||
| 152 | CREATE TABLE `mannernymy` ( | 122 | CREATE TABLE `mannernymy` ( |
| 153 | `adjective_id` INTEGER NOT NULL, | 123 | `adjective_id` INTEGER NOT NULL, |
| 154 | `mannernym_id` INTEGER NOT NULL, | 124 | `mannernym_id` INTEGER NOT NULL |
| 155 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), | ||
| 156 | FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`) | ||
| 157 | ); | 125 | ); |
| 158 | 126 | ||
| 159 | DROP TABLE IF EXISTS `noun_synonymy`; | 127 | CREATE INDEX `mannernym_of` ON `mannernymy`(`adjective_id`); |
| 160 | CREATE TABLE `noun_synonymy` ( | 128 | CREATE INDEX `anti_mannernym_of` ON `mannernymy`(`mannernym_id`); |
| 161 | `noun_1_id` INTEGER NOT NULL, | ||
| 162 | `noun_2_id` INTEGER NOT NULL, | ||
| 163 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`), | ||
| 164 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`) | ||
| 165 | ); | ||
| 166 | 129 | ||
| 167 | DROP TABLE IF EXISTS `adjective_synonymy`; | 130 | CREATE TABLE `usage` ( |
| 168 | CREATE TABLE `adjective_synonymy` ( | 131 | `domain_id` INTEGER NOT NULL, |
| 169 | `adjective_1_id` INTEGER NOT NULL, | 132 | `term_id` INTEGER NOT NULL |
| 170 | `adjective_2_id` INTEGER NOT NULL, | ||
| 171 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | ||
| 172 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | ||
| 173 | ); | 133 | ); |
| 174 | 134 | ||
| 175 | DROP TABLE IF EXISTS `adverb_synonymy`; | 135 | CREATE INDEX `usage_term_of` ON `usage`(`domain_id`); |
| 176 | CREATE TABLE `adverb_synonymy` ( | 136 | CREATE INDEX `usage_domain_of` ON `usage`(`term_id`); |
| 177 | `adverb_1_id` INTEGER NOT NULL, | ||
| 178 | `adverb_2_id` INTEGER NOT NULL, | ||
| 179 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | ||
| 180 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | ||
| 181 | ); | ||
| 182 | 137 | ||
| 183 | DROP TABLE IF EXISTS `noun_pronunciations`; | 138 | CREATE TABLE `topicality` ( |
| 184 | CREATE TABLE `noun_pronunciations` ( | 139 | `domain_id` INTEGER NOT NULL, |
| 185 | `noun_id` INTEGER NOT NULL, | 140 | `term_id` INTEGER NOT NULL |
| 186 | `pronunciation` VARCHAR(64) NOT NULL, | ||
| 187 | `prerhyme` VARCHAR(8), | ||
| 188 | `rhyme` VARCHAR(64), | ||
| 189 | `syllables` INT NOT NULL, | ||
| 190 | `stress` VARCHAR(64) NOT NULL, | ||
| 191 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) | ||
| 192 | ); | 141 | ); |
| 193 | 142 | ||
| 194 | DROP TABLE IF EXISTS `verb_pronunciations`; | 143 | CREATE INDEX `topical_term_of` ON `topicality`(`domain_id`); |
| 195 | CREATE TABLE `verb_pronunciations` ( | 144 | CREATE INDEX `topical_domain_of` ON `topicality`(`term_id`); |
| 196 | `verb_id` INTEGER NOT NULL, | ||
| 197 | `pronunciation` VARCHAR(64) NOT NULL, | ||
| 198 | `prerhyme` VARCHAR(8), | ||
| 199 | `rhyme` VARCHAR(64), | ||
| 200 | `syllables` INT NOT NULL, | ||
| 201 | `stress` VARCHAR(64) NOT NULL, | ||
| 202 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) | ||
| 203 | ); | ||
| 204 | 145 | ||
| 205 | DROP TABLE IF EXISTS `adjective_pronunciations`; | 146 | CREATE TABLE `regionality` ( |
| 206 | CREATE TABLE `adjective_pronunciations` ( | 147 | `domain_id` INTEGER NOT NULL, |
| 207 | `adjective_id` INTEGER NOT NULL, | 148 | `term_id` INTEGER NOT NULL |
| 208 | `pronunciation` VARCHAR(64) NOT NULL, | ||
| 209 | `prerhyme` VARCHAR(8), | ||
| 210 | `rhyme` VARCHAR(64), | ||
| 211 | `syllables` INT NOT NULL, | ||
| 212 | `stress` VARCHAR(64) NOT NULL, | ||
| 213 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
| 214 | ); | 149 | ); |
| 215 | 150 | ||
| 216 | DROP TABLE IF EXISTS `adverb_pronunciations`; | 151 | CREATE INDEX `regional_term_of` ON `regionality`(`domain_id`); |
| 217 | CREATE TABLE `adverb_pronunciations` ( | 152 | CREATE INDEX `regional_domain_of` ON `regionality`(`term_id`); |
| 218 | `adverb_id` INTEGER NOT NULL, | ||
| 219 | `pronunciation` VARCHAR(64) NOT NULL, | ||
| 220 | `prerhyme` VARCHAR(8), | ||
| 221 | `rhyme` VARCHAR(64), | ||
| 222 | `syllables` INT NOT NULL, | ||
| 223 | `stress` VARCHAR(64) NOT NULL, | ||
| 224 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | ||
| 225 | ); | ||
| 226 | 153 | ||
| 227 | DROP TABLE IF EXISTS `noun_noun_derivation`; | 154 | CREATE TABLE `forms` ( |
| 228 | CREATE TABLE `noun_noun_derivation` ( | 155 | `form_id` INTEGER PRIMARY KEY, |
| 229 | `noun_1_id` INTEGER NOT NULL, | 156 | `form` VARCHAR(32) NOT NULL, |
| 230 | `noun_2_id` INTEGER NOT NULL, | 157 | `complexity` SMALLINT NOT NULL, |
| 231 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), | 158 | `proper` SMALLINT NOT NULL |
| 232 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) | ||
| 233 | ); | 159 | ); |
| 234 | 160 | ||
| 235 | DROP TABLE IF EXISTS `noun_adjective_derivation`; | 161 | CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); |
| 236 | CREATE TABLE `noun_adjective_derivation` ( | ||
| 237 | `noun_id` INTEGER NOT NULL, | ||
| 238 | `adjective_id` INTEGER NOT NULL, | ||
| 239 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
| 240 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
| 241 | ); | ||
| 242 | 162 | ||
| 243 | DROP TABLE IF EXISTS `noun_adverb_derivation`; | 163 | CREATE TABLE `lemmas_forms` ( |
| 244 | CREATE TABLE `noun_adverb_derivation` ( | 164 | `lemma_id` INTEGER NOT NULL, |
| 245 | `noun_id` INTEGER NOT NULL, | 165 | `form_id` INTEGER NOT NULL, |
| 246 | `adverb_id` INTEGER NOT NULL, | 166 | `category` SMALLINT NOT NULL |
| 247 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
| 248 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | ||
| 249 | ); | 167 | ); |
| 250 | 168 | ||
| 251 | DROP TABLE IF EXISTS `adjective_adjective_derivation`; | 169 | CREATE INDEX `form_of` ON `lemmas_forms`(`lemma_id`); |
| 252 | CREATE TABLE `adjective_adjective_derivation` ( | 170 | CREATE INDEX `lemma_of` ON `lemmas_forms`(`form_id`); |
| 253 | `adjective_1_id` INTEGER NOT NULL, | 171 | |
| 254 | `adjective_2_id` INTEGER NOT NULL, | 172 | CREATE TABLE `pronunciations` ( |
| 255 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | 173 | `pronunciation_id` INTEGER PRIMARY KEY, |
| 256 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | 174 | `phonemes` VARCHAR(64) NOT NULL, |
| 175 | `prerhyme` VARCHAR(8), | ||
| 176 | `rhyme` VARCHAR(64), | ||
| 177 | `syllables` INTEGER NOT NULL, | ||
| 178 | `stress` VARCHAR(64) NOT NULL | ||
| 257 | ); | 179 | ); |
| 258 | 180 | ||
| 259 | DROP TABLE IF EXISTS `adjective_adverb_derivation`; | 181 | CREATE TABLE `forms_pronunciations` ( |
| 260 | CREATE TABLE `adjective_adverb_derivation` ( | 182 | `form_id` INTEGER NOT NULL, |
| 261 | `adjective_id` INTEGER NOT NULL, | 183 | `pronunciation_id` INTEGER NOT NULL |
| 262 | `adverb_id` INTEGER NOT NULL, | ||
| 263 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), | ||
| 264 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`) | ||
| 265 | ); | 184 | ); |
| 266 | 185 | ||
| 267 | DROP TABLE IF EXISTS `adverb_adverb_derivation`; | 186 | CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`); |
| 268 | CREATE TABLE `adverb_adverb_derivation` ( | 187 | CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`); |
| 269 | `adverb_1_id` INTEGER NOT NULL, | 188 | |
| 270 | `adverb_2_id` INTEGER NOT NULL, | 189 | CREATE TABLE `groups` ( |
| 271 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | 190 | `group_id` INTEGER PRIMARY KEY, |
| 272 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | 191 | `data` BLOB NOT NULL |
| 273 | ); | 192 | ); |
| 274 | 193 | ||
| 275 | DROP TABLE IF EXISTS `prepositions`; | 194 | CREATE TABLE `frames` ( |
| 276 | CREATE TABLE `prepositions` ( | 195 | `frame_id` INTEGER PRIMARY KEY, |
| 277 | `preposition_id` INTEGER PRIMARY KEY, | 196 | `data` BLOB NOT NULL |
| 278 | `form` VARCHAR(32) NOT NULL | ||
| 279 | ); | 197 | ); |
| 280 | 198 | ||
| 281 | DROP TABLE IF EXISTS `preposition_groups`; | 199 | CREATE TABLE `groups_frames` ( |
| 282 | CREATE TABLE `preposition_groups` ( | 200 | `group_id` INTEGER NOT NULL, |
| 283 | `preposition_id` INTEGER NOT NULL, | 201 | `frame_id` INTEGER NOT NULL |
| 284 | `groupname` VARCHAR(32) NOT NULL, | ||
| 285 | FOREIGN KEY (`preposition_id`) REFERENCES `prepositions`(`preposition_id`) | ||
| 286 | ); | 202 | ); |
| 203 | |||
| 204 | CREATE INDEX `frames_in` ON `groups_frames`(`group_id`); | ||
| diff --git a/generator/selrestr.cpp b/generator/selrestr.cpp new file mode 100644 index 0000000..8bdd3f6 --- /dev/null +++ b/generator/selrestr.cpp | |||
| @@ -0,0 +1,288 @@ | |||
| 1 | #include "selrestr.h" | ||
| 2 | |||
| 3 | namespace verbly { | ||
| 4 | namespace generator { | ||
| 5 | |||
| 6 | selrestr::selrestr(const selrestr& other) | ||
| 7 | { | ||
| 8 | type_ = other.type_; | ||
| 9 | |||
| 10 | switch (type_) | ||
| 11 | { | ||
| 12 | case type::singleton: | ||
| 13 | { | ||
| 14 | singleton_.pos = other.singleton_.pos; | ||
| 15 | new(&singleton_.restriction) std::string(other.singleton_.restriction); | ||
| 16 | |||
| 17 | break; | ||
| 18 | } | ||
| 19 | |||
| 20 | case type::group: | ||
| 21 | { | ||
| 22 | new(&group_.children) std::list<selrestr>(other.group_.children); | ||
| 23 | group_.orlogic = other.group_.orlogic; | ||
| 24 | |||
| 25 | break; | ||
| 26 | } | ||
| 27 | |||
| 28 | case type::empty: | ||
| 29 | { | ||
| 30 | break; | ||
| 31 | } | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | selrestr::selrestr(selrestr&& other) : selrestr() | ||
| 36 | { | ||
| 37 | swap(*this, other); | ||
| 38 | } | ||
| 39 | |||
| 40 | selrestr& selrestr::operator=(selrestr other) | ||
| 41 | { | ||
| 42 | swap(*this, other); | ||
| 43 | |||
| 44 | return *this; | ||
| 45 | } | ||
| 46 | |||
| 47 | void swap(selrestr& first, selrestr& second) | ||
| 48 | { | ||
| 49 | using type = selrestr::type; | ||
| 50 | |||
| 51 | type tempType = first.type_; | ||
| 52 | int tempPos; | ||
| 53 | std::string tempRestriction; | ||
| 54 | std::list<selrestr> tempChildren; | ||
| 55 | bool tempOrlogic; | ||
| 56 | |||
| 57 | switch (tempType) | ||
| 58 | { | ||
| 59 | case type::singleton: | ||
| 60 | { | ||
| 61 | tempPos = first.singleton_.pos; | ||
| 62 | tempRestriction = std::move(first.singleton_.restriction); | ||
| 63 | |||
| 64 | break; | ||
| 65 | } | ||
| 66 | |||
| 67 | case type::group: | ||
| 68 | { | ||
| 69 | tempChildren = std::move(first.group_.children); | ||
| 70 | tempOrlogic = first.group_.orlogic; | ||
| 71 | |||
| 72 | break; | ||
| 73 | } | ||
| 74 | |||
| 75 | case type::empty: | ||
| 76 | { | ||
| 77 | break; | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | first.~selrestr(); | ||
| 82 | |||
| 83 | first.type_ = second.type_; | ||
| 84 | |||
| 85 | switch (first.type_) | ||
| 86 | { | ||
| 87 | case type::singleton: | ||
| 88 | { | ||
| 89 | first.singleton_.pos = second.singleton_.pos; | ||
| 90 | new(&first.singleton_.restriction) std::string(std::move(second.singleton_.restriction)); | ||
| 91 | |||
| 92 | break; | ||
| 93 | } | ||
| 94 | |||
| 95 | case type::group: | ||
| 96 | { | ||
| 97 | new(&first.group_.children) std::list<selrestr>(std::move(second.group_.children)); | ||
| 98 | first.group_.orlogic = second.group_.orlogic; | ||
| 99 | |||
| 100 | break; | ||
| 101 | } | ||
| 102 | |||
| 103 | case type::empty: | ||
| 104 | { | ||
| 105 | break; | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | second.~selrestr(); | ||
| 110 | |||
| 111 | second.type_ = tempType; | ||
| 112 | |||
| 113 | switch (second.type_) | ||
| 114 | { | ||
| 115 | case type::singleton: | ||
| 116 | { | ||
| 117 | second.singleton_.pos = tempPos; | ||
| 118 | new(&second.singleton_.restriction) std::string(std::move(tempRestriction)); | ||
| 119 | |||
| 120 | break; | ||
| 121 | } | ||
| 122 | |||
| 123 | case type::group: | ||
| 124 | { | ||
| 125 | new(&second.group_.children) std::list<selrestr>(std::move(tempChildren)); | ||
| 126 | second.group_.orlogic = tempOrlogic; | ||
| 127 | |||
| 128 | break; | ||
| 129 | } | ||
| 130 | |||
| 131 | case type::empty: | ||
| 132 | { | ||
| 133 | break; | ||
| 134 | } | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 138 | selrestr::~selrestr() | ||
| 139 | { | ||
| 140 | switch (type_) | ||
| 141 | { | ||
| 142 | case type::singleton: | ||
| 143 | { | ||
| 144 | using string_type = std::string; | ||
| 145 | singleton_.restriction.~string_type(); | ||
| 146 | |||
| 147 | break; | ||
| 148 | } | ||
| 149 | |||
| 150 | case type::group: | ||
| 151 | { | ||
| 152 | using list_type = std::list<selrestr>; | ||
| 153 | group_.children.~list_type(); | ||
| 154 | |||
| 155 | break; | ||
| 156 | } | ||
| 157 | |||
| 158 | case type::empty: | ||
| 159 | { | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | } | ||
| 163 | } | ||
| 164 | |||
| 165 | selrestr::selrestr() : type_(type::empty) | ||
| 166 | { | ||
| 167 | } | ||
| 168 | |||
| 169 | selrestr::selrestr( | ||
| 170 | std::string restriction, | ||
| 171 | bool pos) : | ||
| 172 | type_(type::singleton) | ||
| 173 | { | ||
| 174 | new(&singleton_.restriction) std::string(std::move(restriction)); | ||
| 175 | singleton_.pos = pos; | ||
| 176 | } | ||
| 177 | |||
| 178 | std::string selrestr::getRestriction() const | ||
| 179 | { | ||
| 180 | if (type_ == type::singleton) | ||
| 181 | { | ||
| 182 | return singleton_.restriction; | ||
| 183 | } else { | ||
| 184 | throw std::domain_error("Only singleton selrestrs have restrictions"); | ||
| 185 | } | ||
| 186 | } | ||
| 187 | |||
| 188 | bool selrestr::getPos() const | ||
| 189 | { | ||
| 190 | if (type_ == type::singleton) | ||
| 191 | { | ||
| 192 | return singleton_.pos; | ||
| 193 | } else { | ||
| 194 | throw std::domain_error("Only singleton selrestrs have positivity flags"); | ||
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | selrestr::selrestr( | ||
| 199 | std::list<selrestr> children, | ||
| 200 | bool orlogic) : | ||
| 201 | type_(type::group) | ||
| 202 | { | ||
| 203 | new(&group_.children) std::list<selrestr>(std::move(children)); | ||
| 204 | group_.orlogic = orlogic; | ||
| 205 | } | ||
| 206 | |||
| 207 | std::list<selrestr> selrestr::getChildren() const | ||
| 208 | { | ||
| 209 | if (type_ == type::group) | ||
| 210 | { | ||
| 211 | return group_.children; | ||
| 212 | } else { | ||
| 213 | throw std::domain_error("Only group selrestrs have children"); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | std::list<selrestr>::const_iterator selrestr::begin() const | ||
| 218 | { | ||
| 219 | if (type_ == type::group) | ||
| 220 | { | ||
| 221 | return std::begin(group_.children); | ||
| 222 | } else { | ||
| 223 | throw std::domain_error("Only group selrestrs have children"); | ||
| 224 | } | ||
| 225 | } | ||
| 226 | |||
| 227 | std::list<selrestr>::const_iterator selrestr::end() const | ||
| 228 | { | ||
| 229 | if (type_ == type::group) | ||
| 230 | { | ||
| 231 | return std::end(group_.children); | ||
| 232 | } else { | ||
| 233 | throw std::domain_error("Only group selrestrs have children"); | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | bool selrestr::getOrlogic() const | ||
| 238 | { | ||
| 239 | if (type_ == type::group) | ||
| 240 | { | ||
| 241 | return group_.orlogic; | ||
| 242 | } else { | ||
| 243 | throw std::domain_error("Only group selrestrs have logic"); | ||
| 244 | } | ||
| 245 | } | ||
| 246 | |||
| 247 | nlohmann::json selrestr::toJson() const | ||
| 248 | { | ||
| 249 | switch (type_) | ||
| 250 | { | ||
| 251 | case type::empty: | ||
| 252 | { | ||
| 253 | return {}; | ||
| 254 | } | ||
| 255 | |||
| 256 | case type::singleton: | ||
| 257 | { | ||
| 258 | return { | ||
| 259 | {"type", singleton_.restriction}, | ||
| 260 | {"pos", singleton_.pos} | ||
| 261 | }; | ||
| 262 | } | ||
| 263 | |||
| 264 | case type::group: | ||
| 265 | { | ||
| 266 | std::string logic; | ||
| 267 | if (group_.orlogic) | ||
| 268 | { | ||
| 269 | logic = "or"; | ||
| 270 | } else { | ||
| 271 | logic = "and"; | ||
| 272 | } | ||
| 273 | |||
| 274 | std::list<nlohmann::json> children; | ||
| 275 | std::transform(std::begin(group_.children), std::end(group_.children), std::back_inserter(children), [] (const selrestr& child) { | ||
| 276 | return child.toJson(); | ||
| 277 | }); | ||
| 278 | |||
| 279 | return { | ||
| 280 | {"logic", logic}, | ||
| 281 | {"children", children} | ||
| 282 | }; | ||
| 283 | } | ||
| 284 | } | ||
| 285 | } | ||
| 286 | |||
| 287 | }; | ||
| 288 | }; | ||
| diff --git a/generator/selrestr.h b/generator/selrestr.h new file mode 100644 index 0000000..5000970 --- /dev/null +++ b/generator/selrestr.h | |||
| @@ -0,0 +1,88 @@ | |||
| 1 | #ifndef SELRESTR_H_50652FB7 | ||
| 2 | #define SELRESTR_H_50652FB7 | ||
| 3 | |||
| 4 | #include <list> | ||
| 5 | #include <string> | ||
| 6 | #include <json.hpp> | ||
| 7 | |||
| 8 | namespace verbly { | ||
| 9 | namespace generator { | ||
| 10 | |||
| 11 | class selrestr { | ||
| 12 | public: | ||
| 13 | enum class type { | ||
| 14 | empty, | ||
| 15 | singleton, | ||
| 16 | group | ||
| 17 | }; | ||
| 18 | |||
| 19 | // Copy and move constructors | ||
| 20 | |||
| 21 | selrestr(const selrestr& other); | ||
| 22 | selrestr(selrestr&& other); | ||
| 23 | |||
| 24 | // Assignment | ||
| 25 | |||
| 26 | selrestr& operator=(selrestr other); | ||
| 27 | |||
| 28 | // Swap | ||
| 29 | |||
| 30 | friend void swap(selrestr& first, selrestr& second); | ||
| 31 | |||
| 32 | // Destructor | ||
| 33 | |||
| 34 | ~selrestr(); | ||
| 35 | |||
| 36 | // Generic accessors | ||
| 37 | |||
| 38 | type getType() const | ||
| 39 | { | ||
| 40 | return type_; | ||
| 41 | } | ||
| 42 | |||
| 43 | // Empty | ||
| 44 | |||
| 45 | selrestr(); | ||
| 46 | |||
| 47 | // Singleton | ||
| 48 | |||
| 49 | selrestr(std::string restriction, bool pos); | ||
| 50 | |||
| 51 | std::string getRestriction() const; | ||
| 52 | |||
| 53 | bool getPos() const; | ||
| 54 | |||
| 55 | // Group | ||
| 56 | |||
| 57 | selrestr(std::list<selrestr> children, bool orlogic); | ||
| 58 | |||
| 59 | std::list<selrestr> getChildren() const; | ||
| 60 | |||
| 61 | std::list<selrestr>::const_iterator begin() const; | ||
| 62 | |||
| 63 | std::list<selrestr>::const_iterator end() const; | ||
| 64 | |||
| 65 | bool getOrlogic() const; | ||
| 66 | |||
| 67 | // Helpers | ||
| 68 | |||
| 69 | nlohmann::json toJson() const; | ||
| 70 | |||
| 71 | private: | ||
| 72 | union { | ||
| 73 | struct { | ||
| 74 | bool pos; | ||
| 75 | std::string restriction; | ||
| 76 | } singleton_; | ||
| 77 | struct { | ||
| 78 | std::list<selrestr> children; | ||
| 79 | bool orlogic; | ||
| 80 | } group_; | ||
| 81 | }; | ||
| 82 | type type_; | ||
| 83 | }; | ||
| 84 | |||
| 85 | }; | ||
| 86 | }; | ||
| 87 | |||
| 88 | #endif /* end of include guard: SELRESTR_H_50652FB7 */ | ||
| diff --git a/generator/word.cpp b/generator/word.cpp new file mode 100644 index 0000000..8ba3ce2 --- /dev/null +++ b/generator/word.cpp | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | #include "word.h" | ||
| 2 | #include <list> | ||
| 3 | #include <string> | ||
| 4 | #include "database.h" | ||
| 5 | #include "notion.h" | ||
| 6 | #include "lemma.h" | ||
| 7 | #include "field.h" | ||
| 8 | #include "group.h" | ||
| 9 | |||
| 10 | namespace verbly { | ||
| 11 | namespace generator { | ||
| 12 | |||
| 13 | int word::nextId_ = 0; | ||
| 14 | |||
| 15 | word::word( | ||
| 16 | notion& n, | ||
| 17 | lemma& l) : | ||
| 18 | id_(nextId_++), | ||
| 19 | notion_(n), | ||
| 20 | lemma_(l) | ||
| 21 | { | ||
| 22 | } | ||
| 23 | |||
| 24 | word::word( | ||
| 25 | notion& n, | ||
| 26 | lemma& l, | ||
| 27 | int tagCount) : | ||
| 28 | id_(nextId_++), | ||
| 29 | notion_(n), | ||
| 30 | lemma_(l), | ||
| 31 | tagCount_(tagCount), | ||
| 32 | hasTagCount_(true) | ||
| 33 | { | ||
| 34 | } | ||
| 35 | |||
| 36 | void word::setAdjectivePosition(positioning adjectivePosition) | ||
| 37 | { | ||
| 38 | adjectivePosition_ = adjectivePosition; | ||
| 39 | } | ||
| 40 | |||
| 41 | void word::setVerbGroup(const group& verbGroup) | ||
| 42 | { | ||
| 43 | verbGroup_ = &verbGroup; | ||
| 44 | } | ||
| 45 | |||
| 46 | database& operator<<(database& db, const word& arg) | ||
| 47 | { | ||
| 48 | std::list<field> fields; | ||
| 49 | |||
| 50 | fields.emplace_back("word_id", arg.getId()); | ||
| 51 | fields.emplace_back("notion_id", arg.getNotion().getId()); | ||
| 52 | fields.emplace_back("lemma_id", arg.getLemma().getId()); | ||
| 53 | |||
| 54 | if (arg.hasTagCount()) | ||
| 55 | { | ||
| 56 | fields.emplace_back("tag_count", arg.getTagCount()); | ||
| 57 | } | ||
| 58 | |||
| 59 | if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective) | ||
| 60 | && (arg.getAdjectivePosition() != positioning::undefined)) | ||
| 61 | { | ||
| 62 | fields.emplace_back("position", static_cast<int>(arg.getAdjectivePosition())); | ||
| 63 | } | ||
| 64 | |||
| 65 | if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb) | ||
| 66 | && (arg.hasVerbGroup())) | ||
| 67 | { | ||
| 68 | fields.emplace_back("group_id", arg.getVerbGroup().getId()); | ||
| 69 | } | ||
| 70 | |||
| 71 | db.insertIntoTable("words", std::move(fields)); | ||
| 72 | |||
| 73 | return db; | ||
| 74 | } | ||
| 75 | |||
| 76 | }; | ||
| 77 | }; | ||
| diff --git a/generator/word.h b/generator/word.h new file mode 100644 index 0000000..bfed586 --- /dev/null +++ b/generator/word.h | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | #ifndef WORD_H_91F99D46 | ||
| 2 | #define WORD_H_91F99D46 | ||
| 3 | |||
| 4 | #include <cassert> | ||
| 5 | #include "enums.h" | ||
| 6 | |||
| 7 | namespace verbly { | ||
| 8 | namespace generator { | ||
| 9 | |||
| 10 | class notion; | ||
| 11 | class lemma; | ||
| 12 | class database; | ||
| 13 | class group; | ||
| 14 | |||
| 15 | class word { | ||
| 16 | public: | ||
| 17 | |||
| 18 | // Constructors | ||
| 19 | |||
| 20 | word(notion& n, lemma& l); | ||
| 21 | |||
| 22 | word(notion& n, lemma& l, int tagCount); | ||
| 23 | |||
| 24 | // Mutators | ||
| 25 | |||
| 26 | void setAdjectivePosition(positioning adjectivePosition); | ||
| 27 | |||
| 28 | void setVerbGroup(const group& verbGroup); | ||
| 29 | |||
| 30 | // Accessors | ||
| 31 | |||
| 32 | int getId() const | ||
| 33 | { | ||
| 34 | return id_; | ||
| 35 | } | ||
| 36 | |||
| 37 | notion& getNotion() | ||
| 38 | { | ||
| 39 | return notion_; | ||
| 40 | } | ||
| 41 | |||
| 42 | const notion& getNotion() const | ||
| 43 | { | ||
| 44 | return notion_; | ||
| 45 | } | ||
| 46 | |||
| 47 | lemma& getLemma() | ||
| 48 | { | ||
| 49 | return lemma_; | ||
| 50 | } | ||
| 51 | |||
| 52 | const lemma& getLemma() const | ||
| 53 | { | ||
| 54 | return lemma_; | ||
| 55 | } | ||
| 56 | |||
| 57 | bool hasTagCount() const | ||
| 58 | { | ||
| 59 | return hasTagCount_; | ||
| 60 | } | ||
| 61 | |||
| 62 | int getTagCount() const | ||
| 63 | { | ||
| 64 | // Calling code should always call hasTagCount first. | ||
| 65 | assert(hasTagCount_); | ||
| 66 | |||
| 67 | return tagCount_; | ||
| 68 | } | ||
| 69 | |||
| 70 | positioning getAdjectivePosition() const | ||
| 71 | { | ||
| 72 | return adjectivePosition_; | ||
| 73 | } | ||
| 74 | |||
| 75 | bool hasVerbGroup() const | ||
| 76 | { | ||
| 77 | return (verbGroup_ != nullptr); | ||
| 78 | } | ||
| 79 | |||
| 80 | const group& getVerbGroup() const | ||
| 81 | { | ||
| 82 | // Calling code should always call hasVerbGroup first. | ||
| 83 | assert(verbGroup_ != nullptr); | ||
| 84 | |||
| 85 | return *verbGroup_; | ||
| 86 | } | ||
| 87 | |||
| 88 | private: | ||
| 89 | |||
| 90 | static int nextId_; | ||
| 91 | |||
| 92 | const int id_; | ||
| 93 | notion& notion_; | ||
| 94 | lemma& lemma_; | ||
| 95 | const int tagCount_ = 0; | ||
| 96 | const bool hasTagCount_ = false; | ||
| 97 | |||
| 98 | positioning adjectivePosition_ = positioning::undefined; | ||
| 99 | const group* verbGroup_ = nullptr; | ||
| 100 | |||
| 101 | }; | ||
| 102 | |||
| 103 | // Serializer | ||
| 104 | |||
| 105 | database& operator<<(database& db, const word& arg); | ||
| 106 | |||
| 107 | }; | ||
| 108 | }; | ||
| 109 | |||
| 110 | #endif /* end of include guard: WORD_H_91F99D46 */ | ||
| diff --git a/lib/adjective.cpp b/lib/adjective.cpp deleted file mode 100644 index ba8254a..0000000 --- a/lib/adjective.cpp +++ /dev/null | |||
| @@ -1,113 +0,0 @@ | |||
| 1 | #include "verbly.h" | ||
| 2 | |||
| 3 | namespace verbly { | ||
| 4 | |||
| 5 | adjective::adjective() | ||
| 6 | { | ||
| 7 | |||
| 8 | } | ||
| 9 | |||
| 10 | adjective::adjective(const data& _data, int _id) : word(_data, _id) | ||
| 11 | { | ||
| 12 | |||
| 13 | } | ||
| 14 | |||
| 15 | std::string adjective::base_form() const | ||
| 16 | { | ||
| 17 | assert(_valid == true); | ||
| 18 | |||
| 19 | return _base_form; | ||
| 20 | } | ||
| 21 | |||
| 22 | std::string adjective::comparative_form() const | ||
| 23 | { | ||
| 24 | assert(_valid == true); | ||
| 25 | |||
| 26 | return _comparative_form; | ||
| 27 | } | ||
| 28 | |||
| 29 | std::string adjective::superlative_form() const | ||
| 30 | { | ||
| 31 | assert(_valid == true); | ||
| 32 | |||
| 33 | return _superlative_form; | ||
| 34 | } | ||
| 35 | |||
| 36 | adjective::positioning adjective::position() const | ||
| 37 | { | ||
| 38 | assert(_valid == true); | ||
| 39 | |||
| 40 | return _position; | ||
| 41 | } | ||
| 42 | |||
| 43 | bool adjective::has_comparative_form() const | ||
| 44 | { | ||
| 45 | assert(_valid == true); | ||
| 46 | |||
| 47 | return !_comparative_form.empty(); | ||
| 48 | } | ||
| 49 | |||
| 50 | bool adjective::has_superlative_form() const | ||
| 51 | { | ||
| 52 | assert(_valid == true); | ||
| 53 | |||
| 54 | return !_superlative_form.empty(); | ||
| 55 | } | ||
| 56 | |||
| 57 | bool adjective::has_position() const | ||
| 58 | { | ||
| 59 | assert(_valid == true); | ||
| 60 | |||
| 61 | return _position != adjective::positioning::undefined; | ||
| 62 | } | ||
| 63 | |||
| 64 | adjective_query adjective::antonyms() const | ||
| 65 | { | ||
| 66 | assert(_valid == true); | ||
| 67 | |||
| 68 | return _data->adjectives().antonym_of(*this); | ||
| 69 | } | ||
| 70 | |||
| 71 | adjective_query adjective::synonyms() const | ||
| 72 | { | ||
| 73 | assert(_valid == true); | ||
| 74 | |||
| 75 | return _data->adjectives().synonym_of(*this); | ||
| 76 | } | ||
| 77 | |||
| 78 | adjective_query adjective::generalizations() const | ||
| 79 | { | ||
| 80 | assert(_valid == true); | ||
| 81 | |||
| 82 | return _data->adjectives().generalization_of(*this); | ||
| 83 | } | ||
| 84 | |||
| 85 | adjective_query adjective::specifications() const | ||
| 86 | { | ||
| 87 | assert(_valid == true); | ||
| 88 | |||
| 89 | return _data->adjectives().specification_of(*this); | ||
| 90 | } | ||
| 91 | |||
| 92 | noun_query adjective::anti_pertainyms() const | ||
| 93 | { | ||
| 94 | assert(_valid == true); | ||
| 95 | |||
| 96 | return _data->nouns().anti_pertainym_of(*this); | ||
| 97 | } | ||
| 98 | |||
| 99 | adverb_query adjective::mannernyms() const | ||
| 100 | { | ||
| 101 | assert(_valid == true); | ||
| 102 | |||
| 103 | return _data->adverbs().mannernym_of(*this); | ||
| 104 | } | ||
| 105 | |||
| 106 | noun_query adjective::attributes() const | ||
| 107 | { | ||
| 108 | assert(_valid == true); | ||
| 109 | |||
| 110 | return _data->nouns().attribute_of(*this); | ||
| 111 | } | ||
| 112 | |||
| 113 | }; | ||
| diff --git a/lib/adjective.h b/lib/adjective.h deleted file mode 100644 index a6eb293..0000000 --- a/lib/adjective.h +++ /dev/null | |||
| @@ -1,51 +0,0 @@ | |||
| 1 | #ifndef ADJECTIVE_H_87B3FB75 | ||
| 2 | #define ADJECTIVE_H_87B3FB75 | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | class adjective_query; | ||
| 7 | class adverb_query; | ||
| 8 | class noun_query; | ||
| 9 | |||
| 10 | class adjective : public word { | ||
| 11 | public: | ||
| 12 | enum class positioning { | ||
| 13 | undefined, | ||
| 14 | predicate, | ||
| 15 | attributive, | ||
| 16 | postnominal | ||
| 17 | }; | ||
| 18 | |||
| 19 | private: | ||
| 20 | std::string _base_form; | ||
| 21 | std::string _comparative_form; | ||
| 22 | std::string _superlative_form; | ||
| 23 | positioning _position = positioning::undefined; | ||
| 24 | |||
| 25 | friend class adjective_query; | ||
| 26 | |||
| 27 | public: | ||
| 28 | adjective(); | ||
| 29 | adjective(const data& _data, int _id); | ||
| 30 | |||
| 31 | std::string base_form() const; | ||
| 32 | std::string comparative_form() const; | ||
| 33 | std::string superlative_form() const; | ||
| 34 | positioning position() const; | ||
| 35 | |||
| 36 | bool has_comparative_form() const; | ||
| 37 | bool has_superlative_form() const; | ||
| 38 | bool has_position() const; | ||
| 39 | |||
| 40 | adjective_query antonyms() const; | ||
| 41 | adjective_query synonyms() const; | ||
| 42 | adjective_query generalizations() const; | ||
| 43 | adjective_query specifications() const; | ||
| 44 | noun_query anti_pertainyms() const; | ||
| 45 | adverb_query mannernyms() const; | ||
| 46 | noun_query attributes() const; | ||
| 47 | }; | ||
| 48 | |||
| 49 | }; | ||
| 50 | |||
| 51 | #endif /* end of include guard: ADJECTIVE_H_87B3FB75 */ | ||
| diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp deleted file mode 100644 index 90ccef4..0000000 --- a/lib/adjective_query.cpp +++ /dev/null | |||
| @@ -1,1072 +0,0 @@ | |||
| 1 | #include "verbly.h" | ||
| 2 | |||
| 3 | namespace verbly { | ||
| 4 | |||
| 5 | adjective_query::adjective_query(const data& _data) : _data(_data) | ||
| 6 | { | ||
| 7 | |||
| 8 | } | ||
| 9 | |||
| 10 | adjective_query& adjective_query::limit(int _limit) | ||
| 11 | { | ||
| 12 | if ((_limit > 0) || (_limit == unlimited)) | ||
| 13 | { | ||
| 14 | this->_limit = _limit; | ||
| 15 | } | ||
| 16 | |||
| 17 | return *this; | ||
| 18 | } | ||
| 19 | |||
| 20 | adjective_query& adjective_query::random() | ||
| 21 | { | ||
| 22 | this->_random = true; | ||
| 23 | |||
| 24 | return *this; | ||
| 25 | } | ||
| 26 | |||
| 27 | adjective_query& adjective_query::except(const adjective& _word) | ||
| 28 | { | ||
| 29 | _except.push_back(_word); | ||
| 30 | |||
| 31 | return *this; | ||
| 32 | } | ||
| 33 | |||
| 34 | adjective_query& adjective_query::rhymes_with(const word& _word) | ||
| 35 | { | ||
| 36 | for (auto rhyme : _word.get_rhymes()) | ||
| 37 | { | ||
| 38 | _rhymes.push_back(rhyme); | ||
| 39 | } | ||
| 40 | |||
| 41 | if (dynamic_cast<const adjective*>(&_word) != nullptr) | ||
| 42 | { | ||
| 43 | _except.push_back(dynamic_cast<const adjective&>(_word)); | ||
| 44 | } | ||
| 45 | |||
| 46 | return *this; | ||
| 47 | } | ||
| 48 | |||
| 49 | adjective_query& adjective_query::rhymes_with(rhyme _r) | ||
| 50 | { | ||
| 51 | _rhymes.push_back(_r); | ||
| 52 | |||
| 53 | return *this; | ||
| 54 | } | ||
| 55 | |||
| 56 | adjective_query& adjective_query::has_pronunciation() | ||
| 57 | { | ||
| 58 | this->_has_prn = true; | ||
| 59 | |||
| 60 | return *this; | ||
| 61 | } | ||
| 62 | |||
| 63 | adjective_query& adjective_query::has_rhyming_noun() | ||
| 64 | { | ||
| 65 | _has_rhyming_noun = true; | ||
| 66 | |||
| 67 | return *this; | ||
| 68 | } | ||
| 69 | |||
| 70 | adjective_query& adjective_query::has_rhyming_adjective() | ||
| 71 | { | ||
| 72 | _has_rhyming_adjective = true; | ||
| 73 | |||
| 74 | return *this; | ||
| 75 | } | ||
| 76 | |||
| 77 | adjective_query& adjective_query::has_rhyming_adverb() | ||
| 78 | { | ||
| 79 | _has_rhyming_adverb = true; | ||
| 80 | |||
| 81 | return *this; | ||
| 82 | } | ||
| 83 | |||
| 84 | adjective_query& adjective_query::has_rhyming_verb() | ||
| 85 | { | ||
| 86 | _has_rhyming_verb = true; | ||
| 87 | |||
| 88 | return *this; | ||
| 89 | } | ||
| 90 | |||
| 91 | adjective_query& adjective_query::with_stress(filter<std::vector<bool>> _arg) | ||
| 92 | { | ||
| 93 | _stress = _arg; | ||
| 94 | |||
| 95 | return *this; | ||
| 96 | } | ||
| 97 | |||
| 98 | adjective_query& adjective_query::with_prefix(filter<std::string> _f) | ||
| 99 | { | ||
| 100 | _f.clean(); | ||
| 101 | _with_prefix = _f; | ||
| 102 | |||
| 103 | return *this; | ||
| 104 | } | ||
| 105 | |||
| 106 | adjective_query& adjective_query::with_suffix(filter<std::string> _f) | ||
| 107 | { | ||
| 108 | _f.clean(); | ||
| 109 | _with_suffix = _f; | ||
| 110 | |||
| 111 | return *this; | ||
| 112 | } | ||
| 113 | |||
| 114 | adjective_query& adjective_query::with_complexity(int _arg) | ||
| 115 | { | ||
| 116 | _with_complexity = _arg; | ||
| 117 | |||
| 118 | return *this; | ||
| 119 | } | ||
| 120 | |||
| 121 | adjective_query& adjective_query::requires_comparative_form() | ||
| 122 | { | ||
| 123 | _requires_comparative_form = true; | ||
| 124 | |||
| 125 | return *this; | ||
| 126 | } | ||
| 127 | |||
| 128 | adjective_query& adjective_query::requires_superlative_form() | ||
| 129 | { | ||
| 130 | _requires_superlative_form = true; | ||
| 131 | |||
| 132 | return *this; | ||
| 133 | } | ||
| 134 | |||
| 135 | adjective_query& adjective_query::position(adjective::positioning pos) | ||
| 136 | { | ||
| 137 | _position = pos; | ||
| 138 | |||
| 139 | return *this; | ||
| 140 | } | ||
| 141 | |||
| 142 | adjective_query& adjective_query::is_variant() | ||
| 143 | { | ||
| 144 | this->_is_variant = true; | ||
| 145 | |||
| 146 | return *this; | ||
| 147 | } | ||
| 148 | |||
| 149 | adjective_query& adjective_query::variant_of(filter<noun> _f) | ||
| 150 | { | ||
| 151 | _f.clean(); | ||
| 152 | _variant_of = _f; | ||
| 153 | |||
| 154 | return *this; | ||
| 155 | } | ||
| 156 | |||
| 157 | adjective_query& adjective_query::has_antonyms() | ||
| 158 | { | ||
| 159 | this->_is_antonymic = true; | ||
| 160 | |||
| 161 | return *this; | ||
| 162 | } | ||
| 163 | |||
| 164 | adjective_query& adjective_query::antonym_of(filter<adjective> _f) | ||
| 165 | { | ||
| 166 | _f.clean(); | ||
| 167 | _antonym_of = _f; | ||
| 168 | |||
| 169 | return *this; | ||
| 170 | } | ||
| 171 | |||
| 172 | adjective_query& adjective_query::has_synonyms() | ||
| 173 | { | ||
| 174 | this->_is_synonymic = true; | ||
| 175 | |||
| 176 | return *this; | ||
| 177 | } | ||
| 178 | |||
| 179 | adjective_query& adjective_query::synonym_of(filter<adjective> _f) | ||
| 180 | { | ||
| 181 | _f.clean(); | ||
| 182 | _synonym_of = _f; | ||
| 183 | |||
| 184 | return *this; | ||
| 185 | } | ||
| 186 | |||
| 187 | adjective_query& adjective_query::is_generalization() | ||
| 188 | { | ||
| 189 | this->_is_generalization = true; | ||
| 190 | |||
| 191 | return *this; | ||
| 192 | } | ||
| 193 | |||
| 194 | adjective_query& adjective_query::generalization_of(filter<adjective> _f) | ||
| 195 | { | ||
| 196 | _f.clean(); | ||
| 197 | _generalization_of = _f; | ||
| 198 | |||
| 199 | return *this; | ||
| 200 | } | ||
| 201 | |||
| 202 | adjective_query& adjective_query::is_specification() | ||
| 203 | { | ||
| 204 | this->_is_specification = true; | ||
| 205 | |||
| 206 | return *this; | ||
| 207 | } | ||
| 208 | |||
| 209 | adjective_query& adjective_query::specification_of(filter<adjective> _f) | ||
| 210 | { | ||
| 211 | _f.clean(); | ||
| 212 | _specification_of = _f; | ||
| 213 | |||
| 214 | return *this; | ||
| 215 | } | ||
| 216 | |||
| 217 | adjective_query& adjective_query::is_pertainymic() | ||
| 218 | { | ||
| 219 | this->_is_pertainymic = true; | ||
| 220 | |||
| 221 | return *this; | ||
| 222 | } | ||
| 223 | |||
| 224 | adjective_query& adjective_query::pertainym_of(filter<noun> _f) | ||
| 225 | { | ||
| 226 | _f.clean(); | ||
| 227 | _pertainym_of = _f; | ||
| 228 | |||
| 229 | return *this; | ||
| 230 | } | ||
| 231 | |||
| 232 | adjective_query& adjective_query::is_mannernymic() | ||
| 233 | { | ||
| 234 | this->_is_mannernymic = true; | ||
| 235 | |||
| 236 | return *this; | ||
| 237 | } | ||
| 238 | |||
| 239 | adjective_query& adjective_query::anti_mannernym_of(filter<adverb> _f) | ||
| 240 | { | ||
| 241 | _f.clean(); | ||
| 242 | _anti_mannernym_of = _f; | ||
| 243 | |||
| 244 | return *this; | ||
| 245 | } | ||
| 246 | /* | ||
| 247 | adjective_query& adjective_query::derived_from(const word& _w) | ||
| 248 | { | ||
| 249 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
| 250 | { | ||
| 251 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
| 252 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
| 253 | { | ||
| 254 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
| 255 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
| 256 | { | ||
| 257 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
| 258 | } | ||
| 259 | |||
| 260 | return *this; | ||
| 261 | } | ||
| 262 | |||
| 263 | adjective_query& adjective_query::not_derived_from(const word& _w) | ||
| 264 | { | ||
| 265 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
| 266 | { | ||
| 267 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
| 268 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
| 269 | { | ||
| 270 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
| 271 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
| 272 | { | ||
| 273 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
| 274 | } | ||
| 275 | |||
| 276 | return *this; | ||
| 277 | } | ||
| 278 | */ | ||
| 279 | std::list<adjective> adjective_query::run() const | ||
| 280 | { | ||
| 281 | std::stringstream construct; | ||
| 282 | construct << "SELECT adjective_id, base_form, comparative, superlative, position FROM adjectives"; | ||
| 283 | std::list<std::string> conditions; | ||
| 284 | std::list<binding> bindings; | ||
| 285 | |||
| 286 | if (_has_prn) | ||
| 287 | { | ||
| 288 | conditions.push_back("adjective_id IN (SELECT adjective_id FROM adjective_pronunciations)"); | ||
| 289 | } | ||
| 290 | |||
| 291 | if (!_rhymes.empty()) | ||
| 292 | { | ||
| 293 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
| 294 | std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 295 | conditions.push_back(cond); | ||
| 296 | |||
| 297 | for (auto rhy : _rhymes) | ||
| 298 | { | ||
| 299 | bindings.emplace_back(rhy.get_prerhyme()); | ||
| 300 | bindings.emplace_back(rhy.get_rhyme()); | ||
| 301 | } | ||
| 302 | } | ||
| 303 | |||
| 304 | if (_has_rhyming_noun) | ||
| 305 | { | ||
| 306 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 307 | } | ||
| 308 | |||
| 309 | if (_has_rhyming_adjective) | ||
| 310 | { | ||
| 311 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adjective_id != curp.adjective_id)"); | ||
| 312 | } | ||
| 313 | |||
| 314 | if (_has_rhyming_adverb) | ||
| 315 | { | ||
| 316 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 317 | } | ||
| 318 | |||
| 319 | if (_has_rhyming_verb) | ||
| 320 | { | ||
| 321 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 322 | } | ||
| 323 | |||
| 324 | for (auto except : _except) | ||
| 325 | { | ||
| 326 | conditions.push_back("adjective_id != ?"); | ||
| 327 | bindings.emplace_back(except._id); | ||
| 328 | } | ||
| 329 | |||
| 330 | if (_requires_comparative_form) | ||
| 331 | { | ||
| 332 | conditions.push_back("comparative IS NOT NULL"); | ||
| 333 | } | ||
| 334 | |||
| 335 | if (_requires_superlative_form) | ||
| 336 | { | ||
| 337 | conditions.push_back("superlative IS NOT NULL"); | ||
| 338 | } | ||
| 339 | |||
| 340 | switch (_position) | ||
| 341 | { | ||
| 342 | case adjective::positioning::predicate: conditions.push_back("position = 'p'"); break; | ||
| 343 | case adjective::positioning::attributive: conditions.push_back("position = 'a'"); break; | ||
| 344 | case adjective::positioning::postnominal: conditions.push_back("position = 'i'"); break; | ||
| 345 | case adjective::positioning::undefined: break; | ||
| 346 | } | ||
| 347 | |||
| 348 | if (!_stress.empty()) | ||
| 349 | { | ||
| 350 | std::stringstream cond; | ||
| 351 | if (_stress.get_notlogic()) | ||
| 352 | { | ||
| 353 | cond << "adjective_id NOT IN"; | ||
| 354 | } else { | ||
| 355 | cond << "adjective_id IN"; | ||
| 356 | } | ||
| 357 | |||
| 358 | cond << "(SELECT adjective_id FROM adjective_pronunciations WHERE "; | ||
| 359 | |||
| 360 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
| 361 | switch (f.get_type()) | ||
| 362 | { | ||
| 363 | case filter<std::vector<bool>>::type::singleton: | ||
| 364 | { | ||
| 365 | std::ostringstream _val; | ||
| 366 | for (auto syl : f.get_elem()) | ||
| 367 | { | ||
| 368 | if (syl) | ||
| 369 | { | ||
| 370 | _val << "1"; | ||
| 371 | } else { | ||
| 372 | _val << "0"; | ||
| 373 | } | ||
| 374 | } | ||
| 375 | |||
| 376 | bindings.emplace_back(_val.str()); | ||
| 377 | |||
| 378 | if (notlogic == f.get_notlogic()) | ||
| 379 | { | ||
| 380 | return "stress = ?"; | ||
| 381 | } else { | ||
| 382 | return "stress != ?"; | ||
| 383 | } | ||
| 384 | } | ||
| 385 | |||
| 386 | case filter<std::vector<bool>>::type::group: | ||
| 387 | { | ||
| 388 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 389 | |||
| 390 | std::list<std::string> clauses; | ||
| 391 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
| 392 | return recur(f2, truelogic); | ||
| 393 | }); | ||
| 394 | |||
| 395 | if (truelogic == f.get_orlogic()) | ||
| 396 | { | ||
| 397 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 398 | } else { | ||
| 399 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 400 | } | ||
| 401 | } | ||
| 402 | } | ||
| 403 | }; | ||
| 404 | |||
| 405 | cond << recur(_stress, _stress.get_notlogic()); | ||
| 406 | cond << ")"; | ||
| 407 | conditions.push_back(cond.str()); | ||
| 408 | } | ||
| 409 | |||
| 410 | if (!_with_prefix.empty()) | ||
| 411 | { | ||
| 412 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
| 413 | switch (f.get_type()) | ||
| 414 | { | ||
| 415 | case filter<std::string>::type::singleton: | ||
| 416 | { | ||
| 417 | bindings.emplace_back(f.get_elem() + "%"); | ||
| 418 | |||
| 419 | if (notlogic == f.get_notlogic()) | ||
| 420 | { | ||
| 421 | return "base_form LIKE ?"; | ||
| 422 | } else { | ||
| 423 | return "base_form NOT LIKE ?"; | ||
| 424 | } | ||
| 425 | } | ||
| 426 | |||
| 427 | case filter<std::string>::type::group: | ||
| 428 | { | ||
| 429 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 430 | |||
| 431 | std::list<std::string> clauses; | ||
| 432 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
| 433 | return recur(f2, truelogic); | ||
| 434 | }); | ||
| 435 | |||
| 436 | if (truelogic == f.get_orlogic()) | ||
| 437 | { | ||
| 438 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 439 | } else { | ||
| 440 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 441 | } | ||
| 442 | } | ||
| 443 | } | ||
| 444 | }; | ||
| 445 | |||
| 446 | conditions.push_back(recur(_with_prefix, false)); | ||
| 447 | } | ||
| 448 | |||
| 449 | if (!_with_suffix.empty()) | ||
| 450 | { | ||
| 451 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
| 452 | switch (f.get_type()) | ||
| 453 | { | ||
| 454 | case filter<std::string>::type::singleton: | ||
| 455 | { | ||
| 456 | bindings.emplace_back("%" + f.get_elem()); | ||
| 457 | |||
| 458 | if (notlogic == f.get_notlogic()) | ||
| 459 | { | ||
| 460 | return "base_form LIKE ?"; | ||
| 461 | } else { | ||
| 462 | return "base_form NOT LIKE ?"; | ||
| 463 | } | ||
| 464 | } | ||
| 465 | |||
| 466 | case filter<std::string>::type::group: | ||
| 467 | { | ||
| 468 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 469 | |||
| 470 | std::list<std::string> clauses; | ||
| 471 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
| 472 | return recur(f2, truelogic); | ||
| 473 | }); | ||
| 474 | |||
| 475 | if (truelogic == f.get_orlogic()) | ||
| 476 | { | ||
| 477 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 478 | } else { | ||
| 479 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 480 | } | ||
| 481 | } | ||
| 482 | } | ||
| 483 | }; | ||
| 484 | |||
| 485 | conditions.push_back(recur(_with_suffix, false)); | ||
| 486 | } | ||
| 487 | |||
| 488 | if (_with_complexity != unlimited) | ||
| 489 | { | ||
| 490 | conditions.push_back("complexity = ?"); | ||
| 491 | bindings.emplace_back(_with_complexity); | ||
| 492 | } | ||
| 493 | |||
| 494 | if (_is_variant) | ||
| 495 | { | ||
| 496 | conditions.push_back("adjective_id IN (SELECT adjective_id FROM variation)"); | ||
| 497 | } | ||
| 498 | |||
| 499 | if (!_variant_of.empty()) | ||
| 500 | { | ||
| 501 | std::stringstream cond; | ||
| 502 | if (_variant_of.get_notlogic()) | ||
| 503 | { | ||
| 504 | cond << "adjective_id NOT IN"; | ||
| 505 | } else { | ||
| 506 | cond << "adjective_id IN"; | ||
| 507 | } | ||
| 508 | |||
| 509 | cond << "(SELECT adjective_id FROM variation WHERE "; | ||
| 510 | |||
| 511 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 512 | switch (f.get_type()) | ||
| 513 | { | ||
| 514 | case filter<noun>::type::singleton: | ||
| 515 | { | ||
| 516 | bindings.emplace_back(f.get_elem()._id); | ||
| 517 | |||
| 518 | if (notlogic == f.get_notlogic()) | ||
| 519 | { | ||
| 520 | return "noun_id = ?"; | ||
| 521 | } else { | ||
| 522 | return "noun_id != ?"; | ||
| 523 | } | ||
| 524 | } | ||
| 525 | |||
| 526 | case filter<noun>::type::group: | ||
| 527 | { | ||
| 528 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 529 | |||
| 530 | std::list<std::string> clauses; | ||
| 531 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 532 | return recur(f2, truelogic); | ||
| 533 | }); | ||
| 534 | |||
| 535 | if (truelogic == f.get_orlogic()) | ||
| 536 | { | ||
| 537 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 538 | } else { | ||
| 539 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 540 | } | ||
| 541 | } | ||
| 542 | } | ||
| 543 | }; | ||
| 544 | |||
| 545 | cond << recur(_variant_of, _variant_of.get_notlogic()); | ||
| 546 | cond << ")"; | ||
| 547 | conditions.push_back(cond.str()); | ||
| 548 | } | ||
| 549 | |||
| 550 | if (_is_antonymic) | ||
| 551 | { | ||
| 552 | conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_antonymy)"); | ||
| 553 | } | ||
| 554 | |||
| 555 | if (!_antonym_of.empty()) | ||
| 556 | { | ||
| 557 | std::stringstream cond; | ||
| 558 | if (_antonym_of.get_notlogic()) | ||
| 559 | { | ||
| 560 | cond << "adjective_id NOT IN"; | ||
| 561 | } else { | ||
| 562 | cond << "adjective_id IN"; | ||
| 563 | } | ||
| 564 | |||
| 565 | cond << "(SELECT adjective_2_id FROM adjective_antonymy WHERE "; | ||
| 566 | |||
| 567 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
| 568 | switch (f.get_type()) | ||
| 569 | { | ||
| 570 | case filter<adjective>::type::singleton: | ||
| 571 | { | ||
| 572 | bindings.emplace_back(f.get_elem()._id); | ||
| 573 | |||
| 574 | if (notlogic == f.get_notlogic()) | ||
| 575 | { | ||
| 576 | return "adjective_1_id = ?"; | ||
| 577 | } else { | ||
| 578 | return "adjective_1_id != ?"; | ||
| 579 | } | ||
| 580 | } | ||
| 581 | |||
| 582 | case filter<adjective>::type::group: | ||
| 583 | { | ||
| 584 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 585 | |||
| 586 | std::list<std::string> clauses; | ||
| 587 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
| 588 | return recur(f2, truelogic); | ||
| 589 | }); | ||
| 590 | |||
| 591 | if (truelogic == f.get_orlogic()) | ||
| 592 | { | ||
| 593 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 594 | } else { | ||
| 595 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 596 | } | ||
| 597 | } | ||
| 598 | } | ||
| 599 | }; | ||
| 600 | |||
| 601 | cond << recur(_antonym_of, _antonym_of.get_notlogic()); | ||
| 602 | cond << ")"; | ||
| 603 | conditions.push_back(cond.str()); | ||
| 604 | } | ||
| 605 | |||
| 606 | if (_is_synonymic) | ||
| 607 | { | ||
| 608 | conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_synonymy)"); | ||
| 609 | } | ||
| 610 | |||
| 611 | if (!_synonym_of.empty()) | ||
| 612 | { | ||
| 613 | std::stringstream cond; | ||
| 614 | if (_synonym_of.get_notlogic()) | ||
| 615 | { | ||
| 616 | cond << "adjective_id NOT IN"; | ||
| 617 | } else { | ||
| 618 | cond << "adjective_id IN"; | ||
| 619 | } | ||
| 620 | |||
| 621 | cond << "(SELECT adjective_2_id FROM adjective_synonymy WHERE "; | ||
| 622 | |||
| 623 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
| 624 | switch (f.get_type()) | ||
| 625 | { | ||
| 626 | case filter<adjective>::type::singleton: | ||
| 627 | { | ||
| 628 | bindings.emplace_back(f.get_elem()._id); | ||
| 629 | |||
| 630 | if (notlogic == f.get_notlogic()) | ||
| 631 | { | ||
| 632 | return "adjective_1_id = ?"; | ||
| 633 | } else { | ||
| 634 | return "adjective_1_id != ?"; | ||
| 635 | } | ||
| 636 | } | ||
| 637 | |||
| 638 | case filter<adjective>::type::group: | ||
| 639 | { | ||
| 640 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 641 | |||
| 642 | std::list<std::string> clauses; | ||
| 643 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
| 644 | return recur(f2, truelogic); | ||
| 645 | }); | ||
| 646 | |||
| 647 | if (truelogic == f.get_orlogic()) | ||
| 648 | { | ||
| 649 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 650 | } else { | ||
| 651 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 652 | } | ||
| 653 | } | ||
| 654 | } | ||
| 655 | }; | ||
| 656 | |||
| 657 | cond << recur(_synonym_of, _synonym_of.get_notlogic()); | ||
| 658 | cond << ")"; | ||
| 659 | conditions.push_back(cond.str()); | ||
| 660 | } | ||
| 661 | |||
| 662 | if (_is_generalization) | ||
| 663 | { | ||
| 664 | conditions.push_back("adjective_id IN (SELECT general_id FROM specification)"); | ||
| 665 | } | ||
| 666 | |||
| 667 | if (!_generalization_of.empty()) | ||
| 668 | { | ||
| 669 | std::stringstream cond; | ||
| 670 | if (_generalization_of.get_notlogic()) | ||
| 671 | { | ||
| 672 | cond << "adjective_id NOT IN"; | ||
| 673 | } else { | ||
| 674 | cond << "adjective_id IN"; | ||
| 675 | } | ||
| 676 | |||
| 677 | cond << "(SELECT general_id FROM specification WHERE "; | ||
| 678 | |||
| 679 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
| 680 | switch (f.get_type()) | ||
| 681 | { | ||
| 682 | case filter<adjective>::type::singleton: | ||
| 683 | { | ||
| 684 | bindings.emplace_back(f.get_elem()._id); | ||
| 685 | |||
| 686 | if (notlogic == f.get_notlogic()) | ||
| 687 | { | ||
| 688 | return "specific_id = ?"; | ||
| 689 | } else { | ||
| 690 | return "specific_id != ?"; | ||
| 691 | } | ||
| 692 | } | ||
| 693 | |||
| 694 | case filter<adjective>::type::group: | ||
| 695 | { | ||
| 696 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 697 | |||
| 698 | std::list<std::string> clauses; | ||
| 699 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
| 700 | return recur(f2, truelogic); | ||
| 701 | }); | ||
| 702 | |||
| 703 | if (truelogic == f.get_orlogic()) | ||
| 704 | { | ||
| 705 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 706 | } else { | ||
| 707 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 708 | } | ||
| 709 | } | ||
| 710 | } | ||
| 711 | }; | ||
| 712 | |||
| 713 | cond << recur(_generalization_of, _generalization_of.get_notlogic()); | ||
| 714 | cond << ")"; | ||
| 715 | conditions.push_back(cond.str()); | ||
| 716 | } | ||
| 717 | |||
| 718 | if (_is_specification) | ||
| 719 | { | ||
| 720 | conditions.push_back("adjective_id IN (SELECT specific_id FROM specification)"); | ||
| 721 | } | ||
| 722 | |||
| 723 | if (!_specification_of.empty()) | ||
| 724 | { | ||
| 725 | std::stringstream cond; | ||
| 726 | if (_specification_of.get_notlogic()) | ||
| 727 | { | ||
| 728 | cond << "adjective_id NOT IN"; | ||
| 729 | } else { | ||
| 730 | cond << "adjective_id IN"; | ||
| 731 | } | ||
| 732 | |||
| 733 | cond << "(SELECT specific_id FROM specification WHERE "; | ||
| 734 | |||
| 735 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
| 736 | switch (f.get_type()) | ||
| 737 | { | ||
| 738 | case filter<adjective>::type::singleton: | ||
| 739 | { | ||
| 740 | bindings.emplace_back(f.get_elem()._id); | ||
| 741 | |||
| 742 | if (notlogic == f.get_notlogic()) | ||
| 743 | { | ||
| 744 | return "general_id = ?"; | ||
| 745 | } else { | ||
| 746 | return "general_id != ?"; | ||
| 747 | } | ||
| 748 | } | ||
| 749 | |||
| 750 | case filter<adjective>::type::group: | ||
| 751 | { | ||
| 752 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 753 | |||
| 754 | std::list<std::string> clauses; | ||
| 755 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
| 756 | return recur(f2, truelogic); | ||
| 757 | }); | ||
| 758 | |||
| 759 | if (truelogic == f.get_orlogic()) | ||
| 760 | { | ||
| 761 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 762 | } else { | ||
| 763 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 764 | } | ||
| 765 | } | ||
| 766 | } | ||
| 767 | }; | ||
| 768 | |||
| 769 | cond << recur(_specification_of, _specification_of.get_notlogic()); | ||
| 770 | cond << ")"; | ||
| 771 | conditions.push_back(cond.str()); | ||
| 772 | } | ||
| 773 | |||
| 774 | if (_is_pertainymic) | ||
| 775 | { | ||
| 776 | conditions.push_back("adjective_id IN (SELECT pertainym_id FROM pertainymy)"); | ||
| 777 | } | ||
| 778 | |||
| 779 | if (!_pertainym_of.empty()) | ||
| 780 | { | ||
| 781 | std::stringstream cond; | ||
| 782 | if (_pertainym_of.get_notlogic()) | ||
| 783 | { | ||
| 784 | cond << "adjective_id NOT IN"; | ||
| 785 | } else { | ||
| 786 | cond << "adjective_id IN"; | ||
| 787 | } | ||
| 788 | |||
| 789 | cond << "(SELECT pertainym_id FROM pertainymy WHERE "; | ||
| 790 | |||
| 791 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 792 | switch (f.get_type()) | ||
| 793 | { | ||
| 794 | case filter<noun>::type::singleton: | ||
| 795 | { | ||
| 796 | bindings.emplace_back(f.get_elem()._id); | ||
| 797 | |||
| 798 | if (notlogic == f.get_notlogic()) | ||
| 799 | { | ||
| 800 | return "noun_id = ?"; | ||
| 801 | } else { | ||
| 802 | return "noun_id != ?"; | ||
| 803 | } | ||
| 804 | } | ||
| 805 | |||
| 806 | case filter<noun>::type::group: | ||
| 807 | { | ||
| 808 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 809 | |||
| 810 | std::list<std::string> clauses; | ||
| 811 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 812 | return recur(f2, truelogic); | ||
| 813 | }); | ||
| 814 | |||
| 815 | if (truelogic == f.get_orlogic()) | ||
| 816 | { | ||
| 817 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 818 | } else { | ||
| 819 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 820 | } | ||
| 821 | } | ||
| 822 | } | ||
| 823 | }; | ||
| 824 | |||
| 825 | cond << recur(_pertainym_of, _pertainym_of.get_notlogic()); | ||
| 826 | cond << ")"; | ||
| 827 | conditions.push_back(cond.str()); | ||
| 828 | } | ||
| 829 | |||
| 830 | if (_is_mannernymic) | ||
| 831 | { | ||
| 832 | conditions.push_back("adjective_id IN (SELECT adjective_id FROM mannernymy)"); | ||
| 833 | } | ||
| 834 | |||
| 835 | if (!_anti_mannernym_of.empty()) | ||
| 836 | { | ||
| 837 | std::stringstream cond; | ||
| 838 | if (_anti_mannernym_of.get_notlogic()) | ||
| 839 | { | ||
| 840 | cond << "adjective_id NOT IN"; | ||
| 841 | } else { | ||
| 842 | cond << "adjective_id IN"; | ||
| 843 | } | ||
| 844 | |||
| 845 | cond << "(SELECT adjective_id FROM mannernymy WHERE "; | ||
| 846 | |||
| 847 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
| 848 | switch (f.get_type()) | ||
| 849 | { | ||
| 850 | case filter<adverb>::type::singleton: | ||
| 851 | { | ||
| 852 | bindings.emplace_back(f.get_elem()._id); | ||
| 853 | |||
| 854 | if (notlogic == f.get_notlogic()) | ||
| 855 | { | ||
| 856 | return "mannernym_id = ?"; | ||
| 857 | } else { | ||
| 858 | return "mannernym_id != ?"; | ||
| 859 | } | ||
| 860 | } | ||
| 861 | |||
| 862 | case filter<adverb>::type::group: | ||
| 863 | { | ||
| 864 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 865 | |||
| 866 | std::list<std::string> clauses; | ||
| 867 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
| 868 | return recur(f2, truelogic); | ||
| 869 | }); | ||
| 870 | |||
| 871 | if (truelogic == f.get_orlogic()) | ||
| 872 | { | ||
| 873 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 874 | } else { | ||
| 875 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 876 | } | ||
| 877 | } | ||
| 878 | } | ||
| 879 | }; | ||
| 880 | |||
| 881 | cond << recur(_anti_mannernym_of, _anti_mannernym_of.get_notlogic()); | ||
| 882 | cond << ")"; | ||
| 883 | conditions.push_back(cond.str()); | ||
| 884 | } | ||
| 885 | /* | ||
| 886 | if (!_derived_from_adjective.empty()) | ||
| 887 | { | ||
| 888 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_2_id = @DERADJ"); | ||
| 889 | std::string cond = "adjective_id IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 890 | conditions.push_back(cond); | ||
| 891 | } | ||
| 892 | |||
| 893 | if (!_not_derived_from_adjective.empty()) | ||
| 894 | { | ||
| 895 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_2_id = @NDERADJ"); | ||
| 896 | std::string cond = "adjective_id NOT IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 897 | conditions.push_back(cond); | ||
| 898 | } | ||
| 899 | |||
| 900 | if (!_derived_from_adverb.empty()) | ||
| 901 | { | ||
| 902 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); | ||
| 903 | std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 904 | conditions.push_back(cond); | ||
| 905 | } | ||
| 906 | |||
| 907 | if (!_not_derived_from_adverb.empty()) | ||
| 908 | { | ||
| 909 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); | ||
| 910 | std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 911 | conditions.push_back(cond); | ||
| 912 | } | ||
| 913 | |||
| 914 | if (!_derived_from_noun.empty()) | ||
| 915 | { | ||
| 916 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN"); | ||
| 917 | std::string cond = "adjective_id IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 918 | conditions.push_back(cond); | ||
| 919 | } | ||
| 920 | |||
| 921 | if (!_not_derived_from_noun.empty()) | ||
| 922 | { | ||
| 923 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); | ||
| 924 | std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 925 | conditions.push_back(cond); | ||
| 926 | }*/ | ||
| 927 | |||
| 928 | if (!conditions.empty()) | ||
| 929 | { | ||
| 930 | construct << " WHERE "; | ||
| 931 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
| 932 | } | ||
| 933 | |||
| 934 | if (_random) | ||
| 935 | { | ||
| 936 | construct << " ORDER BY RANDOM()"; | ||
| 937 | } | ||
| 938 | |||
| 939 | if (_limit != unlimited) | ||
| 940 | { | ||
| 941 | construct << " LIMIT " << _limit; | ||
| 942 | } | ||
| 943 | |||
| 944 | sqlite3_stmt* ppstmt; | ||
| 945 | std::string query = construct.str(); | ||
| 946 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 947 | { | ||
| 948 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
| 949 | } | ||
| 950 | |||
| 951 | int i = 1; | ||
| 952 | for (auto& binding : bindings) | ||
| 953 | { | ||
| 954 | switch (binding.get_type()) | ||
| 955 | { | ||
| 956 | case binding::type::integer: | ||
| 957 | { | ||
| 958 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
| 959 | |||
| 960 | break; | ||
| 961 | } | ||
| 962 | |||
| 963 | case binding::type::string: | ||
| 964 | { | ||
| 965 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
| 966 | |||
| 967 | break; | ||
| 968 | } | ||
| 969 | } | ||
| 970 | |||
| 971 | i++; | ||
| 972 | } | ||
| 973 | |||
| 974 | /* | ||
| 975 | for (auto adj : _derived_from_adjective) | ||
| 976 | { | ||
| 977 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
| 978 | } | ||
| 979 | |||
| 980 | for (auto adj : _not_derived_from_adjective) | ||
| 981 | { | ||
| 982 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
| 983 | } | ||
| 984 | |||
| 985 | for (auto adv : _derived_from_adverb) | ||
| 986 | { | ||
| 987 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
| 988 | } | ||
| 989 | |||
| 990 | for (auto adv : _not_derived_from_adverb) | ||
| 991 | { | ||
| 992 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
| 993 | } | ||
| 994 | |||
| 995 | for (auto n : _derived_from_noun) | ||
| 996 | { | ||
| 997 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
| 998 | } | ||
| 999 | |||
| 1000 | for (auto n : _not_derived_from_noun) | ||
| 1001 | { | ||
| 1002 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
| 1003 | } | ||
| 1004 | */ | ||
| 1005 | std::list<adjective> output; | ||
| 1006 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
| 1007 | { | ||
| 1008 | adjective tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
| 1009 | tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
| 1010 | |||
| 1011 | if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) | ||
| 1012 | { | ||
| 1013 | tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) | ||
| 1017 | { | ||
| 1018 | tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | if (sqlite3_column_type(ppstmt, 4) != SQLITE_NULL) | ||
| 1022 | { | ||
| 1023 | std::string adjpos(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4))); | ||
| 1024 | if (adjpos == "p") | ||
| 1025 | { | ||
| 1026 | tnc._position = adjective::positioning::predicate; | ||
| 1027 | } else if (adjpos == "a") | ||
| 1028 | { | ||
| 1029 | tnc._position = adjective::positioning::attributive; | ||
| 1030 | } else if (adjpos == "i") | ||
| 1031 | { | ||
| 1032 | tnc._position = adjective::positioning::postnominal; | ||
| 1033 | } | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | output.push_back(tnc); | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | sqlite3_finalize(ppstmt); | ||
| 1040 | |||
| 1041 | for (auto& adjective : output) | ||
| 1042 | { | ||
| 1043 | query = "SELECT pronunciation, prerhyme, rhyme FROM adjective_pronunciations WHERE adjective_id = ?"; | ||
| 1044 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 1045 | { | ||
| 1046 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | sqlite3_bind_int(ppstmt, 1, adjective._id); | ||
| 1050 | |||
| 1051 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
| 1052 | { | ||
| 1053 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
| 1054 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
| 1055 | |||
| 1056 | adjective.pronunciations.push_back(phonemes); | ||
| 1057 | |||
| 1058 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
| 1059 | { | ||
| 1060 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
| 1061 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
| 1062 | adjective.rhymes.emplace_back(prerhyme, rhyming); | ||
| 1063 | } | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | sqlite3_finalize(ppstmt); | ||
| 1067 | } | ||
| 1068 | |||
| 1069 | return output; | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | }; | ||
| diff --git a/lib/adjective_query.h b/lib/adjective_query.h deleted file mode 100644 index e6a6609..0000000 --- a/lib/adjective_query.h +++ /dev/null | |||
| @@ -1,112 +0,0 @@ | |||
| 1 | #ifndef ADJECTIVE_QUERY_H_05E590FD | ||
| 2 | #define ADJECTIVE_QUERY_H_05E590FD | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | class adjective_query { | ||
| 7 | public: | ||
| 8 | adjective_query(const data& _data); | ||
| 9 | |||
| 10 | adjective_query& limit(int _limit); | ||
| 11 | adjective_query& random(); | ||
| 12 | adjective_query& except(const adjective& _word); | ||
| 13 | adjective_query& rhymes_with(const word& _word); | ||
| 14 | adjective_query& rhymes_with(rhyme _r); | ||
| 15 | adjective_query& has_pronunciation(); | ||
| 16 | adjective_query& has_rhyming_noun(); | ||
| 17 | adjective_query& has_rhyming_adjective(); | ||
| 18 | adjective_query& has_rhyming_adverb(); | ||
| 19 | adjective_query& has_rhyming_verb(); | ||
| 20 | adjective_query& with_stress(filter<std::vector<bool>> _arg); | ||
| 21 | |||
| 22 | adjective_query& requires_comparative_form(); | ||
| 23 | adjective_query& requires_superlative_form(); | ||
| 24 | adjective_query& position(adjective::positioning pos); | ||
| 25 | |||
| 26 | adjective_query& with_prefix(filter<std::string> _f); | ||
| 27 | adjective_query& with_suffix(filter<std::string> _f); | ||
| 28 | |||
| 29 | adjective_query& with_complexity(int _arg); | ||
| 30 | |||
| 31 | adjective_query& is_variant(); | ||
| 32 | adjective_query& variant_of(filter<noun> _f); | ||
| 33 | |||
| 34 | adjective_query& has_antonyms(); | ||
| 35 | adjective_query& antonym_of(filter<adjective> _f); | ||
| 36 | |||
| 37 | adjective_query& has_synonyms(); | ||
| 38 | adjective_query& synonym_of(filter<adjective> _f); | ||
| 39 | |||
| 40 | adjective_query& is_generalization(); | ||
| 41 | adjective_query& generalization_of(filter<adjective> _f); | ||
| 42 | |||
| 43 | adjective_query& is_specification(); | ||
| 44 | adjective_query& specification_of(filter<adjective> _f); | ||
| 45 | |||
| 46 | adjective_query& is_pertainymic(); | ||
| 47 | adjective_query& pertainym_of(filter<noun> _f); | ||
| 48 | |||
| 49 | adjective_query& is_mannernymic(); | ||
| 50 | adjective_query& anti_mannernym_of(filter<adverb> _f); | ||
| 51 | |||
| 52 | /* adjective_query& derived_from(const word& _w); | ||
| 53 | adjective_query& not_derived_from(const word& _w);*/ | ||
| 54 | |||
| 55 | std::list<adjective> run() const; | ||
| 56 | |||
| 57 | const static int unlimited = -1; | ||
| 58 | |||
| 59 | protected: | ||
| 60 | const data& _data; | ||
| 61 | int _limit = unlimited; | ||
| 62 | bool _random = false; | ||
| 63 | std::list<rhyme> _rhymes; | ||
| 64 | std::list<adjective> _except; | ||
| 65 | bool _has_prn = false; | ||
| 66 | bool _has_rhyming_noun = false; | ||
| 67 | bool _has_rhyming_adjective = false; | ||
| 68 | bool _has_rhyming_adverb = false; | ||
| 69 | bool _has_rhyming_verb = false; | ||
| 70 | filter<std::vector<bool>> _stress; | ||
| 71 | |||
| 72 | bool _requires_comparative_form = false; | ||
| 73 | bool _requires_superlative_form = false; | ||
| 74 | adjective::positioning _position = adjective::positioning::undefined; | ||
| 75 | |||
| 76 | filter<std::string> _with_prefix; | ||
| 77 | filter<std::string> _with_suffix; | ||
| 78 | |||
| 79 | int _with_complexity = unlimited; | ||
| 80 | |||
| 81 | bool _is_variant = false; | ||
| 82 | filter<noun> _variant_of; | ||
| 83 | |||
| 84 | bool _is_antonymic = false; | ||
| 85 | filter<adjective> _antonym_of; | ||
| 86 | |||
| 87 | bool _is_synonymic = false; | ||
| 88 | filter<adjective> _synonym_of; | ||
| 89 | |||
| 90 | bool _is_generalization = false; | ||
| 91 | filter<adjective> _generalization_of; | ||
| 92 | |||
| 93 | bool _is_specification = false; | ||
| 94 | filter<adjective> _specification_of; | ||
| 95 | |||
| 96 | bool _is_pertainymic = false; | ||
| 97 | filter<noun> _pertainym_of; | ||
| 98 | |||
| 99 | bool _is_mannernymic = false; | ||
| 100 | filter<adverb> _anti_mannernym_of; | ||
| 101 | |||
| 102 | /* std::list<adjective> _derived_from_adjective; | ||
| 103 | std::list<adjective> _not_derived_from_adjective; | ||
| 104 | std::list<adverb> _derived_from_adverb; | ||
| 105 | std::list<adverb> _not_derived_from_adverb; | ||
| 106 | std::list<noun> _derived_from_noun; | ||
| 107 | std::list<noun> _not_derived_from_noun;*/ | ||
| 108 | }; | ||
| 109 | |||
| 110 | }; | ||
| 111 | |||
| 112 | #endif /* end of include guard: ADJECTIVE_QUERY_H_05E590FD */ | ||
| diff --git a/lib/adverb.cpp b/lib/adverb.cpp deleted file mode 100644 index 442574e..0000000 --- a/lib/adverb.cpp +++ /dev/null | |||
| @@ -1,71 +0,0 @@ | |||
| 1 | #include "verbly.h" | ||
| 2 | |||
| 3 | namespace verbly { | ||
| 4 | |||
| 5 | adverb::adverb() | ||
| 6 | { | ||
| 7 | |||
| 8 | } | ||
| 9 | |||
| 10 | adverb::adverb(const data& _data, int _id) : word(_data, _id) | ||
| 11 | { | ||
| 12 | |||
| 13 | } | ||
| 14 | |||
| 15 | std::string adverb::base_form() const | ||
| 16 | { | ||
| 17 | assert(_valid == true); | ||
| 18 | |||
| 19 | return _base_form; | ||
| 20 | } | ||
| 21 | |||
| 22 | std::string adverb::comparative_form() const | ||
| 23 | { | ||
| 24 | assert(_valid == true); | ||
| 25 | |||
| 26 | return _comparative_form; | ||
| 27 | } | ||
| 28 | |||
| 29 | std::string adverb::superlative_form() const | ||
| 30 | { | ||
| 31 | assert(_valid == true); | ||
| 32 | |||
| 33 | return _superlative_form; | ||
| 34 | } | ||
| 35 | |||
| 36 | bool adverb::has_comparative_form() const | ||
| 37 | { | ||
| 38 | assert(_valid == true); | ||
| 39 | |||
| 40 | return !_comparative_form.empty(); | ||
| 41 | } | ||
| 42 | |||
| 43 | bool adverb::has_superlative_form() const | ||
| 44 | { | ||
| 45 | assert(_valid == true); | ||
| 46 | |||
| 47 | return !_superlative_form.empty(); | ||
| 48 | } | ||
| 49 | |||
| 50 | adverb_query adverb::antonyms() const | ||
| 51 | { | ||
| 52 | assert(_valid == true); | ||
| 53 | |||
| 54 | return _data->adverbs().antonym_of(*this); | ||
| 55 | } | ||
| 56 | |||
| 57 | adverb_query adverb::synonyms() const | ||
| 58 | { | ||
| 59 | assert(_valid == true); | ||
| 60 | |||
| 61 | return _data->adverbs().synonym_of(*this); | ||
| 62 | } | ||
| 63 | |||
| 64 | adjective_query adverb::anti_mannernyms() const | ||
| 65 | { | ||
| 66 | assert(_valid == true); | ||
| 67 | |||
| 68 | return _data->adjectives().anti_mannernym_of(*this); | ||
| 69 | } | ||
| 70 | |||
| 71 | }; | ||
| diff --git a/lib/adverb.h b/lib/adverb.h deleted file mode 100644 index 56d4e28..0000000 --- a/lib/adverb.h +++ /dev/null | |||
| @@ -1,35 +0,0 @@ | |||
| 1 | #ifndef ADVERB_H_86F8302F | ||
| 2 | #define ADVERB_H_86F8302F | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | class adverb : public word { | ||
| 7 | private: | ||
| 8 | std::string _base_form; | ||
| 9 | std::string _comparative_form; | ||
| 10 | std::string _superlative_form; | ||
| 11 | |||
| 12 | friend class adverb_query; | ||
| 13 | |||
| 14 | public: | ||
| 15 | adverb(); | ||
| 16 | adverb(const data& _data, int _id); | ||
| 17 | |||
| 18 | std::string base_form() const; | ||
| 19 | std::string comparative_form() const; | ||
| 20 | std::string superlative_form() const; | ||
| 21 | |||
| 22 | bool has_comparative_form() const; | ||
| 23 | bool has_superlative_form() const; | ||
| 24 | |||
| 25 | adverb_query antonyms() const; | ||
| 26 | adverb_query synonyms() const; | ||
| 27 | adjective_query anti_mannernyms() const; | ||
| 28 | |||
| 29 | adverb_query& derived_from(const word& _w); | ||
| 30 | adverb_query& not_derived_from(const word& _w); | ||
| 31 | }; | ||
| 32 | |||
| 33 | }; | ||
| 34 | |||
| 35 | #endif /* end of include guard: ADVERB_H_86F8302F */ | ||
| diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp deleted file mode 100644 index 3e62bb7..0000000 --- a/lib/adverb_query.cpp +++ /dev/null | |||
| @@ -1,758 +0,0 @@ | |||
| 1 | #include "verbly.h" | ||
| 2 | |||
| 3 | namespace verbly { | ||
| 4 | |||
| 5 | adverb_query::adverb_query(const data& _data) : _data(_data) | ||
| 6 | { | ||
| 7 | |||
| 8 | } | ||
| 9 | |||
| 10 | adverb_query& adverb_query::limit(int _limit) | ||
| 11 | { | ||
| 12 | if ((_limit > 0) || (_limit == unlimited)) | ||
| 13 | { | ||
| 14 | this->_limit = _limit; | ||
| 15 | } | ||
| 16 | |||
| 17 | return *this; | ||
| 18 | } | ||
| 19 | |||
| 20 | adverb_query& adverb_query::random() | ||
| 21 | { | ||
| 22 | this->_random = true; | ||
| 23 | |||
| 24 | return *this; | ||
| 25 | } | ||
| 26 | |||
| 27 | adverb_query& adverb_query::except(const adverb& _word) | ||
| 28 | { | ||
| 29 | _except.push_back(_word); | ||
| 30 | |||
| 31 | return *this; | ||
| 32 | } | ||
| 33 | |||
| 34 | adverb_query& adverb_query::rhymes_with(const word& _word) | ||
| 35 | { | ||
| 36 | for (auto rhyme : _word.get_rhymes()) | ||
| 37 | { | ||
| 38 | _rhymes.push_back(rhyme); | ||
| 39 | } | ||
| 40 | |||
| 41 | if (dynamic_cast<const adverb*>(&_word) != nullptr) | ||
| 42 | { | ||
| 43 | _except.push_back(dynamic_cast<const adverb&>(_word)); | ||
| 44 | } | ||
| 45 | |||
| 46 | return *this; | ||
| 47 | } | ||
| 48 | |||
| 49 | adverb_query& adverb_query::rhymes_with(rhyme _r) | ||
| 50 | { | ||
| 51 | _rhymes.push_back(_r); | ||
| 52 | |||
| 53 | return *this; | ||
| 54 | } | ||
| 55 | |||
| 56 | adverb_query& adverb_query::has_pronunciation() | ||
| 57 | { | ||
| 58 | this->_has_prn = true; | ||
| 59 | |||
| 60 | return *this; | ||
| 61 | } | ||
| 62 | |||
| 63 | adverb_query& adverb_query::has_rhyming_noun() | ||
| 64 | { | ||
| 65 | _has_rhyming_noun = true; | ||
| 66 | |||
| 67 | return *this; | ||
| 68 | } | ||
| 69 | |||
| 70 | adverb_query& adverb_query::has_rhyming_adjective() | ||
| 71 | { | ||
| 72 | _has_rhyming_adjective = true; | ||
| 73 | |||
| 74 | return *this; | ||
| 75 | } | ||
| 76 | |||
| 77 | adverb_query& adverb_query::has_rhyming_adverb() | ||
| 78 | { | ||
| 79 | _has_rhyming_adverb = true; | ||
| 80 | |||
| 81 | return *this; | ||
| 82 | } | ||
| 83 | |||
| 84 | adverb_query& adverb_query::has_rhyming_verb() | ||
| 85 | { | ||
| 86 | _has_rhyming_verb = true; | ||
| 87 | |||
| 88 | return *this; | ||
| 89 | } | ||
| 90 | |||
| 91 | adverb_query& adverb_query::requires_comparative_form() | ||
| 92 | { | ||
| 93 | _requires_comparative_form = true; | ||
| 94 | |||
| 95 | return *this; | ||
| 96 | } | ||
| 97 | |||
| 98 | adverb_query& adverb_query::requires_superlative_form() | ||
| 99 | { | ||
| 100 | _requires_superlative_form = true; | ||
| 101 | |||
| 102 | return *this; | ||
| 103 | } | ||
| 104 | |||
| 105 | adverb_query& adverb_query::with_stress(filter<std::vector<bool>> _arg) | ||
| 106 | { | ||
| 107 | _stress = _arg; | ||
| 108 | |||
| 109 | return *this; | ||
| 110 | } | ||
| 111 | |||
| 112 | adverb_query& adverb_query::with_prefix(filter<std::string> _f) | ||
| 113 | { | ||
| 114 | _f.clean(); | ||
| 115 | _with_prefix = _f; | ||
| 116 | |||
| 117 | return *this; | ||
| 118 | } | ||
| 119 | |||
| 120 | adverb_query& adverb_query::with_suffix(filter<std::string> _f) | ||
| 121 | { | ||
| 122 | _f.clean(); | ||
| 123 | _with_suffix = _f; | ||
| 124 | |||
| 125 | return *this; | ||
| 126 | } | ||
| 127 | |||
| 128 | adverb_query& adverb_query::with_complexity(int _arg) | ||
| 129 | { | ||
| 130 | _with_complexity = _arg; | ||
| 131 | |||
| 132 | return *this; | ||
| 133 | } | ||
| 134 | |||
| 135 | adverb_query& adverb_query::has_antonyms() | ||
| 136 | { | ||
| 137 | _has_antonyms = true; | ||
| 138 | |||
| 139 | return *this; | ||
| 140 | } | ||
| 141 | |||
| 142 | adverb_query& adverb_query::antonym_of(filter<adverb> _f) | ||
| 143 | { | ||
| 144 | _f.clean(); | ||
| 145 | _antonym_of = _f; | ||
| 146 | |||
| 147 | return *this; | ||
| 148 | } | ||
| 149 | |||
| 150 | adverb_query& adverb_query::has_synonyms() | ||
| 151 | { | ||
| 152 | _has_synonyms = true; | ||
| 153 | |||
| 154 | return *this; | ||
| 155 | } | ||
| 156 | |||
| 157 | adverb_query& adverb_query::synonym_of(filter<adverb> _f) | ||
| 158 | { | ||
| 159 | _f.clean(); | ||
| 160 | _synonym_of = _f; | ||
| 161 | |||
| 162 | return *this; | ||
| 163 | } | ||
| 164 | |||
| 165 | adverb_query& adverb_query::is_mannernymic() | ||
| 166 | { | ||
| 167 | _is_mannernymic = true; | ||
| 168 | |||
| 169 | return *this; | ||
| 170 | } | ||
| 171 | |||
| 172 | adverb_query& adverb_query::mannernym_of(filter<adjective> _f) | ||
| 173 | { | ||
| 174 | _f.clean(); | ||
| 175 | _mannernym_of = _f; | ||
| 176 | |||
| 177 | return *this; | ||
| 178 | } | ||
| 179 | /* | ||
| 180 | adverb_query& adverb_query::derived_from(const word& _w) | ||
| 181 | { | ||
| 182 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
| 183 | { | ||
| 184 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
| 185 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
| 186 | { | ||
| 187 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
| 188 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
| 189 | { | ||
| 190 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
| 191 | } | ||
| 192 | |||
| 193 | return *this; | ||
| 194 | } | ||
| 195 | |||
| 196 | adverb_query& adverb_query::not_derived_from(const word& _w) | ||
| 197 | { | ||
| 198 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
| 199 | { | ||
| 200 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
| 201 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
| 202 | { | ||
| 203 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
| 204 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
| 205 | { | ||
| 206 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
| 207 | } | ||
| 208 | |||
| 209 | return *this; | ||
| 210 | } | ||
| 211 | */ | ||
| 212 | std::list<adverb> adverb_query::run() const | ||
| 213 | { | ||
| 214 | std::stringstream construct; | ||
| 215 | construct << "SELECT adverb_id, base_form, comparative, superlative FROM adverbs"; | ||
| 216 | std::list<std::string> conditions; | ||
| 217 | std::list<binding> bindings; | ||
| 218 | |||
| 219 | if (_has_prn) | ||
| 220 | { | ||
| 221 | conditions.push_back("adverb_id IN (SELECT adverb_id FROM adverb_pronunciations)"); | ||
| 222 | } | ||
| 223 | |||
| 224 | if (!_rhymes.empty()) | ||
| 225 | { | ||
| 226 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
| 227 | std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 228 | conditions.push_back(cond); | ||
| 229 | |||
| 230 | for (auto rhy : _rhymes) | ||
| 231 | { | ||
| 232 | bindings.emplace_back(rhy.get_prerhyme()); | ||
| 233 | bindings.emplace_back(rhy.get_rhyme()); | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | if (_has_rhyming_noun) | ||
| 238 | { | ||
| 239 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 240 | } | ||
| 241 | |||
| 242 | if (_has_rhyming_adjective) | ||
| 243 | { | ||
| 244 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 245 | } | ||
| 246 | |||
| 247 | if (_has_rhyming_adverb) | ||
| 248 | { | ||
| 249 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adverb_id != curp.adverb_id)"); | ||
| 250 | } | ||
| 251 | |||
| 252 | if (_has_rhyming_verb) | ||
| 253 | { | ||
| 254 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 255 | } | ||
| 256 | |||
| 257 | for (auto except : _except) | ||
| 258 | { | ||
| 259 | conditions.push_back("adverb_id != ?"); | ||
| 260 | bindings.emplace_back(except._id); | ||
| 261 | } | ||
| 262 | |||
| 263 | if (_requires_comparative_form) | ||
| 264 | { | ||
| 265 | conditions.push_back("comparative IS NOT NULL"); | ||
| 266 | } | ||
| 267 | |||
| 268 | if (_requires_superlative_form) | ||
| 269 | { | ||
| 270 | conditions.push_back("superlative IS NOT NULL"); | ||
| 271 | } | ||
| 272 | |||
| 273 | if (!_stress.empty()) | ||
| 274 | { | ||
| 275 | std::stringstream cond; | ||
| 276 | if (_stress.get_notlogic()) | ||
| 277 | { | ||
| 278 | cond << "adverb_id NOT IN"; | ||
| 279 | } else { | ||
| 280 | cond << "adverb_id IN"; | ||
| 281 | } | ||
| 282 | |||
| 283 | cond << "(SELECT adverb_id FROM adverb_pronunciations WHERE "; | ||
| 284 | |||
| 285 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
| 286 | switch (f.get_type()) | ||
| 287 | { | ||
| 288 | case filter<std::vector<bool>>::type::singleton: | ||
| 289 | { | ||
| 290 | std::ostringstream _val; | ||
| 291 | for (auto syl : f.get_elem()) | ||
| 292 | { | ||
| 293 | if (syl) | ||
| 294 | { | ||
| 295 | _val << "1"; | ||
| 296 | } else { | ||
| 297 | _val << "0"; | ||
| 298 | } | ||
| 299 | } | ||
| 300 | |||
| 301 | bindings.emplace_back(_val.str()); | ||
| 302 | |||
| 303 | if (notlogic == f.get_notlogic()) | ||
| 304 | { | ||
| 305 | return "stress = ?"; | ||
| 306 | } else { | ||
| 307 | return "stress != ?"; | ||
| 308 | } | ||
| 309 | } | ||
| 310 | |||
| 311 | case filter<std::vector<bool>>::type::group: | ||
| 312 | { | ||
| 313 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 314 | |||
| 315 | std::list<std::string> clauses; | ||
| 316 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
| 317 | return recur(f2, truelogic); | ||
| 318 | }); | ||
| 319 | |||
| 320 | if (truelogic == f.get_orlogic()) | ||
| 321 | { | ||
| 322 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 323 | } else { | ||
| 324 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 325 | } | ||
| 326 | } | ||
| 327 | } | ||
| 328 | }; | ||
| 329 | |||
| 330 | cond << recur(_stress, _stress.get_notlogic()); | ||
| 331 | cond << ")"; | ||
| 332 | conditions.push_back(cond.str()); | ||
| 333 | } | ||
| 334 | |||
| 335 | if (!_with_prefix.empty()) | ||
| 336 | { | ||
| 337 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
| 338 | switch (f.get_type()) | ||
| 339 | { | ||
| 340 | case filter<std::string>::type::singleton: | ||
| 341 | { | ||
| 342 | bindings.emplace_back(f.get_elem() + "%"); | ||
| 343 | |||
| 344 | if (notlogic == f.get_notlogic()) | ||
| 345 | { | ||
| 346 | return "base_form LIKE ?"; | ||
| 347 | } else { | ||
| 348 | return "base_form NOT LIKE ?"; | ||
| 349 | } | ||
| 350 | } | ||
| 351 | |||
| 352 | case filter<std::string>::type::group: | ||
| 353 | { | ||
| 354 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 355 | |||
| 356 | std::list<std::string> clauses; | ||
| 357 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
| 358 | return recur(f2, truelogic); | ||
| 359 | }); | ||
| 360 | |||
| 361 | if (truelogic == f.get_orlogic()) | ||
| 362 | { | ||
| 363 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 364 | } else { | ||
| 365 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 366 | } | ||
| 367 | } | ||
| 368 | } | ||
| 369 | }; | ||
| 370 | |||
| 371 | conditions.push_back(recur(_with_prefix, false)); | ||
| 372 | } | ||
| 373 | |||
| 374 | if (!_with_suffix.empty()) | ||
| 375 | { | ||
| 376 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
| 377 | switch (f.get_type()) | ||
| 378 | { | ||
| 379 | case filter<std::string>::type::singleton: | ||
| 380 | { | ||
| 381 | bindings.emplace_back("%" + f.get_elem()); | ||
| 382 | |||
| 383 | if (notlogic == f.get_notlogic()) | ||
| 384 | { | ||
| 385 | return "base_form LIKE ?"; | ||
| 386 | } else { | ||
| 387 | return "base_form NOT LIKE ?"; | ||
| 388 | } | ||
| 389 | } | ||
| 390 | |||
| 391 | case filter<std::string>::type::group: | ||
| 392 | { | ||
| 393 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 394 | |||
| 395 | std::list<std::string> clauses; | ||
| 396 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
| 397 | return recur(f2, truelogic); | ||
| 398 | }); | ||
| 399 | |||
| 400 | if (truelogic == f.get_orlogic()) | ||
| 401 | { | ||
| 402 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 403 | } else { | ||
| 404 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 405 | } | ||
| 406 | } | ||
| 407 | } | ||
| 408 | }; | ||
| 409 | |||
| 410 | conditions.push_back(recur(_with_suffix, false)); | ||
| 411 | } | ||
| 412 | |||
| 413 | if (_with_complexity != unlimited) | ||
| 414 | { | ||
| 415 | conditions.push_back("complexity = ?"); | ||
| 416 | bindings.emplace_back(_with_complexity); | ||
| 417 | } | ||
| 418 | |||
| 419 | if (_has_antonyms) | ||
| 420 | { | ||
| 421 | conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_antonymy)"); | ||
| 422 | } | ||
| 423 | |||
| 424 | if (!_antonym_of.empty()) | ||
| 425 | { | ||
| 426 | std::stringstream cond; | ||
| 427 | if (_antonym_of.get_notlogic()) | ||
| 428 | { | ||
| 429 | cond << "adverb_id NOT IN"; | ||
| 430 | } else { | ||
| 431 | cond << "adverb_id IN"; | ||
| 432 | } | ||
| 433 | |||
| 434 | cond << "(SELECT adverb_2_id FROM adverb_antonymy WHERE "; | ||
| 435 | |||
| 436 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
| 437 | switch (f.get_type()) | ||
| 438 | { | ||
| 439 | case filter<adverb>::type::singleton: | ||
| 440 | { | ||
| 441 | bindings.emplace_back(f.get_elem()._id); | ||
| 442 | |||
| 443 | if (notlogic == f.get_notlogic()) | ||
| 444 | { | ||
| 445 | return "adverb_1_id = ?"; | ||
| 446 | } else { | ||
| 447 | return "adverb_1_id != ?"; | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 451 | case filter<adverb>::type::group: | ||
| 452 | { | ||
| 453 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 454 | |||
| 455 | std::list<std::string> clauses; | ||
| 456 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
| 457 | return recur(f2, truelogic); | ||
| 458 | }); | ||
| 459 | |||
| 460 | if (truelogic == f.get_orlogic()) | ||
| 461 | { | ||
| 462 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 463 | } else { | ||
| 464 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 465 | } | ||
| 466 | } | ||
| 467 | } | ||
| 468 | }; | ||
| 469 | |||
| 470 | cond << recur(_antonym_of, _antonym_of.get_notlogic()); | ||
| 471 | cond << ")"; | ||
| 472 | conditions.push_back(cond.str()); | ||
| 473 | } | ||
| 474 | |||
| 475 | if (_has_synonyms) | ||
| 476 | { | ||
| 477 | conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_synonymy)"); | ||
| 478 | } | ||
| 479 | |||
| 480 | if (!_synonym_of.empty()) | ||
| 481 | { | ||
| 482 | std::stringstream cond; | ||
| 483 | if (_antonym_of.get_notlogic()) | ||
| 484 | { | ||
| 485 | cond << "adverb_id NOT IN"; | ||
| 486 | } else { | ||
| 487 | cond << "adverb_id IN"; | ||
| 488 | } | ||
| 489 | |||
| 490 | cond << "(SELECT adverb_2_id FROM adverb_synonymy WHERE "; | ||
| 491 | |||
| 492 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
| 493 | switch (f.get_type()) | ||
| 494 | { | ||
| 495 | case filter<adverb>::type::singleton: | ||
| 496 | { | ||
| 497 | bindings.emplace_back(f.get_elem()._id); | ||
| 498 | |||
| 499 | if (notlogic == f.get_notlogic()) | ||
| 500 | { | ||
| 501 | return "adverb_1_id = ?"; | ||
| 502 | } else { | ||
| 503 | return "adverb_1_id != ?"; | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 507 | case filter<adverb>::type::group: | ||
| 508 | { | ||
| 509 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 510 | |||
| 511 | std::list<std::string> clauses; | ||
| 512 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
| 513 | return recur(f2, truelogic); | ||
| 514 | }); | ||
| 515 | |||
| 516 | if (truelogic == f.get_orlogic()) | ||
| 517 | { | ||
| 518 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 519 | } else { | ||
| 520 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 521 | } | ||
| 522 | } | ||
| 523 | } | ||
| 524 | }; | ||
| 525 | |||
| 526 | cond << recur(_synonym_of, _synonym_of.get_notlogic()); | ||
| 527 | cond << ")"; | ||
| 528 | conditions.push_back(cond.str()); | ||
| 529 | } | ||
| 530 | |||
| 531 | if (_is_mannernymic) | ||
| 532 | { | ||
| 533 | conditions.push_back("adverb_id IN (SELECT mannernym_id FROM mannernymy)"); | ||
| 534 | } | ||
| 535 | |||
| 536 | if (!_mannernym_of.empty()) | ||
| 537 | { | ||
| 538 | std::stringstream cond; | ||
| 539 | if (_antonym_of.get_notlogic()) | ||
| 540 | { | ||
| 541 | cond << "adverb_id NOT IN"; | ||
| 542 | } else { | ||
| 543 | cond << "adverb_id IN"; | ||
| 544 | } | ||
| 545 | |||
| 546 | cond << "(SELECT mannernym_id FROM mannernymy WHERE "; | ||
| 547 | |||
| 548 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
| 549 | switch (f.get_type()) | ||
| 550 | { | ||
| 551 | case filter<adjective>::type::singleton: | ||
| 552 | { | ||
| 553 | bindings.emplace_back(f.get_elem()._id); | ||
| 554 | |||
| 555 | if (notlogic == f.get_notlogic()) | ||
| 556 | { | ||
| 557 | return "adjective_id = ?"; | ||
| 558 | } else { | ||
| 559 | return "adjective_id != ?"; | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 563 | case filter<adjective>::type::group: | ||
| 564 | { | ||
| 565 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 566 | |||
| 567 | std::list<std::string> clauses; | ||
| 568 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
| 569 | return recur(f2, truelogic); | ||
| 570 | }); | ||
| 571 | |||
| 572 | if (truelogic == f.get_orlogic()) | ||
| 573 | { | ||
| 574 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 575 | } else { | ||
| 576 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 577 | } | ||
| 578 | } | ||
| 579 | } | ||
| 580 | }; | ||
| 581 | |||
| 582 | cond << recur(_mannernym_of, _mannernym_of.get_notlogic()); | ||
| 583 | cond << ")"; | ||
| 584 | conditions.push_back(cond.str()); | ||
| 585 | } | ||
| 586 | |||
| 587 | /* if (!_derived_from_adjective.empty()) | ||
| 588 | { | ||
| 589 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); | ||
| 590 | std::string cond = "adverb_id IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 591 | conditions.push_back(cond); | ||
| 592 | } | ||
| 593 | |||
| 594 | if (!_not_derived_from_adjective.empty()) | ||
| 595 | { | ||
| 596 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); | ||
| 597 | std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 598 | conditions.push_back(cond); | ||
| 599 | } | ||
| 600 | |||
| 601 | if (!_derived_from_adverb.empty()) | ||
| 602 | { | ||
| 603 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_2_id = @DERADV"); | ||
| 604 | std::string cond = "adverb_id IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 605 | conditions.push_back(cond); | ||
| 606 | } | ||
| 607 | |||
| 608 | if (!_not_derived_from_adverb.empty()) | ||
| 609 | { | ||
| 610 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_2_id = @NDERADV"); | ||
| 611 | std::string cond = "adverb_id NOT IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 612 | conditions.push_back(cond); | ||
| 613 | } | ||
| 614 | |||
| 615 | if (!_derived_from_noun.empty()) | ||
| 616 | { | ||
| 617 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN"); | ||
| 618 | std::string cond = "adverb_id IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 619 | conditions.push_back(cond); | ||
| 620 | } | ||
| 621 | |||
| 622 | if (!_not_derived_from_noun.empty()) | ||
| 623 | { | ||
| 624 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); | ||
| 625 | std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 626 | conditions.push_back(cond); | ||
| 627 | }*/ | ||
| 628 | |||
| 629 | if (!conditions.empty()) | ||
| 630 | { | ||
| 631 | construct << " WHERE "; | ||
| 632 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
| 633 | } | ||
| 634 | |||
| 635 | if (_random) | ||
| 636 | { | ||
| 637 | construct << " ORDER BY RANDOM()"; | ||
| 638 | } | ||
| 639 | |||
| 640 | if (_limit != unlimited) | ||
| 641 | { | ||
| 642 | construct << " LIMIT " << _limit; | ||
| 643 | } | ||
| 644 | |||
| 645 | sqlite3_stmt* ppstmt; | ||
| 646 | std::string query = construct.str(); | ||
| 647 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 648 | { | ||
| 649 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
| 650 | } | ||
| 651 | |||
| 652 | int i = 1; | ||
| 653 | for (auto& binding : bindings) | ||
| 654 | { | ||
| 655 | switch (binding.get_type()) | ||
| 656 | { | ||
| 657 | case binding::type::integer: | ||
| 658 | { | ||
| 659 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
| 660 | |||
| 661 | break; | ||
| 662 | } | ||
| 663 | |||
| 664 | case binding::type::string: | ||
| 665 | { | ||
| 666 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
| 667 | |||
| 668 | break; | ||
| 669 | } | ||
| 670 | } | ||
| 671 | |||
| 672 | i++; | ||
| 673 | } | ||
| 674 | |||
| 675 | /* | ||
| 676 | for (auto adj : _derived_from_adjective) | ||
| 677 | { | ||
| 678 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
| 679 | } | ||
| 680 | |||
| 681 | for (auto adj : _not_derived_from_adjective) | ||
| 682 | { | ||
| 683 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
| 684 | } | ||
| 685 | |||
| 686 | for (auto adv : _derived_from_adverb) | ||
| 687 | { | ||
| 688 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
| 689 | } | ||
| 690 | |||
| 691 | for (auto adv : _not_derived_from_adverb) | ||
| 692 | { | ||
| 693 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
| 694 | } | ||
| 695 | |||
| 696 | for (auto n : _derived_from_noun) | ||
| 697 | { | ||
| 698 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
| 699 | } | ||
| 700 | |||
| 701 | for (auto n : _not_derived_from_noun) | ||
| 702 | { | ||
| 703 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
| 704 | }*/ | ||
| 705 | |||
| 706 | std::list<adverb> output; | ||
| 707 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
| 708 | { | ||
| 709 | adverb tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
| 710 | tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
| 711 | |||
| 712 | if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) | ||
| 713 | { | ||
| 714 | tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
| 715 | } | ||
| 716 | |||
| 717 | if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) | ||
| 718 | { | ||
| 719 | tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
| 720 | } | ||
| 721 | |||
| 722 | output.push_back(tnc); | ||
| 723 | } | ||
| 724 | |||
| 725 | sqlite3_finalize(ppstmt); | ||
| 726 | |||
| 727 | for (auto& adverb : output) | ||
| 728 | { | ||
| 729 | query = "SELECT pronunciation, prerhyme, rhyme FROM adverb_pronunciations WHERE adverb_id = ?"; | ||
| 730 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 731 | { | ||
| 732 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
| 733 | } | ||
| 734 | |||
| 735 | sqlite3_bind_int(ppstmt, 1, adverb._id); | ||
| 736 | |||
| 737 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
| 738 | { | ||
| 739 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
| 740 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
| 741 | |||
| 742 | adverb.pronunciations.push_back(phonemes); | ||
| 743 | |||
| 744 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
| 745 | { | ||
| 746 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
| 747 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
| 748 | adverb.rhymes.emplace_back(prerhyme, rhyming); | ||
| 749 | } | ||
| 750 | } | ||
| 751 | |||
| 752 | sqlite3_finalize(ppstmt); | ||
| 753 | } | ||
| 754 | |||
| 755 | return output; | ||
| 756 | } | ||
| 757 | |||
| 758 | }; | ||
| diff --git a/lib/adverb_query.h b/lib/adverb_query.h deleted file mode 100644 index 30e7400..0000000 --- a/lib/adverb_query.h +++ /dev/null | |||
| @@ -1,86 +0,0 @@ | |||
| 1 | #ifndef ADVERB_QUERY_H_CA13CCDD | ||
| 2 | #define ADVERB_QUERY_H_CA13CCDD | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | class adverb_query { | ||
| 7 | public: | ||
| 8 | adverb_query(const data& _data); | ||
| 9 | |||
| 10 | adverb_query& limit(int _limit); | ||
| 11 | adverb_query& random(); | ||
| 12 | adverb_query& except(const adverb& _word); | ||
| 13 | adverb_query& rhymes_with(const word& _word); | ||
| 14 | adverb_query& rhymes_with(rhyme _r); | ||
| 15 | adverb_query& has_pronunciation(); | ||
| 16 | adverb_query& has_rhyming_noun(); | ||
| 17 | adverb_query& has_rhyming_adjective(); | ||
| 18 | adverb_query& has_rhyming_adverb(); | ||
| 19 | adverb_query& has_rhyming_verb(); | ||
| 20 | adverb_query& with_stress(filter<std::vector<bool>> _arg); | ||
| 21 | |||
| 22 | adverb_query& requires_comparative_form(); | ||
| 23 | adverb_query& requires_superlative_form(); | ||
| 24 | |||
| 25 | adverb_query& with_prefix(filter<std::string> _f); | ||
| 26 | adverb_query& with_suffix(filter<std::string> _f); | ||
| 27 | |||
| 28 | adverb_query& with_complexity(int _arg); | ||
| 29 | |||
| 30 | adverb_query& has_antonyms(); | ||
| 31 | adverb_query& antonym_of(filter<adverb> _f); | ||
| 32 | |||
| 33 | adverb_query& has_synonyms(); | ||
| 34 | adverb_query& synonym_of(filter<adverb> _f); | ||
| 35 | |||
| 36 | adverb_query& is_mannernymic(); | ||
| 37 | adverb_query& mannernym_of(filter<adjective> _f); | ||
| 38 | |||
| 39 | /* adverb_query& derived_from(const word& _w); | ||
| 40 | adverb_query& not_derived_from(const word& _w);*/ | ||
| 41 | |||
| 42 | std::list<adverb> run() const; | ||
| 43 | |||
| 44 | const static int unlimited = -1; | ||
| 45 | |||
| 46 | private: | ||
| 47 | const data& _data; | ||
| 48 | int _limit = unlimited; | ||
| 49 | bool _random = false; | ||
| 50 | std::list<rhyme> _rhymes; | ||
| 51 | std::list<adverb> _except; | ||
| 52 | bool _has_prn = false; | ||
| 53 | bool _has_rhyming_noun = false; | ||
| 54 | bool _has_rhyming_adjective = false; | ||
| 55 | bool _has_rhyming_adverb = false; | ||
| 56 | bool _has_rhyming_verb = false; | ||
| 57 | filter<std::vector<bool>> _stress; | ||
| 58 | |||
| 59 | bool _requires_comparative_form = false; | ||
| 60 | bool _requires_superlative_form = false; | ||
| 61 | |||
| 62 | filter<std::string> _with_prefix; | ||
| 63 | filter<std::string> _with_suffix; | ||
| 64 | |||
| 65 | int _with_complexity = unlimited; | ||
| 66 | |||
| 67 | bool _has_antonyms = false; | ||
| 68 | filter<adverb> _antonym_of; | ||
| 69 | |||
| 70 | bool _has_synonyms = false; | ||
| 71 | filter<adverb> _synonym_of; | ||
| 72 | |||
| 73 | bool _is_mannernymic = false; | ||
| 74 | filter<adjective> _mannernym_of; | ||
| 75 | |||
| 76 | /* std::list<adjective> _derived_from_adjective; | ||
| 77 | std::list<adjective> _not_derived_from_adjective; | ||
| 78 | std::list<adverb> _derived_from_adverb; | ||
| 79 | std::list<adverb> _not_derived_from_adverb; | ||
| 80 | std::list<noun> _derived_from_noun; | ||
| 81 | std::list<noun> _not_derived_from_noun;*/ | ||
| 82 | }; | ||
| 83 | |||
| 84 | }; | ||
| 85 | |||
| 86 | #endif /* end of include guard: ADVERB_QUERY_H_CA13CCDD */ | ||
| diff --git a/lib/binding.cpp b/lib/binding.cpp new file mode 100644 index 0000000..349cd6f --- /dev/null +++ b/lib/binding.cpp | |||
| @@ -0,0 +1,180 @@ | |||
| 1 | #include "binding.h" | ||
| 2 | #include <stdexcept> | ||
| 3 | #include <utility> | ||
| 4 | |||
| 5 | namespace verbly { | ||
| 6 | |||
| 7 | binding::binding(const binding& other) | ||
| 8 | { | ||
| 9 | type_ = other.type_; | ||
| 10 | |||
| 11 | switch (type_) | ||
| 12 | { | ||
| 13 | case type::integer: | ||
| 14 | { | ||
| 15 | integer_ = other.integer_; | ||
| 16 | |||
| 17 | break; | ||
| 18 | } | ||
| 19 | |||
| 20 | case type::string: | ||
| 21 | { | ||
| 22 | new(&string_) std::string(other.string_); | ||
| 23 | |||
| 24 | break; | ||
| 25 | } | ||
| 26 | |||
| 27 | case type::invalid: | ||
| 28 | { | ||
| 29 | break; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | binding::binding(binding&& other) : binding() | ||
| 35 | { | ||
| 36 | swap(*this, other); | ||
| 37 | } | ||
| 38 | |||
| 39 | binding& binding::operator=(binding other) | ||
| 40 | { | ||
| 41 | swap(*this, other); | ||
| 42 | |||
| 43 | return *this; | ||
| 44 | } | ||
| 45 | |||
| 46 | void swap(binding& first, binding& second) | ||
| 47 | { | ||
| 48 | using type = binding::type; | ||
| 49 | |||
| 50 | type tempType = first.type_; | ||
| 51 | int tempInteger; | ||
| 52 | std::string tempString; | ||
| 53 | |||
| 54 | switch (first.type_) | ||
| 55 | { | ||
| 56 | case type::integer: | ||
| 57 | { | ||
| 58 | tempInteger = first.integer_; | ||
| 59 | |||
| 60 | break; | ||
| 61 | } | ||
| 62 | |||
| 63 | case type::string: | ||
| 64 | { | ||
| 65 | tempString = std::move(tempString); | ||
| 66 | |||
| 67 | break; | ||
| 68 | } | ||
| 69 | |||
| 70 | case type::invalid: | ||
| 71 | { | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | first.~binding(); | ||
| 77 | |||
| 78 | first.type_ = second.type_; | ||
| 79 | |||
| 80 | switch (second.type_) | ||
| 81 | { | ||
| 82 | case type::integer: | ||
| 83 | { | ||
| 84 | first.integer_ = second.integer_; | ||
| 85 | |||
| 86 | break; | ||
| 87 | } | ||
| 88 | |||
| 89 | case type::string: | ||
| 90 | { | ||
| 91 | new(&first.string_) std::string(std::move(second.string_)); | ||
| 92 | |||
| 93 | break; | ||
| 94 | } | ||
| 95 | |||
| 96 | case type::invalid: | ||
| 97 | { | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | } | ||
| 101 | |||
| 102 | second.~binding(); | ||
| 103 | |||
| 104 | second.type_ = tempType; | ||
| 105 | |||
| 106 | switch (tempType) | ||
| 107 | { | ||
| 108 | case type::integer: | ||
| 109 | { | ||
| 110 | second.integer_ = tempInteger; | ||
| 111 | |||
| 112 | break; | ||
| 113 | } | ||
| 114 | |||
| 115 | case type::string: | ||
| 116 | { | ||
| 117 | new(&second.string_) std::string(std::move(tempString)); | ||
| 118 | |||
| 119 | break; | ||
| 120 | } | ||
| 121 | |||
| 122 | case type::invalid: | ||
| 123 | { | ||
| 124 | break; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | binding::~binding() | ||
| 130 | { | ||
| 131 | switch (type_) | ||
| 132 | { | ||
| 133 | case type::string: | ||
| 134 | { | ||
| 135 | using string_type = std::string; | ||
| 136 | string_.~string_type(); | ||
| 137 | |||
| 138 | break; | ||
| 139 | } | ||
| 140 | |||
| 141 | case type::integer: | ||
| 142 | case type::invalid: | ||
| 143 | { | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | } | ||
| 147 | } | ||
| 148 | |||
| 149 | binding::binding(int arg) : | ||
| 150 | type_(type::integer), | ||
| 151 | integer_(arg) | ||
| 152 | { | ||
| 153 | } | ||
| 154 | |||
| 155 | int binding::getInteger() const | ||
| 156 | { | ||
| 157 | if (type_ != type::integer) | ||
| 158 | { | ||
| 159 | throw std::domain_error("binding::getInteger called on non-integer binding"); | ||
| 160 | } | ||
| 161 | |||
| 162 | return integer_; | ||
| 163 | } | ||
| 164 | |||
| 165 | binding::binding(std::string arg) : type_(type::string) | ||
| 166 | { | ||
| 167 | new(&string_) std::string(arg); | ||
| 168 | } | ||
| 169 | |||
| 170 | std::string binding::getString() const | ||
| 171 | { | ||
| 172 | if (type_ != type::string) | ||
| 173 | { | ||
| 174 | throw std::domain_error("binding::getString called on non-string binding"); | ||
| 175 | } | ||
| 176 | |||
| 177 | return string_; | ||
| 178 | } | ||
| 179 | |||
| 180 | }; | ||
| diff --git a/lib/binding.h b/lib/binding.h new file mode 100644 index 0000000..7fbe20e --- /dev/null +++ b/lib/binding.h | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | #ifndef BINDING_H_CAE0B18E | ||
| 2 | #define BINDING_H_CAE0B18E | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | |||
| 6 | namespace verbly { | ||
| 7 | |||
| 8 | class binding { | ||
| 9 | public: | ||
| 10 | enum class type { | ||
| 11 | invalid, | ||
| 12 | integer, | ||
| 13 | string | ||
| 14 | }; | ||
| 15 | |||
| 16 | // Default constructor | ||
| 17 | |||
| 18 | binding() | ||
| 19 | { | ||
| 20 | } | ||
| 21 | |||
| 22 | // Copy and move constructors | ||
| 23 | |||
| 24 | binding(const binding& other); | ||
| 25 | binding(binding&& other); | ||
| 26 | |||
| 27 | // Assignment | ||
| 28 | |||
| 29 | binding& operator=(binding other); | ||
| 30 | |||
| 31 | // Swap | ||
| 32 | |||
| 33 | friend void swap(binding& first, binding& second); | ||
| 34 | |||
| 35 | // Destructor | ||
| 36 | |||
| 37 | ~binding(); | ||
| 38 | |||
| 39 | // Generic accessors | ||
| 40 | |||
| 41 | type getType() const | ||
| 42 | { | ||
| 43 | return type_; | ||
| 44 | } | ||
| 45 | |||
| 46 | // Integer | ||
| 47 | |||
| 48 | binding(int arg); | ||
| 49 | |||
| 50 | int getInteger() const; | ||
| 51 | |||
| 52 | // String | ||
| 53 | |||
| 54 | binding(std::string arg); | ||
| 55 | |||
| 56 | std::string getString() const; | ||
| 57 | |||
| 58 | private: | ||
| 59 | |||
| 60 | union { | ||
| 61 | int integer_; | ||
| 62 | std::string string_; | ||
| 63 | }; | ||
| 64 | |||
| 65 | type type_ = type::invalid; | ||
| 66 | }; | ||
| 67 | |||
| 68 | }; | ||
| 69 | |||
| 70 | #endif /* end of include guard: BINDING_H_CAE0B18E */ | ||
| diff --git a/lib/data.cpp b/lib/data.cpp deleted file mode 100644 index db42487..0000000 --- a/lib/data.cpp +++ /dev/null | |||
| @@ -1,177 +0,0 @@ | |||
| 1 | #include "verbly.h" | ||
| 2 | |||
| 3 | namespace verbly { | ||
| 4 | |||
| 5 | data::data(std::string datafile) | ||
| 6 | { | ||
| 7 | if (sqlite3_open_v2(datafile.c_str(), &ppdb, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) | ||
| 8 | { | ||
| 9 | throw std::invalid_argument(sqlite3_errmsg(ppdb)); | ||
| 10 | } | ||
| 11 | } | ||
| 12 | |||
| 13 | data::data(data&& other) | ||
| 14 | { | ||
| 15 | ppdb = other.ppdb; | ||
| 16 | } | ||
| 17 | |||
| 18 | data& data::operator=(data&& other) | ||
| 19 | { | ||
| 20 | ppdb = other.ppdb; | ||
| 21 | |||
| 22 | return *this; | ||
| 23 | } | ||
| 24 | |||
| 25 | data::~data() | ||
| 26 | { | ||
| 27 | sqlite3_close_v2(ppdb); | ||
| 28 | } | ||
| 29 | |||
| 30 | verb_query data::verbs() const | ||
| 31 | { | ||
| 32 | return verb_query(*this); | ||
| 33 | } | ||
| 34 | |||
| 35 | adjective_query data::adjectives() const | ||
| 36 | { | ||
| 37 | return adjective_query(*this); | ||
| 38 | } | ||
| 39 | |||
| 40 | adverb_query data::adverbs() const | ||
| 41 | { | ||
| 42 | return adverb_query(*this); | ||
| 43 | } | ||
| 44 | |||
| 45 | noun_query data::nouns() const | ||
| 46 | { | ||
| 47 | return noun_query(*this); | ||
| 48 | } | ||
| 49 | |||
| 50 | frame_query data::frames() const | ||
| 51 | { | ||
| 52 | return frame_query(*this); | ||
| 53 | } | ||
| 54 | |||
| 55 | preposition_query data::prepositions() const | ||
| 56 | { | ||
| 57 | return preposition_query(*this); | ||
| 58 | } | ||
| 59 | |||
| 60 | binding::type binding::get_type() const | ||
| 61 | { | ||
| 62 | return _type; | ||
| 63 | } | ||
| 64 | |||
| 65 | binding::binding(const binding& other) | ||
| 66 | { | ||
| 67 | _type = other._type; | ||
| 68 | |||
| 69 | switch (_type) | ||
| 70 | { | ||
| 71 | case type::integer: | ||
| 72 | { | ||
| 73 | _integer = other._integer; | ||
| 74 | |||
| 75 | break; | ||
| 76 | } | ||
| 77 | |||
| 78 | case type::string: | ||
| 79 | { | ||
| 80 | new(&_string) std::string(other._string); | ||
| 81 | |||
| 82 | break; | ||
| 83 | } | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | binding::~binding() | ||
| 88 | { | ||
| 89 | switch (_type) | ||
| 90 | { | ||
| 91 | case type::string: | ||
| 92 | { | ||
| 93 | using string_type = std::string; | ||
| 94 | _string.~string_type(); | ||
| 95 | |||
| 96 | break; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | binding& binding::operator=(const binding& other) | ||
| 102 | { | ||
| 103 | this->~binding(); | ||
| 104 | |||
| 105 | _type = other._type; | ||
| 106 | |||
| 107 | switch (_type) | ||
| 108 | { | ||
| 109 | case type::integer: | ||
| 110 | { | ||
| 111 | _integer = other._integer; | ||
| 112 | |||
| 113 | break; | ||
| 114 | } | ||
| 115 | |||
| 116 | case type::string: | ||
| 117 | { | ||
| 118 | new(&_string) std::string(other._string); | ||
| 119 | |||
| 120 | break; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | return *this; | ||
| 125 | } | ||
| 126 | |||
| 127 | binding::binding(int _arg) | ||
| 128 | { | ||
| 129 | _type = type::integer; | ||
| 130 | _integer = _arg; | ||
| 131 | } | ||
| 132 | |||
| 133 | int binding::get_integer() const | ||
| 134 | { | ||
| 135 | assert(_type == type::integer); | ||
| 136 | |||
| 137 | return _integer; | ||
| 138 | } | ||
| 139 | |||
| 140 | void binding::set_integer(int _arg) | ||
| 141 | { | ||
| 142 | *this = binding(_arg); | ||
| 143 | } | ||
| 144 | |||
| 145 | binding& binding::operator=(int _arg) | ||
| 146 | { | ||
| 147 | *this = binding(_arg); | ||
| 148 | |||
| 149 | return *this; | ||
| 150 | } | ||
| 151 | |||
| 152 | binding::binding(std::string _arg) | ||
| 153 | { | ||
| 154 | _type = type::string; | ||
| 155 | new(&_string) std::string(_arg); | ||
| 156 | } | ||
| 157 | |||
| 158 | std::string binding::get_string() const | ||
| 159 | { | ||
| 160 | assert(_type == type::string); | ||
| 161 | |||
| 162 | return _string; | ||
| 163 | } | ||
| 164 | |||
| 165 | void binding::set_string(std::string _arg) | ||
| 166 | { | ||
| 167 | *this = binding(_arg); | ||
| 168 | } | ||
| 169 | |||
| 170 | binding& binding::operator=(std::string _arg) | ||
| 171 | { | ||
| 172 | *this = binding(_arg); | ||
| 173 | |||
| 174 | return *this; | ||
| 175 | } | ||
| 176 | |||
| 177 | }; | ||
| diff --git a/lib/data.h b/lib/data.h deleted file mode 100644 index b8b12b9..0000000 --- a/lib/data.h +++ /dev/null | |||
| @@ -1,380 +0,0 @@ | |||
| 1 | #ifndef DATA_H_C4AEC3DD | ||
| 2 | #define DATA_H_C4AEC3DD | ||
| 3 | |||
| 4 | #include <sqlite3.h> | ||
| 5 | |||
| 6 | namespace verbly { | ||
| 7 | |||
| 8 | class data; | ||
| 9 | class word; | ||
| 10 | class adjective; | ||
| 11 | class noun; | ||
| 12 | class verb; | ||
| 13 | class adverb; | ||
| 14 | class frame; | ||
| 15 | class adjective_query; | ||
| 16 | class adverb_query; | ||
| 17 | class noun_query; | ||
| 18 | class verb_query; | ||
| 19 | class frame_query; | ||
| 20 | class preposition_query; | ||
| 21 | |||
| 22 | class data { | ||
| 23 | private: | ||
| 24 | sqlite3* ppdb; | ||
| 25 | |||
| 26 | friend class adjective_query; | ||
| 27 | friend class noun_query; | ||
| 28 | friend class verb_query; | ||
| 29 | friend class adverb_query; | ||
| 30 | friend class frame_query; | ||
| 31 | friend class preposition_query; | ||
| 32 | |||
| 33 | public: | ||
| 34 | data(std::string datafile); | ||
| 35 | |||
| 36 | data(const data& other) = delete; | ||
| 37 | data& operator=(const data& other) = delete; | ||
| 38 | |||
| 39 | data(data&& other); | ||
| 40 | data& operator=(data&& other); | ||
| 41 | |||
| 42 | ~data(); | ||
| 43 | |||
| 44 | verb_query verbs() const; | ||
| 45 | adjective_query adjectives() const; | ||
| 46 | adverb_query adverbs() const; | ||
| 47 | noun_query nouns() const; | ||
| 48 | frame_query frames() const; | ||
| 49 | preposition_query prepositions() const; | ||
| 50 | |||
| 51 | }; | ||
| 52 | |||
| 53 | template <class T> | ||
| 54 | class filter { | ||
| 55 | public: | ||
| 56 | enum class type { | ||
| 57 | singleton, | ||
| 58 | group | ||
| 59 | }; | ||
| 60 | |||
| 61 | typedef filter<T> value_type; | ||
| 62 | |||
| 63 | type get_type() const | ||
| 64 | { | ||
| 65 | return _type; | ||
| 66 | } | ||
| 67 | |||
| 68 | filter(const filter<T>& other) | ||
| 69 | { | ||
| 70 | _type = other._type; | ||
| 71 | _notlogic = other._notlogic; | ||
| 72 | |||
| 73 | switch (_type) | ||
| 74 | { | ||
| 75 | case type::singleton: | ||
| 76 | { | ||
| 77 | new(&_singleton.elem) T(other._singleton.elem); | ||
| 78 | |||
| 79 | break; | ||
| 80 | } | ||
| 81 | |||
| 82 | case type::group: | ||
| 83 | { | ||
| 84 | new(&_group.elems) std::list<filter<T>>(other._group.elems); | ||
| 85 | _group.orlogic = other._group.orlogic; | ||
| 86 | |||
| 87 | break; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | filter<T>& operator=(const filter<T>& other) | ||
| 93 | { | ||
| 94 | this->~filter(); | ||
| 95 | |||
| 96 | _type = other._type; | ||
| 97 | _notlogic = other._notlogic; | ||
| 98 | |||
| 99 | switch (_type) | ||
| 100 | { | ||
| 101 | case type::singleton: | ||
| 102 | { | ||
| 103 | new(&_singleton.elem) T(other._singleton.elem); | ||
| 104 | |||
| 105 | break; | ||
| 106 | } | ||
| 107 | |||
| 108 | case type::group: | ||
| 109 | { | ||
| 110 | new(&_group.elems) std::list<filter<T>>(other._group.elems); | ||
| 111 | _group.orlogic = other._group.orlogic; | ||
| 112 | |||
| 113 | break; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | return *this; | ||
| 118 | } | ||
| 119 | |||
| 120 | ~filter() | ||
| 121 | { | ||
| 122 | switch (_type) | ||
| 123 | { | ||
| 124 | case type::singleton: | ||
| 125 | { | ||
| 126 | _singleton.elem.~T(); | ||
| 127 | |||
| 128 | break; | ||
| 129 | } | ||
| 130 | |||
| 131 | case type::group: | ||
| 132 | { | ||
| 133 | using list_type = std::list<filter<T>>; | ||
| 134 | _group.elems.~list_type(); | ||
| 135 | |||
| 136 | break; | ||
| 137 | } | ||
| 138 | } | ||
| 139 | } | ||
| 140 | |||
| 141 | bool get_notlogic() const | ||
| 142 | { | ||
| 143 | return _notlogic; | ||
| 144 | } | ||
| 145 | |||
| 146 | void set_notlogic(bool _nl) | ||
| 147 | { | ||
| 148 | _notlogic = _nl; | ||
| 149 | } | ||
| 150 | |||
| 151 | std::list<T> inorder_flatten() const | ||
| 152 | { | ||
| 153 | std::list<T> result; | ||
| 154 | |||
| 155 | if (_type == type::singleton) | ||
| 156 | { | ||
| 157 | result.push_back(_singleton.elem); | ||
| 158 | } else if (_type == type::group) | ||
| 159 | { | ||
| 160 | for (auto elem : _group.elems) | ||
| 161 | { | ||
| 162 | auto l = elem.inorder_flatten(); | ||
| 163 | result.insert(std::end(result), std::begin(l), std::end(l)); | ||
| 164 | } | ||
| 165 | } | ||
| 166 | |||
| 167 | return result; | ||
| 168 | } | ||
| 169 | |||
| 170 | std::set<T> uniq_flatten() const | ||
| 171 | { | ||
| 172 | std::set<T> result; | ||
| 173 | |||
| 174 | if (_type == type::singleton) | ||
| 175 | { | ||
| 176 | result.insert(_singleton.elem); | ||
| 177 | } else if (_type == type::group) | ||
| 178 | { | ||
| 179 | for (auto elem : _group.elems) | ||
| 180 | { | ||
| 181 | auto l = elem.uniq_flatten(); | ||
| 182 | result.insert(std::begin(l), std::end(l)); | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | return result; | ||
| 187 | } | ||
| 188 | |||
| 189 | void clean() | ||
| 190 | { | ||
| 191 | if (_type == type::group) | ||
| 192 | { | ||
| 193 | std::list<typename std::list<filter<T>>::iterator> toremove; | ||
| 194 | for (auto it = _group.elems.begin(); it != _group.elems.end(); it++) | ||
| 195 | { | ||
| 196 | it->clean(); | ||
| 197 | |||
| 198 | if (it->get_type() == type::group) | ||
| 199 | { | ||
| 200 | if (it->_group.elems.size() == 0) | ||
| 201 | { | ||
| 202 | toremove.push_back(it); | ||
| 203 | } else if (it->_group.elems.size() == 1) | ||
| 204 | { | ||
| 205 | bool truelogic = it->_notlogic != it->_group.elems.front()._notlogic; | ||
| 206 | filter<T> e = it->_group.elems.front(); | ||
| 207 | *it = e; | ||
| 208 | it->_notlogic = truelogic; | ||
| 209 | } | ||
| 210 | } | ||
| 211 | } | ||
| 212 | |||
| 213 | for (auto rem : toremove) | ||
| 214 | { | ||
| 215 | _group.elems.erase(rem); | ||
| 216 | } | ||
| 217 | |||
| 218 | if (_group.elems.size() == 1) | ||
| 219 | { | ||
| 220 | bool truelogic = _notlogic != _group.elems.front()._notlogic; | ||
| 221 | filter<T> e = _group.elems.front(); | ||
| 222 | *this = e; | ||
| 223 | _notlogic = truelogic; | ||
| 224 | } | ||
| 225 | } | ||
| 226 | } | ||
| 227 | |||
| 228 | // Singleton | ||
| 229 | filter(T _elem, bool _notlogic = false) : _type(type::singleton) | ||
| 230 | { | ||
| 231 | new(&_singleton.elem) T(_elem); | ||
| 232 | this->_notlogic = _notlogic; | ||
| 233 | } | ||
| 234 | |||
| 235 | filter<T>& operator=(T _elem) | ||
| 236 | { | ||
| 237 | *this = filter<T>{_elem}; | ||
| 238 | |||
| 239 | return *this; | ||
| 240 | } | ||
| 241 | |||
| 242 | T get_elem() const | ||
| 243 | { | ||
| 244 | assert(_type == type::singleton); | ||
| 245 | |||
| 246 | return _singleton.elem; | ||
| 247 | } | ||
| 248 | |||
| 249 | void set_elem(T _elem) | ||
| 250 | { | ||
| 251 | assert(_type == type::singleton); | ||
| 252 | |||
| 253 | _singleton.elem = _elem; | ||
| 254 | } | ||
| 255 | |||
| 256 | // Group | ||
| 257 | typedef typename std::list<filter<T>>::iterator iterator; | ||
| 258 | |||
| 259 | filter() : _type(type::group) | ||
| 260 | { | ||
| 261 | new(&_group.elems) std::list<filter<T>>(); | ||
| 262 | _group.orlogic = false; | ||
| 263 | } | ||
| 264 | |||
| 265 | filter(std::initializer_list<filter<T>> _init) : _type(type::group) | ||
| 266 | { | ||
| 267 | new(&_group.elems) std::list<filter<T>>(_init); | ||
| 268 | _group.orlogic = false; | ||
| 269 | } | ||
| 270 | |||
| 271 | iterator begin() | ||
| 272 | { | ||
| 273 | assert(_type == type::group); | ||
| 274 | |||
| 275 | return _group.elems.begin(); | ||
| 276 | } | ||
| 277 | |||
| 278 | iterator end() | ||
| 279 | { | ||
| 280 | assert(_type == type::group); | ||
| 281 | |||
| 282 | return _group.elems.end(); | ||
| 283 | } | ||
| 284 | |||
| 285 | filter<T>& operator<<(filter<T> _elem) | ||
| 286 | { | ||
| 287 | assert(_type == type::group); | ||
| 288 | |||
| 289 | _group.elems.push_back(_elem); | ||
| 290 | |||
| 291 | return *this; | ||
| 292 | } | ||
| 293 | |||
| 294 | void push_back(filter<T> _elem) | ||
| 295 | { | ||
| 296 | assert(_type == type::group); | ||
| 297 | |||
| 298 | _group.elems.push_back(_elem); | ||
| 299 | } | ||
| 300 | |||
| 301 | bool get_orlogic() const | ||
| 302 | { | ||
| 303 | assert(_type == type::group); | ||
| 304 | |||
| 305 | return _group.orlogic; | ||
| 306 | } | ||
| 307 | |||
| 308 | void set_orlogic(bool _ol) | ||
| 309 | { | ||
| 310 | assert(_type == type::group); | ||
| 311 | |||
| 312 | _group.orlogic = _ol; | ||
| 313 | } | ||
| 314 | |||
| 315 | bool empty() const | ||
| 316 | { | ||
| 317 | if (_type == type::group) | ||
| 318 | { | ||
| 319 | return _group.elems.empty(); | ||
| 320 | } else { | ||
| 321 | return false; | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | int size() const | ||
| 326 | { | ||
| 327 | assert(_type == type::group); | ||
| 328 | |||
| 329 | return _group.elems.size(); | ||
| 330 | } | ||
| 331 | |||
| 332 | private: | ||
| 333 | type _type; | ||
| 334 | bool _notlogic = false; | ||
| 335 | union { | ||
| 336 | struct { | ||
| 337 | T elem; | ||
| 338 | } _singleton; | ||
| 339 | struct { | ||
| 340 | std::list<filter<T>> elems; | ||
| 341 | bool orlogic; | ||
| 342 | } _group; | ||
| 343 | }; | ||
| 344 | }; | ||
| 345 | |||
| 346 | class binding { | ||
| 347 | public: | ||
| 348 | enum class type { | ||
| 349 | integer, | ||
| 350 | string | ||
| 351 | }; | ||
| 352 | |||
| 353 | type get_type() const; | ||
| 354 | binding(const binding& other); | ||
| 355 | ~binding(); | ||
| 356 | binding& operator=(const binding& other); | ||
| 357 | |||
| 358 | // Integer | ||
| 359 | binding(int _arg); | ||
| 360 | int get_integer() const; | ||
| 361 | void set_integer(int _arg); | ||
| 362 | binding& operator=(int _arg); | ||
| 363 | |||
| 364 | // String | ||
| 365 | binding(std::string _arg); | ||
| 366 | std::string get_string() const; | ||
| 367 | void set_string(std::string _arg); | ||
| 368 | binding& operator=(std::string _arg); | ||
| 369 | |||
| 370 | private: | ||
| 371 | union { | ||
| 372 | int _integer; | ||
| 373 | std::string _string; | ||
| 374 | }; | ||
| 375 | type _type; | ||
| 376 | }; | ||
| 377 | |||
| 378 | }; | ||
| 379 | |||
| 380 | #endif /* end of include guard: DATA_H_C4AEC3DD */ | ||
| diff --git a/lib/database.cpp b/lib/database.cpp new file mode 100644 index 0000000..351b93d --- /dev/null +++ b/lib/database.cpp | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | #include "database.h" | ||
| 2 | #include <sqlite3.h> | ||
| 3 | #include <stdexcept> | ||
| 4 | #include "query.h" | ||
| 5 | |||
| 6 | namespace verbly { | ||
| 7 | |||
| 8 | database::database(std::string path) | ||
| 9 | { | ||
| 10 | if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) | ||
| 11 | { | ||
| 12 | // We still have to free the resources allocated. In the event that | ||
| 13 | // allocation failed, ppdb will be null and sqlite3_close_v2 will just | ||
| 14 | // ignore it. | ||
| 15 | std::string errmsg(sqlite3_errmsg(ppdb_)); | ||
| 16 | sqlite3_close_v2(ppdb_); | ||
| 17 | |||
| 18 | throw database_error("Could not open verbly datafile", errmsg); | ||
| 19 | } | ||
| 20 | } | ||
| 21 | |||
| 22 | database::database(database&& other) : database() | ||
| 23 | { | ||
| 24 | swap(*this, other); | ||
| 25 | } | ||
| 26 | |||
| 27 | database& database::operator=(database&& other) | ||
| 28 | { | ||
| 29 | swap(*this, other); | ||
| 30 | |||
| 31 | return *this; | ||
| 32 | } | ||
| 33 | |||
| 34 | void swap(database& first, database& second) | ||
| 35 | { | ||
| 36 | std::swap(first.ppdb_, second.ppdb_); | ||
| 37 | } | ||
| 38 | |||
| 39 | database::~database() | ||
| 40 | { | ||
| 41 | sqlite3_close_v2(ppdb_); | ||
| 42 | } | ||
| 43 | |||
| 44 | query<notion> database::notions(filter where, bool random, int limit) const | ||
| 45 | { | ||
| 46 | return query<notion>(*this, ppdb_, std::move(where), random, limit); | ||
| 47 | } | ||
| 48 | |||
| 49 | query<word> database::words(filter where, bool random, int limit) const | ||
| 50 | { | ||
| 51 | return query<word>(*this, ppdb_, std::move(where), random, limit); | ||
| 52 | } | ||
| 53 | |||
| 54 | query<group> database::groups(filter where, bool random, int limit) const | ||
| 55 | { | ||
| 56 | return query<group>(*this, ppdb_, std::move(where), random, limit); | ||
| 57 | } | ||
| 58 | |||
| 59 | query<frame> database::frames(filter where, bool random, int limit) const | ||
| 60 | { | ||
| 61 | return query<frame>(*this, ppdb_, std::move(where), random, limit); | ||
| 62 | } | ||
| 63 | |||
| 64 | query<lemma> database::lemmas(filter where, bool random, int limit) const | ||
| 65 | { | ||
| 66 | return query<lemma>(*this, ppdb_, std::move(where), random, limit); | ||
| 67 | } | ||
| 68 | |||
| 69 | query<form> database::forms(filter where, bool random, int limit) const | ||
| 70 | { | ||
| 71 | return query<form>(*this, ppdb_, std::move(where), random, limit); | ||
| 72 | } | ||
| 73 | |||
| 74 | query<pronunciation> database::pronunciations(filter where, bool random, int limit) const | ||
| 75 | { | ||
| 76 | return query<pronunciation>(*this, ppdb_, std::move(where), random, limit); | ||
| 77 | } | ||
| 78 | |||
| 79 | }; | ||
| diff --git a/lib/database.h b/lib/database.h new file mode 100644 index 0000000..d68c40b --- /dev/null +++ b/lib/database.h | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | #ifndef DATABASE_H_0B0A47D2 | ||
| 2 | #define DATABASE_H_0B0A47D2 | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | #include <exception> | ||
| 6 | #include <list> | ||
| 7 | #include "notion.h" | ||
| 8 | #include "word.h" | ||
| 9 | #include "group.h" | ||
| 10 | #include "frame.h" | ||
| 11 | #include "lemma.h" | ||
| 12 | #include "form.h" | ||
| 13 | #include "pronunciation.h" | ||
| 14 | |||
| 15 | struct sqlite3; | ||
| 16 | |||
| 17 | namespace verbly { | ||
| 18 | |||
| 19 | template <typename Object> | ||
| 20 | class query; | ||
| 21 | |||
| 22 | class database { | ||
| 23 | public: | ||
| 24 | |||
| 25 | // Constructor | ||
| 26 | |||
| 27 | explicit database(std::string path); | ||
| 28 | |||
| 29 | // Disable copying | ||
| 30 | |||
| 31 | database(const database& other) = delete; | ||
| 32 | database& operator=(const database& other) = delete; | ||
| 33 | |||
| 34 | // Move constructor and move assignment | ||
| 35 | |||
| 36 | database(database&& other); | ||
| 37 | database& operator=(database&& other); | ||
| 38 | |||
| 39 | // Swap | ||
| 40 | |||
| 41 | friend void swap(database& first, database& second); | ||
| 42 | |||
| 43 | // Destructor | ||
| 44 | |||
| 45 | ~database(); | ||
| 46 | |||
| 47 | // Queries | ||
| 48 | |||
| 49 | query<notion> notions(filter where, bool random = true, int limit = 1) const; | ||
| 50 | |||
| 51 | query<word> words(filter where, bool random = true, int limit = 1) const; | ||
| 52 | |||
| 53 | query<group> groups(filter where, bool random = true, int limit = 1) const; | ||
| 54 | |||
| 55 | query<frame> frames(filter where, bool random = true, int limit = 1) const; | ||
| 56 | |||
| 57 | query<lemma> lemmas(filter where, bool random = true, int limit = 1) const; | ||
| 58 | |||
| 59 | query<form> forms(filter where, bool random = true, int limit = 1) const; | ||
| 60 | |||
| 61 | query<pronunciation> pronunciations(filter where, bool random = true, int limit = 1) const; | ||
| 62 | |||
| 63 | private: | ||
| 64 | |||
| 65 | database() = default; | ||
| 66 | |||
| 67 | sqlite3* ppdb_ = nullptr; | ||
| 68 | |||
| 69 | }; | ||
| 70 | |||
| 71 | }; | ||
| 72 | |||
| 73 | #endif /* end of include guard: DATABASE_H_0B0A47D2 */ | ||
| diff --git a/lib/enums.h b/lib/enums.h new file mode 100644 index 0000000..b37be7b --- /dev/null +++ b/lib/enums.h | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | #ifndef ENUMS_H_260BA847 | ||
| 2 | #define ENUMS_H_260BA847 | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | enum class part_of_speech { | ||
| 7 | noun = 0, | ||
| 8 | adjective = 1, | ||
| 9 | adverb = 2, | ||
| 10 | verb = 3, | ||
| 11 | preposition = 4 | ||
| 12 | }; | ||
| 13 | |||
| 14 | enum class positioning { | ||
| 15 | undefined = -1, | ||
| 16 | predicate = 0, | ||
| 17 | attributive = 1, | ||
| 18 | postnominal = 2 | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class inflection { | ||
| 22 | base = 0, | ||
| 23 | plural = 1, | ||
| 24 | comparative = 2, | ||
| 25 | superlative = 3, | ||
| 26 | past_tense = 4, | ||
| 27 | past_participle = 5, | ||
| 28 | ing_form = 6, | ||
| 29 | s_form = 7 | ||
| 30 | }; | ||
| 31 | |||
| 32 | enum class object { | ||
| 33 | undefined = -1, | ||
| 34 | notion = 0, | ||
| 35 | word = 1, | ||
| 36 | group = 2, | ||
| 37 | frame = 3, | ||
| 38 | lemma = 4, | ||
| 39 | form = 5, | ||
| 40 | pronunciation = 6 | ||
| 41 | }; | ||
| 42 | |||
| 43 | }; | ||
| 44 | |||
| 45 | #endif /* end of include guard: ENUMS_H_260BA847 */ | ||
| diff --git a/lib/field.cpp b/lib/field.cpp new file mode 100644 index 0000000..d7adbb3 --- /dev/null +++ b/lib/field.cpp | |||
| @@ -0,0 +1,91 @@ | |||
| 1 | #include "field.h" | ||
| 2 | #include "filter.h" | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | filter field::operator==(int value) const | ||
| 7 | { | ||
| 8 | return filter(*this, filter::comparison::int_equals, value); | ||
| 9 | } | ||
| 10 | |||
| 11 | filter field::operator!=(int value) const | ||
| 12 | { | ||
| 13 | return filter(*this, filter::comparison::int_does_not_equal, value); | ||
| 14 | } | ||
| 15 | |||
| 16 | filter field::operator<(int value) const | ||
| 17 | { | ||
| 18 | return filter(*this, filter::comparison::int_is_less_than, value); | ||
| 19 | } | ||
| 20 | |||
| 21 | filter field::operator<=(int value) const | ||
| 22 | { | ||
| 23 | return filter(*this, filter::comparison::int_is_at_most, value); | ||
| 24 | } | ||
| 25 | |||
| 26 | filter field::operator>(int value) const | ||
| 27 | { | ||
| 28 | return filter(*this, filter::comparison::int_is_greater_than, value); | ||
| 29 | } | ||
| 30 | |||
| 31 | filter field::operator>=(int value) const | ||
| 32 | { | ||
| 33 | return filter(*this, filter::comparison::int_is_at_least, value); | ||
| 34 | } | ||
| 35 | |||
| 36 | filter field::operator==(part_of_speech value) const | ||
| 37 | { | ||
| 38 | return filter(*this, filter::comparison::int_equals, static_cast<int>(value)); | ||
| 39 | } | ||
| 40 | |||
| 41 | filter field::operator==(positioning value) const | ||
| 42 | { | ||
| 43 | return filter(*this, filter::comparison::int_equals, static_cast<int>(value)); | ||
| 44 | } | ||
| 45 | |||
| 46 | filter field::operator==(inflection value) const | ||
| 47 | { | ||
| 48 | return filter(*this, filter::comparison::int_equals, static_cast<int>(value)); | ||
| 49 | } | ||
| 50 | |||
| 51 | filter field::operator==(bool value) const | ||
| 52 | { | ||
| 53 | return filter(*this, filter::comparison::boolean_equals, value); | ||
| 54 | } | ||
| 55 | |||
| 56 | filter field::operator==(std::string value) const | ||
| 57 | { | ||
| 58 | return filter(*this, filter::comparison::string_equals, std::move(value)); | ||
| 59 | } | ||
| 60 | |||
| 61 | filter field::operator!=(std::string value) const | ||
| 62 | { | ||
| 63 | return filter(*this, filter::comparison::string_does_not_equal, std::move(value)); | ||
| 64 | } | ||
| 65 | |||
| 66 | filter field::operator%=(std::string value) const | ||
| 67 | { | ||
| 68 | return filter(*this, filter::comparison::string_is_like, std::move(value)); | ||
| 69 | } | ||
| 70 | |||
| 71 | field::operator filter() const | ||
| 72 | { | ||
| 73 | return filter(*this, filter::comparison::is_not_null); | ||
| 74 | } | ||
| 75 | |||
| 76 | filter field::operator!() const | ||
| 77 | { | ||
| 78 | return filter(*this, filter::comparison::is_null); | ||
| 79 | } | ||
| 80 | |||
| 81 | filter field::operator%=(filter joinCondition) const | ||
| 82 | { | ||
| 83 | if (type_ == type::hierarchal_join) | ||
| 84 | { | ||
| 85 | return filter(*this, filter::comparison::hierarchally_matches, std::move(joinCondition)); | ||
| 86 | } else { | ||
| 87 | return filter(*this, filter::comparison::matches, std::move(joinCondition)); | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | }; | ||
| diff --git a/lib/field.h b/lib/field.h new file mode 100644 index 0000000..30c62be --- /dev/null +++ b/lib/field.h | |||
| @@ -0,0 +1,306 @@ | |||
| 1 | #ifndef FIELD_H_43258321 | ||
| 2 | #define FIELD_H_43258321 | ||
| 3 | |||
| 4 | #include "enums.h" | ||
| 5 | #include <stdexcept> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | namespace verbly { | ||
| 9 | |||
| 10 | class filter; | ||
| 11 | |||
| 12 | class field { | ||
| 13 | public: | ||
| 14 | enum class type { | ||
| 15 | undefined, | ||
| 16 | string, | ||
| 17 | integer, | ||
| 18 | boolean, | ||
| 19 | join, | ||
| 20 | join_through, | ||
| 21 | hierarchal_join | ||
| 22 | }; | ||
| 23 | |||
| 24 | // Default constructor | ||
| 25 | |||
| 26 | field() | ||
| 27 | { | ||
| 28 | } | ||
| 29 | |||
| 30 | // Static factories | ||
| 31 | |||
| 32 | static field stringField( | ||
| 33 | object obj, | ||
| 34 | const char* name, | ||
| 35 | bool nullable = false) | ||
| 36 | { | ||
| 37 | return field(obj, type::string, name, nullable); | ||
| 38 | } | ||
| 39 | |||
| 40 | static field stringField( | ||
| 41 | const char* table, | ||
| 42 | const char* name, | ||
| 43 | bool nullable = false) | ||
| 44 | { | ||
| 45 | return field(object::undefined, type::string, name, nullable, table); | ||
| 46 | } | ||
| 47 | |||
| 48 | static field integerField( | ||
| 49 | object obj, | ||
| 50 | const char* name, | ||
| 51 | bool nullable = false) | ||
| 52 | { | ||
| 53 | return field(obj, type::integer, name, nullable); | ||
| 54 | } | ||
| 55 | |||
| 56 | static field integerField( | ||
| 57 | const char* table, | ||
| 58 | const char* name, | ||
| 59 | bool nullable = false) | ||
| 60 | { | ||
| 61 | return field(object::undefined, type::integer, name, nullable, table); | ||
| 62 | } | ||
| 63 | |||
| 64 | static field booleanField( | ||
| 65 | object obj, | ||
| 66 | const char* name, | ||
| 67 | bool nullable = false) | ||
| 68 | { | ||
| 69 | return field(obj, type::boolean, name, nullable); | ||
| 70 | } | ||
| 71 | |||
| 72 | static field booleanField( | ||
| 73 | const char* table, | ||
| 74 | const char* name, | ||
| 75 | bool nullable = false) | ||
| 76 | { | ||
| 77 | return field(object::undefined, type::boolean, name, nullable, table); | ||
| 78 | } | ||
| 79 | |||
| 80 | static field joinField( | ||
| 81 | object obj, | ||
| 82 | const char* name, | ||
| 83 | object joinWith, | ||
| 84 | bool nullable = false) | ||
| 85 | { | ||
| 86 | return field(obj, type::join, name, nullable, 0, joinWith); | ||
| 87 | } | ||
| 88 | |||
| 89 | static field joinField( | ||
| 90 | object obj, | ||
| 91 | const char* name, | ||
| 92 | const char* table, | ||
| 93 | bool nullable = false) | ||
| 94 | { | ||
| 95 | return field(obj, type::join, name, nullable, table); | ||
| 96 | } | ||
| 97 | |||
| 98 | static field joinThrough( | ||
| 99 | object obj, | ||
| 100 | const char* name, | ||
| 101 | object joinWith, | ||
| 102 | const char* joinTable, | ||
| 103 | const char* foreignColumn) | ||
| 104 | { | ||
| 105 | return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, name, foreignColumn); | ||
| 106 | } | ||
| 107 | |||
| 108 | static field joinThrough( | ||
| 109 | object obj, | ||
| 110 | const char* name, | ||
| 111 | object joinWith, | ||
| 112 | const char* joinTable, | ||
| 113 | const char* foreignColumn, | ||
| 114 | const char* joinColumn, | ||
| 115 | const char* foreignJoinColumn) | ||
| 116 | { | ||
| 117 | return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, joinColumn, foreignJoinColumn); | ||
| 118 | } | ||
| 119 | |||
| 120 | static field selfJoin( | ||
| 121 | object obj, | ||
| 122 | const char* name, | ||
| 123 | const char* joinTable, | ||
| 124 | const char* joinColumn, | ||
| 125 | const char* foreignJoinColumn) | ||
| 126 | { | ||
| 127 | return field(obj, type::join_through, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn); | ||
| 128 | } | ||
| 129 | |||
| 130 | static field hierarchalSelfJoin( | ||
| 131 | object obj, | ||
| 132 | const char* name, | ||
| 133 | const char* joinTable, | ||
| 134 | const char* joinColumn, | ||
| 135 | const char* foreignJoinColumn) | ||
| 136 | { | ||
| 137 | return field(obj, type::hierarchal_join, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn); | ||
| 138 | } | ||
| 139 | |||
| 140 | // Accessors | ||
| 141 | |||
| 142 | object getObject() const | ||
| 143 | { | ||
| 144 | return object_; | ||
| 145 | } | ||
| 146 | |||
| 147 | type getType() const | ||
| 148 | { | ||
| 149 | return type_; | ||
| 150 | } | ||
| 151 | |||
| 152 | bool isJoin() const | ||
| 153 | { | ||
| 154 | return ((type_ == type::join) || (type_ == type::join_through) || (type_ == type::hierarchal_join)); | ||
| 155 | } | ||
| 156 | |||
| 157 | const char* getColumn() const | ||
| 158 | { | ||
| 159 | return column_; | ||
| 160 | } | ||
| 161 | |||
| 162 | bool isNullable() const | ||
| 163 | { | ||
| 164 | return nullable_; | ||
| 165 | } | ||
| 166 | |||
| 167 | bool hasTable() const | ||
| 168 | { | ||
| 169 | return (table_ != 0); | ||
| 170 | } | ||
| 171 | |||
| 172 | const char* getTable() const | ||
| 173 | { | ||
| 174 | return table_; | ||
| 175 | } | ||
| 176 | |||
| 177 | // Joins | ||
| 178 | |||
| 179 | object getJoinObject() const | ||
| 180 | { | ||
| 181 | // We ignore hierarchal joins because they are always self joins. | ||
| 182 | return ((type_ == type::join) || (type_ == type::join_through)) | ||
| 183 | ? joinObject_ | ||
| 184 | : throw std::domain_error("Non-join fields don't have join objects"); | ||
| 185 | } | ||
| 186 | |||
| 187 | // Many-to-many joins | ||
| 188 | |||
| 189 | const char* getForeignColumn() const | ||
| 190 | { | ||
| 191 | // We ignore hierarchal joins because they are always self joins. | ||
| 192 | return (type_ == type::join_through) | ||
| 193 | ? foreignColumn_ | ||
| 194 | : throw std::domain_error("Only many-to-many join fields have a foreign column"); | ||
| 195 | } | ||
| 196 | |||
| 197 | const char* getJoinColumn() const | ||
| 198 | { | ||
| 199 | return ((type_ == type::join_through) || (type_ == type::hierarchal_join)) | ||
| 200 | ? joinColumn_ | ||
| 201 | : throw std::domain_error("Only many-to-many join fields have a join column"); | ||
| 202 | } | ||
| 203 | |||
| 204 | const char* getForeignJoinColumn() const | ||
| 205 | { | ||
| 206 | return ((type_ == type::join_through) || (type_ == type::hierarchal_join)) | ||
| 207 | ? foreignJoinColumn_ | ||
| 208 | : throw std::domain_error("Only many-to-many join fields have a foreign join column"); | ||
| 209 | } | ||
| 210 | |||
| 211 | // Ordering | ||
| 212 | |||
| 213 | bool operator<(const field& other) const | ||
| 214 | { | ||
| 215 | // For the most part, (object, column) uniquely identifies fields. | ||
| 216 | // However, there do exist a number of relationships from an object to | ||
| 217 | // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have | ||
| 218 | // the same object (notion), the same column (notion_id), and the same | ||
| 219 | // table (hypernymy); however, they have different join columns. | ||
| 220 | return std::tie(object_, column_, table_, joinColumn_) < std::tie(other.object_, other.column_, other.table_, other.joinColumn_); | ||
| 221 | } | ||
| 222 | |||
| 223 | // Equality | ||
| 224 | |||
| 225 | bool operator==(const field& other) const | ||
| 226 | { | ||
| 227 | // For the most part, (object, column) uniquely identifies fields. | ||
| 228 | // However, there do exist a number of relationships from an object to | ||
| 229 | // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have | ||
| 230 | // the same object (notion), the same column (notion_id), and the same | ||
| 231 | // table (hypernymy); however, they have different join columns. | ||
| 232 | return std::tie(object_, column_, table_, joinColumn_) == std::tie(other.object_, other.column_, other.table_, other.joinColumn_); | ||
| 233 | } | ||
| 234 | |||
| 235 | // Filter construction | ||
| 236 | |||
| 237 | filter operator==(int value) const; // Integer equality | ||
| 238 | filter operator!=(int value) const; // Integer inequality | ||
| 239 | filter operator<(int value) const; // Integer is less than | ||
| 240 | filter operator<=(int value) const; // Integer is at most | ||
| 241 | filter operator>(int value) const; // Integer is greater than | ||
| 242 | filter operator>=(int value) const; // Integer is at least | ||
| 243 | |||
| 244 | filter operator==(part_of_speech value) const; // Part of speech equality | ||
| 245 | filter operator==(positioning value) const; // Adjective positioning equality | ||
| 246 | filter operator==(inflection value) const; // Inflection category equality | ||
| 247 | |||
| 248 | filter operator==(bool value) const; // Boolean equality | ||
| 249 | |||
| 250 | filter operator==(std::string value) const; // String equality | ||
| 251 | filter operator!=(std::string value) const; // String inequality | ||
| 252 | filter operator%=(std::string value) const; // String matching | ||
| 253 | |||
| 254 | operator filter() const; // Non-nullity | ||
| 255 | filter operator!() const; // Nullity | ||
| 256 | |||
| 257 | filter operator%=(filter joinCondition) const; // Join | ||
| 258 | |||
| 259 | private: | ||
| 260 | |||
| 261 | // Constructor | ||
| 262 | |||
| 263 | field( | ||
| 264 | object obj, | ||
| 265 | type datatype, | ||
| 266 | const char* column, | ||
| 267 | bool nullable = false, | ||
| 268 | const char* table = 0, | ||
| 269 | object joinObject = object::undefined, | ||
| 270 | const char* foreignColumn = 0, | ||
| 271 | const char* joinColumn = 0, | ||
| 272 | const char* foreignJoinColumn = 0) : | ||
| 273 | object_(obj), | ||
| 274 | type_(datatype), | ||
| 275 | column_(column), | ||
| 276 | nullable_(nullable), | ||
| 277 | table_(table), | ||
| 278 | joinObject_(joinObject), | ||
| 279 | foreignColumn_(foreignColumn), | ||
| 280 | joinColumn_(joinColumn), | ||
| 281 | foreignJoinColumn_(foreignJoinColumn) | ||
| 282 | { | ||
| 283 | } | ||
| 284 | |||
| 285 | // General | ||
| 286 | object object_ = object::undefined; | ||
| 287 | type type_ = type::undefined; | ||
| 288 | const char* column_ = 0; | ||
| 289 | const char* table_ = 0; | ||
| 290 | |||
| 291 | // Non-joins and belongs-to joins | ||
| 292 | bool nullable_ = false; | ||
| 293 | |||
| 294 | // Joins | ||
| 295 | object joinObject_ = object::undefined; | ||
| 296 | |||
| 297 | // Many-to-many joins | ||
| 298 | const char* foreignColumn_ = 0; | ||
| 299 | const char* joinColumn_ = 0; | ||
| 300 | const char* foreignJoinColumn_ = 0; | ||
| 301 | |||
| 302 | }; | ||
| 303 | |||
| 304 | }; | ||
| 305 | |||
| 306 | #endif /* end of include guard: FIELD_H_43258321 */ | ||
| diff --git a/lib/filter.cpp b/lib/filter.cpp new file mode 100644 index 0000000..959fa05 --- /dev/null +++ b/lib/filter.cpp | |||
| @@ -0,0 +1,1365 @@ | |||
| 1 | #include "filter.h" | ||
| 2 | #include <stdexcept> | ||
| 3 | #include <map> | ||
| 4 | #include "notion.h" | ||
| 5 | #include "word.h" | ||
| 6 | #include "group.h" | ||
| 7 | #include "frame.h" | ||
| 8 | #include "lemma.h" | ||
| 9 | #include "form.h" | ||
| 10 | #include "pronunciation.h" | ||
| 11 | |||
| 12 | namespace verbly { | ||
| 13 | |||
| 14 | filter::filter(const filter& other) | ||
| 15 | { | ||
| 16 | type_ = other.type_; | ||
| 17 | |||
| 18 | switch (type_) | ||
| 19 | { | ||
| 20 | case type::empty: | ||
| 21 | { | ||
| 22 | break; | ||
| 23 | } | ||
| 24 | |||
| 25 | case type::singleton: | ||
| 26 | { | ||
| 27 | new(&singleton_.filterField) field(other.singleton_.filterField); | ||
| 28 | singleton_.filterType = other.singleton_.filterType; | ||
| 29 | |||
| 30 | switch (singleton_.filterType) | ||
| 31 | { | ||
| 32 | case comparison::int_equals: | ||
| 33 | case comparison::int_does_not_equal: | ||
| 34 | case comparison::int_is_at_least: | ||
| 35 | case comparison::int_is_greater_than: | ||
| 36 | case comparison::int_is_at_most: | ||
| 37 | case comparison::int_is_less_than: | ||
| 38 | { | ||
| 39 | singleton_.intValue = other.singleton_.intValue; | ||
| 40 | |||
| 41 | break; | ||
| 42 | } | ||
| 43 | |||
| 44 | case comparison::boolean_equals: | ||
| 45 | { | ||
| 46 | singleton_.boolValue = other.singleton_.boolValue; | ||
| 47 | |||
| 48 | break; | ||
| 49 | } | ||
| 50 | |||
| 51 | case comparison::string_equals: | ||
| 52 | case comparison::string_does_not_equal: | ||
| 53 | case comparison::string_is_like: | ||
| 54 | case comparison::string_is_not_like: | ||
| 55 | { | ||
| 56 | new(&singleton_.stringValue) std::string(other.singleton_.stringValue); | ||
| 57 | |||
| 58 | break; | ||
| 59 | } | ||
| 60 | |||
| 61 | case comparison::is_null: | ||
| 62 | case comparison::is_not_null: | ||
| 63 | { | ||
| 64 | break; | ||
| 65 | } | ||
| 66 | |||
| 67 | case comparison::matches: | ||
| 68 | case comparison::does_not_match: | ||
| 69 | case comparison::hierarchally_matches: | ||
| 70 | case comparison::does_not_hierarchally_match: | ||
| 71 | { | ||
| 72 | new(&singleton_.join) std::unique_ptr<filter>(new filter(*other.singleton_.join)); | ||
| 73 | |||
| 74 | break; | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | break; | ||
| 79 | } | ||
| 80 | |||
| 81 | case type::group: | ||
| 82 | { | ||
| 83 | new(&group_.children) std::list<filter>(other.group_.children); | ||
| 84 | group_.orlogic = other.group_.orlogic; | ||
| 85 | |||
| 86 | break; | ||
| 87 | } | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | filter::filter(filter&& other) : filter() | ||
| 92 | { | ||
| 93 | swap(*this, other); | ||
| 94 | } | ||
| 95 | |||
| 96 | filter& filter::operator=(filter other) | ||
| 97 | { | ||
| 98 | swap(*this, other); | ||
| 99 | |||
| 100 | return *this; | ||
| 101 | } | ||
| 102 | |||
| 103 | void swap(filter& first, filter& second) | ||
| 104 | { | ||
| 105 | using type = filter::type; | ||
| 106 | using comparison = filter::comparison; | ||
| 107 | |||
| 108 | type tempType = first.type_; | ||
| 109 | field tempField; | ||
| 110 | comparison tempComparison; | ||
| 111 | std::unique_ptr<filter> tempJoin; | ||
| 112 | std::string tempStringValue; | ||
| 113 | int tempIntValue; | ||
| 114 | bool tempBoolValue; | ||
| 115 | std::list<filter> tempChildren; | ||
| 116 | bool tempOrlogic; | ||
| 117 | |||
| 118 | switch (tempType) | ||
| 119 | { | ||
| 120 | case type::empty: | ||
| 121 | { | ||
| 122 | break; | ||
| 123 | } | ||
| 124 | |||
| 125 | case type::singleton: | ||
| 126 | { | ||
| 127 | tempField = std::move(first.singleton_.filterField); | ||
| 128 | tempComparison = first.singleton_.filterType; | ||
| 129 | |||
| 130 | switch (tempComparison) | ||
| 131 | { | ||
| 132 | case comparison::int_equals: | ||
| 133 | case comparison::int_does_not_equal: | ||
| 134 | case comparison::int_is_at_least: | ||
| 135 | case comparison::int_is_greater_than: | ||
| 136 | case comparison::int_is_at_most: | ||
| 137 | case comparison::int_is_less_than: | ||
| 138 | { | ||
| 139 | tempIntValue = first.singleton_.intValue; | ||
| 140 | |||
| 141 | break; | ||
| 142 | } | ||
| 143 | |||
| 144 | case comparison::boolean_equals: | ||
| 145 | { | ||
| 146 | tempBoolValue = first.singleton_.boolValue; | ||
| 147 | |||
| 148 | break; | ||
| 149 | } | ||
| 150 | |||
| 151 | case comparison::string_equals: | ||
| 152 | case comparison::string_does_not_equal: | ||
| 153 | case comparison::string_is_like: | ||
| 154 | case comparison::string_is_not_like: | ||
| 155 | { | ||
| 156 | tempStringValue = std::move(first.singleton_.stringValue); | ||
| 157 | |||
| 158 | break; | ||
| 159 | } | ||
| 160 | |||
| 161 | case comparison::is_null: | ||
| 162 | case comparison::is_not_null: | ||
| 163 | { | ||
| 164 | break; | ||
| 165 | } | ||
| 166 | |||
| 167 | case comparison::matches: | ||
| 168 | case comparison::does_not_match: | ||
| 169 | case comparison::hierarchally_matches: | ||
| 170 | case comparison::does_not_hierarchally_match: | ||
| 171 | { | ||
| 172 | tempJoin = std::move(first.singleton_.join); | ||
| 173 | |||
| 174 | break; | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | break; | ||
| 179 | } | ||
| 180 | |||
| 181 | case type::group: | ||
| 182 | { | ||
| 183 | tempChildren = std::move(first.group_.children); | ||
| 184 | tempOrlogic = first.group_.orlogic; | ||
| 185 | |||
| 186 | break; | ||
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | first.~filter(); | ||
| 191 | |||
| 192 | first.type_ = second.type_; | ||
| 193 | |||
| 194 | switch (first.type_) | ||
| 195 | { | ||
| 196 | case type::empty: | ||
| 197 | { | ||
| 198 | break; | ||
| 199 | } | ||
| 200 | |||
| 201 | case type::singleton: | ||
| 202 | { | ||
| 203 | new(&first.singleton_.filterField) field(std::move(second.singleton_.filterField)); | ||
| 204 | first.singleton_.filterType = second.singleton_.filterType; | ||
| 205 | |||
| 206 | switch (first.singleton_.filterType) | ||
| 207 | { | ||
| 208 | case comparison::int_equals: | ||
| 209 | case comparison::int_does_not_equal: | ||
| 210 | case comparison::int_is_at_least: | ||
| 211 | case comparison::int_is_greater_than: | ||
| 212 | case comparison::int_is_at_most: | ||
| 213 | case comparison::int_is_less_than: | ||
| 214 | { | ||
| 215 | first.singleton_.intValue = second.singleton_.intValue; | ||
| 216 | |||
| 217 | break; | ||
| 218 | } | ||
| 219 | |||
| 220 | case comparison::boolean_equals: | ||
| 221 | { | ||
| 222 | first.singleton_.boolValue = second.singleton_.boolValue; | ||
| 223 | |||
| 224 | break; | ||
| 225 | } | ||
| 226 | |||
| 227 | case comparison::string_equals: | ||
| 228 | case comparison::string_does_not_equal: | ||
| 229 | case comparison::string_is_like: | ||
| 230 | case comparison::string_is_not_like: | ||
| 231 | { | ||
| 232 | new(&first.singleton_.stringValue) std::string(std::move(second.singleton_.stringValue)); | ||
| 233 | |||
| 234 | break; | ||
| 235 | } | ||
| 236 | |||
| 237 | case comparison::is_null: | ||
| 238 | case comparison::is_not_null: | ||
| 239 | { | ||
| 240 | break; | ||
| 241 | } | ||
| 242 | |||
| 243 | case comparison::matches: | ||
| 244 | case comparison::does_not_match: | ||
| 245 | case comparison::hierarchally_matches: | ||
| 246 | case comparison::does_not_hierarchally_match: | ||
| 247 | { | ||
| 248 | new(&first.singleton_.join) std::unique_ptr<filter>(std::move(second.singleton_.join)); | ||
| 249 | |||
| 250 | break; | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | break; | ||
| 255 | } | ||
| 256 | |||
| 257 | case type::group: | ||
| 258 | { | ||
| 259 | new(&first.group_.children) std::list<filter>(std::move(second.group_.children)); | ||
| 260 | first.group_.orlogic = second.group_.orlogic; | ||
| 261 | |||
| 262 | break; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | second.~filter(); | ||
| 267 | |||
| 268 | second.type_ = tempType; | ||
| 269 | |||
| 270 | switch (second.type_) | ||
| 271 | { | ||
| 272 | case type::empty: | ||
| 273 | { | ||
| 274 | break; | ||
| 275 | } | ||
| 276 | |||
| 277 | case type::singleton: | ||
| 278 | { | ||
| 279 | new(&second.singleton_.filterField) field(std::move(tempField)); | ||
| 280 | second.singleton_.filterType = tempComparison; | ||
| 281 | |||
| 282 | switch (second.singleton_.filterType) | ||
| 283 | { | ||
| 284 | case comparison::int_equals: | ||
| 285 | case comparison::int_does_not_equal: | ||
| 286 | case comparison::int_is_at_least: | ||
| 287 | case comparison::int_is_greater_than: | ||
| 288 | case comparison::int_is_at_most: | ||
| 289 | case comparison::int_is_less_than: | ||
| 290 | { | ||
| 291 | second.singleton_.intValue = tempIntValue; | ||
| 292 | |||
| 293 | break; | ||
| 294 | } | ||
| 295 | |||
| 296 | case comparison::boolean_equals: | ||
| 297 | { | ||
| 298 | second.singleton_.boolValue = tempBoolValue; | ||
| 299 | |||
| 300 | break; | ||
| 301 | } | ||
| 302 | |||
| 303 | case comparison::string_equals: | ||
| 304 | case comparison::string_does_not_equal: | ||
| 305 | case comparison::string_is_like: | ||
| 306 | case comparison::string_is_not_like: | ||
| 307 | { | ||
| 308 | new(&second.singleton_.stringValue) std::string(std::move(tempStringValue)); | ||
| 309 | |||
| 310 | break; | ||
| 311 | } | ||
| 312 | |||
| 313 | case comparison::is_null: | ||
| 314 | case comparison::is_not_null: | ||
| 315 | { | ||
| 316 | break; | ||
| 317 | } | ||
| 318 | |||
| 319 | case comparison::matches: | ||
| 320 | case comparison::does_not_match: | ||
| 321 | case comparison::hierarchally_matches: | ||
| 322 | case comparison::does_not_hierarchally_match: | ||
| 323 | { | ||
| 324 | new(&second.singleton_.join) std::unique_ptr<filter>(std::move(tempJoin)); | ||
| 325 | |||
| 326 | break; | ||
| 327 | } | ||
| 328 | } | ||
| 329 | |||
| 330 | break; | ||
| 331 | } | ||
| 332 | |||
| 333 | case type::group: | ||
| 334 | { | ||
| 335 | new(&second.group_.children) std::list<filter>(std::move(tempChildren)); | ||
| 336 | second.group_.orlogic = tempOrlogic; | ||
| 337 | |||
| 338 | break; | ||
| 339 | } | ||
| 340 | } | ||
| 341 | } | ||
| 342 | |||
| 343 | filter::~filter() | ||
| 344 | { | ||
| 345 | switch (type_) | ||
| 346 | { | ||
| 347 | case type::empty: | ||
| 348 | { | ||
| 349 | break; | ||
| 350 | } | ||
| 351 | |||
| 352 | case type::singleton: | ||
| 353 | { | ||
| 354 | singleton_.filterField.~field(); | ||
| 355 | |||
| 356 | switch (singleton_.filterType) | ||
| 357 | { | ||
| 358 | case comparison::int_equals: | ||
| 359 | case comparison::int_does_not_equal: | ||
| 360 | case comparison::int_is_at_least: | ||
| 361 | case comparison::int_is_greater_than: | ||
| 362 | case comparison::int_is_at_most: | ||
| 363 | case comparison::int_is_less_than: | ||
| 364 | case comparison::boolean_equals: | ||
| 365 | case comparison::is_null: | ||
| 366 | case comparison::is_not_null: | ||
| 367 | { | ||
| 368 | break; | ||
| 369 | } | ||
| 370 | |||
| 371 | case comparison::string_equals: | ||
| 372 | case comparison::string_does_not_equal: | ||
| 373 | case comparison::string_is_like: | ||
| 374 | case comparison::string_is_not_like: | ||
| 375 | { | ||
| 376 | using string_type = std::string; | ||
| 377 | |||
| 378 | singleton_.stringValue.~string_type(); | ||
| 379 | |||
| 380 | break; | ||
| 381 | } | ||
| 382 | |||
| 383 | case comparison::matches: | ||
| 384 | case comparison::does_not_match: | ||
| 385 | case comparison::hierarchally_matches: | ||
| 386 | case comparison::does_not_hierarchally_match: | ||
| 387 | { | ||
| 388 | using ptr_type = std::unique_ptr<filter>; | ||
| 389 | |||
| 390 | singleton_.join.~ptr_type(); | ||
| 391 | |||
| 392 | break; | ||
| 393 | } | ||
| 394 | } | ||
| 395 | |||
| 396 | break; | ||
| 397 | } | ||
| 398 | |||
| 399 | case type::group: | ||
| 400 | { | ||
| 401 | using list_type = std::list<filter>; | ||
| 402 | |||
| 403 | group_.children.~list_type(); | ||
| 404 | |||
| 405 | break; | ||
| 406 | } | ||
| 407 | } | ||
| 408 | } | ||
| 409 | |||
| 410 | filter::filter() | ||
| 411 | { | ||
| 412 | } | ||
| 413 | |||
| 414 | filter::filter( | ||
| 415 | field filterField, | ||
| 416 | comparison filterType, | ||
| 417 | int filterValue) : | ||
| 418 | type_(type::singleton) | ||
| 419 | { | ||
| 420 | if (filterField.getType() == field::type::integer) | ||
| 421 | { | ||
| 422 | switch (filterType) | ||
| 423 | { | ||
| 424 | case comparison::int_equals: | ||
| 425 | case comparison::int_does_not_equal: | ||
| 426 | case comparison::int_is_at_least: | ||
| 427 | case comparison::int_is_greater_than: | ||
| 428 | case comparison::int_is_at_most: | ||
| 429 | case comparison::int_is_less_than: | ||
| 430 | { | ||
| 431 | new(&singleton_.filterField) field(std::move(filterField)); | ||
| 432 | singleton_.filterType = filterType; | ||
| 433 | singleton_.intValue = filterValue; | ||
| 434 | |||
| 435 | break; | ||
| 436 | } | ||
| 437 | |||
| 438 | case comparison::boolean_equals: | ||
| 439 | case comparison::string_equals: | ||
| 440 | case comparison::string_does_not_equal: | ||
| 441 | case comparison::string_is_like: | ||
| 442 | case comparison::string_is_not_like: | ||
| 443 | case comparison::is_null: | ||
| 444 | case comparison::is_not_null: | ||
| 445 | case comparison::matches: | ||
| 446 | case comparison::does_not_match: | ||
| 447 | case comparison::hierarchally_matches: | ||
| 448 | case comparison::does_not_hierarchally_match: | ||
| 449 | { | ||
| 450 | throw std::invalid_argument("Invalid comparison for integer field"); | ||
| 451 | } | ||
| 452 | } | ||
| 453 | } else { | ||
| 454 | throw std::domain_error("Cannot match a non-integer field against an integer value"); | ||
| 455 | } | ||
| 456 | } | ||
| 457 | |||
| 458 | filter::filter( | ||
| 459 | field filterField, | ||
| 460 | comparison filterType, | ||
| 461 | std::string filterValue) : | ||
| 462 | type_(type::singleton) | ||
| 463 | { | ||
| 464 | if (filterField.getType() == field::type::string) | ||
| 465 | { | ||
| 466 | switch (filterType) | ||
| 467 | { | ||
| 468 | case comparison::string_equals: | ||
| 469 | case comparison::string_does_not_equal: | ||
| 470 | case comparison::string_is_like: | ||
| 471 | case comparison::string_is_not_like: | ||
| 472 | { | ||
| 473 | new(&singleton_.filterField) field(std::move(filterField)); | ||
| 474 | singleton_.filterType = filterType; | ||
| 475 | new(&singleton_.stringValue) std::string(std::move(filterValue)); | ||
| 476 | |||
| 477 | break; | ||
| 478 | } | ||
| 479 | |||
| 480 | case comparison::int_equals: | ||
| 481 | case comparison::int_does_not_equal: | ||
| 482 | case comparison::int_is_at_least: | ||
| 483 | case comparison::int_is_greater_than: | ||
| 484 | case comparison::int_is_at_most: | ||
| 485 | case comparison::int_is_less_than: | ||
| 486 | case comparison::boolean_equals: | ||
| 487 | case comparison::is_null: | ||
| 488 | case comparison::is_not_null: | ||
| 489 | case comparison::matches: | ||
| 490 | case comparison::does_not_match: | ||
| 491 | case comparison::hierarchally_matches: | ||
| 492 | case comparison::does_not_hierarchally_match: | ||
| 493 | { | ||
| 494 | throw std::invalid_argument("Invalid comparison for string field"); | ||
| 495 | } | ||
| 496 | } | ||
| 497 | } else { | ||
| 498 | throw std::domain_error("Cannot match a non-string field against an string value"); | ||
| 499 | } | ||
| 500 | } | ||
| 501 | |||
| 502 | filter::filter( | ||
| 503 | field filterField, | ||
| 504 | comparison filterType, | ||
| 505 | bool filterValue) : | ||
| 506 | type_(type::singleton) | ||
| 507 | { | ||
| 508 | if (filterField.getType() == field::type::boolean) | ||
| 509 | { | ||
| 510 | switch (filterType) | ||
| 511 | { | ||
| 512 | case comparison::boolean_equals: | ||
| 513 | { | ||
| 514 | new(&singleton_.filterField) field(std::move(filterField)); | ||
| 515 | singleton_.filterType = filterType; | ||
| 516 | singleton_.boolValue = filterValue; | ||
| 517 | |||
| 518 | break; | ||
| 519 | } | ||
| 520 | |||
| 521 | case comparison::string_equals: | ||
| 522 | case comparison::string_does_not_equal: | ||
| 523 | case comparison::string_is_like: | ||
| 524 | case comparison::string_is_not_like: | ||
| 525 | case comparison::int_equals: | ||
| 526 | case comparison::int_does_not_equal: | ||
| 527 | case comparison::int_is_at_least: | ||
| 528 | case comparison::int_is_greater_than: | ||
| 529 | case comparison::int_is_at_most: | ||
| 530 | case comparison::int_is_less_than: | ||
| 531 | case comparison::is_null: | ||
| 532 | case comparison::is_not_null: | ||
| 533 | case comparison::matches: | ||
| 534 | case comparison::does_not_match: | ||
| 535 | case comparison::hierarchally_matches: | ||
| 536 | case comparison::does_not_hierarchally_match: | ||
| 537 | { | ||
| 538 | throw std::invalid_argument("Invalid comparison for boolean field"); | ||
| 539 | } | ||
| 540 | } | ||
| 541 | } else { | ||
| 542 | throw std::domain_error("Cannot match a non-boolean field against a boolean value"); | ||
| 543 | } | ||
| 544 | } | ||
| 545 | |||
| 546 | filter::filter( | ||
| 547 | field filterField, | ||
| 548 | comparison filterType) : | ||
| 549 | type_(type::singleton) | ||
| 550 | { | ||
| 551 | if (filterField.isNullable()) | ||
| 552 | { | ||
| 553 | switch (filterType) | ||
| 554 | { | ||
| 555 | case comparison::is_null: | ||
| 556 | case comparison::is_not_null: | ||
| 557 | { | ||
| 558 | new(&singleton_.filterField) field(std::move(filterField)); | ||
| 559 | singleton_.filterType = filterType; | ||
| 560 | |||
| 561 | break; | ||
| 562 | } | ||
| 563 | |||
| 564 | case comparison::string_equals: | ||
| 565 | case comparison::string_does_not_equal: | ||
| 566 | case comparison::string_is_like: | ||
| 567 | case comparison::string_is_not_like: | ||
| 568 | case comparison::int_equals: | ||
| 569 | case comparison::int_does_not_equal: | ||
| 570 | case comparison::int_is_at_least: | ||
| 571 | case comparison::int_is_greater_than: | ||
| 572 | case comparison::int_is_at_most: | ||
| 573 | case comparison::int_is_less_than: | ||
| 574 | case comparison::boolean_equals: | ||
| 575 | case comparison::matches: | ||
| 576 | case comparison::does_not_match: | ||
| 577 | case comparison::hierarchally_matches: | ||
| 578 | case comparison::does_not_hierarchally_match: | ||
| 579 | { | ||
| 580 | throw std::invalid_argument("Incorrect constructor for given comparison"); | ||
| 581 | } | ||
| 582 | } | ||
| 583 | } else { | ||
| 584 | throw std::domain_error("Cannot check nullity/non-nullity of non-nullable field"); | ||
| 585 | } | ||
| 586 | } | ||
| 587 | |||
| 588 | filter::filter( | ||
| 589 | field joinOn, | ||
| 590 | comparison filterType, | ||
| 591 | filter joinCondition) : | ||
| 592 | type_(type::singleton) | ||
| 593 | { | ||
| 594 | switch (joinOn.getType()) | ||
| 595 | { | ||
| 596 | case field::type::join: | ||
| 597 | case field::type::join_through: | ||
| 598 | { | ||
| 599 | switch (filterType) | ||
| 600 | { | ||
| 601 | case comparison::matches: | ||
| 602 | case comparison::does_not_match: | ||
| 603 | { | ||
| 604 | new(&singleton_.filterField) field(std::move(joinOn)); | ||
| 605 | singleton_.filterType = filterType; | ||
| 606 | new(&singleton_.join) std::unique_ptr<filter>(new filter(joinCondition.normalize(singleton_.filterField.getJoinObject()))); | ||
| 607 | |||
| 608 | break; | ||
| 609 | } | ||
| 610 | |||
| 611 | case comparison::int_equals: | ||
| 612 | case comparison::int_does_not_equal: | ||
| 613 | case comparison::int_is_at_least: | ||
| 614 | case comparison::int_is_greater_than: | ||
| 615 | case comparison::int_is_at_most: | ||
| 616 | case comparison::int_is_less_than: | ||
| 617 | case comparison::boolean_equals: | ||
| 618 | case comparison::string_equals: | ||
| 619 | case comparison::string_does_not_equal: | ||
| 620 | case comparison::string_is_like: | ||
| 621 | case comparison::string_is_not_like: | ||
| 622 | case comparison::is_null: | ||
| 623 | case comparison::is_not_null: | ||
| 624 | case comparison::hierarchally_matches: | ||
| 625 | case comparison::does_not_hierarchally_match: | ||
| 626 | { | ||
| 627 | throw std::invalid_argument("Incorrect constructor for given comparison"); | ||
| 628 | } | ||
| 629 | } | ||
| 630 | |||
| 631 | break; | ||
| 632 | } | ||
| 633 | |||
| 634 | case field::type::hierarchal_join: | ||
| 635 | { | ||
| 636 | switch (filterType) | ||
| 637 | { | ||
| 638 | case comparison::hierarchally_matches: | ||
| 639 | case comparison::does_not_hierarchally_match: | ||
| 640 | { | ||
| 641 | new(&singleton_.filterField) field(std::move(joinOn)); | ||
| 642 | singleton_.filterType = filterType; | ||
| 643 | new(&singleton_.join) std::unique_ptr<filter>(new filter(joinCondition.normalize(singleton_.filterField.getObject()))); | ||
| 644 | |||
| 645 | break; | ||
| 646 | } | ||
| 647 | |||
| 648 | case comparison::int_equals: | ||
| 649 | case comparison::int_does_not_equal: | ||
| 650 | case comparison::int_is_at_least: | ||
| 651 | case comparison::int_is_greater_than: | ||
| 652 | case comparison::int_is_at_most: | ||
| 653 | case comparison::int_is_less_than: | ||
| 654 | case comparison::boolean_equals: | ||
| 655 | case comparison::string_equals: | ||
| 656 | case comparison::string_does_not_equal: | ||
| 657 | case comparison::string_is_like: | ||
| 658 | case comparison::string_is_not_like: | ||
| 659 | case comparison::is_null: | ||
| 660 | case comparison::is_not_null: | ||
| 661 | case comparison::matches: | ||
| 662 | case comparison::does_not_match: | ||
| 663 | { | ||
| 664 | throw std::invalid_argument("Incorrect constructor for given comparison"); | ||
| 665 | } | ||
| 666 | } | ||
| 667 | |||
| 668 | break; | ||
| 669 | } | ||
| 670 | |||
| 671 | case field::type::undefined: | ||
| 672 | case field::type::string: | ||
| 673 | case field::type::integer: | ||
| 674 | case field::type::boolean: | ||
| 675 | { | ||
| 676 | throw std::domain_error("Matching field must be a join field"); | ||
| 677 | } | ||
| 678 | } | ||
| 679 | } | ||
| 680 | |||
| 681 | field filter::getField() const | ||
| 682 | { | ||
| 683 | if (type_ == type::singleton) | ||
| 684 | { | ||
| 685 | return singleton_.filterField; | ||
| 686 | } else { | ||
| 687 | throw std::domain_error("This filter does not have a field"); | ||
| 688 | } | ||
| 689 | } | ||
| 690 | |||
| 691 | filter::comparison filter::getComparison() const | ||
| 692 | { | ||
| 693 | if (type_ == type::singleton) | ||
| 694 | { | ||
| 695 | return singleton_.filterType; | ||
| 696 | } else { | ||
| 697 | throw std::domain_error("This filter does not have a comparison"); | ||
| 698 | } | ||
| 699 | } | ||
| 700 | |||
| 701 | filter filter::getJoinCondition() const | ||
| 702 | { | ||
| 703 | if (type_ == type::singleton) | ||
| 704 | { | ||
| 705 | switch (singleton_.filterType) | ||
| 706 | { | ||
| 707 | case comparison::matches: | ||
| 708 | case comparison::does_not_match: | ||
| 709 | case comparison::hierarchally_matches: | ||
| 710 | case comparison::does_not_hierarchally_match: | ||
| 711 | { | ||
| 712 | return *singleton_.join; | ||
| 713 | } | ||
| 714 | |||
| 715 | case comparison::string_equals: | ||
| 716 | case comparison::string_does_not_equal: | ||
| 717 | case comparison::string_is_like: | ||
| 718 | case comparison::string_is_not_like: | ||
| 719 | case comparison::int_equals: | ||
| 720 | case comparison::int_does_not_equal: | ||
| 721 | case comparison::int_is_at_least: | ||
| 722 | case comparison::int_is_greater_than: | ||
| 723 | case comparison::int_is_at_most: | ||
| 724 | case comparison::int_is_less_than: | ||
| 725 | case comparison::boolean_equals: | ||
| 726 | case comparison::is_null: | ||
| 727 | case comparison::is_not_null: | ||
| 728 | { | ||
| 729 | throw std::domain_error("This filter does not have a join condition"); | ||
| 730 | } | ||
| 731 | } | ||
| 732 | } else { | ||
| 733 | throw std::domain_error("This filter does not have a join condition"); | ||
| 734 | } | ||
| 735 | } | ||
| 736 | |||
| 737 | std::string filter::getStringArgument() const | ||
| 738 | { | ||
| 739 | if (type_ == type::singleton) | ||
| 740 | { | ||
| 741 | switch (singleton_.filterType) | ||
| 742 | { | ||
| 743 | case comparison::string_equals: | ||
| 744 | case comparison::string_does_not_equal: | ||
| 745 | case comparison::string_is_like: | ||
| 746 | case comparison::string_is_not_like: | ||
| 747 | { | ||
| 748 | return singleton_.stringValue; | ||
| 749 | } | ||
| 750 | |||
| 751 | case comparison::int_equals: | ||
| 752 | case comparison::int_does_not_equal: | ||
| 753 | case comparison::int_is_at_least: | ||
| 754 | case comparison::int_is_greater_than: | ||
| 755 | case comparison::int_is_at_most: | ||
| 756 | case comparison::int_is_less_than: | ||
| 757 | case comparison::boolean_equals: | ||
| 758 | case comparison::is_null: | ||
| 759 | case comparison::is_not_null: | ||
| 760 | case comparison::matches: | ||
| 761 | case comparison::does_not_match: | ||
| 762 | case comparison::hierarchally_matches: | ||
| 763 | case comparison::does_not_hierarchally_match: | ||
| 764 | { | ||
| 765 | throw std::domain_error("This filter does not have a string argument"); | ||
| 766 | } | ||
| 767 | } | ||
| 768 | } else { | ||
| 769 | throw std::domain_error("This filter does not have a string argument"); | ||
| 770 | } | ||
| 771 | } | ||
| 772 | |||
| 773 | int filter::getIntegerArgument() const | ||
| 774 | { | ||
| 775 | if (type_ == type::singleton) | ||
| 776 | { | ||
| 777 | switch (singleton_.filterType) | ||
| 778 | { | ||
| 779 | case comparison::int_equals: | ||
| 780 | case comparison::int_does_not_equal: | ||
| 781 | case comparison::int_is_at_least: | ||
| 782 | case comparison::int_is_greater_than: | ||
| 783 | case comparison::int_is_at_most: | ||
| 784 | case comparison::int_is_less_than: | ||
| 785 | { | ||
| 786 | return singleton_.intValue; | ||
| 787 | } | ||
| 788 | |||
| 789 | case comparison::string_equals: | ||
| 790 | case comparison::string_does_not_equal: | ||
| 791 | case comparison::string_is_like: | ||
| 792 | case comparison::string_is_not_like: | ||
| 793 | case comparison::boolean_equals: | ||
| 794 | case comparison::is_null: | ||
| 795 | case comparison::is_not_null: | ||
| 796 | case comparison::matches: | ||
| 797 | case comparison::does_not_match: | ||
| 798 | case comparison::hierarchally_matches: | ||
| 799 | case comparison::does_not_hierarchally_match: | ||
| 800 | { | ||
| 801 | throw std::domain_error("This filter does not have an integer argument"); | ||
| 802 | } | ||
| 803 | } | ||
| 804 | } else { | ||
| 805 | throw std::domain_error("This filter does not have an integer argument"); | ||
| 806 | } | ||
| 807 | } | ||
| 808 | |||
| 809 | bool filter::getBooleanArgument() const | ||
| 810 | { | ||
| 811 | if ((type_ == type::singleton) && (singleton_.filterType == comparison::boolean_equals)) | ||
| 812 | { | ||
| 813 | return singleton_.boolValue; | ||
| 814 | } else { | ||
| 815 | throw std::domain_error("This filter does not have a boolean argument"); | ||
| 816 | } | ||
| 817 | } | ||
| 818 | |||
| 819 | filter::filter(bool orlogic) : type_(type::group) | ||
| 820 | { | ||
| 821 | new(&group_.children) std::list<filter>(); | ||
| 822 | group_.orlogic = orlogic; | ||
| 823 | } | ||
| 824 | |||
| 825 | bool filter::getOrlogic() const | ||
| 826 | { | ||
| 827 | if (type_ == type::group) | ||
| 828 | { | ||
| 829 | return group_.orlogic; | ||
| 830 | } else { | ||
| 831 | throw std::domain_error("This filter is not a group filter"); | ||
| 832 | } | ||
| 833 | } | ||
| 834 | |||
| 835 | filter filter::operator+(filter condition) const | ||
| 836 | { | ||
| 837 | filter result(*this); | ||
| 838 | result += std::move(condition); | ||
| 839 | |||
| 840 | return result; | ||
| 841 | } | ||
| 842 | |||
| 843 | filter& filter::operator+=(filter condition) | ||
| 844 | { | ||
| 845 | if (type_ == type::group) | ||
| 846 | { | ||
| 847 | group_.children.push_back(std::move(condition)); | ||
| 848 | |||
| 849 | return *this; | ||
| 850 | } else { | ||
| 851 | throw std::domain_error("Children can only be added to group filters"); | ||
| 852 | } | ||
| 853 | } | ||
| 854 | |||
| 855 | filter::const_iterator filter::begin() const | ||
| 856 | { | ||
| 857 | if (type_ == type::group) | ||
| 858 | { | ||
| 859 | return std::begin(group_.children); | ||
| 860 | } else { | ||
| 861 | throw std::domain_error("This filter has no children"); | ||
| 862 | } | ||
| 863 | } | ||
| 864 | |||
| 865 | filter::const_iterator filter::end() const | ||
| 866 | { | ||
| 867 | if (type_ == type::group) | ||
| 868 | { | ||
| 869 | return std::end(group_.children); | ||
| 870 | } else { | ||
| 871 | throw std::domain_error("This filter has no children"); | ||
| 872 | } | ||
| 873 | } | ||
| 874 | |||
| 875 | filter filter::operator!() const | ||
| 876 | { | ||
| 877 | switch (type_) | ||
| 878 | { | ||
| 879 | case type::empty: | ||
| 880 | { | ||
| 881 | return {}; | ||
| 882 | } | ||
| 883 | |||
| 884 | case type::singleton: | ||
| 885 | { | ||
| 886 | switch (singleton_.filterType) | ||
| 887 | { | ||
| 888 | case comparison::int_equals: | ||
| 889 | { | ||
| 890 | return filter(singleton_.filterField, comparison::int_does_not_equal, singleton_.intValue); | ||
| 891 | } | ||
| 892 | |||
| 893 | case comparison::int_does_not_equal: | ||
| 894 | { | ||
| 895 | return filter(singleton_.filterField, comparison::int_equals, singleton_.intValue); | ||
| 896 | } | ||
| 897 | |||
| 898 | case comparison::int_is_at_least: | ||
| 899 | { | ||
| 900 | return filter(singleton_.filterField, comparison::int_is_less_than, singleton_.intValue); | ||
| 901 | } | ||
| 902 | |||
| 903 | case comparison::int_is_greater_than: | ||
| 904 | { | ||
| 905 | return filter(singleton_.filterField, comparison::int_is_at_most, singleton_.intValue); | ||
| 906 | } | ||
| 907 | |||
| 908 | case comparison::int_is_at_most: | ||
| 909 | { | ||
| 910 | return filter(singleton_.filterField, comparison::int_is_greater_than, singleton_.intValue); | ||
| 911 | } | ||
| 912 | |||
| 913 | case comparison::int_is_less_than: | ||
| 914 | { | ||
| 915 | return filter(singleton_.filterField, comparison::int_is_at_least, singleton_.intValue); | ||
| 916 | } | ||
| 917 | |||
| 918 | case comparison::boolean_equals: | ||
| 919 | { | ||
| 920 | return filter(singleton_.filterField, comparison::boolean_equals, !singleton_.boolValue); | ||
| 921 | } | ||
| 922 | |||
| 923 | case comparison::string_equals: | ||
| 924 | { | ||
| 925 | return filter(singleton_.filterField, comparison::string_does_not_equal, singleton_.stringValue); | ||
| 926 | } | ||
| 927 | |||
| 928 | case comparison::string_does_not_equal: | ||
| 929 | { | ||
| 930 | return filter(singleton_.filterField, comparison::string_equals, singleton_.stringValue); | ||
| 931 | } | ||
| 932 | |||
| 933 | case comparison::string_is_like: | ||
| 934 | { | ||
| 935 | return filter(singleton_.filterField, comparison::string_is_not_like, singleton_.stringValue); | ||
| 936 | } | ||
| 937 | |||
| 938 | case comparison::string_is_not_like: | ||
| 939 | { | ||
| 940 | return filter(singleton_.filterField, comparison::string_is_like, singleton_.stringValue); | ||
| 941 | } | ||
| 942 | |||
| 943 | case comparison::is_null: | ||
| 944 | { | ||
| 945 | return filter(singleton_.filterField, comparison::is_not_null); | ||
| 946 | } | ||
| 947 | |||
| 948 | case comparison::is_not_null: | ||
| 949 | { | ||
| 950 | return filter(singleton_.filterField, comparison::is_null); | ||
| 951 | } | ||
| 952 | |||
| 953 | case comparison::matches: | ||
| 954 | { | ||
| 955 | return filter(singleton_.filterField, comparison::does_not_match, *singleton_.join); | ||
| 956 | } | ||
| 957 | |||
| 958 | case comparison::does_not_match: | ||
| 959 | { | ||
| 960 | return filter(singleton_.filterField, comparison::matches, *singleton_.join); | ||
| 961 | } | ||
| 962 | |||
| 963 | case comparison::hierarchally_matches: | ||
| 964 | { | ||
| 965 | return filter(singleton_.filterField, comparison::does_not_hierarchally_match, *singleton_.join); | ||
| 966 | } | ||
| 967 | |||
| 968 | case comparison::does_not_hierarchally_match: | ||
| 969 | { | ||
| 970 | return filter(singleton_.filterField, comparison::hierarchally_matches, *singleton_.join); | ||
| 971 | } | ||
| 972 | } | ||
| 973 | } | ||
| 974 | |||
| 975 | case type::group: | ||
| 976 | { | ||
| 977 | filter result(!group_.orlogic); | ||
| 978 | |||
| 979 | for (const filter& child : group_.children) | ||
| 980 | { | ||
| 981 | result += !child; | ||
| 982 | } | ||
| 983 | |||
| 984 | return result; | ||
| 985 | } | ||
| 986 | } | ||
| 987 | } | ||
| 988 | |||
| 989 | filter& filter::operator&=(filter condition) | ||
| 990 | { | ||
| 991 | return (*this = (*this && std::move(condition))); | ||
| 992 | } | ||
| 993 | |||
| 994 | filter& filter::operator|=(filter condition) | ||
| 995 | { | ||
| 996 | return (*this = (*this || std::move(condition))); | ||
| 997 | } | ||
| 998 | |||
| 999 | filter filter::operator&&(filter condition) const | ||
| 1000 | { | ||
| 1001 | switch (type_) | ||
| 1002 | { | ||
| 1003 | case type::empty: | ||
| 1004 | { | ||
| 1005 | return condition; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | case type::singleton: | ||
| 1009 | { | ||
| 1010 | filter result(false); | ||
| 1011 | result.group_.children.push_back(*this); | ||
| 1012 | result.group_.children.push_back(std::move(condition)); | ||
| 1013 | |||
| 1014 | return result; | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | case type::group: | ||
| 1018 | { | ||
| 1019 | if (group_.orlogic) | ||
| 1020 | { | ||
| 1021 | filter result(false); | ||
| 1022 | result.group_.children.push_back(*this); | ||
| 1023 | result.group_.children.push_back(std::move(condition)); | ||
| 1024 | |||
| 1025 | return result; | ||
| 1026 | } else { | ||
| 1027 | filter result(*this); | ||
| 1028 | result.group_.children.push_back(std::move(condition)); | ||
| 1029 | |||
| 1030 | return result; | ||
| 1031 | } | ||
| 1032 | } | ||
| 1033 | } | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | filter filter::operator||(filter condition) const | ||
| 1037 | { | ||
| 1038 | switch (type_) | ||
| 1039 | { | ||
| 1040 | case type::empty: | ||
| 1041 | { | ||
| 1042 | return condition; | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | case type::singleton: | ||
| 1046 | { | ||
| 1047 | filter result(true); | ||
| 1048 | result.group_.children.push_back(*this); | ||
| 1049 | result.group_.children.push_back(std::move(condition)); | ||
| 1050 | |||
| 1051 | return result; | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | case type::group: | ||
| 1055 | { | ||
| 1056 | if (!group_.orlogic) | ||
| 1057 | { | ||
| 1058 | filter result(true); | ||
| 1059 | result.group_.children.push_back(*this); | ||
| 1060 | result.group_.children.push_back(std::move(condition)); | ||
| 1061 | |||
| 1062 | return result; | ||
| 1063 | } else { | ||
| 1064 | filter result(*this); | ||
| 1065 | result.group_.children.push_back(std::move(condition)); | ||
| 1066 | |||
| 1067 | return result; | ||
| 1068 | } | ||
| 1069 | } | ||
| 1070 | } | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | filter filter::normalize(object context) const | ||
| 1074 | { | ||
| 1075 | { | ||
| 1076 | switch (type_) | ||
| 1077 | { | ||
| 1078 | case type::empty: | ||
| 1079 | { | ||
| 1080 | return *this; | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | case type::singleton: | ||
| 1084 | { | ||
| 1085 | // First, switch on the normalized context, and then switch on the | ||
| 1086 | // current context. We recursively recontextualize by using the | ||
| 1087 | // current filter as a subquery for a join such that the context of | ||
| 1088 | // the subquery is one step closer to the context of the current | ||
| 1089 | // filter, and then letting the filter constructor normalize the | ||
| 1090 | // subquery. | ||
| 1091 | switch (context) | ||
| 1092 | { | ||
| 1093 | case object::undefined: | ||
| 1094 | { | ||
| 1095 | // An undefined object indicates no participation in | ||
| 1096 | // recontexualization. | ||
| 1097 | return *this; | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | case object::notion: | ||
| 1101 | { | ||
| 1102 | switch (singleton_.filterField.getObject()) | ||
| 1103 | { | ||
| 1104 | case object::undefined: | ||
| 1105 | case object::notion: | ||
| 1106 | { | ||
| 1107 | return *this; | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | case object::word: | ||
| 1111 | case object::group: | ||
| 1112 | case object::frame: | ||
| 1113 | case object::lemma: | ||
| 1114 | case object::form: | ||
| 1115 | case object::pronunciation: | ||
| 1116 | { | ||
| 1117 | return (verbly::notion::word %= *this); | ||
| 1118 | } | ||
| 1119 | } | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | case object::word: | ||
| 1123 | { | ||
| 1124 | switch (singleton_.filterField.getObject()) | ||
| 1125 | { | ||
| 1126 | case object::notion: | ||
| 1127 | { | ||
| 1128 | return (verbly::word::notion %= *this); | ||
| 1129 | } | ||
| 1130 | |||
| 1131 | case object::undefined: | ||
| 1132 | case object::word: | ||
| 1133 | { | ||
| 1134 | return *this; | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | case object::group: | ||
| 1138 | case object::frame: | ||
| 1139 | { | ||
| 1140 | return (verbly::word::group %= *this); | ||
| 1141 | } | ||
| 1142 | |||
| 1143 | case object::lemma: | ||
| 1144 | case object::form: | ||
| 1145 | case object::pronunciation: | ||
| 1146 | { | ||
| 1147 | return (verbly::word::lemma %= *this); | ||
| 1148 | } | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | case object::group: | ||
| 1152 | { | ||
| 1153 | switch (singleton_.filterField.getObject()) | ||
| 1154 | { | ||
| 1155 | case object::undefined: | ||
| 1156 | case object::group: | ||
| 1157 | { | ||
| 1158 | return *this; | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | case object::notion: | ||
| 1162 | case object::word: | ||
| 1163 | case object::lemma: | ||
| 1164 | case object::form: | ||
| 1165 | case object::pronunciation: | ||
| 1166 | { | ||
| 1167 | return (verbly::group::word %= *this); | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | case object::frame: | ||
| 1171 | { | ||
| 1172 | return (verbly::group::frame %= *this); | ||
| 1173 | } | ||
| 1174 | } | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | case object::frame: | ||
| 1178 | { | ||
| 1179 | switch (singleton_.filterField.getObject()) | ||
| 1180 | { | ||
| 1181 | case object::undefined: | ||
| 1182 | case object::frame: | ||
| 1183 | { | ||
| 1184 | return *this; | ||
| 1185 | } | ||
| 1186 | |||
| 1187 | case object::notion: | ||
| 1188 | case object::word: | ||
| 1189 | case object::group: | ||
| 1190 | case object::lemma: | ||
| 1191 | case object::form: | ||
| 1192 | case object::pronunciation: | ||
| 1193 | { | ||
| 1194 | return (verbly::frame::group %= *this); | ||
| 1195 | } | ||
| 1196 | } | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | case object::lemma: | ||
| 1200 | { | ||
| 1201 | switch (singleton_.filterField.getObject()) | ||
| 1202 | { | ||
| 1203 | case object::notion: | ||
| 1204 | case object::word: | ||
| 1205 | case object::group: | ||
| 1206 | case object::frame: | ||
| 1207 | { | ||
| 1208 | return verbly::lemma::word %= *this; | ||
| 1209 | } | ||
| 1210 | |||
| 1211 | case object::undefined: | ||
| 1212 | case object::lemma: | ||
| 1213 | { | ||
| 1214 | return *this; | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | case object::form: | ||
| 1218 | case object::pronunciation: | ||
| 1219 | { | ||
| 1220 | return (verbly::lemma::form(inflection::base) %= *this); | ||
| 1221 | } | ||
| 1222 | } | ||
| 1223 | } | ||
| 1224 | |||
| 1225 | case object::form: | ||
| 1226 | { | ||
| 1227 | switch (singleton_.filterField.getObject()) | ||
| 1228 | { | ||
| 1229 | case object::notion: | ||
| 1230 | case object::word: | ||
| 1231 | case object::group: | ||
| 1232 | case object::frame: | ||
| 1233 | case object::lemma: | ||
| 1234 | { | ||
| 1235 | return verbly::form::lemma(inflection::base) %= *this; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | case object::undefined: | ||
| 1239 | case object::form: | ||
| 1240 | { | ||
| 1241 | return *this; | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | case object::pronunciation: | ||
| 1245 | { | ||
| 1246 | return (verbly::form::pronunciation %= *this); | ||
| 1247 | } | ||
| 1248 | } | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | case object::pronunciation: | ||
| 1252 | { | ||
| 1253 | switch (singleton_.filterField.getObject()) | ||
| 1254 | { | ||
| 1255 | case object::notion: | ||
| 1256 | case object::word: | ||
| 1257 | case object::group: | ||
| 1258 | case object::frame: | ||
| 1259 | case object::lemma: | ||
| 1260 | case object::form: | ||
| 1261 | { | ||
| 1262 | return verbly::pronunciation::form %= *this; | ||
| 1263 | } | ||
| 1264 | |||
| 1265 | case object::undefined: | ||
| 1266 | case object::pronunciation: | ||
| 1267 | { | ||
| 1268 | return *this; | ||
| 1269 | } | ||
| 1270 | } | ||
| 1271 | } | ||
| 1272 | } | ||
| 1273 | } | ||
| 1274 | } | ||
| 1275 | |||
| 1276 | case type::group: | ||
| 1277 | { | ||
| 1278 | filter result(group_.orlogic); | ||
| 1279 | std::map<field, filter> joins; | ||
| 1280 | |||
| 1281 | for (const filter& child : group_.children) | ||
| 1282 | { | ||
| 1283 | filter normalized = child.normalize(context); | ||
| 1284 | |||
| 1285 | // Notably, this does not attempt to merge hierarchal matches. | ||
| 1286 | switch (normalized.getType()) | ||
| 1287 | { | ||
| 1288 | case type::singleton: | ||
| 1289 | { | ||
| 1290 | switch (normalized.getComparison()) | ||
| 1291 | { | ||
| 1292 | case comparison::matches: | ||
| 1293 | { | ||
| 1294 | if (!joins.count(normalized.singleton_.filterField)) | ||
| 1295 | { | ||
| 1296 | joins[normalized.getField()] = filter(group_.orlogic); | ||
| 1297 | } | ||
| 1298 | |||
| 1299 | joins.at(normalized.getField()) += std::move(*normalized.singleton_.join); | ||
| 1300 | |||
| 1301 | break; | ||
| 1302 | } | ||
| 1303 | |||
| 1304 | case comparison::does_not_match: | ||
| 1305 | { | ||
| 1306 | if (!joins.count(normalized.singleton_.filterField)) | ||
| 1307 | { | ||
| 1308 | joins[normalized.getField()] = filter(group_.orlogic); | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | joins.at(normalized.getField()) += !*normalized.singleton_.join; | ||
| 1312 | |||
| 1313 | break; | ||
| 1314 | } | ||
| 1315 | |||
| 1316 | case comparison::int_equals: | ||
| 1317 | case comparison::int_does_not_equal: | ||
| 1318 | case comparison::int_is_at_least: | ||
| 1319 | case comparison::int_is_greater_than: | ||
| 1320 | case comparison::int_is_at_most: | ||
| 1321 | case comparison::int_is_less_than: | ||
| 1322 | case comparison::boolean_equals: | ||
| 1323 | case comparison::string_equals: | ||
| 1324 | case comparison::string_does_not_equal: | ||
| 1325 | case comparison::string_is_like: | ||
| 1326 | case comparison::string_is_not_like: | ||
| 1327 | case comparison::is_null: | ||
| 1328 | case comparison::is_not_null: | ||
| 1329 | case comparison::hierarchally_matches: | ||
| 1330 | case comparison::does_not_hierarchally_match: | ||
| 1331 | { | ||
| 1332 | result += std::move(normalized); | ||
| 1333 | |||
| 1334 | break; | ||
| 1335 | } | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | break; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | case type::group: | ||
| 1342 | case type::empty: | ||
| 1343 | { | ||
| 1344 | result += std::move(normalized); | ||
| 1345 | |||
| 1346 | break; | ||
| 1347 | } | ||
| 1348 | } | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | for (auto& mapping : joins) | ||
| 1352 | { | ||
| 1353 | const field& joinOn = mapping.first; | ||
| 1354 | filter& joinCondition = mapping.second; | ||
| 1355 | |||
| 1356 | result += (joinOn %= joinCondition.normalize(joinOn.getJoinObject())); | ||
| 1357 | } | ||
| 1358 | |||
| 1359 | return result; | ||
| 1360 | } | ||
| 1361 | } | ||
| 1362 | } | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | }; | ||
| diff --git a/lib/filter.h b/lib/filter.h new file mode 100644 index 0000000..d213d7a --- /dev/null +++ b/lib/filter.h | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | #ifndef FILTER_H_932BA9C6 | ||
| 2 | #define FILTER_H_932BA9C6 | ||
| 3 | |||
| 4 | #include <list> | ||
| 5 | #include <string> | ||
| 6 | #include <memory> | ||
| 7 | #include "field.h" | ||
| 8 | #include "enums.h" | ||
| 9 | |||
| 10 | namespace verbly { | ||
| 11 | |||
| 12 | class filter { | ||
| 13 | public: | ||
| 14 | enum class type { | ||
| 15 | empty, | ||
| 16 | singleton, | ||
| 17 | group | ||
| 18 | }; | ||
| 19 | |||
| 20 | enum class comparison { | ||
| 21 | int_equals, | ||
| 22 | int_does_not_equal, | ||
| 23 | int_is_at_least, | ||
| 24 | int_is_greater_than, | ||
| 25 | int_is_at_most, | ||
| 26 | int_is_less_than, | ||
| 27 | boolean_equals, | ||
| 28 | string_equals, | ||
| 29 | string_does_not_equal, | ||
| 30 | string_is_like, | ||
| 31 | string_is_not_like, | ||
| 32 | is_null, | ||
| 33 | is_not_null, | ||
| 34 | matches, | ||
| 35 | does_not_match, | ||
| 36 | hierarchally_matches, | ||
| 37 | does_not_hierarchally_match | ||
| 38 | }; | ||
| 39 | |||
| 40 | // Copy and move constructors | ||
| 41 | |||
| 42 | filter(const filter& other); | ||
| 43 | filter(filter&& other); | ||
| 44 | |||
| 45 | // Assignment | ||
| 46 | |||
| 47 | filter& operator=(filter other); | ||
| 48 | |||
| 49 | // Swap | ||
| 50 | |||
| 51 | friend void swap(filter& first, filter& second); | ||
| 52 | |||
| 53 | // Destructor | ||
| 54 | |||
| 55 | ~filter(); | ||
| 56 | |||
| 57 | // Accessors | ||
| 58 | |||
| 59 | type getType() const | ||
| 60 | { | ||
| 61 | return type_; | ||
| 62 | } | ||
| 63 | |||
| 64 | // Empty | ||
| 65 | |||
| 66 | filter(); | ||
| 67 | |||
| 68 | // Singleton | ||
| 69 | |||
| 70 | filter(field filterField, comparison filterType, int filterValue); | ||
| 71 | filter(field filterField, comparison filterType, std::string filterValue); | ||
| 72 | filter(field filterField, comparison filterType, bool filterValue); | ||
| 73 | filter(field filterField, comparison filterType); | ||
| 74 | filter(field joinOn, comparison filterType, filter joinCondition); | ||
| 75 | |||
| 76 | field getField() const; | ||
| 77 | |||
| 78 | comparison getComparison() const; | ||
| 79 | |||
| 80 | filter getJoinCondition() const; | ||
| 81 | |||
| 82 | std::string getStringArgument() const; | ||
| 83 | |||
| 84 | int getIntegerArgument() const; | ||
| 85 | |||
| 86 | bool getBooleanArgument() const; | ||
| 87 | |||
| 88 | // Group | ||
| 89 | |||
| 90 | explicit filter(bool orlogic); | ||
| 91 | |||
| 92 | bool getOrlogic() const; | ||
| 93 | |||
| 94 | filter operator+(filter condition) const; | ||
| 95 | |||
| 96 | filter& operator+=(filter condition); | ||
| 97 | |||
| 98 | using const_iterator = std::list<filter>::const_iterator; | ||
| 99 | |||
| 100 | const_iterator begin() const; | ||
| 101 | |||
| 102 | const_iterator end() const; | ||
| 103 | |||
| 104 | // Negation | ||
| 105 | |||
| 106 | filter operator!() const; | ||
| 107 | |||
| 108 | // Groupifying | ||
| 109 | |||
| 110 | filter operator&&(filter condition) const; | ||
| 111 | filter operator||(filter condition) const; | ||
| 112 | |||
| 113 | filter& operator&=(filter condition); | ||
| 114 | filter& operator|=(filter condition); | ||
| 115 | |||
| 116 | // Utility | ||
| 117 | |||
| 118 | filter normalize(object context) const; | ||
| 119 | |||
| 120 | private: | ||
| 121 | union { | ||
| 122 | struct { | ||
| 123 | field filterField; | ||
| 124 | comparison filterType; | ||
| 125 | union { | ||
| 126 | std::unique_ptr<filter> join; | ||
| 127 | std::string stringValue; | ||
| 128 | int intValue; | ||
| 129 | bool boolValue; | ||
| 130 | }; | ||
| 131 | } singleton_; | ||
| 132 | struct { | ||
| 133 | std::list<filter> children; | ||
| 134 | bool orlogic; | ||
| 135 | } group_; | ||
| 136 | }; | ||
| 137 | type type_ = type::empty; | ||
| 138 | |||
| 139 | }; | ||
| 140 | |||
| 141 | }; | ||
| 142 | |||
| 143 | #endif /* end of include guard: FILTER_H_932BA9C6 */ | ||
| diff --git a/lib/form.cpp b/lib/form.cpp new file mode 100644 index 0000000..8ba3bd7 --- /dev/null +++ b/lib/form.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | #include "form.h" | ||
| 2 | #include <sqlite3.h> | ||
| 3 | #include "filter.h" | ||
| 4 | #include "pronunciation.h" | ||
| 5 | #include "database.h" | ||
| 6 | #include "query.h" | ||
| 7 | |||
| 8 | namespace verbly { | ||
| 9 | |||
| 10 | const object form::objectType = object::form; | ||
| 11 | |||
| 12 | const std::list<std::string> form::select = {"form_id", "form", "complexity", "proper"}; | ||
| 13 | |||
| 14 | const field form::id = field::integerField(object::form, "form_id"); | ||
| 15 | const field form::text = field::stringField(object::form, "form"); | ||
| 16 | const field form::complexity = field::integerField(object::form, "complexity"); | ||
| 17 | const field form::proper = field::booleanField(object::form, "proper"); | ||
| 18 | |||
| 19 | const field form::pronunciation = field::joinThrough(object::form, "form_id", object::pronunciation, "forms_pronunciations", "pronunciation_id"); | ||
| 20 | |||
| 21 | const field form::lemmaJoin = field::joinField(object::form, "form_id", object::lemma); | ||
| 22 | const field form::inflectionCategory = field::integerField("lemmas_forms", "category"); | ||
| 23 | |||
| 24 | form::form(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
| 25 | { | ||
| 26 | id_ = sqlite3_column_int(row, 0); | ||
| 27 | text_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 1))); | ||
| 28 | complexity_ = sqlite3_column_int(row, 2); | ||
| 29 | proper_ = (sqlite3_column_int(row, 3) == 1); | ||
| 30 | } | ||
| 31 | |||
| 32 | filter operator%=(form::inflection_field check, filter joinCondition) | ||
| 33 | { | ||
| 34 | return (form::lemmaJoin %= (joinCondition && (form::inflectionCategory == check.getCategory()))); | ||
| 35 | } | ||
| 36 | |||
| 37 | const std::vector<pronunciation>& form::getPronunciations() const | ||
| 38 | { | ||
| 39 | if (!valid_) | ||
| 40 | { | ||
| 41 | throw std::domain_error("Bad access to uninitialized form"); | ||
| 42 | } | ||
| 43 | |||
| 44 | if (!initializedPronunciations_) | ||
| 45 | { | ||
| 46 | pronunciations_ = db_->pronunciations(pronunciation::form %= *this, false, -1).all(); | ||
| 47 | initializedPronunciations_ = true; | ||
| 48 | } | ||
| 49 | |||
| 50 | return pronunciations_; | ||
| 51 | } | ||
| 52 | |||
| 53 | }; | ||
| diff --git a/lib/form.h b/lib/form.h new file mode 100644 index 0000000..c6a1353 --- /dev/null +++ b/lib/form.h | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | #ifndef FORM_H_3A6C962C | ||
| 2 | #define FORM_H_3A6C962C | ||
| 3 | |||
| 4 | #include <list> | ||
| 5 | #include <vector> | ||
| 6 | #include <string> | ||
| 7 | #include <stdexcept> | ||
| 8 | #include "field.h" | ||
| 9 | #include "filter.h" | ||
| 10 | |||
| 11 | struct sqlite3_stmt; | ||
| 12 | |||
| 13 | namespace verbly { | ||
| 14 | |||
| 15 | class pronunciation; | ||
| 16 | class database; | ||
| 17 | |||
| 18 | class form { | ||
| 19 | public: | ||
| 20 | |||
| 21 | // Default constructor | ||
| 22 | |||
| 23 | form() = default; | ||
| 24 | |||
| 25 | // Construct from database | ||
| 26 | |||
| 27 | form(const database& db, sqlite3_stmt* row); | ||
| 28 | |||
| 29 | // Accessors | ||
| 30 | |||
| 31 | operator bool() const | ||
| 32 | { | ||
| 33 | return valid_; | ||
| 34 | } | ||
| 35 | |||
| 36 | int getId() const | ||
| 37 | { | ||
| 38 | if (!valid_) | ||
| 39 | { | ||
| 40 | throw std::domain_error("Bad access to uninitialized form"); | ||
| 41 | } | ||
| 42 | |||
| 43 | return id_; | ||
| 44 | } | ||
| 45 | |||
| 46 | std::string getText() const | ||
| 47 | { | ||
| 48 | if (!valid_) | ||
| 49 | { | ||
| 50 | throw std::domain_error("Bad access to uninitialized form"); | ||
| 51 | } | ||
| 52 | |||
| 53 | return text_; | ||
| 54 | } | ||
| 55 | |||
| 56 | int getComplexity() const | ||
| 57 | { | ||
| 58 | if (!valid_) | ||
| 59 | { | ||
| 60 | throw std::domain_error("Bad access to uninitialized form"); | ||
| 61 | } | ||
| 62 | |||
| 63 | return complexity_; | ||
| 64 | } | ||
| 65 | |||
| 66 | bool isProper() const | ||
| 67 | { | ||
| 68 | if (!valid_) | ||
| 69 | { | ||
| 70 | throw std::domain_error("Bad access to uninitialized form"); | ||
| 71 | } | ||
| 72 | |||
| 73 | return proper_; | ||
| 74 | } | ||
| 75 | |||
| 76 | const std::vector<pronunciation>& getPronunciations() const; | ||
| 77 | |||
| 78 | // Type info | ||
| 79 | |||
| 80 | static const object objectType; | ||
| 81 | |||
| 82 | static const std::list<std::string> select; | ||
| 83 | |||
| 84 | // Query fields | ||
| 85 | |||
| 86 | static const field id; | ||
| 87 | static const field text; | ||
| 88 | static const field complexity; | ||
| 89 | static const field proper; | ||
| 90 | |||
| 91 | operator filter() const | ||
| 92 | { | ||
| 93 | if (!valid_) | ||
| 94 | { | ||
| 95 | throw std::domain_error("Bad access to uninitialized form"); | ||
| 96 | } | ||
| 97 | |||
| 98 | return (id == id_); | ||
| 99 | } | ||
| 100 | |||
| 101 | // Relationships to other objects | ||
| 102 | |||
| 103 | static const field pronunciation; | ||
| 104 | |||
| 105 | class inflection_field { | ||
| 106 | public: | ||
| 107 | |||
| 108 | inflection_field(inflection category) : category_(category) | ||
| 109 | { | ||
| 110 | } | ||
| 111 | |||
| 112 | const inflection getCategory() const | ||
| 113 | { | ||
| 114 | return category_; | ||
| 115 | } | ||
| 116 | |||
| 117 | private: | ||
| 118 | |||
| 119 | const inflection category_; | ||
| 120 | }; | ||
| 121 | |||
| 122 | static const inflection_field lemma(inflection category) | ||
| 123 | { | ||
| 124 | return inflection_field(category); | ||
| 125 | } | ||
| 126 | |||
| 127 | friend filter operator%=(form::inflection_field check, filter joinCondition); | ||
| 128 | |||
| 129 | private: | ||
| 130 | bool valid_ = false; | ||
| 131 | |||
| 132 | int id_; | ||
| 133 | std::string text_; | ||
| 134 | int complexity_ ; | ||
| 135 | bool proper_; | ||
| 136 | |||
| 137 | const database* db_; | ||
| 138 | |||
| 139 | mutable bool initializedPronunciations_ = false; | ||
| 140 | mutable std::vector<class pronunciation> pronunciations_; | ||
| 141 | |||
| 142 | static const field lemmaJoin; | ||
| 143 | static const field inflectionCategory; | ||
| 144 | |||
| 145 | }; | ||
| 146 | |||
| 147 | }; | ||
| 148 | |||
| 149 | #endif /* end of include guard: FORM_H_3A6C962C */ | ||
| diff --git a/lib/frame.cpp b/lib/frame.cpp index ccec81b..bc3f842 100644 --- a/lib/frame.cpp +++ b/lib/frame.cpp | |||
| @@ -1,320 +1,21 @@ | |||
| 1 | #include "verbly.h" | 1 | #include "frame.h" |
| 2 | #include <sqlite3.h> | ||
| 2 | 3 | ||
| 3 | namespace verbly { | 4 | namespace verbly { |
| 4 | 5 | ||
| 5 | frame::selrestr::type frame::selrestr::get_type() const | 6 | const object frame::objectType = object::frame; |
| 6 | { | ||
| 7 | return _type; | ||
| 8 | } | ||
| 9 | |||
| 10 | frame::selrestr::selrestr(const selrestr& other) | ||
| 11 | { | ||
| 12 | _type = other._type; | ||
| 13 | |||
| 14 | switch (_type) | ||
| 15 | { | ||
| 16 | case frame::selrestr::type::singleton: | ||
| 17 | { | ||
| 18 | _singleton.pos = other._singleton.pos; | ||
| 19 | new(&_singleton.restriction) std::string(other._singleton.restriction); | ||
| 20 | |||
| 21 | break; | ||
| 22 | } | ||
| 23 | |||
| 24 | case frame::selrestr::type::group: | ||
| 25 | { | ||
| 26 | new(&_group.children) std::list<selrestr>(other._group.children); | ||
| 27 | _group.orlogic = other._group.orlogic; | ||
| 28 | |||
| 29 | break; | ||
| 30 | } | ||
| 31 | |||
| 32 | case frame::selrestr::type::empty: | ||
| 33 | { | ||
| 34 | // Nothing! | ||
| 35 | |||
| 36 | break; | ||
| 37 | } | ||
| 38 | } | ||
| 39 | } | ||
| 40 | |||
| 41 | frame::selrestr::~selrestr() | ||
| 42 | { | ||
| 43 | switch (_type) | ||
| 44 | { | ||
| 45 | case frame::selrestr::type::singleton: | ||
| 46 | { | ||
| 47 | using string_type = std::string; | ||
| 48 | _singleton.restriction.~string_type(); | ||
| 49 | |||
| 50 | break; | ||
| 51 | } | ||
| 52 | |||
| 53 | case frame::selrestr::type::group: | ||
| 54 | { | ||
| 55 | using list_type = std::list<selrestr>; | ||
| 56 | _group.children.~list_type(); | ||
| 57 | |||
| 58 | break; | ||
| 59 | } | ||
| 60 | |||
| 61 | case frame::selrestr::type::empty: | ||
| 62 | { | ||
| 63 | // Nothing! | ||
| 64 | |||
| 65 | break; | ||
| 66 | } | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | frame::selrestr& frame::selrestr::operator=(const selrestr& other) | ||
| 71 | { | ||
| 72 | this->~selrestr(); | ||
| 73 | |||
| 74 | _type = other._type; | ||
| 75 | |||
| 76 | switch (_type) | ||
| 77 | { | ||
| 78 | case frame::selrestr::type::singleton: | ||
| 79 | { | ||
| 80 | _singleton.pos = other._singleton.pos; | ||
| 81 | new(&_singleton.restriction) std::string(other._singleton.restriction); | ||
| 82 | |||
| 83 | break; | ||
| 84 | } | ||
| 85 | |||
| 86 | case frame::selrestr::type::group: | ||
| 87 | { | ||
| 88 | new(&_group.children) std::list<selrestr>(other._group.children); | ||
| 89 | _group.orlogic = other._group.orlogic; | ||
| 90 | |||
| 91 | break; | ||
| 92 | } | ||
| 93 | |||
| 94 | case frame::selrestr::type::empty: | ||
| 95 | { | ||
| 96 | // Nothing! | ||
| 97 | |||
| 98 | break; | ||
| 99 | } | ||
| 100 | } | ||
| 101 | |||
| 102 | return *this; | ||
| 103 | } | ||
| 104 | |||
| 105 | frame::selrestr::selrestr() : _type(frame::selrestr::type::empty) | ||
| 106 | { | ||
| 107 | |||
| 108 | } | ||
| 109 | |||
| 110 | frame::selrestr::selrestr(std::string restriction, bool pos) : _type(frame::selrestr::type::singleton) | ||
| 111 | { | ||
| 112 | new(&_singleton.restriction) std::string(restriction); | ||
| 113 | _singleton.pos = pos; | ||
| 114 | } | ||
| 115 | |||
| 116 | std::string frame::selrestr::get_restriction() const | ||
| 117 | { | ||
| 118 | assert(_type == frame::selrestr::type::singleton); | ||
| 119 | |||
| 120 | return _singleton.restriction; | ||
| 121 | } | ||
| 122 | |||
| 123 | bool frame::selrestr::get_pos() const | ||
| 124 | { | ||
| 125 | assert(_type == frame::selrestr::type::singleton); | ||
| 126 | |||
| 127 | return _singleton.pos; | ||
| 128 | } | ||
| 129 | |||
| 130 | frame::selrestr::selrestr(std::list<selrestr> children, bool orlogic) : _type(frame::selrestr::type::group) | ||
| 131 | { | ||
| 132 | new(&_group.children) std::list<selrestr>(children); | ||
| 133 | _group.orlogic = orlogic; | ||
| 134 | } | ||
| 135 | |||
| 136 | std::list<frame::selrestr> frame::selrestr::get_children() const | ||
| 137 | { | ||
| 138 | assert(_type == frame::selrestr::type::group); | ||
| 139 | |||
| 140 | return _group.children; | ||
| 141 | } | ||
| 142 | |||
| 143 | std::list<frame::selrestr>::const_iterator frame::selrestr::begin() const | ||
| 144 | { | ||
| 145 | assert(_type == frame::selrestr::type::group); | ||
| 146 | |||
| 147 | return _group.children.begin(); | ||
| 148 | } | ||
| 149 | |||
| 150 | std::list<frame::selrestr>::const_iterator frame::selrestr::end() const | ||
| 151 | { | ||
| 152 | assert(_type == frame::selrestr::type::group); | ||
| 153 | |||
| 154 | return _group.children.end(); | ||
| 155 | } | ||
| 156 | |||
| 157 | bool frame::selrestr::get_orlogic() const | ||
| 158 | { | ||
| 159 | assert(_type == frame::selrestr::type::group); | ||
| 160 | |||
| 161 | return _group.orlogic; | ||
| 162 | } | ||
| 163 | |||
| 164 | frame::part::type frame::part::get_type() const | ||
| 165 | { | ||
| 166 | return _type; | ||
| 167 | } | ||
| 168 | |||
| 169 | frame::part::part() | ||
| 170 | { | ||
| 171 | |||
| 172 | } | ||
| 173 | 7 | ||
| 174 | frame::part::part(const part& other) | 8 | const std::list<std::string> frame::select = {"frame_id", "data"}; |
| 175 | { | ||
| 176 | _type = other._type; | ||
| 177 | |||
| 178 | switch (_type) | ||
| 179 | { | ||
| 180 | case frame::part::type::noun_phrase: | ||
| 181 | { | ||
| 182 | new(&_noun_phrase.role) std::string(other._noun_phrase.role); | ||
| 183 | new(&_noun_phrase.selrestrs) selrestr(other._noun_phrase.selrestrs); | ||
| 184 | new(&_noun_phrase.synrestrs) std::set<std::string>(other._noun_phrase.synrestrs); | ||
| 185 | |||
| 186 | break; | ||
| 187 | } | ||
| 188 | |||
| 189 | case frame::part::type::literal_preposition: | ||
| 190 | { | ||
| 191 | new(&_literal_preposition.choices) std::vector<std::string>(other._literal_preposition.choices); | ||
| 192 | |||
| 193 | break; | ||
| 194 | } | ||
| 195 | |||
| 196 | case frame::part::type::selection_preposition: | ||
| 197 | { | ||
| 198 | new(&_selection_preposition.preprestrs) std::vector<std::string>(other._selection_preposition.preprestrs); | ||
| 199 | |||
| 200 | break; | ||
| 201 | } | ||
| 202 | |||
| 203 | case frame::part::type::literal: | ||
| 204 | { | ||
| 205 | new(&_literal.lexval) std::string(other._literal.lexval); | ||
| 206 | |||
| 207 | break; | ||
| 208 | } | ||
| 209 | |||
| 210 | default: | ||
| 211 | { | ||
| 212 | // Nothing! | ||
| 213 | |||
| 214 | break; | ||
| 215 | } | ||
| 216 | } | ||
| 217 | } | ||
| 218 | 9 | ||
| 219 | frame::part::~part() | 10 | const field frame::id = field::integerField(object::frame, "frame_id"); |
| 220 | { | ||
| 221 | switch (_type) | ||
| 222 | { | ||
| 223 | case frame::part::type::noun_phrase: | ||
| 224 | { | ||
| 225 | using string_type = std::string; | ||
| 226 | using set_type = std::set<std::string>; | ||
| 227 | |||
| 228 | _noun_phrase.role.~string_type(); | ||
| 229 | _noun_phrase.selrestrs.~selrestr(); | ||
| 230 | _noun_phrase.synrestrs.~set_type(); | ||
| 231 | |||
| 232 | break; | ||
| 233 | } | ||
| 234 | |||
| 235 | case frame::part::type::literal_preposition: | ||
| 236 | { | ||
| 237 | using vector_type = std::vector<std::string>; | ||
| 238 | _literal_preposition.choices.~vector_type(); | ||
| 239 | |||
| 240 | break; | ||
| 241 | } | ||
| 242 | |||
| 243 | case frame::part::type::selection_preposition: | ||
| 244 | { | ||
| 245 | using vector_type = std::vector<std::string>; | ||
| 246 | _selection_preposition.preprestrs.~vector_type(); | ||
| 247 | |||
| 248 | break; | ||
| 249 | } | ||
| 250 | |||
| 251 | case frame::part::type::literal: | ||
| 252 | { | ||
| 253 | using string_type = std::string; | ||
| 254 | _literal.lexval.~string_type(); | ||
| 255 | |||
| 256 | break; | ||
| 257 | } | ||
| 258 | |||
| 259 | default: | ||
| 260 | { | ||
| 261 | // Nothing! | ||
| 262 | |||
| 263 | break; | ||
| 264 | } | ||
| 265 | } | ||
| 266 | } | ||
| 267 | 11 | ||
| 268 | std::string frame::part::get_role() const | 12 | const field frame::group = field::joinThrough(object::frame, "frame_id", object::group, "groups_frames", "group_id"); |
| 269 | { | ||
| 270 | assert(_type == frame::part::type::noun_phrase); | ||
| 271 | |||
| 272 | return _noun_phrase.role; | ||
| 273 | } | ||
| 274 | 13 | ||
| 275 | frame::selrestr frame::part::get_selrestrs() const | 14 | frame::frame(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) |
| 276 | { | 15 | { |
| 277 | assert(_type == frame::part::type::noun_phrase); | 16 | id_ = sqlite3_column_int(row, 0); |
| 278 | 17 | ||
| 279 | return _noun_phrase.selrestrs; | 18 | // TODO: Initialize frame data from row. |
| 280 | } | ||
| 281 | |||
| 282 | std::set<std::string> frame::part::get_synrestrs() const | ||
| 283 | { | ||
| 284 | assert(_type == frame::part::type::noun_phrase); | ||
| 285 | |||
| 286 | return _noun_phrase.synrestrs; | ||
| 287 | } | ||
| 288 | |||
| 289 | std::vector<std::string> frame::part::get_choices() const | ||
| 290 | { | ||
| 291 | assert(_type == frame::part::type::literal_preposition); | ||
| 292 | |||
| 293 | return _literal_preposition.choices; | ||
| 294 | } | ||
| 295 | |||
| 296 | std::vector<std::string> frame::part::get_preprestrs() const | ||
| 297 | { | ||
| 298 | assert(_type == frame::part::type::selection_preposition); | ||
| 299 | |||
| 300 | return _selection_preposition.preprestrs; | ||
| 301 | } | ||
| 302 | |||
| 303 | std::string frame::part::get_literal() const | ||
| 304 | { | ||
| 305 | assert(_type == frame::part::type::literal); | ||
| 306 | |||
| 307 | return _literal.lexval; | ||
| 308 | } | ||
| 309 | |||
| 310 | std::vector<frame::part> frame::parts() const | ||
| 311 | { | ||
| 312 | return _parts; | ||
| 313 | } | ||
| 314 | |||
| 315 | std::map<std::string, frame::selrestr> frame::roles() const | ||
| 316 | { | ||
| 317 | return _roles; | ||
| 318 | } | 19 | } |
| 319 | 20 | ||
| 320 | }; | 21 | }; |
| diff --git a/lib/frame.h b/lib/frame.h index fa57e1b..68a4346 100644 --- a/lib/frame.h +++ b/lib/frame.h | |||
| @@ -1,118 +1,78 @@ | |||
| 1 | #ifndef FRAME_H_9A5D90FE | 1 | #ifndef FRAME_H_EA29065A |
| 2 | #define FRAME_H_9A5D90FE | 2 | #define FRAME_H_EA29065A |
| 3 | |||
| 4 | #include <stdexcept> | ||
| 5 | #include <list> | ||
| 6 | #include "field.h" | ||
| 7 | #include "filter.h" | ||
| 8 | |||
| 9 | struct sqlite3_stmt; | ||
| 3 | 10 | ||
| 4 | namespace verbly { | 11 | namespace verbly { |
| 5 | 12 | ||
| 6 | class frame_query; | 13 | class database; |
| 7 | 14 | ||
| 8 | class frame { | 15 | class frame { |
| 9 | public: | 16 | public: |
| 10 | class selrestr { | 17 | |
| 11 | public: | 18 | // Default constructor |
| 12 | enum class type { | 19 | |
| 13 | empty, | 20 | frame() = default; |
| 14 | singleton, | 21 | |
| 15 | group | 22 | // Construct from database |
| 16 | }; | 23 | |
| 17 | 24 | frame(const database& db, sqlite3_stmt* row); | |
| 18 | type get_type() const; | 25 | |
| 19 | selrestr(const selrestr& other); | 26 | // Accessors |
| 20 | ~selrestr(); | 27 | |
| 21 | selrestr& operator=(const selrestr& other); | 28 | operator bool() const |
| 22 | 29 | { | |
| 23 | // Empty | 30 | return valid_; |
| 24 | selrestr(); | 31 | } |
| 25 | 32 | ||
| 26 | // Singleton | 33 | int getId() const |
| 27 | selrestr(std::string restriction, bool pos); | 34 | { |
| 28 | std::string get_restriction() const; | 35 | if (!valid_) |
| 29 | bool get_pos() const; | 36 | { |
| 30 | 37 | throw std::domain_error("Bad access to uninitialized frame"); | |
| 31 | // Group | 38 | } |
| 32 | selrestr(std::list<selrestr> children, bool orlogic); | ||
| 33 | std::list<selrestr> get_children() const; | ||
| 34 | std::list<selrestr>::const_iterator begin() const; | ||
| 35 | std::list<selrestr>::const_iterator end() const; | ||
| 36 | bool get_orlogic() const; | ||
| 37 | |||
| 38 | private: | ||
| 39 | union { | ||
| 40 | struct { | ||
| 41 | bool pos; | ||
| 42 | std::string restriction; | ||
| 43 | } _singleton; | ||
| 44 | struct { | ||
| 45 | std::list<selrestr> children; | ||
| 46 | bool orlogic; | ||
| 47 | } _group; | ||
| 48 | }; | ||
| 49 | type _type; | ||
| 50 | }; | ||
| 51 | 39 | ||
| 52 | class part { | 40 | return id_; |
| 53 | public: | 41 | } |
| 54 | enum class type { | 42 | |
| 55 | noun_phrase, | 43 | // Type info |
| 56 | verb, | 44 | |
| 57 | literal_preposition, | 45 | static const object objectType; |
| 58 | selection_preposition, | 46 | |
| 59 | adjective, | 47 | static const std::list<std::string> select; |
| 60 | adverb, | 48 | |
| 61 | literal | 49 | // Query fields |
| 62 | }; | 50 | |
| 63 | 51 | static const field id; | |
| 64 | type get_type() const; | 52 | |
| 65 | part(const part& other); | 53 | operator filter() const |
| 66 | ~part(); | 54 | { |
| 67 | 55 | if (!valid_) | |
| 68 | // Noun phrase | 56 | { |
| 69 | std::string get_role() const; | 57 | throw std::domain_error("Bad access to uninitialized frame"); |
| 70 | selrestr get_selrestrs() const; | 58 | } |
| 71 | std::set<std::string> get_synrestrs() const; | ||
| 72 | |||
| 73 | // Literal preposition | ||
| 74 | std::vector<std::string> get_choices() const; | ||
| 75 | |||
| 76 | // Selection preposition | ||
| 77 | std::vector<std::string> get_preprestrs() const; | ||
| 78 | |||
| 79 | // Literal | ||
| 80 | std::string get_literal() const; | ||
| 81 | |||
| 82 | private: | ||
| 83 | friend class frame_query; | ||
| 84 | |||
| 85 | part(); | ||
| 86 | |||
| 87 | union { | ||
| 88 | struct { | ||
| 89 | std::string role; | ||
| 90 | selrestr selrestrs; | ||
| 91 | std::set<std::string> synrestrs; | ||
| 92 | } _noun_phrase; | ||
| 93 | struct { | ||
| 94 | std::vector<std::string> choices; | ||
| 95 | } _literal_preposition; | ||
| 96 | struct { | ||
| 97 | std::vector<std::string> preprestrs; | ||
| 98 | } _selection_preposition; | ||
| 99 | struct { | ||
| 100 | std::string lexval; | ||
| 101 | } _literal; | ||
| 102 | }; | ||
| 103 | type _type; | ||
| 104 | }; | ||
| 105 | 59 | ||
| 106 | std::vector<part> parts() const; | 60 | return (id == id_); |
| 107 | std::map<std::string, selrestr> roles() const; | 61 | } |
| 108 | 62 | ||
| 109 | private: | 63 | // Relationships to other objects |
| 110 | friend class frame_query; | 64 | |
| 111 | 65 | static const field group; | |
| 112 | std::vector<part> _parts; | 66 | |
| 113 | std::map<std::string, selrestr> _roles; | 67 | private: |
| 68 | bool valid_ = false; | ||
| 69 | |||
| 70 | int id_; | ||
| 71 | |||
| 72 | const database* db_; | ||
| 73 | |||
| 114 | }; | 74 | }; |
| 115 | 75 | ||
| 116 | }; | 76 | }; |
| 117 | 77 | ||
| 118 | #endif /* end of include guard: FRAME_H_9A5D90FE */ | 78 | #endif /* end of include guard: FRAME_H_EA29065A */ |
| diff --git a/lib/group.cpp b/lib/group.cpp new file mode 100644 index 0000000..8b6d985 --- /dev/null +++ b/lib/group.cpp | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | #include "group.h" | ||
| 2 | #include <sqlite3.h> | ||
| 3 | #include "frame.h" | ||
| 4 | #include "database.h" | ||
| 5 | #include "query.h" | ||
| 6 | |||
| 7 | namespace verbly { | ||
| 8 | |||
| 9 | const object group::objectType = object::group; | ||
| 10 | |||
| 11 | const std::list<std::string> group::select = {"group_id", "data"}; | ||
| 12 | |||
| 13 | const field group::id = field::integerField(object::group, "group_id"); | ||
| 14 | |||
| 15 | const field group::frame = field::joinThrough(object::group, "group_id", object::frame, "groups_frames", "frame_id"); | ||
| 16 | const field group::word = field::joinField(object::group, "group_id", object::word); | ||
| 17 | |||
| 18 | group::group(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
| 19 | { | ||
| 20 | id_ = sqlite3_column_int(row, 0); | ||
| 21 | |||
| 22 | // TODO: Initialize role data from row. | ||
| 23 | } | ||
| 24 | |||
| 25 | const std::vector<frame>& group::getFrames() const | ||
| 26 | { | ||
| 27 | if (!valid_) | ||
| 28 | { | ||
| 29 | throw std::domain_error("Bad access to uninitialized group"); | ||
| 30 | } | ||
| 31 | |||
| 32 | if (!initializedFrames_) | ||
| 33 | { | ||
| 34 | frames_ = db_->frames(frame::group %= *this, false, -1).all(); | ||
| 35 | |||
| 36 | initializedFrames_ = true; | ||
| 37 | } | ||
| 38 | |||
| 39 | return frames_; | ||
| 40 | } | ||
| 41 | |||
| 42 | }; | ||
| 43 | |||
| diff --git a/lib/group.h b/lib/group.h new file mode 100644 index 0000000..dd53503 --- /dev/null +++ b/lib/group.h | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | #ifndef GROUP_H_BD6933C0 | ||
| 2 | #define GROUP_H_BD6933C0 | ||
| 3 | |||
| 4 | #include <stdexcept> | ||
| 5 | #include <list> | ||
| 6 | #include <vector> | ||
| 7 | #include "field.h" | ||
| 8 | #include "filter.h" | ||
| 9 | |||
| 10 | struct sqlite3_stmt; | ||
| 11 | |||
| 12 | namespace verbly { | ||
| 13 | |||
| 14 | class database; | ||
| 15 | class frame; | ||
| 16 | |||
| 17 | class group { | ||
| 18 | public: | ||
| 19 | |||
| 20 | // Default constructor | ||
| 21 | |||
| 22 | group() = default; | ||
| 23 | |||
| 24 | // Construct from database | ||
| 25 | |||
| 26 | group(const database& db, sqlite3_stmt* row); | ||
| 27 | |||
| 28 | // Accessors | ||
| 29 | |||
| 30 | operator bool() const | ||
| 31 | { | ||
| 32 | return valid_; | ||
| 33 | } | ||
| 34 | |||
| 35 | int getId() const | ||
| 36 | { | ||
| 37 | if (!valid_) | ||
| 38 | { | ||
| 39 | throw std::domain_error("Bad access to uninitialized group"); | ||
| 40 | } | ||
| 41 | |||
| 42 | return id_; | ||
| 43 | } | ||
| 44 | |||
| 45 | const std::vector<frame>& getFrames() const; | ||
| 46 | |||
| 47 | // Type info | ||
| 48 | |||
| 49 | static const object objectType; | ||
| 50 | |||
| 51 | static const std::list<std::string> select; | ||
| 52 | |||
| 53 | // Query fields | ||
| 54 | |||
| 55 | static const field id; | ||
| 56 | |||
| 57 | operator filter() const | ||
| 58 | { | ||
| 59 | if (!valid_) | ||
| 60 | { | ||
| 61 | throw std::domain_error("Bad access to uninitialized group"); | ||
| 62 | } | ||
| 63 | |||
| 64 | return (id == id_); | ||
| 65 | } | ||
| 66 | |||
| 67 | // Relationships to other objects | ||
| 68 | |||
| 69 | static const field frame; | ||
| 70 | |||
| 71 | static const field word; | ||
| 72 | |||
| 73 | private: | ||
| 74 | bool valid_ = false; | ||
| 75 | |||
| 76 | int id_; | ||
| 77 | |||
| 78 | const database* db_; | ||
| 79 | |||
| 80 | mutable bool initializedFrames_ = false; | ||
| 81 | mutable std::vector<class frame> frames_; | ||
| 82 | |||
| 83 | }; | ||
| 84 | |||
| 85 | }; | ||
| 86 | |||
| 87 | #endif /* end of include guard: GROUP_H_BD6933C0 */ | ||
| diff --git a/lib/lemma.cpp b/lib/lemma.cpp new file mode 100644 index 0000000..f9e9fcc --- /dev/null +++ b/lib/lemma.cpp | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | #include "lemma.h" | ||
| 2 | #include <sqlite3.h> | ||
| 3 | #include "database.h" | ||
| 4 | #include "query.h" | ||
| 5 | |||
| 6 | namespace verbly { | ||
| 7 | |||
| 8 | const object lemma::objectType = object::lemma; | ||
| 9 | |||
| 10 | const std::list<std::string> lemma::select = {"lemma_id"}; | ||
| 11 | |||
| 12 | const field lemma::id = field::integerField(object::lemma, "lemma_id"); | ||
| 13 | |||
| 14 | const field lemma::word = field::joinField(object::lemma, "lemma_id", object::word); | ||
| 15 | |||
| 16 | const field lemma::formJoin = field::joinField(object::lemma, "form_id", object::form); | ||
| 17 | const field lemma::inflectionCategory = field::integerField(object::lemma, "category"); | ||
| 18 | |||
| 19 | filter operator%=(lemma::inflection_field check, filter joinCondition) | ||
| 20 | { | ||
| 21 | return (lemma::formJoin %= joinCondition) && (lemma::inflectionCategory == check.getCategory()); | ||
| 22 | } | ||
| 23 | |||
| 24 | lemma::lemma(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
| 25 | { | ||
| 26 | id_ = sqlite3_column_int(row, 0); | ||
| 27 | } | ||
| 28 | |||
| 29 | const form& lemma::getBaseForm() const | ||
| 30 | { | ||
| 31 | if (!valid_) | ||
| 32 | { | ||
| 33 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
| 34 | } | ||
| 35 | |||
| 36 | if (!forms_.count(inflection::base)) | ||
| 37 | { | ||
| 38 | initializeForm(inflection::base); | ||
| 39 | } | ||
| 40 | |||
| 41 | return forms_.at(inflection::base).front(); | ||
| 42 | } | ||
| 43 | |||
| 44 | bool lemma::hasInflection(inflection category) const | ||
| 45 | { | ||
| 46 | return !getInflections(category).empty(); | ||
| 47 | } | ||
| 48 | |||
| 49 | const std::vector<form>& lemma::getInflections(inflection category) const | ||
| 50 | { | ||
| 51 | if (!valid_) | ||
| 52 | { | ||
| 53 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
| 54 | } | ||
| 55 | |||
| 56 | if (!forms_.count(category)) | ||
| 57 | { | ||
| 58 | initializeForm(category); | ||
| 59 | } | ||
| 60 | |||
| 61 | return forms_.at(category); | ||
| 62 | } | ||
| 63 | |||
| 64 | void lemma::initializeForm(inflection infl) const | ||
| 65 | { | ||
| 66 | forms_[infl] = db_->forms(form::lemma(infl) %= *this, false, -1).all(); | ||
| 67 | } | ||
| 68 | |||
| 69 | }; | ||
| diff --git a/lib/lemma.h b/lib/lemma.h new file mode 100644 index 0000000..9a07f16 --- /dev/null +++ b/lib/lemma.h | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | #ifndef LEMMA_H_0A180D30 | ||
| 2 | #define LEMMA_H_0A180D30 | ||
| 3 | |||
| 4 | #include <stdexcept> | ||
| 5 | #include <vector> | ||
| 6 | #include <list> | ||
| 7 | #include <map> | ||
| 8 | #include "field.h" | ||
| 9 | #include "enums.h" | ||
| 10 | #include "filter.h" | ||
| 11 | |||
| 12 | struct sqlite3_stmt; | ||
| 13 | |||
| 14 | namespace verbly { | ||
| 15 | |||
| 16 | class form; | ||
| 17 | class database; | ||
| 18 | |||
| 19 | class lemma { | ||
| 20 | public: | ||
| 21 | |||
| 22 | // Default constructor | ||
| 23 | |||
| 24 | lemma() = default; | ||
| 25 | |||
| 26 | // Construct from database | ||
| 27 | |||
| 28 | lemma(const database& db, sqlite3_stmt* row); | ||
| 29 | |||
| 30 | // Accessors | ||
| 31 | |||
| 32 | operator bool() const | ||
| 33 | { | ||
| 34 | return valid_; | ||
| 35 | } | ||
| 36 | |||
| 37 | int getId() const | ||
| 38 | { | ||
| 39 | if (!valid_) | ||
| 40 | { | ||
| 41 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
| 42 | } | ||
| 43 | |||
| 44 | return id_; | ||
| 45 | } | ||
| 46 | |||
| 47 | const form& getBaseForm() const; | ||
| 48 | |||
| 49 | bool hasInflection(inflection category) const; | ||
| 50 | |||
| 51 | const std::vector<form>& getInflections(inflection category) const; | ||
| 52 | |||
| 53 | // Type info | ||
| 54 | |||
| 55 | static const object objectType; | ||
| 56 | |||
| 57 | static const std::list<std::string> select; | ||
| 58 | |||
| 59 | // Query fields | ||
| 60 | |||
| 61 | static const field id; | ||
| 62 | |||
| 63 | operator filter() const | ||
| 64 | { | ||
| 65 | if (!valid_) | ||
| 66 | { | ||
| 67 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
| 68 | } | ||
| 69 | |||
| 70 | return (id == id_); | ||
| 71 | } | ||
| 72 | |||
| 73 | // Relationships to other objects | ||
| 74 | |||
| 75 | static const field word; | ||
| 76 | |||
| 77 | class inflection_field { | ||
| 78 | public: | ||
| 79 | |||
| 80 | inflection_field(inflection category) : category_(category) | ||
| 81 | { | ||
| 82 | } | ||
| 83 | |||
| 84 | const inflection getCategory() const | ||
| 85 | { | ||
| 86 | return category_; | ||
| 87 | } | ||
| 88 | |||
| 89 | private: | ||
| 90 | |||
| 91 | const inflection category_; | ||
| 92 | }; | ||
| 93 | |||
| 94 | static const inflection_field form(inflection category) | ||
| 95 | { | ||
| 96 | return inflection_field(category); | ||
| 97 | } | ||
| 98 | |||
| 99 | friend filter operator%=(lemma::inflection_field check, filter joinCondition); | ||
| 100 | |||
| 101 | private: | ||
| 102 | |||
| 103 | void initializeForm(inflection category) const; | ||
| 104 | |||
| 105 | bool valid_ = false; | ||
| 106 | |||
| 107 | int id_; | ||
| 108 | |||
| 109 | mutable std::map<inflection, std::vector<class form>> forms_; | ||
| 110 | |||
| 111 | const database* db_; | ||
| 112 | |||
| 113 | static const field formJoin; | ||
| 114 | static const field inflectionCategory; | ||
| 115 | |||
| 116 | }; | ||
| 117 | |||
| 118 | }; | ||
| 119 | |||
| 120 | #endif /* end of include guard: LEMMA_H_0A180D30 */ | ||
| diff --git a/lib/notion.cpp b/lib/notion.cpp new file mode 100644 index 0000000..16794d3 --- /dev/null +++ b/lib/notion.cpp | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | #include "notion.h" | ||
| 2 | #include <sqlite3.h> | ||
| 3 | #include <sstream> | ||
| 4 | |||
| 5 | namespace verbly { | ||
| 6 | |||
| 7 | const object notion::objectType = object::notion; | ||
| 8 | |||
| 9 | const std::list<std::string> notion::select = {"notion_id", "part_of_speech", "wnid", "images"}; | ||
| 10 | |||
| 11 | const field notion::id = field::integerField(object::notion, "notion_id"); | ||
| 12 | const field notion::partOfSpeech = field::integerField(object::notion, "part_of_speech"); | ||
| 13 | const field notion::wnid = field::integerField(object::notion, "wnid", true); | ||
| 14 | const field notion::numOfImages = field::integerField(object::notion, "images", true); | ||
| 15 | |||
| 16 | const field notion::word = field::joinField(object::notion, "word_id", object::word); | ||
| 17 | |||
| 18 | const field notion::hypernyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id"); | ||
| 19 | const field notion::hyponyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id"); | ||
| 20 | |||
| 21 | const field notion::fullHypernyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id"); | ||
| 22 | const field notion::fullHyponyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id"); | ||
| 23 | |||
| 24 | const field notion::instances = field::selfJoin(object::notion, "notion_id", "instantiation", "class_id", "instance_id"); | ||
| 25 | const field notion::classes = field::selfJoin(object::notion, "notion_id", "instantiation", "instance_id", "class_id"); | ||
| 26 | |||
| 27 | const field notion::memberMeronyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id"); | ||
| 28 | const field notion::memberHolonyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id"); | ||
| 29 | |||
| 30 | const field notion::fullMemberMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id"); | ||
| 31 | const field notion::fullMemberHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id"); | ||
| 32 | |||
| 33 | const field notion::partMeronyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id"); | ||
| 34 | const field notion::partHolonyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id"); | ||
| 35 | |||
| 36 | const field notion::fullPartMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id"); | ||
| 37 | const field notion::fullPartHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id"); | ||
| 38 | |||
| 39 | const field notion::substanceMeronyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id"); | ||
| 40 | const field notion::substanceHolonyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id"); | ||
| 41 | |||
| 42 | const field notion::fullSubstanceMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id"); | ||
| 43 | const field notion::fullSubstanceHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id"); | ||
| 44 | |||
| 45 | const field notion::variants = field::selfJoin(object::notion, "notion_id", "variation", "noun_id", "adjective_id"); | ||
| 46 | const field notion::attributes = field::selfJoin(object::notion, "notion_id", "variation", "adjective_id", "noun_id"); | ||
| 47 | |||
| 48 | const field notion::similarAdjectives = field::selfJoin(object::notion, "notion_id", "similarity", "adjective_2_id", "adjective_1_id"); | ||
| 49 | |||
| 50 | const field notion::entails = field::selfJoin(object::notion, "notion_id", "entailment", "given_id", "entailment_id"); | ||
| 51 | const field notion::entailedBy = field::selfJoin(object::notion, "notion_id", "entailment", "entailment_id", "given_id"); | ||
| 52 | |||
| 53 | const field notion::causes = field::selfJoin(object::notion, "notion_id", "causality", "effect_id", "cause_id"); | ||
| 54 | const field notion::effects = field::selfJoin(object::notion, "notion_id", "causality", "cause_id", "effect_id"); | ||
| 55 | |||
| 56 | const notion::preposition_group_field prepositionGroup = {}; | ||
| 57 | |||
| 58 | const field notion::preposition_group_field::isA = field::joinField(object::notion, "notion_id", "is_a"); | ||
| 59 | const field notion::preposition_group_field::groupNameField = field::stringField("is_a", "groupname"); | ||
| 60 | |||
| 61 | notion::notion(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
| 62 | { | ||
| 63 | id_ = sqlite3_column_int(row, 0); | ||
| 64 | partOfSpeech_ = static_cast<part_of_speech>(sqlite3_column_int(row, 1)); | ||
| 65 | |||
| 66 | if (sqlite3_column_type(row, 2) != SQLITE_NULL) | ||
| 67 | { | ||
| 68 | hasWnid_ = true; | ||
| 69 | wnid_ = sqlite3_column_int(row, 2); | ||
| 70 | } | ||
| 71 | |||
| 72 | if (sqlite3_column_type(row, 3) != SQLITE_NULL) | ||
| 73 | { | ||
| 74 | hasNumOfImages_ = true; | ||
| 75 | numOfImages_ = sqlite3_column_int(row, 3); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | std::string notion::getImageNetUrl() const | ||
| 80 | { | ||
| 81 | std::stringstream url; | ||
| 82 | url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n"; | ||
| 83 | url.width(8); | ||
| 84 | url.fill('0'); | ||
| 85 | url << (getWnid() % 100000000); | ||
| 86 | return url.str(); | ||
| 87 | } | ||
| 88 | |||
| 89 | filter notion::preposition_group_field::operator==(std::string groupName) const | ||
| 90 | { | ||
| 91 | return (isA %= (groupNameField == groupName)); | ||
| 92 | } | ||
| 93 | |||
| 94 | }; | ||
| diff --git a/lib/notion.h b/lib/notion.h new file mode 100644 index 0000000..a180d73 --- /dev/null +++ b/lib/notion.h | |||
| @@ -0,0 +1,200 @@ | |||
| 1 | #ifndef NOTION_H_FD1C7646 | ||
| 2 | #define NOTION_H_FD1C7646 | ||
| 3 | |||
| 4 | #include <stdexcept> | ||
| 5 | #include <string> | ||
| 6 | #include "field.h" | ||
| 7 | #include "filter.h" | ||
| 8 | |||
| 9 | struct sqlite3_stmt; | ||
| 10 | |||
| 11 | namespace verbly { | ||
| 12 | |||
| 13 | class database; | ||
| 14 | |||
| 15 | class notion { | ||
| 16 | public: | ||
| 17 | |||
| 18 | // Default constructor | ||
| 19 | |||
| 20 | notion() = default; | ||
| 21 | |||
| 22 | // Construct from database | ||
| 23 | |||
| 24 | notion(const database& db, sqlite3_stmt* row); | ||
| 25 | |||
| 26 | // Accessors | ||
| 27 | |||
| 28 | operator bool() const | ||
| 29 | { | ||
| 30 | return valid_; | ||
| 31 | } | ||
| 32 | |||
| 33 | int getId() const | ||
| 34 | { | ||
| 35 | if (!valid_) | ||
| 36 | { | ||
| 37 | throw std::domain_error("Bad access to uninitialized notion"); | ||
| 38 | } | ||
| 39 | |||
| 40 | return id_; | ||
| 41 | } | ||
| 42 | |||
| 43 | part_of_speech getPartOfSpeech() const | ||
| 44 | { | ||
| 45 | if (!valid_) | ||
| 46 | { | ||
| 47 | throw std::domain_error("Bad access to uninitialized notion"); | ||
| 48 | } | ||
| 49 | |||
| 50 | return partOfSpeech_; | ||
| 51 | } | ||
| 52 | |||
| 53 | bool hasWnid() const | ||
| 54 | { | ||
| 55 | if (!valid_) | ||
| 56 | { | ||
| 57 | throw std::domain_error("Bad access to uninitialized notion"); | ||
| 58 | } | ||
| 59 | |||
| 60 | return hasWnid_; | ||
| 61 | } | ||
| 62 | |||
| 63 | int getWnid() const | ||
| 64 | { | ||
| 65 | if (!valid_) | ||
| 66 | { | ||
| 67 | throw std::domain_error("Bad access to uninitialized notion"); | ||
| 68 | } | ||
| 69 | |||
| 70 | if (!hasWnid_) | ||
| 71 | { | ||
| 72 | throw std::domain_error("Notion has no wnid"); | ||
| 73 | } | ||
| 74 | |||
| 75 | return wnid_; | ||
| 76 | } | ||
| 77 | |||
| 78 | bool hasNumOfImages() const | ||
| 79 | { | ||
| 80 | if (!valid_) | ||
| 81 | { | ||
| 82 | throw std::domain_error("Bad access to uninitialized notion"); | ||
| 83 | } | ||
| 84 | |||
| 85 | return hasNumOfImages_; | ||
| 86 | } | ||
| 87 | |||
| 88 | int getNumOfImages() const | ||
| 89 | { | ||
| 90 | if (!valid_) | ||
| 91 | { | ||
| 92 | throw std::domain_error("Bad access to uninitialized notion"); | ||
| 93 | } | ||
| 94 | |||
| 95 | if (!hasNumOfImages_) | ||
| 96 | { | ||
| 97 | throw std::domain_error("Notion does not have a number of images"); | ||
| 98 | } | ||
| 99 | |||
| 100 | return numOfImages_; | ||
| 101 | } | ||
| 102 | |||
| 103 | // Convenience | ||
| 104 | |||
| 105 | std::string getImageNetUrl() const; | ||
| 106 | |||
| 107 | // Type info | ||
| 108 | |||
| 109 | static const object objectType; | ||
| 110 | |||
| 111 | static const std::list<std::string> select; | ||
| 112 | |||
| 113 | // Query fields | ||
| 114 | |||
| 115 | static const field id; | ||
| 116 | static const field partOfSpeech; | ||
| 117 | static const field wnid; | ||
| 118 | static const field numOfImages; | ||
| 119 | |||
| 120 | operator filter() const | ||
| 121 | { | ||
| 122 | return (id == id_); | ||
| 123 | } | ||
| 124 | |||
| 125 | // Relationships with other objects | ||
| 126 | |||
| 127 | static const field word; | ||
| 128 | |||
| 129 | // Relationships with self | ||
| 130 | |||
| 131 | static const field hypernyms; | ||
| 132 | static const field hyponyms; | ||
| 133 | |||
| 134 | static const field fullHypernyms; | ||
| 135 | static const field fullHyponyms; | ||
| 136 | |||
| 137 | static const field instances; | ||
| 138 | static const field classes; | ||
| 139 | |||
| 140 | static const field memberMeronyms; | ||
| 141 | static const field memberHolonyms; | ||
| 142 | |||
| 143 | static const field fullMemberMeronyms; | ||
| 144 | static const field fullMemberHolonyms; | ||
| 145 | |||
| 146 | static const field partMeronyms; | ||
| 147 | static const field partHolonyms; | ||
| 148 | |||
| 149 | static const field fullPartMeronyms; | ||
| 150 | static const field fullPartHolonyms; | ||
| 151 | |||
| 152 | static const field substanceMeronyms; | ||
| 153 | static const field substanceHolonyms; | ||
| 154 | |||
| 155 | static const field fullSubstanceMeronyms; | ||
| 156 | static const field fullSubstanceHolonyms; | ||
| 157 | |||
| 158 | static const field variants; | ||
| 159 | static const field attributes; | ||
| 160 | |||
| 161 | static const field similarAdjectives; | ||
| 162 | |||
| 163 | static const field entails; | ||
| 164 | static const field entailedBy; | ||
| 165 | |||
| 166 | static const field causes; | ||
| 167 | static const field effects; | ||
| 168 | |||
| 169 | // Preposition group relationship | ||
| 170 | |||
| 171 | class preposition_group_field { | ||
| 172 | public: | ||
| 173 | |||
| 174 | filter operator==(std::string groupName) const; | ||
| 175 | |||
| 176 | private: | ||
| 177 | |||
| 178 | static const field isA; | ||
| 179 | static const field groupNameField; | ||
| 180 | }; | ||
| 181 | |||
| 182 | static const preposition_group_field prepositionGroup; | ||
| 183 | |||
| 184 | private: | ||
| 185 | bool valid_ = false; | ||
| 186 | |||
| 187 | int id_; | ||
| 188 | part_of_speech partOfSpeech_; | ||
| 189 | bool hasWnid_ = false; | ||
| 190 | int wnid_; | ||
| 191 | bool hasNumOfImages_ = false; | ||
| 192 | int numOfImages_; | ||
| 193 | |||
| 194 | const database* db_; | ||
| 195 | |||
| 196 | }; | ||
| 197 | |||
| 198 | }; | ||
| 199 | |||
| 200 | #endif /* end of include guard: NOTION_H_FD1C7646 */ | ||
| diff --git a/lib/noun.cpp b/lib/noun.cpp deleted file mode 100644 index d8b34c9..0000000 --- a/lib/noun.cpp +++ /dev/null | |||
| @@ -1,221 +0,0 @@ | |||
| 1 | #include "verbly.h" | ||
| 2 | #include <set> | ||
| 3 | #include <iostream> | ||
| 4 | |||
| 5 | namespace verbly { | ||
| 6 | |||
| 7 | noun::noun() | ||
| 8 | { | ||
| 9 | |||
| 10 | } | ||
| 11 | |||
| 12 | noun::noun(const data& _data, int _id) : word(_data, _id) | ||
| 13 | { | ||
| 14 | |||
| 15 | } | ||
| 16 | |||
| 17 | std::string noun::base_form() const | ||
| 18 | { | ||
| 19 | assert(_valid == true); | ||
| 20 | |||
| 21 | return _singular; | ||
| 22 | } | ||
| 23 | |||
| 24 | std::string noun::singular_form() const | ||
| 25 | { | ||
| 26 | assert(_valid == true); | ||
| 27 | |||
| 28 | return _singular; | ||
| 29 | } | ||
| 30 | |||
| 31 | std::string noun::plural_form() const | ||
| 32 | { | ||
| 33 | assert(_valid == true); | ||
| 34 | |||
| 35 | return _plural; | ||
| 36 | } | ||
| 37 | |||
| 38 | int noun::wnid() const | ||
| 39 | { | ||
| 40 | assert(_valid == true); | ||
| 41 | |||
| 42 | return _wnid; | ||
| 43 | } | ||
| 44 | |||
| 45 | bool noun::has_plural_form() const | ||
| 46 | { | ||
| 47 | assert(_valid == true); | ||
| 48 | |||
| 49 | return !_plural.empty(); | ||
| 50 | } | ||
| 51 | |||
| 52 | noun_query noun::hypernyms() const | ||
| 53 | { | ||
| 54 | assert(_valid == true); | ||
| 55 | |||
| 56 | return _data->nouns().hypernym_of(*this); | ||
| 57 | } | ||
| 58 | |||
| 59 | noun_query noun::full_hypernyms() const | ||
| 60 | { | ||
| 61 | assert(_valid == true); | ||
| 62 | |||
| 63 | return _data->nouns().full_hypernym_of(*this); | ||
| 64 | } | ||
| 65 | |||
| 66 | noun_query noun::hyponyms() const | ||
| 67 | { | ||
| 68 | assert(_valid == true); | ||
| 69 | |||
| 70 | return _data->nouns().hyponym_of(*this); | ||
| 71 | } | ||
| 72 | |||
| 73 | noun_query noun::full_hyponyms() const | ||
| 74 | { | ||
| 75 | assert(_valid == true); | ||
| 76 | |||
| 77 | return _data->nouns().full_hyponym_of(*this); | ||
| 78 | } | ||
| 79 | |||
| 80 | noun_query noun::part_meronyms() const | ||
| 81 | { | ||
| 82 | assert(_valid == true); | ||
| 83 | |||
| 84 | return _data->nouns().part_meronym_of(*this); | ||
| 85 | } | ||
| 86 | |||
| 87 | noun_query noun::full_part_meronyms() const | ||
| 88 | { | ||
| 89 | assert(_valid == true); | ||
| 90 | |||
| 91 | return _data->nouns().full_part_meronym_of(*this); | ||
| 92 | } | ||
| 93 | |||
| 94 | noun_query noun::part_holonyms() const | ||
| 95 | { | ||
| 96 | assert(_valid == true); | ||
| 97 | |||
| 98 | return _data->nouns().part_holonym_of(*this); | ||
| 99 | } | ||
| 100 | |||
| 101 | noun_query noun::full_part_holonyms() const | ||
| 102 | { | ||
| 103 | assert(_valid == true); | ||
| 104 | |||
| 105 | return _data->nouns().full_part_holonym_of(*this); | ||
| 106 | } | ||
| 107 | |||
| 108 | noun_query noun::substance_meronyms() const | ||
| 109 | { | ||
| 110 | assert(_valid == true); | ||
| 111 | |||
| 112 | return _data->nouns().substance_meronym_of(*this); | ||
| 113 | } | ||
| 114 | |||
| 115 | noun_query noun::full_substance_meronyms() const | ||
| 116 | { | ||
| 117 | assert(_valid == true); | ||
| 118 | |||
| 119 | return _data->nouns().full_substance_meronym_of(*this); | ||
| 120 | } | ||
| 121 | |||
| 122 | noun_query noun::substance_holonyms() const | ||
| 123 | { | ||
| 124 | assert(_valid == true); | ||
| 125 | |||
| 126 | return _data->nouns().substance_holonym_of(*this); | ||
| 127 | } | ||
| 128 | |||
| 129 | noun_query noun::full_substance_holonyms() const | ||
| 130 | { | ||
| 131 | assert(_valid == true); | ||
| 132 | |||
| 133 | return _data->nouns().full_substance_holonym_of(*this); | ||
| 134 | } | ||
| 135 | |||
| 136 | noun_query noun::member_meronyms() const | ||
| 137 | { | ||
| 138 | assert(_valid == true); | ||
| 139 | |||
| 140 | return _data->nouns().member_meronym_of(*this); | ||
| 141 | } | ||
| 142 | |||
| 143 | noun_query noun::full_member_meronyms() const | ||
| 144 | { | ||
| 145 | assert(_valid == true); | ||
| 146 | |||
| 147 | return _data->nouns().full_member_meronym_of(*this); | ||
| 148 | } | ||
| 149 | |||
| 150 | noun_query noun::member_holonyms() const | ||
| 151 | { | ||
| 152 | assert(_valid == true); | ||
| 153 | |||
| 154 | return _data->nouns().member_holonym_of(*this); | ||
| 155 | } | ||
| 156 | |||
| 157 | noun_query noun::full_member_holonyms() const | ||
| 158 | { | ||
| 159 | assert(_valid == true); | ||
| 160 | |||
| 161 | return _data->nouns().full_member_holonym_of(*this); | ||
| 162 | } | ||
| 163 | |||
| 164 | noun_query noun::classes() const | ||
| 165 | { | ||
| 166 | assert(_valid == true); | ||
| 167 | |||
| 168 | return _data->nouns().class_of(*this); | ||
| 169 | } | ||
| 170 | |||
| 171 | noun_query noun::instances() const | ||
| 172 | { | ||
| 173 | assert(_valid == true); | ||
| 174 | |||
| 175 | return _data->nouns().instance_of(*this); | ||
| 176 | } | ||
| 177 | |||
| 178 | noun_query noun::synonyms() const | ||
| 179 | { | ||
| 180 | assert(_valid == true); | ||
| 181 | |||
| 182 | return _data->nouns().synonym_of(*this); | ||
| 183 | } | ||
| 184 | |||
| 185 | noun_query noun::antonyms() const | ||
| 186 | { | ||
| 187 | assert(_valid == true); | ||
| 188 | |||
| 189 | return _data->nouns().antonym_of(*this); | ||
| 190 | } | ||
| 191 | |||
| 192 | adjective_query noun::pertainyms() const | ||
| 193 | { | ||
| 194 | assert(_valid == true); | ||
| 195 | |||
| 196 | return _data->adjectives().pertainym_of(*this); | ||
| 197 | } | ||
| 198 | |||
| 199 | adjective_query noun::variations() const | ||
| 200 | { | ||
| 201 | assert(_valid == true); | ||
| 202 | |||
| 203 | return _data->adjectives().variant_of(*this); | ||
| 204 | } | ||
| 205 | |||
| 206 | std::string noun::imagenet_url() const | ||
| 207 | { | ||
| 208 | std::stringstream url; | ||
| 209 | url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n"; | ||
| 210 | url.width(8); | ||
| 211 | url.fill('0'); | ||
| 212 | url << (_wnid % 100000000); | ||
| 213 | return url.str(); | ||
| 214 | } | ||
| 215 | |||
| 216 | bool noun::operator<(const noun& other) const | ||
| 217 | { | ||
| 218 | return _id < other._id; | ||
| 219 | } | ||
| 220 | |||
| 221 | }; | ||
| diff --git a/lib/noun.h b/lib/noun.h deleted file mode 100644 index bd71e57..0000000 --- a/lib/noun.h +++ /dev/null | |||
| @@ -1,55 +0,0 @@ | |||
| 1 | #ifndef NOUN_H_24A03C83 | ||
| 2 | #define NOUN_H_24A03C83 | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | class noun : public word { | ||
| 7 | private: | ||
| 8 | std::string _singular; | ||
| 9 | std::string _plural; | ||
| 10 | int _wnid; | ||
| 11 | |||
| 12 | friend class noun_query; | ||
| 13 | |||
| 14 | public: | ||
| 15 | noun(); | ||
| 16 | noun(const data& _data, int _id); | ||
| 17 | |||
| 18 | std::string base_form() const; | ||
| 19 | std::string singular_form() const; | ||
| 20 | std::string plural_form() const; | ||
| 21 | int wnid() const; | ||
| 22 | |||
| 23 | bool has_plural_form() const; | ||
| 24 | |||
| 25 | noun_query hypernyms() const; | ||
| 26 | noun_query full_hypernyms() const; | ||
| 27 | noun_query hyponyms() const; | ||
| 28 | noun_query full_hyponyms() const; | ||
| 29 | noun_query part_meronyms() const; | ||
| 30 | noun_query full_part_meronyms() const; | ||
| 31 | noun_query part_holonyms() const; | ||
| 32 | noun_query full_part_holonyms() const; | ||
| 33 | noun_query substance_meronyms() const; | ||
| 34 | noun_query full_substance_meronyms() const; | ||
| 35 | noun_query substance_holonyms() const; | ||
| 36 | noun_query full_substance_holonyms() const; | ||
| 37 | noun_query member_meronyms() const; | ||
| 38 | noun_query full_member_meronyms() const; | ||
| 39 | noun_query member_holonyms() const; | ||
| 40 | noun_query full_member_holonyms() const; | ||
| 41 | noun_query classes() const; | ||
| 42 | noun_query instances() const; | ||
| 43 | noun_query synonyms() const; | ||
| 44 | noun_query antonyms() const; | ||
| 45 | adjective_query pertainyms() const; | ||
| 46 | adjective_query variations() const; | ||
| 47 | |||
| 48 | std::string imagenet_url() const; | ||
| 49 | |||
| 50 | bool operator<(const noun& other) const; | ||
| 51 | }; | ||
| 52 | |||
| 53 | }; | ||
| 54 | |||
| 55 | #endif /* end of include guard: NOUN_H_24A03C83 */ | ||
| diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp deleted file mode 100644 index 8648227..0000000 --- a/lib/noun_query.cpp +++ /dev/null | |||
| @@ -1,2013 +0,0 @@ | |||
| 1 | #include "verbly.h" | ||
| 2 | |||
| 3 | namespace verbly { | ||
| 4 | |||
| 5 | noun_query::noun_query(const data& _data) : _data(_data) | ||
| 6 | { | ||
| 7 | |||
| 8 | } | ||
| 9 | |||
| 10 | noun_query& noun_query::limit(int _limit) | ||
| 11 | { | ||
| 12 | if ((_limit > 0) || (_limit == unlimited)) | ||
| 13 | { | ||
| 14 | this->_limit = _limit; | ||
| 15 | } | ||
| 16 | |||
| 17 | return *this; | ||
| 18 | } | ||
| 19 | |||
| 20 | noun_query& noun_query::random() | ||
| 21 | { | ||
| 22 | this->_random = true; | ||
| 23 | |||
| 24 | return *this; | ||
| 25 | } | ||
| 26 | |||
| 27 | noun_query& noun_query::except(const noun& _word) | ||
| 28 | { | ||
| 29 | _except.push_back(_word); | ||
| 30 | |||
| 31 | return *this; | ||
| 32 | } | ||
| 33 | |||
| 34 | noun_query& noun_query::rhymes_with(const word& _word) | ||
| 35 | { | ||
| 36 | for (auto rhyme : _word.get_rhymes()) | ||
| 37 | { | ||
| 38 | _rhymes.push_back(rhyme); | ||
| 39 | } | ||
| 40 | |||
| 41 | if (dynamic_cast<const noun*>(&_word) != nullptr) | ||
| 42 | { | ||
| 43 | _except.push_back(dynamic_cast<const noun&>(_word)); | ||
| 44 | } | ||
| 45 | |||
| 46 | return *this; | ||
| 47 | } | ||
| 48 | |||
| 49 | noun_query& noun_query::rhymes_with(rhyme _r) | ||
| 50 | { | ||
| 51 | _rhymes.push_back(_r); | ||
| 52 | |||
| 53 | return *this; | ||
| 54 | } | ||
| 55 | |||
| 56 | noun_query& noun_query::has_pronunciation() | ||
| 57 | { | ||
| 58 | this->_has_prn = true; | ||
| 59 | |||
| 60 | return *this; | ||
| 61 | } | ||
| 62 | |||
| 63 | noun_query& noun_query::has_rhyming_noun() | ||
| 64 | { | ||
| 65 | _has_rhyming_noun = true; | ||
| 66 | |||
| 67 | return *this; | ||
| 68 | } | ||
| 69 | |||
| 70 | noun_query& noun_query::has_rhyming_adjective() | ||
| 71 | { | ||
| 72 | _has_rhyming_adjective = true; | ||
| 73 | |||
| 74 | return *this; | ||
| 75 | } | ||
| 76 | |||
| 77 | noun_query& noun_query::has_rhyming_adverb() | ||
| 78 | { | ||
| 79 | _has_rhyming_adverb = true; | ||
| 80 | |||
| 81 | return *this; | ||
| 82 | } | ||
| 83 | |||
| 84 | noun_query& noun_query::has_rhyming_verb() | ||
| 85 | { | ||
| 86 | _has_rhyming_verb = true; | ||
| 87 | |||
| 88 | return *this; | ||
| 89 | } | ||
| 90 | |||
| 91 | noun_query& noun_query::with_stress(filter<std::vector<bool>> _arg) | ||
| 92 | { | ||
| 93 | _stress = _arg; | ||
| 94 | |||
| 95 | return *this; | ||
| 96 | } | ||
| 97 | |||
| 98 | noun_query& noun_query::with_singular_form(std::string _arg) | ||
| 99 | { | ||
| 100 | _with_singular_form.push_back(_arg); | ||
| 101 | |||
| 102 | return *this; | ||
| 103 | } | ||
| 104 | |||
| 105 | noun_query& noun_query::with_prefix(filter<std::string> _f) | ||
| 106 | { | ||
| 107 | _f.clean(); | ||
| 108 | _with_prefix = _f; | ||
| 109 | |||
| 110 | return *this; | ||
| 111 | } | ||
| 112 | |||
| 113 | noun_query& noun_query::with_suffix(filter<std::string> _f) | ||
| 114 | { | ||
| 115 | _f.clean(); | ||
| 116 | _with_suffix = _f; | ||
| 117 | |||
| 118 | return *this; | ||
| 119 | } | ||
| 120 | |||
| 121 | noun_query& noun_query::requires_plural_form() | ||
| 122 | { | ||
| 123 | _requires_plural_form = true; | ||
| 124 | |||
| 125 | return *this; | ||
| 126 | } | ||
| 127 | |||
| 128 | noun_query& noun_query::with_complexity(int _arg) | ||
| 129 | { | ||
| 130 | _with_complexity = _arg; | ||
| 131 | |||
| 132 | return *this; | ||
| 133 | } | ||
| 134 | |||
| 135 | noun_query& noun_query::is_hypernym() | ||
| 136 | { | ||
| 137 | _is_hypernym = true; | ||
| 138 | |||
| 139 | return *this; | ||
| 140 | } | ||
| 141 | |||
| 142 | noun_query& noun_query::hypernym_of(filter<noun> _f) | ||
| 143 | { | ||
| 144 | _f.clean(); | ||
| 145 | _hypernym_of = _f; | ||
| 146 | |||
| 147 | return *this; | ||
| 148 | } | ||
| 149 | |||
| 150 | noun_query& noun_query::full_hypernym_of(filter<noun> _f) | ||
| 151 | { | ||
| 152 | _f.clean(); | ||
| 153 | _full_hypernym_of = _f; | ||
| 154 | |||
| 155 | return *this; | ||
| 156 | } | ||
| 157 | |||
| 158 | noun_query& noun_query::is_hyponym() | ||
| 159 | { | ||
| 160 | _is_hyponym = true; | ||
| 161 | |||
| 162 | return *this; | ||
| 163 | } | ||
| 164 | |||
| 165 | noun_query& noun_query::hyponym_of(filter<noun> _f) | ||
| 166 | { | ||
| 167 | _f.clean(); | ||
| 168 | _hyponym_of = _f; | ||
| 169 | |||
| 170 | return *this; | ||
| 171 | } | ||
| 172 | |||
| 173 | noun_query& noun_query::full_hyponym_of(filter<noun> _f) | ||
| 174 | { | ||
| 175 | _f.clean(); | ||
| 176 | _full_hyponym_of = _f; | ||
| 177 | |||
| 178 | return *this; | ||
| 179 | } | ||
| 180 | |||
| 181 | noun_query& noun_query::is_part_meronym() | ||
| 182 | { | ||
| 183 | _is_part_meronym = true; | ||
| 184 | |||
| 185 | return *this; | ||
| 186 | } | ||
| 187 | |||
| 188 | noun_query& noun_query::part_meronym_of(filter<noun> _f) | ||
| 189 | { | ||
| 190 | _f.clean(); | ||
| 191 | _part_meronym_of = _f; | ||
| 192 | |||
| 193 | return *this; | ||
| 194 | } | ||
| 195 | |||
| 196 | noun_query& noun_query::full_part_meronym_of(filter<noun> _f) | ||
| 197 | { | ||
| 198 | _f.clean(); | ||
| 199 | _full_part_meronym_of = _f; | ||
| 200 | |||
| 201 | return *this; | ||
| 202 | } | ||
| 203 | |||
| 204 | noun_query& noun_query::is_part_holonym() | ||
| 205 | { | ||
| 206 | _is_part_holonym = true; | ||
| 207 | |||
| 208 | return *this; | ||
| 209 | } | ||
| 210 | |||
| 211 | noun_query& noun_query::part_holonym_of(filter<noun> _f) | ||
| 212 | { | ||
| 213 | _f.clean(); | ||
| 214 | _part_holonym_of = _f; | ||
| 215 | |||
| 216 | return *this; | ||
| 217 | } | ||
| 218 | |||
| 219 | noun_query& noun_query::full_part_holonym_of(filter<noun> _f) | ||
| 220 | { | ||
| 221 | _f.clean(); | ||
| 222 | _full_part_holonym_of = _f; | ||
| 223 | |||
| 224 | return *this; | ||
| 225 | } | ||
| 226 | |||
| 227 | noun_query& noun_query::is_substance_meronym() | ||
| 228 | { | ||
| 229 | _is_substance_meronym = true; | ||
| 230 | |||
| 231 | return *this; | ||
| 232 | } | ||
| 233 | |||
| 234 | noun_query& noun_query::substance_meronym_of(filter<noun> _f) | ||
| 235 | { | ||
| 236 | _f.clean(); | ||
| 237 | _substance_meronym_of = _f; | ||
| 238 | |||
| 239 | return *this; | ||
| 240 | } | ||
| 241 | |||
| 242 | noun_query& noun_query::full_substance_meronym_of(filter<noun> _f) | ||
| 243 | { | ||
| 244 | _f.clean(); | ||
| 245 | _full_substance_meronym_of = _f; | ||
| 246 | |||
| 247 | return *this; | ||
| 248 | } | ||
| 249 | |||
| 250 | noun_query& noun_query::is_substance_holonym() | ||
| 251 | { | ||
| 252 | _is_substance_holonym = true; | ||
| 253 | |||
| 254 | return *this; | ||
| 255 | } | ||
| 256 | |||
| 257 | noun_query& noun_query::substance_holonym_of(filter<noun> _f) | ||
| 258 | { | ||
| 259 | _f.clean(); | ||
| 260 | _substance_holonym_of = _f; | ||
| 261 | |||
| 262 | return *this; | ||
| 263 | } | ||
| 264 | |||
| 265 | noun_query& noun_query::full_substance_holonym_of(filter<noun> _f) | ||
| 266 | { | ||
| 267 | _f.clean(); | ||
| 268 | _full_substance_holonym_of = _f; | ||
| 269 | |||
| 270 | return *this; | ||
| 271 | } | ||
| 272 | |||
| 273 | noun_query& noun_query::is_member_meronym() | ||
| 274 | { | ||
| 275 | _is_member_meronym = true; | ||
| 276 | |||
| 277 | return *this; | ||
| 278 | } | ||
| 279 | |||
| 280 | noun_query& noun_query::member_meronym_of(filter<noun> _f) | ||
| 281 | { | ||
| 282 | _f.clean(); | ||
| 283 | _member_meronym_of = _f; | ||
| 284 | |||
| 285 | return *this; | ||
| 286 | } | ||
| 287 | |||
| 288 | noun_query& noun_query::full_member_meronym_of(filter<noun> _f) | ||
| 289 | { | ||
| 290 | _f.clean(); | ||
| 291 | _full_member_meronym_of = _f; | ||
| 292 | |||
| 293 | return *this; | ||
| 294 | } | ||
| 295 | |||
| 296 | noun_query& noun_query::is_member_holonym() | ||
| 297 | { | ||
| 298 | _is_member_holonym = true; | ||
| 299 | |||
| 300 | return *this; | ||
| 301 | } | ||
| 302 | |||
| 303 | noun_query& noun_query::member_holonym_of(filter<noun> _f) | ||
| 304 | { | ||
| 305 | _f.clean(); | ||
| 306 | _member_holonym_of = _f; | ||
| 307 | |||
| 308 | return *this; | ||
| 309 | } | ||
| 310 | |||
| 311 | noun_query& noun_query::full_member_holonym_of(filter<noun> _f) | ||
| 312 | { | ||
| 313 | _f.clean(); | ||
| 314 | _full_member_holonym_of = _f; | ||
| 315 | |||
| 316 | return *this; | ||
| 317 | } | ||
| 318 | |||
| 319 | noun_query& noun_query::is_proper() | ||
| 320 | { | ||
| 321 | _is_proper = true; | ||
| 322 | |||
| 323 | return *this; | ||
| 324 | } | ||
| 325 | |||
| 326 | noun_query& noun_query::is_not_proper() | ||
| 327 | { | ||
| 328 | _is_not_proper = true; | ||
| 329 | |||
| 330 | return *this; | ||
| 331 | } | ||
| 332 | |||
| 333 | noun_query& noun_query::is_instance() | ||
| 334 | { | ||
| 335 | _is_instance = true; | ||
| 336 | |||
| 337 | return *this; | ||
| 338 | } | ||
| 339 | |||
| 340 | noun_query& noun_query::instance_of(filter<noun> _f) | ||
| 341 | { | ||
| 342 | _f.clean(); | ||
| 343 | _instance_of = _f; | ||
| 344 | |||
| 345 | return *this; | ||
| 346 | } | ||
| 347 | |||
| 348 | noun_query& noun_query::is_class() | ||
| 349 | { | ||
| 350 | _is_class = true; | ||
| 351 | |||
| 352 | return *this; | ||
| 353 | } | ||
| 354 | |||
| 355 | noun_query& noun_query::class_of(filter<noun> _f) | ||
| 356 | { | ||
| 357 | _f.clean(); | ||
| 358 | _class_of = _f; | ||
| 359 | |||
| 360 | return *this; | ||
| 361 | } | ||
| 362 | |||
| 363 | noun_query& noun_query::has_synonyms() | ||
| 364 | { | ||
| 365 | _has_synonyms = true; | ||
| 366 | |||
| 367 | return *this; | ||
| 368 | } | ||
| 369 | |||
| 370 | noun_query& noun_query::synonym_of(filter<noun> _f) | ||
| 371 | { | ||
| 372 | _f.clean(); | ||
| 373 | _synonym_of = _f; | ||
| 374 | |||
| 375 | return *this; | ||
| 376 | } | ||
| 377 | |||
| 378 | noun_query& noun_query::has_antonyms() | ||
| 379 | { | ||
| 380 | _has_antonyms = true; | ||
| 381 | |||
| 382 | return *this; | ||
| 383 | } | ||
| 384 | |||
| 385 | noun_query& noun_query::antonym_of(filter<noun> _f) | ||
| 386 | { | ||
| 387 | _f.clean(); | ||
| 388 | _antonym_of = _f; | ||
| 389 | |||
| 390 | return *this; | ||
| 391 | } | ||
| 392 | |||
| 393 | noun_query& noun_query::has_pertainym() | ||
| 394 | { | ||
| 395 | _has_pertainym = true; | ||
| 396 | |||
| 397 | return *this; | ||
| 398 | } | ||
| 399 | |||
| 400 | noun_query& noun_query::anti_pertainym_of(filter<adjective> _f) | ||
| 401 | { | ||
| 402 | _f.clean(); | ||
| 403 | _anti_pertainym_of = _f; | ||
| 404 | |||
| 405 | return *this; | ||
| 406 | } | ||
| 407 | |||
| 408 | noun_query& noun_query::is_attribute() | ||
| 409 | { | ||
| 410 | _is_attribute = true; | ||
| 411 | |||
| 412 | return *this; | ||
| 413 | } | ||
| 414 | |||
| 415 | noun_query& noun_query::attribute_of(filter<adjective> _f) | ||
| 416 | { | ||
| 417 | _f.clean(); | ||
| 418 | _attribute_of = _f; | ||
| 419 | |||
| 420 | return *this; | ||
| 421 | } | ||
| 422 | |||
| 423 | noun_query& noun_query::at_least_n_images(int _arg) | ||
| 424 | { | ||
| 425 | _at_least_n_images = _arg; | ||
| 426 | |||
| 427 | return *this; | ||
| 428 | } | ||
| 429 | |||
| 430 | noun_query& noun_query::with_wnid(int _arg) | ||
| 431 | { | ||
| 432 | _with_wnid.insert(_arg); | ||
| 433 | |||
| 434 | return *this; | ||
| 435 | } | ||
| 436 | |||
| 437 | /* | ||
| 438 | noun_query& noun_query::derived_from(const word& _w) | ||
| 439 | { | ||
| 440 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
| 441 | { | ||
| 442 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
| 443 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
| 444 | { | ||
| 445 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
| 446 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
| 447 | { | ||
| 448 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
| 449 | } | ||
| 450 | |||
| 451 | return *this; | ||
| 452 | } | ||
| 453 | |||
| 454 | noun_query& noun_query::not_derived_from(const word& _w) | ||
| 455 | { | ||
| 456 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
| 457 | { | ||
| 458 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
| 459 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
| 460 | { | ||
| 461 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
| 462 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
| 463 | { | ||
| 464 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
| 465 | } | ||
| 466 | |||
| 467 | return *this; | ||
| 468 | }*/ | ||
| 469 | |||
| 470 | std::list<noun> noun_query::run() const | ||
| 471 | { | ||
| 472 | std::stringstream construct; | ||
| 473 | |||
| 474 | if (!_full_hypernym_of.empty() || !_full_hyponym_of.empty() || !_full_part_meronym_of.empty() || !_full_part_holonym_of.empty() || !_full_substance_meronym_of.empty() || !_full_substance_holonym_of.empty() || !_full_member_meronym_of.empty() || !_full_member_holonym_of.empty()) | ||
| 475 | { | ||
| 476 | construct << "WITH RECURSIVE "; | ||
| 477 | |||
| 478 | std::list<std::string> ctes; | ||
| 479 | |||
| 480 | for (auto hyponym : _full_hypernym_of.uniq_flatten()) | ||
| 481 | { | ||
| 482 | ctes.push_back("hypernym_tree_" + std::to_string(hyponym._id) + " AS (SELECT hypernym_id FROM hypernymy WHERE hyponym_id = " + std::to_string(hyponym._id) + " UNION SELECT h.hypernym_id FROM hypernym_tree_" + std::to_string(hyponym._id) + " AS t INNER JOIN hypernymy AS h ON t.hypernym_id = h.hyponym_id)"); | ||
| 483 | } | ||
| 484 | |||
| 485 | for (auto hypernym : _full_hyponym_of.uniq_flatten()) | ||
| 486 | { | ||
| 487 | ctes.push_back("hyponym_tree_" + std::to_string(hypernym._id) + " AS (SELECT hyponym_id FROM hypernymy WHERE hypernym_id = " + std::to_string(hypernym._id) + " UNION SELECT h.hyponym_id FROM hyponym_tree_" + std::to_string(hypernym._id) + " AS t INNER JOIN hypernymy AS h ON t.hyponym_id = h.hypernym_id)"); | ||
| 488 | } | ||
| 489 | |||
| 490 | for (auto holonym : _full_part_meronym_of.uniq_flatten()) | ||
| 491 | { | ||
| 492 | ctes.push_back("part_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM part_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM part_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN part_meronymy AS h ON t.meronym_id = h.holonym_id)"); | ||
| 493 | } | ||
| 494 | |||
| 495 | for (auto meronym : _full_part_holonym_of.uniq_flatten()) | ||
| 496 | { | ||
| 497 | ctes.push_back("part_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM part_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM part_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN part_meronymy AS h ON t.holonym_id = h.meronym_id)"); | ||
| 498 | } | ||
| 499 | |||
| 500 | for (auto holonym : _full_substance_meronym_of.uniq_flatten()) | ||
| 501 | { | ||
| 502 | ctes.push_back("substance_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM substance_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM substance_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.meronym_id = h.holonym_id)"); | ||
| 503 | } | ||
| 504 | |||
| 505 | for (auto meronym : _full_substance_holonym_of.uniq_flatten()) | ||
| 506 | { | ||
| 507 | ctes.push_back("substance_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM substance_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM substance_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.holonym_id = h.meronym_id)"); | ||
| 508 | } | ||
| 509 | |||
| 510 | for (auto holonym : _full_member_meronym_of.uniq_flatten()) | ||
| 511 | { | ||
| 512 | ctes.push_back("member_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM member_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM member_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN member_meronymy AS h ON t.meronym_id = h.holonym_id)"); | ||
| 513 | } | ||
| 514 | |||
| 515 | for (auto meronym : _full_member_holonym_of.uniq_flatten()) | ||
| 516 | { | ||
| 517 | ctes.push_back("member_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM member_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM member_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN member_meronymy AS h ON t.holonym_id = h.meronym_id)"); | ||
| 518 | } | ||
| 519 | |||
| 520 | construct << verbly::implode(std::begin(ctes), std::end(ctes), ", "); | ||
| 521 | construct << " "; | ||
| 522 | } | ||
| 523 | |||
| 524 | construct << "SELECT noun_id, singular, plural, wnid FROM nouns"; | ||
| 525 | std::list<std::string> conditions; | ||
| 526 | std::list<binding> bindings; | ||
| 527 | |||
| 528 | if (_has_prn) | ||
| 529 | { | ||
| 530 | conditions.push_back("noun_id IN (SELECT noun_id FROM noun_pronunciations)"); | ||
| 531 | } | ||
| 532 | |||
| 533 | if (!_rhymes.empty()) | ||
| 534 | { | ||
| 535 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
| 536 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 537 | conditions.push_back(cond); | ||
| 538 | |||
| 539 | for (auto rhy : _rhymes) | ||
| 540 | { | ||
| 541 | bindings.emplace_back(rhy.get_prerhyme()); | ||
| 542 | bindings.emplace_back(rhy.get_rhyme()); | ||
| 543 | } | ||
| 544 | } | ||
| 545 | |||
| 546 | if (_has_rhyming_noun) | ||
| 547 | { | ||
| 548 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.noun_id != curp.noun_id)"); | ||
| 549 | } | ||
| 550 | |||
| 551 | if (_has_rhyming_adjective) | ||
| 552 | { | ||
| 553 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 554 | } | ||
| 555 | |||
| 556 | if (_has_rhyming_adverb) | ||
| 557 | { | ||
| 558 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 559 | } | ||
| 560 | |||
| 561 | if (_has_rhyming_verb) | ||
| 562 | { | ||
| 563 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 564 | } | ||
| 565 | |||
| 566 | if (!_stress.empty()) | ||
| 567 | { | ||
| 568 | std::stringstream cond; | ||
| 569 | if (_stress.get_notlogic()) | ||
| 570 | { | ||
| 571 | cond << "noun_id NOT IN"; | ||
| 572 | } else { | ||
| 573 | cond << "noun_id IN"; | ||
| 574 | } | ||
| 575 | |||
| 576 | cond << "(SELECT noun_id FROM noun_pronunciations WHERE "; | ||
| 577 | |||
| 578 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
| 579 | switch (f.get_type()) | ||
| 580 | { | ||
| 581 | case filter<std::vector<bool>>::type::singleton: | ||
| 582 | { | ||
| 583 | std::ostringstream _val; | ||
| 584 | for (auto syl : f.get_elem()) | ||
| 585 | { | ||
| 586 | if (syl) | ||
| 587 | { | ||
| 588 | _val << "1"; | ||
| 589 | } else { | ||
| 590 | _val << "0"; | ||
| 591 | } | ||
| 592 | } | ||
| 593 | |||
| 594 | bindings.emplace_back(_val.str()); | ||
| 595 | |||
| 596 | if (notlogic == f.get_notlogic()) | ||
| 597 | { | ||
| 598 | return "stress = ?"; | ||
| 599 | } else { | ||
| 600 | return "stress != ?"; | ||
| 601 | } | ||
| 602 | } | ||
| 603 | |||
| 604 | case filter<std::vector<bool>>::type::group: | ||
| 605 | { | ||
| 606 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 607 | |||
| 608 | std::list<std::string> clauses; | ||
| 609 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
| 610 | return recur(f2, truelogic); | ||
| 611 | }); | ||
| 612 | |||
| 613 | if (truelogic == f.get_orlogic()) | ||
| 614 | { | ||
| 615 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 616 | } else { | ||
| 617 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 618 | } | ||
| 619 | } | ||
| 620 | } | ||
| 621 | }; | ||
| 622 | |||
| 623 | cond << recur(_stress, _stress.get_notlogic()); | ||
| 624 | cond << ")"; | ||
| 625 | conditions.push_back(cond.str()); | ||
| 626 | } | ||
| 627 | |||
| 628 | for (auto except : _except) | ||
| 629 | { | ||
| 630 | conditions.push_back("noun_id != ?"); | ||
| 631 | bindings.emplace_back(except._id); | ||
| 632 | } | ||
| 633 | |||
| 634 | if (!_with_singular_form.empty()) | ||
| 635 | { | ||
| 636 | std::list<std::string> clauses(_with_singular_form.size(), "singular = ?"); | ||
| 637 | std::string cond = "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 638 | conditions.push_back(cond); | ||
| 639 | |||
| 640 | for (auto form : _with_singular_form) | ||
| 641 | { | ||
| 642 | bindings.emplace_back(form); | ||
| 643 | } | ||
| 644 | } | ||
| 645 | |||
| 646 | if (_requires_plural_form) | ||
| 647 | { | ||
| 648 | conditions.push_back("plural IS NOT NULL"); | ||
| 649 | } | ||
| 650 | |||
| 651 | if (!_with_prefix.empty()) | ||
| 652 | { | ||
| 653 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
| 654 | switch (f.get_type()) | ||
| 655 | { | ||
| 656 | case filter<std::string>::type::singleton: | ||
| 657 | { | ||
| 658 | bindings.emplace_back(f.get_elem() + "%"); | ||
| 659 | |||
| 660 | if (notlogic == f.get_notlogic()) | ||
| 661 | { | ||
| 662 | return "singular LIKE ?"; | ||
| 663 | } else { | ||
| 664 | return "singular NOT LIKE ?"; | ||
| 665 | } | ||
| 666 | } | ||
| 667 | |||
| 668 | case filter<std::string>::type::group: | ||
| 669 | { | ||
| 670 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 671 | |||
| 672 | std::list<std::string> clauses; | ||
| 673 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
| 674 | return recur(f2, truelogic); | ||
| 675 | }); | ||
| 676 | |||
| 677 | if (truelogic == f.get_orlogic()) | ||
| 678 | { | ||
| 679 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 680 | } else { | ||
| 681 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 682 | } | ||
| 683 | } | ||
| 684 | } | ||
| 685 | }; | ||
| 686 | |||
| 687 | conditions.push_back(recur(_with_prefix, false)); | ||
| 688 | } | ||
| 689 | |||
| 690 | if (!_with_suffix.empty()) | ||
| 691 | { | ||
| 692 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
| 693 | switch (f.get_type()) | ||
| 694 | { | ||
| 695 | case filter<std::string>::type::singleton: | ||
| 696 | { | ||
| 697 | bindings.emplace_back("%" + f.get_elem()); | ||
| 698 | |||
| 699 | if (notlogic == f.get_notlogic()) | ||
| 700 | { | ||
| 701 | return "singular LIKE ?"; | ||
| 702 | } else { | ||
| 703 | return "singular NOT LIKE ?"; | ||
| 704 | } | ||
| 705 | } | ||
| 706 | |||
| 707 | case filter<std::string>::type::group: | ||
| 708 | { | ||
| 709 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 710 | |||
| 711 | std::list<std::string> clauses; | ||
| 712 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
| 713 | return recur(f2, truelogic); | ||
| 714 | }); | ||
| 715 | |||
| 716 | if (truelogic == f.get_orlogic()) | ||
| 717 | { | ||
| 718 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 719 | } else { | ||
| 720 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 721 | } | ||
| 722 | } | ||
| 723 | } | ||
| 724 | }; | ||
| 725 | |||
| 726 | conditions.push_back(recur(_with_suffix, false)); | ||
| 727 | } | ||
| 728 | |||
| 729 | if (_with_complexity != unlimited) | ||
| 730 | { | ||
| 731 | conditions.push_back("complexity = ?"); | ||
| 732 | bindings.emplace_back(_with_complexity); | ||
| 733 | } | ||
| 734 | |||
| 735 | if (_is_hypernym) | ||
| 736 | { | ||
| 737 | conditions.push_back("noun_id IN (SELECT hypernym_id FROM hypernymy)"); | ||
| 738 | } | ||
| 739 | |||
| 740 | if (!_hypernym_of.empty()) | ||
| 741 | { | ||
| 742 | std::stringstream cond; | ||
| 743 | if (_hypernym_of.get_notlogic()) | ||
| 744 | { | ||
| 745 | cond << "noun_id NOT IN"; | ||
| 746 | } else { | ||
| 747 | cond << "noun_id IN"; | ||
| 748 | } | ||
| 749 | |||
| 750 | cond << "(SELECT hypernym_id FROM hypernymy WHERE "; | ||
| 751 | |||
| 752 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 753 | switch (f.get_type()) | ||
| 754 | { | ||
| 755 | case filter<noun>::type::singleton: | ||
| 756 | { | ||
| 757 | bindings.emplace_back(f.get_elem()._id); | ||
| 758 | |||
| 759 | if (notlogic == f.get_notlogic()) | ||
| 760 | { | ||
| 761 | return "hyponym_id = ?"; | ||
| 762 | } else { | ||
| 763 | return "hyponym_id != ?"; | ||
| 764 | } | ||
| 765 | } | ||
| 766 | |||
| 767 | case filter<noun>::type::group: | ||
| 768 | { | ||
| 769 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 770 | |||
| 771 | std::list<std::string> clauses; | ||
| 772 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 773 | return recur(f2, truelogic); | ||
| 774 | }); | ||
| 775 | |||
| 776 | if (truelogic == f.get_orlogic()) | ||
| 777 | { | ||
| 778 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 779 | } else { | ||
| 780 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 781 | } | ||
| 782 | } | ||
| 783 | } | ||
| 784 | }; | ||
| 785 | |||
| 786 | cond << recur(_hypernym_of, _hypernym_of.get_notlogic()); | ||
| 787 | cond << ")"; | ||
| 788 | conditions.push_back(cond.str()); | ||
| 789 | } | ||
| 790 | |||
| 791 | if (!_full_hypernym_of.empty()) | ||
| 792 | { | ||
| 793 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 794 | switch (f.get_type()) | ||
| 795 | { | ||
| 796 | case filter<noun>::type::singleton: | ||
| 797 | { | ||
| 798 | if (notlogic == f.get_notlogic()) | ||
| 799 | { | ||
| 800 | return "noun_id IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 801 | } else { | ||
| 802 | return "noun_id NOT IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 803 | } | ||
| 804 | } | ||
| 805 | |||
| 806 | case filter<noun>::type::group: | ||
| 807 | { | ||
| 808 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 809 | |||
| 810 | std::list<std::string> clauses; | ||
| 811 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 812 | return recur(f2, truelogic); | ||
| 813 | }); | ||
| 814 | |||
| 815 | if (truelogic == f.get_orlogic()) | ||
| 816 | { | ||
| 817 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 818 | } else { | ||
| 819 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 820 | } | ||
| 821 | } | ||
| 822 | } | ||
| 823 | }; | ||
| 824 | |||
| 825 | conditions.push_back(recur(_full_hypernym_of, false)); | ||
| 826 | } | ||
| 827 | |||
| 828 | if (!_full_hyponym_of.empty()) | ||
| 829 | { | ||
| 830 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 831 | switch (f.get_type()) | ||
| 832 | { | ||
| 833 | case filter<noun>::type::singleton: | ||
| 834 | { | ||
| 835 | if (notlogic == f.get_notlogic()) | ||
| 836 | { | ||
| 837 | return "noun_id IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 838 | } else { | ||
| 839 | return "noun_id NOT IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 840 | } | ||
| 841 | } | ||
| 842 | |||
| 843 | case filter<noun>::type::group: | ||
| 844 | { | ||
| 845 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 846 | |||
| 847 | std::list<std::string> clauses; | ||
| 848 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 849 | return recur(f2, truelogic); | ||
| 850 | }); | ||
| 851 | |||
| 852 | if (truelogic == f.get_orlogic()) | ||
| 853 | { | ||
| 854 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 855 | } else { | ||
| 856 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 857 | } | ||
| 858 | } | ||
| 859 | } | ||
| 860 | }; | ||
| 861 | |||
| 862 | conditions.push_back(recur(_full_hyponym_of, false)); | ||
| 863 | } | ||
| 864 | |||
| 865 | if (_is_hyponym) | ||
| 866 | { | ||
| 867 | conditions.push_back("noun_id IN (SELECT hyponym_id FROM hypernymy)"); | ||
| 868 | } | ||
| 869 | |||
| 870 | if (!_hyponym_of.empty()) | ||
| 871 | { | ||
| 872 | std::stringstream cond; | ||
| 873 | if (_hyponym_of.get_notlogic()) | ||
| 874 | { | ||
| 875 | cond << "noun_id NOT IN"; | ||
| 876 | } else { | ||
| 877 | cond << "noun_id IN"; | ||
| 878 | } | ||
| 879 | |||
| 880 | cond << "(SELECT hyponym_id FROM hypernymy WHERE "; | ||
| 881 | |||
| 882 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 883 | switch (f.get_type()) | ||
| 884 | { | ||
| 885 | case filter<noun>::type::singleton: | ||
| 886 | { | ||
| 887 | bindings.emplace_back(f.get_elem()._id); | ||
| 888 | |||
| 889 | if (notlogic == f.get_notlogic()) | ||
| 890 | { | ||
| 891 | return "hypernym_id = ?"; | ||
| 892 | } else { | ||
| 893 | return "hypernym_id != ?"; | ||
| 894 | } | ||
| 895 | } | ||
| 896 | |||
| 897 | case filter<noun>::type::group: | ||
| 898 | { | ||
| 899 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 900 | |||
| 901 | std::list<std::string> clauses; | ||
| 902 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 903 | return recur(f2, truelogic); | ||
| 904 | }); | ||
| 905 | |||
| 906 | if (truelogic == f.get_orlogic()) | ||
| 907 | { | ||
| 908 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 909 | } else { | ||
| 910 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 911 | } | ||
| 912 | } | ||
| 913 | } | ||
| 914 | }; | ||
| 915 | |||
| 916 | cond << recur(_hyponym_of, _hyponym_of.get_notlogic()); | ||
| 917 | cond << ")"; | ||
| 918 | conditions.push_back(cond.str()); | ||
| 919 | } | ||
| 920 | |||
| 921 | if (_is_part_meronym) | ||
| 922 | { | ||
| 923 | conditions.push_back("noun_id IN (SELECT meronym_id FROM part_meronymy)"); | ||
| 924 | } | ||
| 925 | |||
| 926 | if (!_part_meronym_of.empty()) | ||
| 927 | { | ||
| 928 | std::stringstream cond; | ||
| 929 | if (_part_meronym_of.get_notlogic()) | ||
| 930 | { | ||
| 931 | cond << "noun_id NOT IN"; | ||
| 932 | } else { | ||
| 933 | cond << "noun_id IN"; | ||
| 934 | } | ||
| 935 | |||
| 936 | cond << "(SELECT meronym_id FROM part_meronymy WHERE "; | ||
| 937 | |||
| 938 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 939 | switch (f.get_type()) | ||
| 940 | { | ||
| 941 | case filter<noun>::type::singleton: | ||
| 942 | { | ||
| 943 | bindings.emplace_back(f.get_elem()._id); | ||
| 944 | |||
| 945 | if (notlogic == f.get_notlogic()) | ||
| 946 | { | ||
| 947 | return "holonym_id = ?"; | ||
| 948 | } else { | ||
| 949 | return "holonym_id != ?"; | ||
| 950 | } | ||
| 951 | } | ||
| 952 | |||
| 953 | case filter<noun>::type::group: | ||
| 954 | { | ||
| 955 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 956 | |||
| 957 | std::list<std::string> clauses; | ||
| 958 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 959 | return recur(f2, truelogic); | ||
| 960 | }); | ||
| 961 | |||
| 962 | if (truelogic == f.get_orlogic()) | ||
| 963 | { | ||
| 964 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 965 | } else { | ||
| 966 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 967 | } | ||
| 968 | } | ||
| 969 | } | ||
| 970 | }; | ||
| 971 | |||
| 972 | cond << recur(_part_meronym_of, _part_meronym_of.get_notlogic()); | ||
| 973 | cond << ")"; | ||
| 974 | conditions.push_back(cond.str()); | ||
| 975 | } | ||
| 976 | |||
| 977 | if (!_full_part_meronym_of.empty()) | ||
| 978 | { | ||
| 979 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 980 | switch (f.get_type()) | ||
| 981 | { | ||
| 982 | case filter<noun>::type::singleton: | ||
| 983 | { | ||
| 984 | if (notlogic == f.get_notlogic()) | ||
| 985 | { | ||
| 986 | return "noun_id IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 987 | } else { | ||
| 988 | return "noun_id NOT IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 989 | } | ||
| 990 | } | ||
| 991 | |||
| 992 | case filter<noun>::type::group: | ||
| 993 | { | ||
| 994 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 995 | |||
| 996 | std::list<std::string> clauses; | ||
| 997 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 998 | return recur(f2, truelogic); | ||
| 999 | }); | ||
| 1000 | |||
| 1001 | if (truelogic == f.get_orlogic()) | ||
| 1002 | { | ||
| 1003 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1004 | } else { | ||
| 1005 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1006 | } | ||
| 1007 | } | ||
| 1008 | } | ||
| 1009 | }; | ||
| 1010 | |||
| 1011 | conditions.push_back(recur(_full_part_meronym_of, false)); | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | if (_is_part_holonym) | ||
| 1015 | { | ||
| 1016 | conditions.push_back("noun_id IN (SELECT holonym_id FROM part_meronymy)"); | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | if (!_part_holonym_of.empty()) | ||
| 1020 | { | ||
| 1021 | std::stringstream cond; | ||
| 1022 | if (_part_holonym_of.get_notlogic()) | ||
| 1023 | { | ||
| 1024 | cond << "noun_id NOT IN"; | ||
| 1025 | } else { | ||
| 1026 | cond << "noun_id IN"; | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | cond << "(SELECT holonym_id FROM part_meronymy WHERE "; | ||
| 1030 | |||
| 1031 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1032 | switch (f.get_type()) | ||
| 1033 | { | ||
| 1034 | case filter<noun>::type::singleton: | ||
| 1035 | { | ||
| 1036 | bindings.emplace_back(f.get_elem()._id); | ||
| 1037 | |||
| 1038 | if (notlogic == f.get_notlogic()) | ||
| 1039 | { | ||
| 1040 | return "meronym_id = ?"; | ||
| 1041 | } else { | ||
| 1042 | return "meronym_id != ?"; | ||
| 1043 | } | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | case filter<noun>::type::group: | ||
| 1047 | { | ||
| 1048 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1049 | |||
| 1050 | std::list<std::string> clauses; | ||
| 1051 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1052 | return recur(f2, truelogic); | ||
| 1053 | }); | ||
| 1054 | |||
| 1055 | if (truelogic == f.get_orlogic()) | ||
| 1056 | { | ||
| 1057 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1058 | } else { | ||
| 1059 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1060 | } | ||
| 1061 | } | ||
| 1062 | } | ||
| 1063 | }; | ||
| 1064 | |||
| 1065 | cond << recur(_part_holonym_of, _part_holonym_of.get_notlogic()); | ||
| 1066 | cond << ")"; | ||
| 1067 | conditions.push_back(cond.str()); | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | if (!_full_part_holonym_of.empty()) | ||
| 1071 | { | ||
| 1072 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1073 | switch (f.get_type()) | ||
| 1074 | { | ||
| 1075 | case filter<noun>::type::singleton: | ||
| 1076 | { | ||
| 1077 | if (notlogic == f.get_notlogic()) | ||
| 1078 | { | ||
| 1079 | return "noun_id IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 1080 | } else { | ||
| 1081 | return "noun_id NOT IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 1082 | } | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | case filter<noun>::type::group: | ||
| 1086 | { | ||
| 1087 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1088 | |||
| 1089 | std::list<std::string> clauses; | ||
| 1090 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1091 | return recur(f2, truelogic); | ||
| 1092 | }); | ||
| 1093 | |||
| 1094 | if (truelogic == f.get_orlogic()) | ||
| 1095 | { | ||
| 1096 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1097 | } else { | ||
| 1098 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1099 | } | ||
| 1100 | } | ||
| 1101 | } | ||
| 1102 | }; | ||
| 1103 | |||
| 1104 | conditions.push_back(recur(_full_part_holonym_of, false)); | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | if (_is_substance_meronym) | ||
| 1108 | { | ||
| 1109 | conditions.push_back("noun_id IN (SELECT meronym_id FROM substance_meronymy)"); | ||
| 1110 | } | ||
| 1111 | |||
| 1112 | if (!_substance_meronym_of.empty()) | ||
| 1113 | { | ||
| 1114 | std::stringstream cond; | ||
| 1115 | if (_substance_meronym_of.get_notlogic()) | ||
| 1116 | { | ||
| 1117 | cond << "noun_id NOT IN"; | ||
| 1118 | } else { | ||
| 1119 | cond << "noun_id IN"; | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | cond << "(SELECT meronym_id FROM substance_meronymy WHERE "; | ||
| 1123 | |||
| 1124 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1125 | switch (f.get_type()) | ||
| 1126 | { | ||
| 1127 | case filter<noun>::type::singleton: | ||
| 1128 | { | ||
| 1129 | bindings.emplace_back(f.get_elem()._id); | ||
| 1130 | |||
| 1131 | if (notlogic == f.get_notlogic()) | ||
| 1132 | { | ||
| 1133 | return "holonym_id = ?"; | ||
| 1134 | } else { | ||
| 1135 | return "holonym_id != ?"; | ||
| 1136 | } | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | case filter<noun>::type::group: | ||
| 1140 | { | ||
| 1141 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1142 | |||
| 1143 | std::list<std::string> clauses; | ||
| 1144 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1145 | return recur(f2, truelogic); | ||
| 1146 | }); | ||
| 1147 | |||
| 1148 | if (truelogic == f.get_orlogic()) | ||
| 1149 | { | ||
| 1150 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1151 | } else { | ||
| 1152 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1153 | } | ||
| 1154 | } | ||
| 1155 | } | ||
| 1156 | }; | ||
| 1157 | |||
| 1158 | cond << recur(_substance_meronym_of, _substance_meronym_of.get_notlogic()); | ||
| 1159 | cond << ")"; | ||
| 1160 | conditions.push_back(cond.str()); | ||
| 1161 | } | ||
| 1162 | |||
| 1163 | if (!_full_substance_meronym_of.empty()) | ||
| 1164 | { | ||
| 1165 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1166 | switch (f.get_type()) | ||
| 1167 | { | ||
| 1168 | case filter<noun>::type::singleton: | ||
| 1169 | { | ||
| 1170 | if (notlogic == f.get_notlogic()) | ||
| 1171 | { | ||
| 1172 | return "noun_id IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 1173 | } else { | ||
| 1174 | return "noun_id NOT IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 1175 | } | ||
| 1176 | } | ||
| 1177 | |||
| 1178 | case filter<noun>::type::group: | ||
| 1179 | { | ||
| 1180 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1181 | |||
| 1182 | std::list<std::string> clauses; | ||
| 1183 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1184 | return recur(f2, truelogic); | ||
| 1185 | }); | ||
| 1186 | |||
| 1187 | if (truelogic == f.get_orlogic()) | ||
| 1188 | { | ||
| 1189 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1190 | } else { | ||
| 1191 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1192 | } | ||
| 1193 | } | ||
| 1194 | } | ||
| 1195 | }; | ||
| 1196 | |||
| 1197 | conditions.push_back(recur(_full_substance_meronym_of, false)); | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | if (_is_substance_holonym) | ||
| 1201 | { | ||
| 1202 | conditions.push_back("noun_id IN (SELECT holonym_id FROM substance_meronymy)"); | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | if (!_substance_holonym_of.empty()) | ||
| 1206 | { | ||
| 1207 | std::stringstream cond; | ||
| 1208 | if (_substance_holonym_of.get_notlogic()) | ||
| 1209 | { | ||
| 1210 | cond << "noun_id NOT IN"; | ||
| 1211 | } else { | ||
| 1212 | cond << "noun_id IN"; | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | cond << "(SELECT holonym_id FROM substance_meronymy WHERE "; | ||
| 1216 | |||
| 1217 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1218 | switch (f.get_type()) | ||
| 1219 | { | ||
| 1220 | case filter<noun>::type::singleton: | ||
| 1221 | { | ||
| 1222 | bindings.emplace_back(f.get_elem()._id); | ||
| 1223 | |||
| 1224 | if (notlogic == f.get_notlogic()) | ||
| 1225 | { | ||
| 1226 | return "meronym_id = ?"; | ||
| 1227 | } else { | ||
| 1228 | return "meronym_id != ?"; | ||
| 1229 | } | ||
| 1230 | } | ||
| 1231 | |||
| 1232 | case filter<noun>::type::group: | ||
| 1233 | { | ||
| 1234 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1235 | |||
| 1236 | std::list<std::string> clauses; | ||
| 1237 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1238 | return recur(f2, truelogic); | ||
| 1239 | }); | ||
| 1240 | |||
| 1241 | if (truelogic == f.get_orlogic()) | ||
| 1242 | { | ||
| 1243 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1244 | } else { | ||
| 1245 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1246 | } | ||
| 1247 | } | ||
| 1248 | } | ||
| 1249 | }; | ||
| 1250 | |||
| 1251 | cond << recur(_substance_holonym_of, _substance_holonym_of.get_notlogic()); | ||
| 1252 | cond << ")"; | ||
| 1253 | conditions.push_back(cond.str()); | ||
| 1254 | } | ||
| 1255 | |||
| 1256 | if (!_full_substance_holonym_of.empty()) | ||
| 1257 | { | ||
| 1258 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1259 | switch (f.get_type()) | ||
| 1260 | { | ||
| 1261 | case filter<noun>::type::singleton: | ||
| 1262 | { | ||
| 1263 | if (notlogic == f.get_notlogic()) | ||
| 1264 | { | ||
| 1265 | return "noun_id IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 1266 | } else { | ||
| 1267 | return "noun_id NOT IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 1268 | } | ||
| 1269 | } | ||
| 1270 | |||
| 1271 | case filter<noun>::type::group: | ||
| 1272 | { | ||
| 1273 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1274 | |||
| 1275 | std::list<std::string> clauses; | ||
| 1276 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1277 | return recur(f2, truelogic); | ||
| 1278 | }); | ||
| 1279 | |||
| 1280 | if (truelogic == f.get_orlogic()) | ||
| 1281 | { | ||
| 1282 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1283 | } else { | ||
| 1284 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1285 | } | ||
| 1286 | } | ||
| 1287 | } | ||
| 1288 | }; | ||
| 1289 | |||
| 1290 | conditions.push_back(recur(_full_substance_holonym_of, false)); | ||
| 1291 | } | ||
| 1292 | |||
| 1293 | if (_is_member_meronym) | ||
| 1294 | { | ||
| 1295 | conditions.push_back("noun_id IN (SELECT meronym_id FROM member_meronymy)"); | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | if (!_member_meronym_of.empty()) | ||
| 1299 | { | ||
| 1300 | std::stringstream cond; | ||
| 1301 | if (_member_meronym_of.get_notlogic()) | ||
| 1302 | { | ||
| 1303 | cond << "noun_id NOT IN"; | ||
| 1304 | } else { | ||
| 1305 | cond << "noun_id IN"; | ||
| 1306 | } | ||
| 1307 | |||
| 1308 | cond << "(SELECT meronym_id FROM member_meronymy WHERE "; | ||
| 1309 | |||
| 1310 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1311 | switch (f.get_type()) | ||
| 1312 | { | ||
| 1313 | case filter<noun>::type::singleton: | ||
| 1314 | { | ||
| 1315 | bindings.emplace_back(f.get_elem()._id); | ||
| 1316 | |||
| 1317 | if (notlogic == f.get_notlogic()) | ||
| 1318 | { | ||
| 1319 | return "holonym_id = ?"; | ||
| 1320 | } else { | ||
| 1321 | return "holonym_id != ?"; | ||
| 1322 | } | ||
| 1323 | } | ||
| 1324 | |||
| 1325 | case filter<noun>::type::group: | ||
| 1326 | { | ||
| 1327 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1328 | |||
| 1329 | std::list<std::string> clauses; | ||
| 1330 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1331 | return recur(f2, truelogic); | ||
| 1332 | }); | ||
| 1333 | |||
| 1334 | if (truelogic == f.get_orlogic()) | ||
| 1335 | { | ||
| 1336 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1337 | } else { | ||
| 1338 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1339 | } | ||
| 1340 | } | ||
| 1341 | } | ||
| 1342 | }; | ||
| 1343 | |||
| 1344 | cond << recur(_member_meronym_of, _member_meronym_of.get_notlogic()); | ||
| 1345 | cond << ")"; | ||
| 1346 | conditions.push_back(cond.str()); | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | if (!_full_member_meronym_of.empty()) | ||
| 1350 | { | ||
| 1351 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1352 | switch (f.get_type()) | ||
| 1353 | { | ||
| 1354 | case filter<noun>::type::singleton: | ||
| 1355 | { | ||
| 1356 | if (notlogic == f.get_notlogic()) | ||
| 1357 | { | ||
| 1358 | return "noun_id IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 1359 | } else { | ||
| 1360 | return "noun_id NOT IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 1361 | } | ||
| 1362 | } | ||
| 1363 | |||
| 1364 | case filter<noun>::type::group: | ||
| 1365 | { | ||
| 1366 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1367 | |||
| 1368 | std::list<std::string> clauses; | ||
| 1369 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1370 | return recur(f2, truelogic); | ||
| 1371 | }); | ||
| 1372 | |||
| 1373 | if (truelogic == f.get_orlogic()) | ||
| 1374 | { | ||
| 1375 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1376 | } else { | ||
| 1377 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1378 | } | ||
| 1379 | } | ||
| 1380 | } | ||
| 1381 | }; | ||
| 1382 | |||
| 1383 | conditions.push_back(recur(_full_member_meronym_of, false)); | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | if (_is_member_holonym) | ||
| 1387 | { | ||
| 1388 | conditions.push_back("noun_id IN (SELECT holonym_id FROM member_meronym)"); | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | if (!_member_holonym_of.empty()) | ||
| 1392 | { | ||
| 1393 | std::stringstream cond; | ||
| 1394 | if (_member_holonym_of.get_notlogic()) | ||
| 1395 | { | ||
| 1396 | cond << "noun_id NOT IN"; | ||
| 1397 | } else { | ||
| 1398 | cond << "noun_id IN"; | ||
| 1399 | } | ||
| 1400 | |||
| 1401 | cond << "(SELECT holonym_id FROM member_meronymy WHERE "; | ||
| 1402 | |||
| 1403 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1404 | switch (f.get_type()) | ||
| 1405 | { | ||
| 1406 | case filter<noun>::type::singleton: | ||
| 1407 | { | ||
| 1408 | bindings.emplace_back(f.get_elem()._id); | ||
| 1409 | |||
| 1410 | if (notlogic == f.get_notlogic()) | ||
| 1411 | { | ||
| 1412 | return "meronym_id = ?"; | ||
| 1413 | } else { | ||
| 1414 | return "meronym_id != ?"; | ||
| 1415 | } | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | case filter<noun>::type::group: | ||
| 1419 | { | ||
| 1420 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1421 | |||
| 1422 | std::list<std::string> clauses; | ||
| 1423 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1424 | return recur(f2, truelogic); | ||
| 1425 | }); | ||
| 1426 | |||
| 1427 | if (truelogic == f.get_orlogic()) | ||
| 1428 | { | ||
| 1429 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1430 | } else { | ||
| 1431 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1432 | } | ||
| 1433 | } | ||
| 1434 | } | ||
| 1435 | }; | ||
| 1436 | |||
| 1437 | cond << recur(_member_holonym_of, _member_holonym_of.get_notlogic()); | ||
| 1438 | cond << ")"; | ||
| 1439 | conditions.push_back(cond.str()); | ||
| 1440 | } | ||
| 1441 | |||
| 1442 | if (!_full_member_holonym_of.empty()) | ||
| 1443 | { | ||
| 1444 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1445 | switch (f.get_type()) | ||
| 1446 | { | ||
| 1447 | case filter<noun>::type::singleton: | ||
| 1448 | { | ||
| 1449 | if (notlogic == f.get_notlogic()) | ||
| 1450 | { | ||
| 1451 | return "noun_id IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 1452 | } else { | ||
| 1453 | return "noun_id NOT IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
| 1454 | } | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | case filter<noun>::type::group: | ||
| 1458 | { | ||
| 1459 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1460 | |||
| 1461 | std::list<std::string> clauses; | ||
| 1462 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1463 | return recur(f2, truelogic); | ||
| 1464 | }); | ||
| 1465 | |||
| 1466 | if (truelogic == f.get_orlogic()) | ||
| 1467 | { | ||
| 1468 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1469 | } else { | ||
| 1470 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1471 | } | ||
| 1472 | } | ||
| 1473 | } | ||
| 1474 | }; | ||
| 1475 | |||
| 1476 | conditions.push_back(recur(_full_member_holonym_of, false)); | ||
| 1477 | } | ||
| 1478 | |||
| 1479 | if (_is_proper) | ||
| 1480 | { | ||
| 1481 | conditions.push_back("proper = 1"); | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | if (_is_not_proper) | ||
| 1485 | { | ||
| 1486 | conditions.push_back("proper = 0"); | ||
| 1487 | } | ||
| 1488 | |||
| 1489 | if (_is_instance) | ||
| 1490 | { | ||
| 1491 | conditions.push_back("noun_id IN (SELECT instance_id FROM instantiation)"); | ||
| 1492 | } | ||
| 1493 | |||
| 1494 | if (!_instance_of.empty()) | ||
| 1495 | { | ||
| 1496 | std::stringstream cond; | ||
| 1497 | if (_instance_of.get_notlogic()) | ||
| 1498 | { | ||
| 1499 | cond << "noun_id NOT IN"; | ||
| 1500 | } else { | ||
| 1501 | cond << "noun_id IN"; | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | cond << "(SELECT instance_id FROM instantiation WHERE "; | ||
| 1505 | |||
| 1506 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1507 | switch (f.get_type()) | ||
| 1508 | { | ||
| 1509 | case filter<noun>::type::singleton: | ||
| 1510 | { | ||
| 1511 | bindings.emplace_back(f.get_elem()._id); | ||
| 1512 | |||
| 1513 | if (notlogic == f.get_notlogic()) | ||
| 1514 | { | ||
| 1515 | return "class_id = ?"; | ||
| 1516 | } else { | ||
| 1517 | return "class_id != ?"; | ||
| 1518 | } | ||
| 1519 | } | ||
| 1520 | |||
| 1521 | case filter<noun>::type::group: | ||
| 1522 | { | ||
| 1523 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1524 | |||
| 1525 | std::list<std::string> clauses; | ||
| 1526 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1527 | return recur(f2, truelogic); | ||
| 1528 | }); | ||
| 1529 | |||
| 1530 | if (truelogic == f.get_orlogic()) | ||
| 1531 | { | ||
| 1532 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1533 | } else { | ||
| 1534 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1535 | } | ||
| 1536 | } | ||
| 1537 | } | ||
| 1538 | }; | ||
| 1539 | |||
| 1540 | cond << recur(_instance_of, _instance_of.get_notlogic()); | ||
| 1541 | cond << ")"; | ||
| 1542 | conditions.push_back(cond.str()); | ||
| 1543 | } | ||
| 1544 | |||
| 1545 | if (_is_class) | ||
| 1546 | { | ||
| 1547 | conditions.push_back("noun_id IN (SELECT class_id FROM instantiation)"); | ||
| 1548 | } | ||
| 1549 | |||
| 1550 | if (!_class_of.empty()) | ||
| 1551 | { | ||
| 1552 | std::stringstream cond; | ||
| 1553 | if (_class_of.get_notlogic()) | ||
| 1554 | { | ||
| 1555 | cond << "noun_id NOT IN"; | ||
| 1556 | } else { | ||
| 1557 | cond << "noun_id IN"; | ||
| 1558 | } | ||
| 1559 | |||
| 1560 | cond << "(SELECT class_id FROM instantiation WHERE "; | ||
| 1561 | |||
| 1562 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1563 | switch (f.get_type()) | ||
| 1564 | { | ||
| 1565 | case filter<noun>::type::singleton: | ||
| 1566 | { | ||
| 1567 | bindings.emplace_back(f.get_elem()._id); | ||
| 1568 | |||
| 1569 | if (notlogic == f.get_notlogic()) | ||
| 1570 | { | ||
| 1571 | return "instance_id = ?"; | ||
| 1572 | } else { | ||
| 1573 | return "instance_id != ?"; | ||
| 1574 | } | ||
| 1575 | } | ||
| 1576 | |||
| 1577 | case filter<noun>::type::group: | ||
| 1578 | { | ||
| 1579 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1580 | |||
| 1581 | std::list<std::string> clauses; | ||
| 1582 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1583 | return recur(f2, truelogic); | ||
| 1584 | }); | ||
| 1585 | |||
| 1586 | if (truelogic == f.get_orlogic()) | ||
| 1587 | { | ||
| 1588 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1589 | } else { | ||
| 1590 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1591 | } | ||
| 1592 | } | ||
| 1593 | } | ||
| 1594 | }; | ||
| 1595 | |||
| 1596 | cond << recur(_class_of, _class_of.get_notlogic()); | ||
| 1597 | cond << ")"; | ||
| 1598 | conditions.push_back(cond.str()); | ||
| 1599 | } | ||
| 1600 | |||
| 1601 | if (_has_synonyms) | ||
| 1602 | { | ||
| 1603 | conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_synonymy)"); | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | if (!_synonym_of.empty()) | ||
| 1607 | { | ||
| 1608 | std::stringstream cond; | ||
| 1609 | if (_synonym_of.get_notlogic()) | ||
| 1610 | { | ||
| 1611 | cond << "noun_id NOT IN"; | ||
| 1612 | } else { | ||
| 1613 | cond << "noun_id IN"; | ||
| 1614 | } | ||
| 1615 | |||
| 1616 | cond << "(SELECT noun_2_id FROM noun_synonymy WHERE "; | ||
| 1617 | |||
| 1618 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1619 | switch (f.get_type()) | ||
| 1620 | { | ||
| 1621 | case filter<noun>::type::singleton: | ||
| 1622 | { | ||
| 1623 | bindings.emplace_back(f.get_elem()._id); | ||
| 1624 | |||
| 1625 | if (notlogic == f.get_notlogic()) | ||
| 1626 | { | ||
| 1627 | return "noun_1_id = ?"; | ||
| 1628 | } else { | ||
| 1629 | return "noun_1_id != ?"; | ||
| 1630 | } | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | case filter<noun>::type::group: | ||
| 1634 | { | ||
| 1635 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1636 | |||
| 1637 | std::list<std::string> clauses; | ||
| 1638 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1639 | return recur(f2, truelogic); | ||
| 1640 | }); | ||
| 1641 | |||
| 1642 | if (truelogic == f.get_orlogic()) | ||
| 1643 | { | ||
| 1644 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1645 | } else { | ||
| 1646 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1647 | } | ||
| 1648 | } | ||
| 1649 | } | ||
| 1650 | }; | ||
| 1651 | |||
| 1652 | cond << recur(_synonym_of, _synonym_of.get_notlogic()); | ||
| 1653 | cond << ")"; | ||
| 1654 | conditions.push_back(cond.str()); | ||
| 1655 | } | ||
| 1656 | |||
| 1657 | if (_has_antonyms) | ||
| 1658 | { | ||
| 1659 | conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_antonymy)"); | ||
| 1660 | } | ||
| 1661 | |||
| 1662 | if (!_antonym_of.empty()) | ||
| 1663 | { | ||
| 1664 | std::stringstream cond; | ||
| 1665 | if (_antonym_of.get_notlogic()) | ||
| 1666 | { | ||
| 1667 | cond << "noun_id NOT IN"; | ||
| 1668 | } else { | ||
| 1669 | cond << "noun_id IN"; | ||
| 1670 | } | ||
| 1671 | |||
| 1672 | cond << "(SELECT noun_2_id FROM noun_antonymy WHERE "; | ||
| 1673 | |||
| 1674 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
| 1675 | switch (f.get_type()) | ||
| 1676 | { | ||
| 1677 | case filter<noun>::type::singleton: | ||
| 1678 | { | ||
| 1679 | bindings.emplace_back(f.get_elem()._id); | ||
| 1680 | |||
| 1681 | if (notlogic == f.get_notlogic()) | ||
| 1682 | { | ||
| 1683 | return "noun_1_id = ?"; | ||
| 1684 | } else { | ||
| 1685 | return "noun_1_id != ?"; | ||
| 1686 | } | ||
| 1687 | } | ||
| 1688 | |||
| 1689 | case filter<noun>::type::group: | ||
| 1690 | { | ||
| 1691 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1692 | |||
| 1693 | std::list<std::string> clauses; | ||
| 1694 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
| 1695 | return recur(f2, truelogic); | ||
| 1696 | }); | ||
| 1697 | |||
| 1698 | if (truelogic == f.get_orlogic()) | ||
| 1699 | { | ||
| 1700 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1701 | } else { | ||
| 1702 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1703 | } | ||
| 1704 | } | ||
| 1705 | } | ||
| 1706 | }; | ||
| 1707 | |||
| 1708 | cond << recur(_antonym_of, _antonym_of.get_notlogic()); | ||
| 1709 | cond << ")"; | ||
| 1710 | conditions.push_back(cond.str()); | ||
| 1711 | } | ||
| 1712 | |||
| 1713 | if (_has_pertainym) | ||
| 1714 | { | ||
| 1715 | conditions.push_back("noun_id IN (SELECT noun_id FROM pertainymy)"); | ||
| 1716 | } | ||
| 1717 | |||
| 1718 | if (!_anti_pertainym_of.empty()) | ||
| 1719 | { | ||
| 1720 | std::stringstream cond; | ||
| 1721 | if (_anti_pertainym_of.get_notlogic()) | ||
| 1722 | { | ||
| 1723 | cond << "noun_id NOT IN"; | ||
| 1724 | } else { | ||
| 1725 | cond << "noun_id IN"; | ||
| 1726 | } | ||
| 1727 | |||
| 1728 | cond << "(SELECT noun_id FROM pertainymy WHERE "; | ||
| 1729 | |||
| 1730 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
| 1731 | switch (f.get_type()) | ||
| 1732 | { | ||
| 1733 | case filter<adjective>::type::singleton: | ||
| 1734 | { | ||
| 1735 | bindings.emplace_back(f.get_elem()._id); | ||
| 1736 | |||
| 1737 | if (notlogic == f.get_notlogic()) | ||
| 1738 | { | ||
| 1739 | return "pertainym_id = ?"; | ||
| 1740 | } else { | ||
| 1741 | return "pertainym_id != ?"; | ||
| 1742 | } | ||
| 1743 | } | ||
| 1744 | |||
| 1745 | case filter<adjective>::type::group: | ||
| 1746 | { | ||
| 1747 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1748 | |||
| 1749 | std::list<std::string> clauses; | ||
| 1750 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
| 1751 | return recur(f2, truelogic); | ||
| 1752 | }); | ||
| 1753 | |||
| 1754 | if (truelogic == f.get_orlogic()) | ||
| 1755 | { | ||
| 1756 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1757 | } else { | ||
| 1758 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1759 | } | ||
| 1760 | } | ||
| 1761 | } | ||
| 1762 | }; | ||
| 1763 | |||
| 1764 | cond << recur(_anti_pertainym_of, _anti_pertainym_of.get_notlogic()); | ||
| 1765 | cond << ")"; | ||
| 1766 | conditions.push_back(cond.str()); | ||
| 1767 | } | ||
| 1768 | |||
| 1769 | if (_is_attribute) | ||
| 1770 | { | ||
| 1771 | conditions.push_back("noun_id IN (SELECT noun_id FROM variation)"); | ||
| 1772 | } | ||
| 1773 | |||
| 1774 | if (!_attribute_of.empty()) | ||
| 1775 | { | ||
| 1776 | std::stringstream cond; | ||
| 1777 | if (_attribute_of.get_notlogic()) | ||
| 1778 | { | ||
| 1779 | cond << "noun_id NOT IN"; | ||
| 1780 | } else { | ||
| 1781 | cond << "noun_id IN"; | ||
| 1782 | } | ||
| 1783 | |||
| 1784 | cond << "(SELECT noun_id FROM variation WHERE "; | ||
| 1785 | |||
| 1786 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
| 1787 | switch (f.get_type()) | ||
| 1788 | { | ||
| 1789 | case filter<adjective>::type::singleton: | ||
| 1790 | { | ||
| 1791 | bindings.emplace_back(f.get_elem()._id); | ||
| 1792 | |||
| 1793 | if (notlogic == f.get_notlogic()) | ||
| 1794 | { | ||
| 1795 | return "adjective_id = ?"; | ||
| 1796 | } else { | ||
| 1797 | return "adjective_id != ?"; | ||
| 1798 | } | ||
| 1799 | } | ||
| 1800 | |||
| 1801 | case filter<adjective>::type::group: | ||
| 1802 | { | ||
| 1803 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 1804 | |||
| 1805 | std::list<std::string> clauses; | ||
| 1806 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
| 1807 | return recur(f2, truelogic); | ||
| 1808 | }); | ||
| 1809 | |||
| 1810 | if (truelogic == f.get_orlogic()) | ||
| 1811 | { | ||
| 1812 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 1813 | } else { | ||
| 1814 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1815 | } | ||
| 1816 | } | ||
| 1817 | } | ||
| 1818 | }; | ||
| 1819 | |||
| 1820 | cond << recur(_attribute_of, _attribute_of.get_notlogic()); | ||
| 1821 | cond << ")"; | ||
| 1822 | conditions.push_back(cond.str()); | ||
| 1823 | } | ||
| 1824 | |||
| 1825 | if (_at_least_n_images != unlimited) | ||
| 1826 | { | ||
| 1827 | conditions.push_back("images >= ?"); | ||
| 1828 | bindings.emplace_back(_at_least_n_images); | ||
| 1829 | } | ||
| 1830 | |||
| 1831 | if (!_with_wnid.empty()) | ||
| 1832 | { | ||
| 1833 | std::vector<std::string> clauses(_with_wnid.size(), "wnid = ?"); | ||
| 1834 | std::string cond = verbly::implode(std::begin(clauses), std::end(clauses), " OR "); | ||
| 1835 | conditions.push_back("(" + cond + ")"); | ||
| 1836 | |||
| 1837 | for (auto wnid : _with_wnid) | ||
| 1838 | { | ||
| 1839 | bindings.emplace_back(wnid); | ||
| 1840 | } | ||
| 1841 | } | ||
| 1842 | |||
| 1843 | /* | ||
| 1844 | if (!_derived_from_adjective.empty()) | ||
| 1845 | { | ||
| 1846 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); | ||
| 1847 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1848 | conditions.push_back(cond); | ||
| 1849 | } | ||
| 1850 | |||
| 1851 | if (!_not_derived_from_adjective.empty()) | ||
| 1852 | { | ||
| 1853 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); | ||
| 1854 | std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1855 | conditions.push_back(cond); | ||
| 1856 | } | ||
| 1857 | |||
| 1858 | if (!_derived_from_adverb.empty()) | ||
| 1859 | { | ||
| 1860 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); | ||
| 1861 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1862 | conditions.push_back(cond); | ||
| 1863 | } | ||
| 1864 | |||
| 1865 | if (!_not_derived_from_adverb.empty()) | ||
| 1866 | { | ||
| 1867 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); | ||
| 1868 | std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1869 | conditions.push_back(cond); | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | if (!_derived_from_noun.empty()) | ||
| 1873 | { | ||
| 1874 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_2_id = @DERN"); | ||
| 1875 | std::string cond = "noun_id IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1876 | conditions.push_back(cond); | ||
| 1877 | } | ||
| 1878 | |||
| 1879 | if (!_not_derived_from_noun.empty()) | ||
| 1880 | { | ||
| 1881 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_2_id = @NDERN"); | ||
| 1882 | std::string cond = "noun_id NOT IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 1883 | conditions.push_back(cond); | ||
| 1884 | } | ||
| 1885 | */ | ||
| 1886 | if (!conditions.empty()) | ||
| 1887 | { | ||
| 1888 | construct << " WHERE "; | ||
| 1889 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
| 1890 | } | ||
| 1891 | |||
| 1892 | if (_random) | ||
| 1893 | { | ||
| 1894 | construct << " ORDER BY RANDOM()"; | ||
| 1895 | } | ||
| 1896 | |||
| 1897 | if (_limit != unlimited) | ||
| 1898 | { | ||
| 1899 | construct << " LIMIT " << _limit; | ||
| 1900 | } | ||
| 1901 | |||
| 1902 | sqlite3_stmt* ppstmt; | ||
| 1903 | std::string query = construct.str(); | ||
| 1904 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 1905 | { | ||
| 1906 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
| 1907 | } | ||
| 1908 | |||
| 1909 | int i = 1; | ||
| 1910 | for (auto& binding : bindings) | ||
| 1911 | { | ||
| 1912 | switch (binding.get_type()) | ||
| 1913 | { | ||
| 1914 | case binding::type::integer: | ||
| 1915 | { | ||
| 1916 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
| 1917 | |||
| 1918 | break; | ||
| 1919 | } | ||
| 1920 | |||
| 1921 | case binding::type::string: | ||
| 1922 | { | ||
| 1923 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
| 1924 | |||
| 1925 | break; | ||
| 1926 | } | ||
| 1927 | } | ||
| 1928 | |||
| 1929 | i++; | ||
| 1930 | } | ||
| 1931 | |||
| 1932 | /* | ||
| 1933 | for (auto adj : _derived_from_adjective) | ||
| 1934 | { | ||
| 1935 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
| 1936 | } | ||
| 1937 | |||
| 1938 | for (auto adj : _not_derived_from_adjective) | ||
| 1939 | { | ||
| 1940 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
| 1941 | } | ||
| 1942 | |||
| 1943 | for (auto adv : _derived_from_adverb) | ||
| 1944 | { | ||
| 1945 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
| 1946 | } | ||
| 1947 | |||
| 1948 | for (auto adv : _not_derived_from_adverb) | ||
| 1949 | { | ||
| 1950 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
| 1951 | } | ||
| 1952 | |||
| 1953 | for (auto n : _derived_from_noun) | ||
| 1954 | { | ||
| 1955 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
| 1956 | } | ||
| 1957 | |||
| 1958 | for (auto n : _not_derived_from_noun) | ||
| 1959 | { | ||
| 1960 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
| 1961 | } | ||
| 1962 | */ | ||
| 1963 | std::list<noun> output; | ||
| 1964 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
| 1965 | { | ||
| 1966 | noun tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
| 1967 | tnc._singular = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
| 1968 | |||
| 1969 | if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) | ||
| 1970 | { | ||
| 1971 | tnc._plural = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
| 1972 | } | ||
| 1973 | |||
| 1974 | tnc._wnid = sqlite3_column_int(ppstmt, 3); | ||
| 1975 | |||
| 1976 | output.push_back(tnc); | ||
| 1977 | } | ||
| 1978 | |||
| 1979 | sqlite3_finalize(ppstmt); | ||
| 1980 | |||
| 1981 | for (auto& noun : output) | ||
| 1982 | { | ||
| 1983 | query = "SELECT pronunciation, prerhyme, rhyme FROM noun_pronunciations WHERE noun_id = ?"; | ||
| 1984 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 1985 | { | ||
| 1986 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
| 1987 | } | ||
| 1988 | |||
| 1989 | sqlite3_bind_int(ppstmt, 1, noun._id); | ||
| 1990 | |||
| 1991 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
| 1992 | { | ||
| 1993 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
| 1994 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
| 1995 | |||
| 1996 | noun.pronunciations.push_back(phonemes); | ||
| 1997 | |||
| 1998 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
| 1999 | { | ||
| 2000 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
| 2001 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
| 2002 | |||
| 2003 | noun.rhymes.emplace_back(prerhyme, rhyming); | ||
| 2004 | } | ||
| 2005 | } | ||
| 2006 | |||
| 2007 | sqlite3_finalize(ppstmt); | ||
| 2008 | } | ||
| 2009 | |||
| 2010 | return output; | ||
| 2011 | } | ||
| 2012 | |||
| 2013 | }; | ||
| diff --git a/lib/noun_query.h b/lib/noun_query.h deleted file mode 100644 index 74df260..0000000 --- a/lib/noun_query.h +++ /dev/null | |||
| @@ -1,180 +0,0 @@ | |||
| 1 | #ifndef NOUN_QUERY_H_5DE51DD7 | ||
| 2 | #define NOUN_QUERY_H_5DE51DD7 | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | class noun_query { | ||
| 7 | public: | ||
| 8 | noun_query(const data& _data); | ||
| 9 | |||
| 10 | noun_query& limit(int _limit); | ||
| 11 | noun_query& random(); | ||
| 12 | noun_query& except(const noun& _word); | ||
| 13 | noun_query& rhymes_with(const word& _word); | ||
| 14 | noun_query& rhymes_with(rhyme _r); | ||
| 15 | noun_query& has_pronunciation(); | ||
| 16 | noun_query& has_rhyming_noun(); | ||
| 17 | noun_query& has_rhyming_adjective(); | ||
| 18 | noun_query& has_rhyming_adverb(); | ||
| 19 | noun_query& has_rhyming_verb(); | ||
| 20 | noun_query& with_stress(filter<std::vector<bool>> _arg); | ||
| 21 | |||
| 22 | noun_query& with_singular_form(std::string _arg); | ||
| 23 | noun_query& with_prefix(filter<std::string> _f); | ||
| 24 | noun_query& with_suffix(filter<std::string> _f); | ||
| 25 | |||
| 26 | noun_query& requires_plural_form(); | ||
| 27 | |||
| 28 | noun_query& with_complexity(int _arg); | ||
| 29 | |||
| 30 | noun_query& is_hypernym(); | ||
| 31 | noun_query& hypernym_of(filter<noun> _f); | ||
| 32 | noun_query& full_hypernym_of(filter<noun> _f); | ||
| 33 | |||
| 34 | noun_query& is_hyponym(); | ||
| 35 | noun_query& hyponym_of(filter<noun> _f); | ||
| 36 | noun_query& full_hyponym_of(filter<noun> _f); | ||
| 37 | |||
| 38 | noun_query& is_part_meronym(); | ||
| 39 | noun_query& part_meronym_of(filter<noun> _f); | ||
| 40 | noun_query& full_part_meronym_of(filter<noun> _f); | ||
| 41 | |||
| 42 | noun_query& is_part_holonym(); | ||
| 43 | noun_query& part_holonym_of(filter<noun> _f); | ||
| 44 | noun_query& full_part_holonym_of(filter<noun> _f); | ||
| 45 | |||
| 46 | noun_query& is_substance_meronym(); | ||
| 47 | noun_query& substance_meronym_of(filter<noun> _f); | ||
| 48 | noun_query& full_substance_meronym_of(filter<noun> _f); | ||
| 49 | |||
| 50 | noun_query& is_substance_holonym(); | ||
| 51 | noun_query& substance_holonym_of(filter<noun> _f); | ||
| 52 | noun_query& full_substance_holonym_of(filter<noun> _f); | ||
| 53 | |||
| 54 | noun_query& is_member_meronym(); | ||
| 55 | noun_query& member_meronym_of(filter<noun> _f); | ||
| 56 | noun_query& full_member_meronym_of(filter<noun> _f); | ||
| 57 | |||
| 58 | noun_query& is_member_holonym(); | ||
| 59 | noun_query& member_holonym_of(filter<noun> _f); | ||
| 60 | noun_query& full_member_holonym_of(filter<noun> _f); | ||
| 61 | |||
| 62 | noun_query& is_proper(); | ||
| 63 | noun_query& is_not_proper(); | ||
| 64 | |||
| 65 | noun_query& is_instance(); | ||
| 66 | noun_query& instance_of(filter<noun> _f); | ||
| 67 | |||
| 68 | noun_query& is_class(); | ||
| 69 | noun_query& class_of(filter<noun> _f); | ||
| 70 | |||
| 71 | noun_query& has_synonyms(); | ||
| 72 | noun_query& synonym_of(filter<noun> _f); | ||
| 73 | |||
| 74 | noun_query& has_antonyms(); | ||
| 75 | noun_query& antonym_of(filter<noun> _f); | ||
| 76 | |||
| 77 | noun_query& has_pertainym(); | ||
| 78 | noun_query& anti_pertainym_of(filter<adjective> _f); | ||
| 79 | |||
| 80 | noun_query& is_attribute(); | ||
| 81 | noun_query& attribute_of(filter<adjective> _f); | ||
| 82 | |||
| 83 | noun_query& at_least_n_images(int _arg); | ||
| 84 | noun_query& with_wnid(int _arg); | ||
| 85 | |||
| 86 | /* noun_query& derived_from(const word& _w); | ||
| 87 | noun_query& not_derived_from(const word& _w);*/ | ||
| 88 | |||
| 89 | std::list<noun> run() const; | ||
| 90 | |||
| 91 | const static int unlimited = -1; | ||
| 92 | |||
| 93 | private: | ||
| 94 | const data& _data; | ||
| 95 | int _limit = unlimited; | ||
| 96 | bool _random = false; | ||
| 97 | std::list<rhyme> _rhymes; | ||
| 98 | std::list<noun> _except; | ||
| 99 | bool _has_prn = false; | ||
| 100 | bool _has_rhyming_noun = false; | ||
| 101 | bool _has_rhyming_adjective = false; | ||
| 102 | bool _has_rhyming_adverb = false; | ||
| 103 | bool _has_rhyming_verb = false; | ||
| 104 | filter<std::vector<bool>> _stress; | ||
| 105 | |||
| 106 | std::list<std::string> _with_singular_form; | ||
| 107 | filter<std::string> _with_prefix; | ||
| 108 | filter<std::string> _with_suffix; | ||
| 109 | |||
| 110 | int _with_complexity = unlimited; | ||
| 111 | |||
| 112 | bool _requires_plural_form = false; | ||
| 113 | |||
| 114 | bool _is_hypernym = false; | ||
| 115 | filter<noun> _hypernym_of; | ||
| 116 | filter<noun> _full_hypernym_of; | ||
| 117 | |||
| 118 | bool _is_hyponym = false; | ||
| 119 | filter<noun> _hyponym_of; | ||
| 120 | filter<noun> _full_hyponym_of; | ||
| 121 | |||
| 122 | bool _is_part_meronym = false; | ||
| 123 | filter<noun> _part_meronym_of; | ||
| 124 | filter<noun> _full_part_meronym_of; | ||
| 125 | |||
| 126 | bool _is_substance_meronym = false; | ||
| 127 | filter<noun> _substance_meronym_of; | ||
| 128 | filter<noun> _full_substance_meronym_of; | ||
| 129 | |||
| 130 | bool _is_member_meronym = false; | ||
| 131 | filter<noun> _member_meronym_of; | ||
| 132 | filter<noun> _full_member_meronym_of; | ||
| 133 | |||
| 134 | bool _is_part_holonym = false; | ||
| 135 | filter<noun> _part_holonym_of; | ||
| 136 | filter<noun> _full_part_holonym_of; | ||
| 137 | |||
| 138 | bool _is_substance_holonym = false; | ||
| 139 | filter<noun> _substance_holonym_of; | ||
| 140 | filter<noun> _full_substance_holonym_of; | ||
| 141 | |||
| 142 | bool _is_member_holonym = false; | ||
| 143 | filter<noun> _member_holonym_of; | ||
| 144 | filter<noun> _full_member_holonym_of; | ||
| 145 | |||
| 146 | bool _is_proper = false; | ||
| 147 | bool _is_not_proper = false; | ||
| 148 | |||
| 149 | bool _is_instance = false; | ||
| 150 | filter<noun> _instance_of; | ||
| 151 | |||
| 152 | bool _is_class = false; | ||
| 153 | filter<noun> _class_of; | ||
| 154 | |||
| 155 | bool _has_synonyms = false; | ||
| 156 | filter<noun> _synonym_of; | ||
| 157 | |||
| 158 | bool _has_antonyms = false; | ||
| 159 | filter<noun> _antonym_of; | ||
| 160 | |||
| 161 | bool _has_pertainym = false; | ||
| 162 | filter<adjective> _anti_pertainym_of; | ||
| 163 | |||
| 164 | bool _is_attribute = false; | ||
| 165 | filter<adjective> _attribute_of; | ||
| 166 | |||
| 167 | int _at_least_n_images = unlimited; | ||
| 168 | std::set<int> _with_wnid; | ||
| 169 | |||
| 170 | /* std::list<adjective> _derived_from_adjective; | ||
| 171 | std::list<adjective> _not_derived_from_adjective; | ||
| 172 | std::list<adverb> _derived_from_adverb; | ||
| 173 | std::list<adverb> _not_derived_from_adverb; | ||
| 174 | std::list<noun> _derived_from_noun; | ||
| 175 | std::list<noun> _not_derived_from_noun;*/ | ||
| 176 | }; | ||
| 177 | |||
| 178 | }; | ||
| 179 | |||
| 180 | #endif /* end of include guard: NOUN_QUERY_H_5DE51DD7 */ | ||
| diff --git a/lib/preposition.cpp b/lib/preposition.cpp deleted file mode 100644 index cea9165..0000000 --- a/lib/preposition.cpp +++ /dev/null | |||
| @@ -1,107 +0,0 @@ | |||
| 1 | #include "verbly.h" | ||
| 2 | |||
| 3 | namespace verbly { | ||
| 4 | |||
| 5 | std::string preposition::get_form() const | ||
| 6 | { | ||
| 7 | return form; | ||
| 8 | } | ||
| 9 | |||
| 10 | preposition_query::preposition_query(const data& _data) : _data(_data) | ||
| 11 | { | ||
| 12 | |||
| 13 | } | ||
| 14 | |||
| 15 | preposition_query& preposition_query::limit(int _limit) | ||
| 16 | { | ||
| 17 | this->_limit = _limit; | ||
| 18 | |||
| 19 | return *this; | ||
| 20 | } | ||
| 21 | |||
| 22 | preposition_query& preposition_query::random() | ||
| 23 | { | ||
| 24 | _random = true; | ||
| 25 | |||
| 26 | return *this; | ||
| 27 | } | ||
| 28 | |||
| 29 | preposition_query& preposition_query::in_group(std::string _arg) | ||
| 30 | { | ||
| 31 | _in_group.push_back(_arg); | ||
| 32 | |||
| 33 | return *this; | ||
| 34 | } | ||
| 35 | |||
| 36 | std::list<preposition> preposition_query::run() const | ||
| 37 | { | ||
| 38 | std::stringstream construct; | ||
| 39 | construct << "SELECT form FROM prepositions"; | ||
| 40 | std::list<binding> bindings; | ||
| 41 | |||
| 42 | if (!_in_group.empty()) | ||
| 43 | { | ||
| 44 | std::list<std::string> clauses(_in_group.size(), "groupname = ?"); | ||
| 45 | construct << " WHERE preposition_id IN (SELECT preposition_id FROM preposition_groups WHERE "; | ||
| 46 | construct << verbly::implode(std::begin(clauses), std::end(clauses), " OR "); | ||
| 47 | construct << ")"; | ||
| 48 | |||
| 49 | for (auto g : _in_group) | ||
| 50 | { | ||
| 51 | bindings.emplace_back(g); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | if (_random) | ||
| 56 | { | ||
| 57 | construct << " ORDER BY RANDOM()"; | ||
| 58 | } | ||
| 59 | |||
| 60 | if (_limit != unlimited) | ||
| 61 | { | ||
| 62 | construct << " LIMIT " << _limit; | ||
| 63 | } | ||
| 64 | |||
| 65 | sqlite3_stmt* ppstmt; | ||
| 66 | std::string query = construct.str(); | ||
| 67 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 68 | { | ||
| 69 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
| 70 | } | ||
| 71 | |||
| 72 | int i = 1; | ||
| 73 | for (auto& binding : bindings) | ||
| 74 | { | ||
| 75 | switch (binding.get_type()) | ||
| 76 | { | ||
| 77 | case binding::type::integer: | ||
| 78 | { | ||
| 79 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
| 80 | |||
| 81 | break; | ||
| 82 | } | ||
| 83 | |||
| 84 | case binding::type::string: | ||
| 85 | { | ||
| 86 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
| 87 | |||
| 88 | break; | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | i++; | ||
| 93 | } | ||
| 94 | |||
| 95 | std::list<preposition> output; | ||
| 96 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
| 97 | { | ||
| 98 | preposition pp; | ||
| 99 | pp.form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
| 100 | |||
| 101 | output.push_back(pp); | ||
| 102 | } | ||
| 103 | |||
| 104 | return output; | ||
| 105 | } | ||
| 106 | |||
| 107 | }; | ||
| diff --git a/lib/preposition.h b/lib/preposition.h deleted file mode 100644 index 89f24fa..0000000 --- a/lib/preposition.h +++ /dev/null | |||
| @@ -1,38 +0,0 @@ | |||
| 1 | #ifndef PREPOSITION_H_FF908021 | ||
| 2 | #define PREPOSITION_H_FF908021 | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | class preposition_query; | ||
| 7 | |||
| 8 | class preposition { | ||
| 9 | public: | ||
| 10 | std::string get_form() const; | ||
| 11 | |||
| 12 | private: | ||
| 13 | friend class preposition_query; | ||
| 14 | |||
| 15 | std::string form; | ||
| 16 | }; | ||
| 17 | |||
| 18 | class preposition_query { | ||
| 19 | public: | ||
| 20 | preposition_query(const data& _data); | ||
| 21 | |||
| 22 | preposition_query& limit(int _limit); | ||
| 23 | preposition_query& random(); | ||
| 24 | preposition_query& in_group(std::string _arg); | ||
| 25 | |||
| 26 | std::list<preposition> run() const; | ||
| 27 | |||
| 28 | const static int unlimited = -1; | ||
| 29 | private: | ||
| 30 | const data& _data; | ||
| 31 | int _limit = unlimited; | ||
| 32 | bool _random = false; | ||
| 33 | std::list<std::string> _in_group; | ||
| 34 | }; | ||
| 35 | |||
| 36 | }; | ||
| 37 | |||
| 38 | #endif /* end of include guard: PREPOSITION_H_FF908021 */ | ||
| diff --git a/lib/pronunciation.cpp b/lib/pronunciation.cpp new file mode 100644 index 0000000..f5b742f --- /dev/null +++ b/lib/pronunciation.cpp | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | #include "pronunciation.h" | ||
| 2 | #include <sqlite3.h> | ||
| 3 | #include "form.h" | ||
| 4 | #include "lemma.h" | ||
| 5 | #include "word.h" | ||
| 6 | #include "util.h" | ||
| 7 | |||
| 8 | namespace verbly { | ||
| 9 | |||
| 10 | const object pronunciation::objectType = object::pronunciation; | ||
| 11 | |||
| 12 | const std::list<std::string> pronunciation::select = {"pronunciation_id", "phonemes", "syllables", "stress", "prerhyme", "rhyme"}; | ||
| 13 | |||
| 14 | const field pronunciation::id = field::integerField(object::pronunciation, "pronunciation_id"); | ||
| 15 | const field pronunciation::numOfSyllables = field::integerField(object::pronunciation, "syllables"); | ||
| 16 | const field pronunciation::stress = field::stringField(object::pronunciation, "stress"); | ||
| 17 | |||
| 18 | const field pronunciation::form = field::joinThrough(object::pronunciation, "pronunciation_id", object::form, "forms_pronunciations", "form_id"); | ||
| 19 | |||
| 20 | const field pronunciation::prerhyme = field::stringField(object::pronunciation, "prerhyme", true); | ||
| 21 | const field pronunciation::rhyme = field::stringField(object::pronunciation, "rhyme", true); | ||
| 22 | |||
| 23 | pronunciation::pronunciation(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
| 24 | { | ||
| 25 | id_ = sqlite3_column_int(row, 0); | ||
| 26 | |||
| 27 | std::string phonemesStr(reinterpret_cast<const char*>(sqlite3_column_text(row, 1))); | ||
| 28 | phonemes_ = split<std::vector<std::string>>(phonemesStr, " "); | ||
| 29 | |||
| 30 | syllables_ = sqlite3_column_int(row, 2); | ||
| 31 | stress_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 3))); | ||
| 32 | |||
| 33 | if (sqlite3_column_type(row, 5) != SQLITE_NULL) | ||
| 34 | { | ||
| 35 | hasRhyme_ = true; | ||
| 36 | |||
| 37 | prerhyme_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 4))); | ||
| 38 | rhyme_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 5))); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | filter pronunciation::rhymesWith(const pronunciation& arg) | ||
| 43 | { | ||
| 44 | return (prerhyme != arg.getPrerhyme()) && (rhyme == arg.getRhyme()); | ||
| 45 | } | ||
| 46 | |||
| 47 | /*filter pronunciation::rhymesWith(const class form& arg) | ||
| 48 | { | ||
| 49 | filter result; | ||
| 50 | |||
| 51 | for (const pronunciation& p : arg.getPronunciations()) | ||
| 52 | { | ||
| 53 | result |= rhymesWith(p); | ||
| 54 | } | ||
| 55 | |||
| 56 | return result; | ||
| 57 | } | ||
| 58 | |||
| 59 | filter pronunciation::rhymesWith(const lemma& arg) | ||
| 60 | { | ||
| 61 | return rhymesWith(arg.getBaseForm()); | ||
| 62 | } | ||
| 63 | |||
| 64 | filter pronunciation::rhymesWith(const word& arg) | ||
| 65 | { | ||
| 66 | return rhymesWith(arg.getLemma()); | ||
| 67 | }*/ | ||
| 68 | |||
| 69 | }; | ||
| diff --git a/lib/pronunciation.h b/lib/pronunciation.h new file mode 100644 index 0000000..c7a1d4d --- /dev/null +++ b/lib/pronunciation.h | |||
| @@ -0,0 +1,163 @@ | |||
| 1 | #ifndef PRONUNCIATION_H_C68F86B0 | ||
| 2 | #define PRONUNCIATION_H_C68F86B0 | ||
| 3 | |||
| 4 | #include <stdexcept> | ||
| 5 | #include <vector> | ||
| 6 | #include <string> | ||
| 7 | #include "field.h" | ||
| 8 | #include "filter.h" | ||
| 9 | |||
| 10 | struct sqlite3_stmt; | ||
| 11 | |||
| 12 | namespace verbly { | ||
| 13 | |||
| 14 | class form; | ||
| 15 | class lemma; | ||
| 16 | class word; | ||
| 17 | class database; | ||
| 18 | |||
| 19 | class pronunciation { | ||
| 20 | public: | ||
| 21 | |||
| 22 | // Default constructor | ||
| 23 | |||
| 24 | pronunciation() = default; | ||
| 25 | |||
| 26 | // Construct from database | ||
| 27 | |||
| 28 | pronunciation(const database& db, sqlite3_stmt* row); | ||
| 29 | |||
| 30 | // Accessors | ||
| 31 | |||
| 32 | operator bool() const | ||
| 33 | { | ||
| 34 | return valid_; | ||
| 35 | } | ||
| 36 | |||
| 37 | int getId() const | ||
| 38 | { | ||
| 39 | if (!valid_) | ||
| 40 | { | ||
| 41 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
| 42 | } | ||
| 43 | |||
| 44 | return id_; | ||
| 45 | } | ||
| 46 | |||
| 47 | const std::vector<std::string>& getPhonemes() const | ||
| 48 | { | ||
| 49 | if (!valid_) | ||
| 50 | { | ||
| 51 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
| 52 | } | ||
| 53 | |||
| 54 | return phonemes_; | ||
| 55 | } | ||
| 56 | |||
| 57 | int getSyllables() const | ||
| 58 | { | ||
| 59 | if (!valid_) | ||
| 60 | { | ||
| 61 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
| 62 | } | ||
| 63 | |||
| 64 | return syllables_; | ||
| 65 | } | ||
| 66 | |||
| 67 | std::string getStress() const | ||
| 68 | { | ||
| 69 | if (!valid_) | ||
| 70 | { | ||
| 71 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
| 72 | } | ||
| 73 | |||
| 74 | return stress_; | ||
| 75 | } | ||
| 76 | |||
| 77 | bool hasRhyme() const | ||
| 78 | { | ||
| 79 | if (!valid_) | ||
| 80 | { | ||
| 81 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
| 82 | } | ||
| 83 | |||
| 84 | return hasRhyme_; | ||
| 85 | } | ||
| 86 | |||
| 87 | std::string getPrerhyme() const | ||
| 88 | { | ||
| 89 | if (!valid_) | ||
| 90 | { | ||
| 91 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
| 92 | } | ||
| 93 | |||
| 94 | if (!hasRhyme_) | ||
| 95 | { | ||
| 96 | throw std::domain_error("This pronunciation has no rhyme"); | ||
| 97 | } | ||
| 98 | |||
| 99 | return prerhyme_; | ||
| 100 | } | ||
| 101 | |||
| 102 | std::string getRhyme() const | ||
| 103 | { | ||
| 104 | if (!valid_) | ||
| 105 | { | ||
| 106 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
| 107 | } | ||
| 108 | |||
| 109 | if (!hasRhyme_) | ||
| 110 | { | ||
| 111 | throw std::domain_error("This pronunciation has no rhyme"); | ||
| 112 | } | ||
| 113 | |||
| 114 | return rhyme_; | ||
| 115 | } | ||
| 116 | |||
| 117 | // Type info | ||
| 118 | |||
| 119 | static const object objectType; | ||
| 120 | |||
| 121 | static const std::list<std::string> select; | ||
| 122 | |||
| 123 | // Query fields | ||
| 124 | |||
| 125 | static const field id; | ||
| 126 | static const field numOfSyllables; | ||
| 127 | static const field stress; | ||
| 128 | |||
| 129 | operator filter() const | ||
| 130 | { | ||
| 131 | return (id == id_); | ||
| 132 | } | ||
| 133 | |||
| 134 | static filter rhymesWith(const pronunciation& arg); | ||
| 135 | static filter rhymesWith(const class form& arg); | ||
| 136 | static filter rhymesWith(const lemma& arg); | ||
| 137 | static filter rhymesWith(const word& arg); | ||
| 138 | |||
| 139 | // Relationships to other objects | ||
| 140 | |||
| 141 | static const field form; | ||
| 142 | |||
| 143 | private: | ||
| 144 | bool valid_ = false; | ||
| 145 | |||
| 146 | int id_; | ||
| 147 | std::vector<std::string> phonemes_; | ||
| 148 | int syllables_; | ||
| 149 | std::string stress_; | ||
| 150 | bool hasRhyme_ = false; | ||
| 151 | std::string prerhyme_; | ||
| 152 | std::string rhyme_; | ||
| 153 | |||
| 154 | const database* db_; | ||
| 155 | |||
| 156 | static const field prerhyme; | ||
| 157 | static const field rhyme; | ||
| 158 | |||
| 159 | }; | ||
| 160 | |||
| 161 | }; | ||
| 162 | |||
| 163 | #endif /* end of include guard: PRONUNCIATION_H_C68F86B0 */ | ||
| diff --git a/lib/query.h b/lib/query.h new file mode 100644 index 0000000..e31be3d --- /dev/null +++ b/lib/query.h | |||
| @@ -0,0 +1,123 @@ | |||
| 1 | #ifndef QUERY_H_7CC5284C | ||
| 2 | #define QUERY_H_7CC5284C | ||
| 3 | |||
| 4 | #include <vector> | ||
| 5 | #include <stdexcept> | ||
| 6 | #include <string> | ||
| 7 | #include <list> | ||
| 8 | #include <sqlite3.h> | ||
| 9 | #include <iostream> | ||
| 10 | #include "statement.h" | ||
| 11 | #include "binding.h" | ||
| 12 | |||
| 13 | namespace verbly { | ||
| 14 | |||
| 15 | class database_error : public std::logic_error { | ||
| 16 | public: | ||
| 17 | |||
| 18 | database_error(std::string msg, std::string sqlMsg) : std::logic_error(msg + " (" + sqlMsg + ")") | ||
| 19 | { | ||
| 20 | } | ||
| 21 | }; | ||
| 22 | |||
| 23 | template <typename Object> | ||
| 24 | class query { | ||
| 25 | public: | ||
| 26 | |||
| 27 | query(const database& db, sqlite3* ppdb, filter queryFilter, bool random, int limit) : db_(&db) | ||
| 28 | { | ||
| 29 | statement stmt(Object::objectType, std::move(queryFilter)); | ||
| 30 | |||
| 31 | std::string queryString = stmt.getQueryString(Object::select, random, limit); | ||
| 32 | std::list<binding> bindings = stmt.getBindings(); | ||
| 33 | |||
| 34 | std::cout << queryString << std::endl; | ||
| 35 | |||
| 36 | if (sqlite3_prepare_v2(ppdb, queryString.c_str(), queryString.length(), &ppstmt_, NULL) != SQLITE_OK) | ||
| 37 | { | ||
| 38 | std::string errorMsg = sqlite3_errmsg(ppdb); | ||
| 39 | sqlite3_finalize(ppstmt_); | ||
| 40 | |||
| 41 | throw database_error("Error preparing query", errorMsg); | ||
| 42 | } | ||
| 43 | |||
| 44 | int i = 1; | ||
| 45 | for (const binding& value : bindings) | ||
| 46 | { | ||
| 47 | switch (value.getType()) | ||
| 48 | { | ||
| 49 | case binding::type::integer: | ||
| 50 | { | ||
| 51 | if (sqlite3_bind_int(ppstmt_, i, value.getInteger()) != SQLITE_OK) | ||
| 52 | { | ||
| 53 | std::string errorMsg = sqlite3_errmsg(ppdb); | ||
| 54 | sqlite3_finalize(ppstmt_); | ||
| 55 | |||
| 56 | throw database_error("Error binding value to query", errorMsg); | ||
| 57 | } | ||
| 58 | |||
| 59 | break; | ||
| 60 | } | ||
| 61 | |||
| 62 | case binding::type::string: | ||
| 63 | { | ||
| 64 | if (sqlite3_bind_text(ppstmt_, i, value.getString().c_str(), value.getString().length(), SQLITE_TRANSIENT) != SQLITE_OK) | ||
| 65 | { | ||
| 66 | std::string errorMsg = sqlite3_errmsg(ppdb); | ||
| 67 | sqlite3_finalize(ppstmt_); | ||
| 68 | |||
| 69 | throw database_error("Error binding value to query", errorMsg); | ||
| 70 | } | ||
| 71 | |||
| 72 | break; | ||
| 73 | } | ||
| 74 | |||
| 75 | case binding::type::invalid: | ||
| 76 | { | ||
| 77 | throw std::logic_error("Cannot use invalid bindings"); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | i++; | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | ~query() | ||
| 86 | { | ||
| 87 | sqlite3_finalize(ppstmt_); | ||
| 88 | } | ||
| 89 | |||
| 90 | std::vector<Object> all() const | ||
| 91 | { | ||
| 92 | std::vector<Object> result; | ||
| 93 | |||
| 94 | while (sqlite3_step(ppstmt_) == SQLITE_ROW) | ||
| 95 | { | ||
| 96 | result.emplace_back(*db_, ppstmt_); | ||
| 97 | } | ||
| 98 | |||
| 99 | sqlite3_reset(ppstmt_); | ||
| 100 | |||
| 101 | return result; | ||
| 102 | } | ||
| 103 | |||
| 104 | Object first() const | ||
| 105 | { | ||
| 106 | std::vector<Object> results = all(); | ||
| 107 | if (!results.empty()) | ||
| 108 | { | ||
| 109 | return results.front(); | ||
| 110 | } else { | ||
| 111 | throw std::logic_error("query returned empty dataset"); | ||
| 112 | } | ||
| 113 | } | ||
| 114 | |||
| 115 | private: | ||
| 116 | const database* db_; | ||
| 117 | sqlite3_stmt* ppstmt_; | ||
| 118 | |||
| 119 | }; | ||
| 120 | |||
| 121 | }; | ||
| 122 | |||
| 123 | #endif /* end of include guard: QUERY_H_7CC5284C */ | ||
| diff --git a/lib/statement.cpp b/lib/statement.cpp new file mode 100644 index 0000000..52fa00d --- /dev/null +++ b/lib/statement.cpp | |||
| @@ -0,0 +1,806 @@ | |||
| 1 | #include "statement.h" | ||
| 2 | #include <sstream> | ||
| 3 | #include <utility> | ||
| 4 | #include "filter.h" | ||
| 5 | #include "util.h" | ||
| 6 | #include "notion.h" | ||
| 7 | #include "word.h" | ||
| 8 | #include "group.h" | ||
| 9 | #include "frame.h" | ||
| 10 | #include "lemma.h" | ||
| 11 | #include "form.h" | ||
| 12 | #include "pronunciation.h" | ||
| 13 | |||
| 14 | namespace verbly { | ||
| 15 | |||
| 16 | statement::statement( | ||
| 17 | object context, | ||
| 18 | filter queryFilter) : | ||
| 19 | statement(getTableForContext(context), queryFilter.normalize(context)) | ||
| 20 | { | ||
| 21 | } | ||
| 22 | |||
| 23 | std::string statement::getQueryString(std::list<std::string> select, bool random, int limit) const | ||
| 24 | { | ||
| 25 | std::stringstream queryStream; | ||
| 26 | |||
| 27 | if (!withs_.empty()) | ||
| 28 | { | ||
| 29 | queryStream << "WITH RECURSIVE "; | ||
| 30 | |||
| 31 | std::list<std::string> ctes; | ||
| 32 | for (const with& cte : withs_) | ||
| 33 | { | ||
| 34 | std::stringstream cteStream; | ||
| 35 | cteStream << cte.getIdentifier(); | ||
| 36 | cteStream << " AS (SELECT "; | ||
| 37 | cteStream << cte.getTopTable(); | ||
| 38 | cteStream << ".* FROM "; | ||
| 39 | cteStream << cte.getTableForId(cte.getTopTable()); | ||
| 40 | cteStream << " AS "; | ||
| 41 | cteStream << cte.getTopTable(); | ||
| 42 | |||
| 43 | for (const join& j : cte.getJoins()) | ||
| 44 | { | ||
| 45 | cteStream << " "; | ||
| 46 | cteStream << j; | ||
| 47 | } | ||
| 48 | |||
| 49 | if (cte.getCondition().getType() != condition::type::empty) | ||
| 50 | { | ||
| 51 | cteStream << " WHERE "; | ||
| 52 | cteStream << cte.getCondition().toSql(); | ||
| 53 | } | ||
| 54 | |||
| 55 | cteStream << " UNION SELECT l.* FROM "; | ||
| 56 | cteStream << cte.getIdentifier(); | ||
| 57 | cteStream << " AS t INNER JOIN "; | ||
| 58 | cteStream << cte.getField().getTable(); | ||
| 59 | cteStream << " AS j ON t."; | ||
| 60 | cteStream << cte.getField().getColumn(); | ||
| 61 | cteStream << " = j."; | ||
| 62 | cteStream << cte.getField().getForeignJoinColumn(); | ||
| 63 | cteStream << " INNER JOIN "; | ||
| 64 | cteStream << cte.getTableForId(cte.getTopTable()); | ||
| 65 | cteStream << " AS l ON j."; | ||
| 66 | cteStream << cte.getField().getJoinColumn(); | ||
| 67 | cteStream << " = l."; | ||
| 68 | cteStream << cte.getField().getColumn(); | ||
| 69 | cteStream << ")"; | ||
| 70 | |||
| 71 | ctes.push_back(cteStream.str()); | ||
| 72 | } | ||
| 73 | |||
| 74 | queryStream << implode(std::begin(ctes), std::end(ctes), ", "); | ||
| 75 | queryStream << " "; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::list<std::string> realSelect; | ||
| 79 | for (std::string& s : select) | ||
| 80 | { | ||
| 81 | realSelect.push_back(topTable_ + "." + s); | ||
| 82 | } | ||
| 83 | |||
| 84 | queryStream << "SELECT "; | ||
| 85 | queryStream << implode(std::begin(realSelect), std::end(realSelect), ", "); | ||
| 86 | queryStream << " FROM "; | ||
| 87 | queryStream << tables_.at(topTable_); | ||
| 88 | queryStream << " AS "; | ||
| 89 | queryStream << topTable_; | ||
| 90 | |||
| 91 | for (const join& j : joins_) | ||
| 92 | { | ||
| 93 | queryStream << " "; | ||
| 94 | queryStream << j; | ||
| 95 | } | ||
| 96 | |||
| 97 | if (topCondition_.getType() != condition::type::empty) | ||
| 98 | { | ||
| 99 | queryStream << " WHERE "; | ||
| 100 | queryStream << topCondition_.toSql(); | ||
| 101 | } | ||
| 102 | |||
| 103 | if (random) | ||
| 104 | { | ||
| 105 | queryStream << " ORDER BY RANDOM()"; | ||
| 106 | } | ||
| 107 | |||
| 108 | if (limit > 0) | ||
| 109 | { | ||
| 110 | queryStream << " LIMIT "; | ||
| 111 | queryStream << limit; | ||
| 112 | } | ||
| 113 | |||
| 114 | return queryStream.str(); | ||
| 115 | } | ||
| 116 | |||
| 117 | std::list<binding> statement::getBindings() const | ||
| 118 | { | ||
| 119 | std::list<binding> result; | ||
| 120 | |||
| 121 | for (const with& w : withs_) | ||
| 122 | { | ||
| 123 | for (binding value : w.getCondition().flattenBindings()) | ||
| 124 | { | ||
| 125 | result.push_back(std::move(value)); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | for (binding value : topCondition_.flattenBindings()) | ||
| 130 | { | ||
| 131 | result.push_back(std::move(value)); | ||
| 132 | } | ||
| 133 | |||
| 134 | return result; | ||
| 135 | } | ||
| 136 | |||
| 137 | statement::statement( | ||
| 138 | std::string tableName, | ||
| 139 | filter clause, | ||
| 140 | int nextTableId, | ||
| 141 | int nextWithId) : | ||
| 142 | nextTableId_(nextTableId), | ||
| 143 | nextWithId_(nextWithId), | ||
| 144 | topTable_(instantiateTable(std::move(tableName))), | ||
| 145 | topCondition_(parseFilter(std::move(clause))) | ||
| 146 | { | ||
| 147 | } | ||
| 148 | |||
| 149 | statement::condition statement::parseFilter(filter clause) | ||
| 150 | { | ||
| 151 | switch (clause.getType()) | ||
| 152 | { | ||
| 153 | case filter::type::empty: | ||
| 154 | { | ||
| 155 | return {}; | ||
| 156 | } | ||
| 157 | |||
| 158 | case filter::type::singleton: | ||
| 159 | { | ||
| 160 | switch (clause.getField().getType()) | ||
| 161 | { | ||
| 162 | case field::type::undefined: | ||
| 163 | { | ||
| 164 | return {}; | ||
| 165 | } | ||
| 166 | |||
| 167 | case field::type::string: | ||
| 168 | case field::type::integer: | ||
| 169 | case field::type::boolean: | ||
| 170 | { | ||
| 171 | switch (clause.getComparison()) | ||
| 172 | { | ||
| 173 | case filter::comparison::is_null: | ||
| 174 | { | ||
| 175 | return condition(topTable_, clause.getField().getColumn(), true); | ||
| 176 | } | ||
| 177 | |||
| 178 | case filter::comparison::is_not_null: | ||
| 179 | { | ||
| 180 | return condition(topTable_, clause.getField().getColumn(), false); | ||
| 181 | } | ||
| 182 | |||
| 183 | case filter::comparison::int_equals: | ||
| 184 | { | ||
| 185 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getIntegerArgument()); | ||
| 186 | } | ||
| 187 | |||
| 188 | case filter::comparison::int_does_not_equal: | ||
| 189 | { | ||
| 190 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getIntegerArgument()); | ||
| 191 | } | ||
| 192 | |||
| 193 | case filter::comparison::int_is_at_least: | ||
| 194 | { | ||
| 195 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_least, clause.getIntegerArgument()); | ||
| 196 | } | ||
| 197 | |||
| 198 | case filter::comparison::int_is_greater_than: | ||
| 199 | { | ||
| 200 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_greater_than, clause.getIntegerArgument()); | ||
| 201 | } | ||
| 202 | |||
| 203 | case filter::comparison::int_is_at_most: | ||
| 204 | { | ||
| 205 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_most, clause.getIntegerArgument()); | ||
| 206 | } | ||
| 207 | |||
| 208 | case filter::comparison::int_is_less_than: | ||
| 209 | { | ||
| 210 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_less_than, clause.getIntegerArgument()); | ||
| 211 | } | ||
| 212 | |||
| 213 | case filter::comparison::boolean_equals: | ||
| 214 | { | ||
| 215 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getBooleanArgument() ? 1 : 0); | ||
| 216 | } | ||
| 217 | |||
| 218 | case filter::comparison::string_equals: | ||
| 219 | { | ||
| 220 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getStringArgument()); | ||
| 221 | } | ||
| 222 | |||
| 223 | case filter::comparison::string_does_not_equal: | ||
| 224 | { | ||
| 225 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getStringArgument()); | ||
| 226 | } | ||
| 227 | |||
| 228 | case filter::comparison::string_is_like: | ||
| 229 | { | ||
| 230 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_like, clause.getStringArgument()); | ||
| 231 | } | ||
| 232 | |||
| 233 | case filter::comparison::string_is_not_like: | ||
| 234 | { | ||
| 235 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_not_like, clause.getStringArgument()); | ||
| 236 | } | ||
| 237 | |||
| 238 | case filter::comparison::matches: | ||
| 239 | case filter::comparison::does_not_match: | ||
| 240 | case filter::comparison::hierarchally_matches: | ||
| 241 | case filter::comparison::does_not_hierarchally_match: | ||
| 242 | { | ||
| 243 | throw std::logic_error("Invalid comparison type for field"); | ||
| 244 | } | ||
| 245 | } | ||
| 246 | } | ||
| 247 | |||
| 248 | case field::type::join: | ||
| 249 | { | ||
| 250 | std::string joinTableName; | ||
| 251 | if (clause.getField().hasTable()) | ||
| 252 | { | ||
| 253 | joinTableName = clause.getField().getTable(); | ||
| 254 | } else { | ||
| 255 | joinTableName = getTableForContext(clause.getField().getJoinObject()); | ||
| 256 | } | ||
| 257 | |||
| 258 | statement joinStmt( | ||
| 259 | joinTableName, | ||
| 260 | clause.getJoinCondition().normalize(clause.getField().getJoinObject()), | ||
| 261 | nextTableId_, | ||
| 262 | nextWithId_); | ||
| 263 | |||
| 264 | std::string joinTable = joinStmt.topTable_; | ||
| 265 | condition curCond = integrate(std::move(joinStmt)); | ||
| 266 | |||
| 267 | bool outer = false; | ||
| 268 | if (clause.getComparison() == filter::comparison::does_not_match) | ||
| 269 | { | ||
| 270 | outer = true; | ||
| 271 | |||
| 272 | curCond &= condition(joinTable, clause.getField().getColumn(), true); | ||
| 273 | } | ||
| 274 | |||
| 275 | joins_.emplace_back(outer, joinTableName, topTable_, clause.getField().getColumn(), joinTable, clause.getField().getColumn()); | ||
| 276 | |||
| 277 | return curCond; | ||
| 278 | } | ||
| 279 | |||
| 280 | case field::type::join_through: | ||
| 281 | { | ||
| 282 | statement joinStmt( | ||
| 283 | getTableForContext(clause.getField().getJoinObject()), | ||
| 284 | clause.getJoinCondition().normalize(clause.getField().getJoinObject()), | ||
| 285 | nextTableId_, | ||
| 286 | nextWithId_); | ||
| 287 | |||
| 288 | std::string joinTable = joinStmt.topTable_; | ||
| 289 | std::string throughTable = instantiateTable(clause.getField().getTable()); | ||
| 290 | condition curCond = integrate(std::move(joinStmt)); | ||
| 291 | |||
| 292 | bool outer = false; | ||
| 293 | if (clause.getComparison() == filter::comparison::does_not_match) | ||
| 294 | { | ||
| 295 | outer = true; | ||
| 296 | |||
| 297 | curCond &= condition(throughTable, clause.getField().getJoinColumn(), true); | ||
| 298 | } | ||
| 299 | |||
| 300 | joins_.emplace_back(outer, clause.getField().getTable(), topTable_, clause.getField().getColumn(), throughTable, clause.getField().getJoinColumn()); | ||
| 301 | joins_.emplace_back(false, getTableForContext(clause.getField().getJoinObject()), throughTable, clause.getField().getForeignJoinColumn(), joinTable, clause.getField().getForeignColumn()); | ||
| 302 | |||
| 303 | return curCond; | ||
| 304 | } | ||
| 305 | |||
| 306 | case field::type::hierarchal_join: | ||
| 307 | { | ||
| 308 | std::string withName = std::string(clause.getField().getTable()) + "_tree_" + std::to_string(nextWithId_++); | ||
| 309 | std::string withInstName = instantiateTable(withName); | ||
| 310 | |||
| 311 | bool outer = false; | ||
| 312 | if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) | ||
| 313 | { | ||
| 314 | outer = true; | ||
| 315 | } | ||
| 316 | |||
| 317 | joins_.emplace_back(outer, withName, topTable_, clause.getField().getColumn(), withInstName, clause.getField().getColumn()); | ||
| 318 | |||
| 319 | statement withStmt( | ||
| 320 | getTableForContext(clause.getField().getObject()), | ||
| 321 | clause.getJoinCondition().normalize(clause.getField().getObject()), | ||
| 322 | nextTableId_, | ||
| 323 | nextWithId_); | ||
| 324 | |||
| 325 | for (auto& w : withStmt.withs_) | ||
| 326 | { | ||
| 327 | withs_.push_back(std::move(w)); | ||
| 328 | } | ||
| 329 | |||
| 330 | nextTableId_ = withStmt.nextTableId_; | ||
| 331 | nextWithId_ = withStmt.nextWithId_; | ||
| 332 | |||
| 333 | withs_.emplace_back( | ||
| 334 | withName, | ||
| 335 | clause.getField(), | ||
| 336 | std::move(withStmt.tables_), | ||
| 337 | std::move(withStmt.topTable_), | ||
| 338 | std::move(withStmt.topCondition_), | ||
| 339 | std::move(withStmt.joins_)); | ||
| 340 | |||
| 341 | if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) | ||
| 342 | { | ||
| 343 | return condition(withInstName, clause.getField().getColumn(), true); | ||
| 344 | } else { | ||
| 345 | return {}; | ||
| 346 | } | ||
| 347 | } | ||
| 348 | } | ||
| 349 | } | ||
| 350 | |||
| 351 | case filter::type::group: | ||
| 352 | { | ||
| 353 | condition grp(clause.getOrlogic()); | ||
| 354 | |||
| 355 | for (const filter& child : clause) | ||
| 356 | { | ||
| 357 | condition newChild = parseFilter(child); | ||
| 358 | if (newChild.getType() != condition::type::empty) | ||
| 359 | { | ||
| 360 | grp += std::move(newChild); | ||
| 361 | } | ||
| 362 | } | ||
| 363 | |||
| 364 | if (grp.getChildren().empty()) | ||
| 365 | { | ||
| 366 | grp = {}; | ||
| 367 | } | ||
| 368 | |||
| 369 | return grp; | ||
| 370 | } | ||
| 371 | } | ||
| 372 | } | ||
| 373 | |||
| 374 | std::string statement::instantiateTable(std::string name) | ||
| 375 | { | ||
| 376 | std::string identifier = name + "_" + std::to_string(nextTableId_++); | ||
| 377 | tables_[identifier] = name; | ||
| 378 | |||
| 379 | return identifier; | ||
| 380 | } | ||
| 381 | |||
| 382 | statement::condition statement::integrate(statement subStmt) | ||
| 383 | { | ||
| 384 | for (auto& mapping : subStmt.tables_) | ||
| 385 | { | ||
| 386 | tables_[mapping.first] = mapping.second; | ||
| 387 | } | ||
| 388 | |||
| 389 | for (auto& j : subStmt.joins_) | ||
| 390 | { | ||
| 391 | joins_.push_back(j); | ||
| 392 | } | ||
| 393 | |||
| 394 | for (auto& w : subStmt.withs_) | ||
| 395 | { | ||
| 396 | withs_.push_back(w); | ||
| 397 | } | ||
| 398 | |||
| 399 | nextTableId_ = subStmt.nextTableId_; | ||
| 400 | nextWithId_ = subStmt.nextWithId_; | ||
| 401 | |||
| 402 | return subStmt.topCondition_; | ||
| 403 | } | ||
| 404 | |||
| 405 | std::ostream& operator<<(std::ostream& oss, const statement::join& j) | ||
| 406 | { | ||
| 407 | if (j.isOuterJoin()) | ||
| 408 | { | ||
| 409 | oss << "LEFT"; | ||
| 410 | } else { | ||
| 411 | oss << "INNER"; | ||
| 412 | } | ||
| 413 | |||
| 414 | return oss | ||
| 415 | << " JOIN " | ||
| 416 | << j.getForeignTableName() | ||
| 417 | << " AS " | ||
| 418 | << j.getForeignTable() | ||
| 419 | << " ON " | ||
| 420 | << j.getForeignTable() | ||
| 421 | << "." | ||
| 422 | << j.getForeignColumn() | ||
| 423 | << " = " | ||
| 424 | << j.getJoinTable() | ||
| 425 | << "." | ||
| 426 | << j.getJoinColumn(); | ||
| 427 | } | ||
| 428 | |||
| 429 | statement::condition::condition(const condition& other) | ||
| 430 | { | ||
| 431 | type_ = other.type_; | ||
| 432 | |||
| 433 | switch (type_) | ||
| 434 | { | ||
| 435 | case type::empty: | ||
| 436 | { | ||
| 437 | break; | ||
| 438 | } | ||
| 439 | |||
| 440 | case type::singleton: | ||
| 441 | { | ||
| 442 | new(&singleton_.table_) std::string(other.singleton_.table_); | ||
| 443 | new(&singleton_.column_) std::string(other.singleton_.column_); | ||
| 444 | singleton_.comparison_ = other.singleton_.comparison_; | ||
| 445 | new(&singleton_.value_) binding(other.singleton_.value_); | ||
| 446 | |||
| 447 | break; | ||
| 448 | } | ||
| 449 | |||
| 450 | case type::group: | ||
| 451 | { | ||
| 452 | new(&group_.children_) std::list<condition>(other.group_.children_); | ||
| 453 | group_.orlogic_ = other.group_.orlogic_; | ||
| 454 | |||
| 455 | break; | ||
| 456 | } | ||
| 457 | } | ||
| 458 | } | ||
| 459 | |||
| 460 | statement::condition::condition(condition&& other) : condition() | ||
| 461 | { | ||
| 462 | swap(*this, other); | ||
| 463 | } | ||
| 464 | |||
| 465 | statement::condition& statement::condition::operator=(condition other) | ||
| 466 | { | ||
| 467 | swap(*this, other); | ||
| 468 | |||
| 469 | return *this; | ||
| 470 | } | ||
| 471 | |||
| 472 | void swap(statement::condition& first, statement::condition& second) | ||
| 473 | { | ||
| 474 | using type = statement::condition::type; | ||
| 475 | using condition = statement::condition; | ||
| 476 | |||
| 477 | type tempType = first.type_; | ||
| 478 | std::string tempTable; | ||
| 479 | std::string tempColumn; | ||
| 480 | condition::comparison tempComparison; | ||
| 481 | binding tempBinding; | ||
| 482 | std::list<condition> tempChildren; | ||
| 483 | bool tempOrlogic; | ||
| 484 | |||
| 485 | switch (tempType) | ||
| 486 | { | ||
| 487 | case type::empty: | ||
| 488 | { | ||
| 489 | break; | ||
| 490 | } | ||
| 491 | |||
| 492 | case type::singleton: | ||
| 493 | { | ||
| 494 | tempTable = std::move(first.singleton_.table_); | ||
| 495 | tempColumn = std::move(first.singleton_.column_); | ||
| 496 | tempComparison = first.singleton_.comparison_; | ||
| 497 | tempBinding = std::move(first.singleton_.value_); | ||
| 498 | |||
| 499 | break; | ||
| 500 | } | ||
| 501 | |||
| 502 | case type::group: | ||
| 503 | { | ||
| 504 | tempChildren = std::move(first.group_.children_); | ||
| 505 | tempOrlogic = first.group_.orlogic_; | ||
| 506 | |||
| 507 | break; | ||
| 508 | } | ||
| 509 | } | ||
| 510 | |||
| 511 | first.~condition(); | ||
| 512 | |||
| 513 | first.type_ = second.type_; | ||
| 514 | |||
| 515 | switch (first.type_) | ||
| 516 | { | ||
| 517 | case type::empty: | ||
| 518 | { | ||
| 519 | break; | ||
| 520 | } | ||
| 521 | |||
| 522 | case type::singleton: | ||
| 523 | { | ||
| 524 | new(&first.singleton_.table_) std::string(std::move(second.singleton_.table_)); | ||
| 525 | new(&first.singleton_.column_) std::string(std::move(second.singleton_.column_)); | ||
| 526 | first.singleton_.comparison_ = second.singleton_.comparison_; | ||
| 527 | new(&first.singleton_.value_) binding(std::move(second.singleton_.value_)); | ||
| 528 | |||
| 529 | break; | ||
| 530 | } | ||
| 531 | |||
| 532 | case type::group: | ||
| 533 | { | ||
| 534 | new(&first.group_.children_) std::list<condition>(std::move(second.group_.children_)); | ||
| 535 | first.group_.orlogic_ = second.group_.orlogic_; | ||
| 536 | |||
| 537 | break; | ||
| 538 | } | ||
| 539 | } | ||
| 540 | |||
| 541 | second.~condition(); | ||
| 542 | |||
| 543 | second.type_ = tempType; | ||
| 544 | |||
| 545 | switch (second.type_) | ||
| 546 | { | ||
| 547 | case type::empty: | ||
| 548 | { | ||
| 549 | break; | ||
| 550 | } | ||
| 551 | |||
| 552 | case type::singleton: | ||
| 553 | { | ||
| 554 | new(&second.singleton_.table_) std::string(std::move(tempTable)); | ||
| 555 | new(&second.singleton_.column_) std::string(std::move(tempColumn)); | ||
| 556 | second.singleton_.comparison_ = tempComparison; | ||
| 557 | new(&second.singleton_.value_) binding(std::move(tempBinding)); | ||
| 558 | |||
| 559 | break; | ||
| 560 | } | ||
| 561 | |||
| 562 | case type::group: | ||
| 563 | { | ||
| 564 | new(&second.group_.children_) std::list<condition>(std::move(tempChildren)); | ||
| 565 | second.group_.orlogic_ = tempOrlogic; | ||
| 566 | |||
| 567 | break; | ||
| 568 | } | ||
| 569 | } | ||
| 570 | } | ||
| 571 | |||
| 572 | statement::condition::~condition() | ||
| 573 | { | ||
| 574 | switch (type_) | ||
| 575 | { | ||
| 576 | case type::empty: | ||
| 577 | { | ||
| 578 | break; | ||
| 579 | } | ||
| 580 | |||
| 581 | case type::singleton: | ||
| 582 | { | ||
| 583 | using string_type = std::string; | ||
| 584 | |||
| 585 | singleton_.table_.~string_type(); | ||
| 586 | singleton_.column_.~string_type(); | ||
| 587 | singleton_.value_.~binding(); | ||
| 588 | |||
| 589 | break; | ||
| 590 | } | ||
| 591 | |||
| 592 | case type::group: | ||
| 593 | { | ||
| 594 | using list_type = std::list<condition>; | ||
| 595 | |||
| 596 | group_.children_.~list_type(); | ||
| 597 | |||
| 598 | break; | ||
| 599 | } | ||
| 600 | } | ||
| 601 | } | ||
| 602 | |||
| 603 | statement::condition::condition() : type_(type::empty) | ||
| 604 | { | ||
| 605 | } | ||
| 606 | |||
| 607 | statement::condition::condition( | ||
| 608 | std::string table, | ||
| 609 | std::string column, | ||
| 610 | bool isNull) : | ||
| 611 | type_(type::singleton) | ||
| 612 | { | ||
| 613 | new(&singleton_.table_) std::string(std::move(table)); | ||
| 614 | new(&singleton_.column_) std::string(std::move(column)); | ||
| 615 | |||
| 616 | if (isNull) | ||
| 617 | { | ||
| 618 | singleton_.comparison_ = comparison::is_null; | ||
| 619 | } else { | ||
| 620 | singleton_.comparison_ = comparison::is_not_null; | ||
| 621 | } | ||
| 622 | } | ||
| 623 | |||
| 624 | statement::condition::condition( | ||
| 625 | std::string table, | ||
| 626 | std::string column, | ||
| 627 | comparison comp, | ||
| 628 | binding value) : | ||
| 629 | type_(type::singleton) | ||
| 630 | { | ||
| 631 | new(&singleton_.table_) std::string(std::move(table)); | ||
| 632 | new(&singleton_.column_) std::string(std::move(column)); | ||
| 633 | singleton_.comparison_ = comp; | ||
| 634 | new(&singleton_.value_) binding(std::move(value)); | ||
| 635 | } | ||
| 636 | |||
| 637 | std::string statement::condition::toSql() const | ||
| 638 | { | ||
| 639 | switch (type_) | ||
| 640 | { | ||
| 641 | case type::empty: | ||
| 642 | { | ||
| 643 | return ""; | ||
| 644 | } | ||
| 645 | |||
| 646 | case type::singleton: | ||
| 647 | { | ||
| 648 | switch (singleton_.comparison_) | ||
| 649 | { | ||
| 650 | case comparison::equals: | ||
| 651 | { | ||
| 652 | return singleton_.table_ + "." + singleton_.column_ + " = ?"; | ||
| 653 | } | ||
| 654 | |||
| 655 | case comparison::does_not_equal: | ||
| 656 | { | ||
| 657 | return singleton_.table_ + "." + singleton_.column_ + " != ?"; | ||
| 658 | } | ||
| 659 | |||
| 660 | case comparison::is_greater_than: | ||
| 661 | { | ||
| 662 | return singleton_.table_ + "." + singleton_.column_ + " > ?"; | ||
| 663 | } | ||
| 664 | |||
| 665 | case comparison::is_at_most: | ||
| 666 | { | ||
| 667 | return singleton_.table_ + "." + singleton_.column_ + " <= ?"; | ||
| 668 | } | ||
| 669 | |||
| 670 | case comparison::is_less_than: | ||
| 671 | { | ||
| 672 | return singleton_.table_ + "." + singleton_.column_ + " < ?"; | ||
| 673 | } | ||
| 674 | |||
| 675 | case comparison::is_at_least: | ||
| 676 | { | ||
| 677 | return singleton_.table_ + "." + singleton_.column_ + " >= ?"; | ||
| 678 | } | ||
| 679 | |||
| 680 | case comparison::is_like: | ||
| 681 | { | ||
| 682 | return singleton_.table_ + "." + singleton_.column_ + " LIKE ?"; | ||
| 683 | } | ||
| 684 | |||
| 685 | case comparison::is_not_like: | ||
| 686 | { | ||
| 687 | return singleton_.table_ + "." + singleton_.column_ + " NOT LIKE ?"; | ||
| 688 | } | ||
| 689 | |||
| 690 | case comparison::is_not_null: | ||
| 691 | { | ||
| 692 | return singleton_.table_ + "." + singleton_.column_ + " IS NOT NULL"; | ||
| 693 | } | ||
| 694 | |||
| 695 | case comparison::is_null: | ||
| 696 | { | ||
| 697 | return singleton_.table_ + "." + singleton_.column_ + " IS NULL"; | ||
| 698 | } | ||
| 699 | } | ||
| 700 | } | ||
| 701 | |||
| 702 | case type::group: | ||
| 703 | { | ||
| 704 | std::list<std::string> clauses; | ||
| 705 | for (const condition& cond : group_.children_) | ||
| 706 | { | ||
| 707 | clauses.push_back(cond.toSql()); | ||
| 708 | } | ||
| 709 | |||
| 710 | return implode(std::begin(clauses), std::end(clauses), group_.orlogic_ ? " OR " : " AND "); | ||
| 711 | } | ||
| 712 | } | ||
| 713 | } | ||
| 714 | |||
| 715 | std::list<binding> statement::condition::flattenBindings() const | ||
| 716 | { | ||
| 717 | switch (type_) | ||
| 718 | { | ||
| 719 | case type::empty: | ||
| 720 | { | ||
| 721 | return {}; | ||
| 722 | } | ||
| 723 | |||
| 724 | case type::singleton: | ||
| 725 | { | ||
| 726 | return {singleton_.value_}; | ||
| 727 | } | ||
| 728 | |||
| 729 | case type::group: | ||
| 730 | { | ||
| 731 | std::list<binding> bindings; | ||
| 732 | for (const condition& cond : group_.children_) | ||
| 733 | { | ||
| 734 | for (binding value : cond.flattenBindings()) | ||
| 735 | { | ||
| 736 | bindings.push_back(std::move(value)); | ||
| 737 | } | ||
| 738 | } | ||
| 739 | |||
| 740 | return bindings; | ||
| 741 | } | ||
| 742 | } | ||
| 743 | } | ||
| 744 | |||
| 745 | statement::condition::condition(bool orlogic) : type_(type::group) | ||
| 746 | { | ||
| 747 | new(&group_.children_) std::list<condition>(); | ||
| 748 | group_.orlogic_ = orlogic; | ||
| 749 | } | ||
| 750 | |||
| 751 | statement::condition& statement::condition::operator+=(condition n) | ||
| 752 | { | ||
| 753 | if (type_ == type::group) | ||
| 754 | { | ||
| 755 | group_.children_.push_back(std::move(n)); | ||
| 756 | |||
| 757 | return *this; | ||
| 758 | } else { | ||
| 759 | throw std::domain_error("Cannot add condition to non-group condition"); | ||
| 760 | } | ||
| 761 | } | ||
| 762 | |||
| 763 | statement::condition& statement::condition::operator&=(condition n) | ||
| 764 | { | ||
| 765 | switch (type_) | ||
| 766 | { | ||
| 767 | case type::empty: | ||
| 768 | { | ||
| 769 | *this = std::move(n); | ||
| 770 | |||
| 771 | break; | ||
| 772 | } | ||
| 773 | |||
| 774 | case type::singleton: | ||
| 775 | { | ||
| 776 | condition grp(false); | ||
| 777 | grp += *this; | ||
| 778 | grp += std::move(n); | ||
| 779 | |||
| 780 | *this = grp; | ||
| 781 | |||
| 782 | break; | ||
| 783 | } | ||
| 784 | |||
| 785 | case type::group: | ||
| 786 | { | ||
| 787 | *this += std::move(n); | ||
| 788 | |||
| 789 | break; | ||
| 790 | } | ||
| 791 | } | ||
| 792 | |||
| 793 | return *this; | ||
| 794 | } | ||
| 795 | |||
| 796 | const std::list<statement::condition>& statement::condition::getChildren() const | ||
| 797 | { | ||
| 798 | if (type_ == type::group) | ||
| 799 | { | ||
| 800 | return group_.children_; | ||
| 801 | } else { | ||
| 802 | throw std::domain_error("Cannot get children of non-group condition"); | ||
| 803 | } | ||
| 804 | } | ||
| 805 | |||
| 806 | }; | ||
| diff --git a/lib/statement.h b/lib/statement.h new file mode 100644 index 0000000..a528d60 --- /dev/null +++ b/lib/statement.h | |||
| @@ -0,0 +1,272 @@ | |||
| 1 | #ifndef STATEMENT_H_29F51659 | ||
| 2 | #define STATEMENT_H_29F51659 | ||
| 3 | |||
| 4 | #include <string> | ||
| 5 | #include <list> | ||
| 6 | #include <map> | ||
| 7 | #include <set> | ||
| 8 | #include "binding.h" | ||
| 9 | #include "enums.h" | ||
| 10 | #include "field.h" | ||
| 11 | #include "filter.h" | ||
| 12 | |||
| 13 | namespace verbly { | ||
| 14 | |||
| 15 | class filter; | ||
| 16 | |||
| 17 | class statement { | ||
| 18 | public: | ||
| 19 | |||
| 20 | statement(object context, filter queryFilter); | ||
| 21 | |||
| 22 | std::string getQueryString(std::list<std::string> select, bool random, int limit) const; | ||
| 23 | |||
| 24 | std::list<binding> getBindings() const; | ||
| 25 | |||
| 26 | private: | ||
| 27 | |||
| 28 | class join { | ||
| 29 | public: | ||
| 30 | |||
| 31 | join( | ||
| 32 | bool outer, | ||
| 33 | std::string foreignTableName, | ||
| 34 | std::string joinTable, | ||
| 35 | std::string joinColumn, | ||
| 36 | std::string foreignTable, | ||
| 37 | std::string foreignColumn) : | ||
| 38 | outer_(outer), | ||
| 39 | foreignTableName_(std::move(foreignTableName)), | ||
| 40 | joinTable_(std::move(joinTable)), | ||
| 41 | joinColumn_(std::move(joinColumn)), | ||
| 42 | foreignTable_(std::move(foreignTable)), | ||
| 43 | foreignColumn_(std::move(foreignColumn)) | ||
| 44 | { | ||
| 45 | } | ||
| 46 | |||
| 47 | bool isOuterJoin() const | ||
| 48 | { | ||
| 49 | return outer_; | ||
| 50 | } | ||
| 51 | |||
| 52 | const std::string& getForeignTableName() const | ||
| 53 | { | ||
| 54 | return foreignTableName_; | ||
| 55 | } | ||
| 56 | |||
| 57 | const std::string& getJoinTable() const | ||
| 58 | { | ||
| 59 | return joinTable_; | ||
| 60 | } | ||
| 61 | |||
| 62 | const std::string& getJoinColumn() const | ||
| 63 | { | ||
| 64 | return joinColumn_; | ||
| 65 | } | ||
| 66 | |||
| 67 | const std::string& getForeignTable() const | ||
| 68 | { | ||
| 69 | return foreignTable_; | ||
| 70 | } | ||
| 71 | |||
| 72 | const std::string& getForeignColumn() const | ||
| 73 | { | ||
| 74 | return foreignColumn_; | ||
| 75 | } | ||
| 76 | |||
| 77 | private: | ||
| 78 | bool outer_ = false; | ||
| 79 | std::string foreignTableName_; | ||
| 80 | std::string joinTable_; | ||
| 81 | std::string joinColumn_; | ||
| 82 | std::string foreignTable_; | ||
| 83 | std::string foreignColumn_; | ||
| 84 | |||
| 85 | }; | ||
| 86 | |||
| 87 | friend std::ostream& operator<<(std::ostream& oss, const join& j); | ||
| 88 | |||
| 89 | class condition { | ||
| 90 | public: | ||
| 91 | enum class type { | ||
| 92 | empty, | ||
| 93 | singleton, | ||
| 94 | group | ||
| 95 | }; | ||
| 96 | |||
| 97 | enum class comparison { | ||
| 98 | equals, | ||
| 99 | does_not_equal, | ||
| 100 | is_greater_than, | ||
| 101 | is_at_most, | ||
| 102 | is_less_than, | ||
| 103 | is_at_least, | ||
| 104 | is_like, | ||
| 105 | is_not_like, | ||
| 106 | is_not_null, | ||
| 107 | is_null | ||
| 108 | }; | ||
| 109 | |||
| 110 | // Copy and move constructors | ||
| 111 | |||
| 112 | condition(const condition& other); | ||
| 113 | condition(condition&& other); | ||
| 114 | |||
| 115 | // Assignment | ||
| 116 | |||
| 117 | condition& operator=(condition other); | ||
| 118 | |||
| 119 | // Swap | ||
| 120 | |||
| 121 | friend void swap(condition& first, condition& second); | ||
| 122 | |||
| 123 | // Destructor | ||
| 124 | |||
| 125 | ~condition(); | ||
| 126 | |||
| 127 | // Accessors | ||
| 128 | |||
| 129 | type getType() const | ||
| 130 | { | ||
| 131 | return type_; | ||
| 132 | } | ||
| 133 | |||
| 134 | // Empty | ||
| 135 | |||
| 136 | condition(); | ||
| 137 | |||
| 138 | // Singleton | ||
| 139 | |||
| 140 | condition(std::string table, std::string column, bool isNull); | ||
| 141 | |||
| 142 | condition(std::string table, std::string column, comparison comp, binding value); | ||
| 143 | |||
| 144 | // Group | ||
| 145 | |||
| 146 | explicit condition(bool orlogic); | ||
| 147 | |||
| 148 | condition& operator+=(condition n); | ||
| 149 | |||
| 150 | condition& operator&=(condition n); | ||
| 151 | |||
| 152 | const std::list<condition>& getChildren() const; | ||
| 153 | |||
| 154 | // Utility | ||
| 155 | |||
| 156 | std::string toSql() const; | ||
| 157 | |||
| 158 | std::list<binding> flattenBindings() const; | ||
| 159 | |||
| 160 | private: | ||
| 161 | union { | ||
| 162 | struct { | ||
| 163 | std::string table_; | ||
| 164 | std::string column_; | ||
| 165 | comparison comparison_; | ||
| 166 | binding value_; | ||
| 167 | } singleton_; | ||
| 168 | struct { | ||
| 169 | std::list<condition> children_; | ||
| 170 | bool orlogic_; | ||
| 171 | } group_; | ||
| 172 | }; | ||
| 173 | type type_; | ||
| 174 | }; | ||
| 175 | |||
| 176 | friend void swap(condition& first, condition& second); | ||
| 177 | |||
| 178 | class with { | ||
| 179 | public: | ||
| 180 | |||
| 181 | with( | ||
| 182 | std::string identifier, | ||
| 183 | field f, | ||
| 184 | std::map<std::string, std::string> tables, | ||
| 185 | std::string topTable, | ||
| 186 | condition where, | ||
| 187 | std::list<join> joins) : | ||
| 188 | identifier_(std::move(identifier)), | ||
| 189 | field_(f), | ||
| 190 | tables_(std::move(tables)), | ||
| 191 | topTable_(std::move(topTable)), | ||
| 192 | topCondition_(std::move(where)), | ||
| 193 | joins_(std::move(joins)) | ||
| 194 | { | ||
| 195 | } | ||
| 196 | |||
| 197 | const std::string& getIdentifier() const | ||
| 198 | { | ||
| 199 | return identifier_; | ||
| 200 | } | ||
| 201 | |||
| 202 | field getField() const | ||
| 203 | { | ||
| 204 | return field_; | ||
| 205 | } | ||
| 206 | |||
| 207 | std::string getTableForId(std::string identifier) const | ||
| 208 | { | ||
| 209 | return tables_.at(identifier); | ||
| 210 | } | ||
| 211 | |||
| 212 | const std::string& getTopTable() const | ||
| 213 | { | ||
| 214 | return topTable_; | ||
| 215 | } | ||
| 216 | |||
| 217 | const condition& getCondition() const | ||
| 218 | { | ||
| 219 | return topCondition_; | ||
| 220 | } | ||
| 221 | |||
| 222 | const std::list<join>& getJoins() const | ||
| 223 | { | ||
| 224 | return joins_; | ||
| 225 | } | ||
| 226 | |||
| 227 | private: | ||
| 228 | std::string identifier_; | ||
| 229 | field field_; | ||
| 230 | std::map<std::string, std::string> tables_; | ||
| 231 | std::string topTable_; | ||
| 232 | condition topCondition_; | ||
| 233 | std::list<join> joins_; | ||
| 234 | |||
| 235 | }; | ||
| 236 | |||
| 237 | static constexpr const char* getTableForContext(object context) | ||
| 238 | { | ||
| 239 | return (context == object::notion) ? "notions" | ||
| 240 | : (context == object::word) ? "words" | ||
| 241 | : (context == object::group) ? "groups" | ||
| 242 | : (context == object::frame) ? "frames" | ||
| 243 | : (context == object::lemma) ? "lemmas_forms" | ||
| 244 | : (context == object::form) ? "forms" | ||
| 245 | : (context == object::pronunciation) ? "pronunciations" | ||
| 246 | : throw std::domain_error("Provided context has no associated table"); | ||
| 247 | } | ||
| 248 | |||
| 249 | static const std::list<field> getSelectForContext(object context); | ||
| 250 | |||
| 251 | statement(std::string tableName, filter clause, int nextTableId = 0, int nextWithId = 0); | ||
| 252 | |||
| 253 | condition parseFilter(filter queryFilter); | ||
| 254 | |||
| 255 | std::string instantiateTable(std::string name); | ||
| 256 | |||
| 257 | condition integrate(statement subStmt); | ||
| 258 | |||
| 259 | int nextTableId_; | ||
| 260 | int nextWithId_; | ||
| 261 | |||
| 262 | std::map<std::string, std::string> tables_; | ||
| 263 | std::string topTable_; | ||
| 264 | std::list<join> joins_; | ||
| 265 | std::list<with> withs_; | ||
| 266 | condition topCondition_; | ||
| 267 | |||
| 268 | }; | ||
| 269 | |||
| 270 | }; | ||
| 271 | |||
| 272 | #endif /* end of include guard: STATEMENT_H_29F51659 */ | ||
| diff --git a/lib/util.h b/lib/util.h index fb5fe67..b74b050 100644 --- a/lib/util.h +++ b/lib/util.h | |||
| @@ -1,6 +1,10 @@ | |||
| 1 | #ifndef UTIL_H_15DDCA2D | 1 | #ifndef UTIL_H_15DDCA2D |
| 2 | #define UTIL_H_15DDCA2D | 2 | #define UTIL_H_15DDCA2D |
| 3 | 3 | ||
| 4 | #include <string> | ||
| 5 | #include <sstream> | ||
| 6 | #include <iterator> | ||
| 7 | |||
| 4 | namespace verbly { | 8 | namespace verbly { |
| 5 | 9 | ||
| 6 | template <class InputIterator> | 10 | template <class InputIterator> |
| @@ -21,25 +25,33 @@ namespace verbly { | |||
| 21 | return result.str(); | 25 | return result.str(); |
| 22 | } | 26 | } |
| 23 | 27 | ||
| 24 | template <class Container> | 28 | template <class OutputIterator> |
| 25 | Container split(std::string input, std::string delimiter) | 29 | void split(std::string input, std::string delimiter, OutputIterator out) |
| 26 | { | 30 | { |
| 27 | Container result; | ||
| 28 | |||
| 29 | while (!input.empty()) | 31 | while (!input.empty()) |
| 30 | { | 32 | { |
| 31 | int divider = input.find(delimiter); | 33 | int divider = input.find(delimiter); |
| 32 | if (divider == std::string::npos) | 34 | if (divider == std::string::npos) |
| 33 | { | 35 | { |
| 34 | result.push_back(input); | 36 | *out = input; |
| 37 | out++; | ||
| 35 | 38 | ||
| 36 | input = ""; | 39 | input = ""; |
| 37 | } else { | 40 | } else { |
| 38 | result.push_back(input.substr(0, divider)); | 41 | *out = input.substr(0, divider); |
| 42 | out++; | ||
| 39 | 43 | ||
| 40 | input = input.substr(divider+delimiter.length()); | 44 | input = input.substr(divider+delimiter.length()); |
| 41 | } | 45 | } |
| 42 | } | 46 | } |
| 47 | } | ||
| 48 | |||
| 49 | template <class Container> | ||
| 50 | Container split(std::string input, std::string delimiter) | ||
| 51 | { | ||
| 52 | Container result; | ||
| 53 | |||
| 54 | split(input, delimiter, std::back_inserter(result)); | ||
| 43 | 55 | ||
| 44 | return result; | 56 | return result; |
| 45 | } | 57 | } |
| diff --git a/lib/verb.cpp b/lib/verb.cpp deleted file mode 100644 index 1f45d53..0000000 --- a/lib/verb.cpp +++ /dev/null | |||
| @@ -1,64 +0,0 @@ | |||
| 1 | #include "verbly.h" | ||
| 2 | |||
| 3 | namespace verbly { | ||
| 4 | |||
| 5 | verb::verb() | ||
| 6 | { | ||
| 7 | |||
| 8 | } | ||
| 9 | |||
| 10 | verb::verb(const data& _data, int _id) : word(_data, _id) | ||
| 11 | { | ||
| 12 | |||
| 13 | } | ||
| 14 | |||
| 15 | std::string verb::base_form() const | ||
| 16 | { | ||
| 17 | assert(_valid == true); | ||
| 18 | |||
| 19 | return _infinitive; | ||
| 20 | } | ||
| 21 | |||
| 22 | std::string verb::infinitive_form() const | ||
| 23 | { | ||
| 24 | assert(_valid == true); | ||
| 25 | |||
| 26 | return _infinitive; | ||
| 27 | } | ||
| 28 | |||
| 29 | std::string verb::past_tense_form() const | ||
| 30 | { | ||
| 31 | assert(_valid == true); | ||
| 32 | |||
| 33 | return _past_tense; | ||
| 34 | } | ||
| 35 | |||
| 36 | std::string verb::past_participle_form() const | ||
| 37 | { | ||
| 38 | assert(_valid == true); | ||
| 39 | |||
| 40 | return _past_participle; | ||
| 41 | } | ||
| 42 | |||
| 43 | std::string verb::ing_form() const | ||
| 44 | { | ||
| 45 | assert(_valid == true); | ||
| 46 | |||
| 47 | return _ing_form; | ||
| 48 | } | ||
| 49 | |||
| 50 | std::string verb::s_form() const | ||
| 51 | { | ||
| 52 | assert(_valid == true); | ||
| 53 | |||
| 54 | return _s_form; | ||
| 55 | } | ||
| 56 | |||
| 57 | frame_query verb::frames() const | ||
| 58 | { | ||
| 59 | assert(_valid == true); | ||
| 60 | |||
| 61 | return _data->frames().for_verb(*this); | ||
| 62 | } | ||
| 63 | |||
| 64 | }; | ||
| diff --git a/lib/verb.h b/lib/verb.h deleted file mode 100644 index 7a2486e..0000000 --- a/lib/verb.h +++ /dev/null | |||
| @@ -1,34 +0,0 @@ | |||
| 1 | #ifndef VERB_H_BCC929AD | ||
| 2 | #define VERB_H_BCC929AD | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | class frame_query; | ||
| 7 | |||
| 8 | class verb : public word { | ||
| 9 | private: | ||
| 10 | std::string _infinitive; | ||
| 11 | std::string _past_tense; | ||
| 12 | std::string _past_participle; | ||
| 13 | std::string _ing_form; | ||
| 14 | std::string _s_form; | ||
| 15 | |||
| 16 | friend class verb_query; | ||
| 17 | |||
| 18 | public: | ||
| 19 | verb(); | ||
| 20 | verb(const data& _data, int _id); | ||
| 21 | |||
| 22 | std::string base_form() const; | ||
| 23 | std::string infinitive_form() const; | ||
| 24 | std::string past_tense_form() const; | ||
| 25 | std::string past_participle_form() const; | ||
| 26 | std::string ing_form() const; | ||
| 27 | std::string s_form() const; | ||
| 28 | |||
| 29 | frame_query frames() const; | ||
| 30 | }; | ||
| 31 | |||
| 32 | }; | ||
| 33 | |||
| 34 | #endif /* end of include guard: VERB_H_BCC929AD */ | ||
| diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp deleted file mode 100644 index 4e6c253..0000000 --- a/lib/verb_query.cpp +++ /dev/null | |||
| @@ -1,315 +0,0 @@ | |||
| 1 | #include "verbly.h" | ||
| 2 | |||
| 3 | namespace verbly { | ||
| 4 | |||
| 5 | verb_query::verb_query(const data& _data) : _data(_data) | ||
| 6 | { | ||
| 7 | |||
| 8 | } | ||
| 9 | |||
| 10 | verb_query& verb_query::limit(int _limit) | ||
| 11 | { | ||
| 12 | if ((_limit > 0) || (_limit == unlimited)) | ||
| 13 | { | ||
| 14 | this->_limit = _limit; | ||
| 15 | } | ||
| 16 | |||
| 17 | return *this; | ||
| 18 | } | ||
| 19 | |||
| 20 | verb_query& verb_query::random() | ||
| 21 | { | ||
| 22 | this->_random = true; | ||
| 23 | |||
| 24 | return *this; | ||
| 25 | } | ||
| 26 | |||
| 27 | verb_query& verb_query::except(const verb& _word) | ||
| 28 | { | ||
| 29 | _except.push_back(_word); | ||
| 30 | |||
| 31 | return *this; | ||
| 32 | } | ||
| 33 | |||
| 34 | verb_query& verb_query::rhymes_with(const word& _word) | ||
| 35 | { | ||
| 36 | for (auto rhyme : _word.get_rhymes()) | ||
| 37 | { | ||
| 38 | _rhymes.push_back(rhyme); | ||
| 39 | } | ||
| 40 | |||
| 41 | if (dynamic_cast<const verb*>(&_word) != nullptr) | ||
| 42 | { | ||
| 43 | _except.push_back(dynamic_cast<const verb&>(_word)); | ||
| 44 | } | ||
| 45 | |||
| 46 | return *this; | ||
| 47 | } | ||
| 48 | |||
| 49 | verb_query& verb_query::rhymes_with(rhyme _r) | ||
| 50 | { | ||
| 51 | _rhymes.push_back(_r); | ||
| 52 | |||
| 53 | return *this; | ||
| 54 | } | ||
| 55 | |||
| 56 | verb_query& verb_query::has_pronunciation() | ||
| 57 | { | ||
| 58 | this->_has_prn = true; | ||
| 59 | |||
| 60 | return *this; | ||
| 61 | } | ||
| 62 | |||
| 63 | verb_query& verb_query::has_rhyming_noun() | ||
| 64 | { | ||
| 65 | _has_rhyming_noun = true; | ||
| 66 | |||
| 67 | return *this; | ||
| 68 | } | ||
| 69 | |||
| 70 | verb_query& verb_query::has_rhyming_adjective() | ||
| 71 | { | ||
| 72 | _has_rhyming_adjective = true; | ||
| 73 | |||
| 74 | return *this; | ||
| 75 | } | ||
| 76 | |||
| 77 | verb_query& verb_query::has_rhyming_adverb() | ||
| 78 | { | ||
| 79 | _has_rhyming_adverb = true; | ||
| 80 | |||
| 81 | return *this; | ||
| 82 | } | ||
| 83 | |||
| 84 | verb_query& verb_query::has_rhyming_verb() | ||
| 85 | { | ||
| 86 | _has_rhyming_verb = true; | ||
| 87 | |||
| 88 | return *this; | ||
| 89 | } | ||
| 90 | |||
| 91 | verb_query& verb_query::with_stress(filter<std::vector<bool>> _arg) | ||
| 92 | { | ||
| 93 | _stress = _arg; | ||
| 94 | |||
| 95 | return *this; | ||
| 96 | } | ||
| 97 | |||
| 98 | verb_query& verb_query::has_frames() | ||
| 99 | { | ||
| 100 | this->_has_frames = true; | ||
| 101 | |||
| 102 | return *this; | ||
| 103 | } | ||
| 104 | |||
| 105 | std::list<verb> verb_query::run() const | ||
| 106 | { | ||
| 107 | std::stringstream construct; | ||
| 108 | construct << "SELECT verb_id, infinitive, past_tense, past_participle, ing_form, s_form FROM verbs"; | ||
| 109 | std::list<std::string> conditions; | ||
| 110 | std::list<binding> bindings; | ||
| 111 | |||
| 112 | if (_has_prn) | ||
| 113 | { | ||
| 114 | conditions.push_back("verb_id IN (SELECT verb_id FROM verb_pronunciations)"); | ||
| 115 | } | ||
| 116 | |||
| 117 | if (!_rhymes.empty()) | ||
| 118 | { | ||
| 119 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
| 120 | std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 121 | conditions.push_back(cond); | ||
| 122 | |||
| 123 | for (auto rhy : _rhymes) | ||
| 124 | { | ||
| 125 | bindings.emplace_back(rhy.get_prerhyme()); | ||
| 126 | bindings.emplace_back(rhy.get_rhyme()); | ||
| 127 | } | ||
| 128 | } | ||
| 129 | |||
| 130 | if (_has_rhyming_noun) | ||
| 131 | { | ||
| 132 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 133 | } | ||
| 134 | |||
| 135 | if (_has_rhyming_adjective) | ||
| 136 | { | ||
| 137 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 138 | } | ||
| 139 | |||
| 140 | if (_has_rhyming_adverb) | ||
| 141 | { | ||
| 142 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
| 143 | } | ||
| 144 | |||
| 145 | if (_has_rhyming_verb) | ||
| 146 | { | ||
| 147 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)"); | ||
| 148 | } | ||
| 149 | |||
| 150 | if (!_stress.empty()) | ||
| 151 | { | ||
| 152 | std::stringstream cond; | ||
| 153 | if (_stress.get_notlogic()) | ||
| 154 | { | ||
| 155 | cond << "verb_id NOT IN"; | ||
| 156 | } else { | ||
| 157 | cond << "verb_id IN"; | ||
| 158 | } | ||
| 159 | |||
| 160 | cond << "(SELECT verb_id FROM verb_pronunciations WHERE "; | ||
| 161 | |||
| 162 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
| 163 | switch (f.get_type()) | ||
| 164 | { | ||
| 165 | case filter<std::vector<bool>>::type::singleton: | ||
| 166 | { | ||
| 167 | std::ostringstream _val; | ||
| 168 | for (auto syl : f.get_elem()) | ||
| 169 | { | ||
| 170 | if (syl) | ||
| 171 | { | ||
| 172 | _val << "1"; | ||
| 173 | } else { | ||
| 174 | _val << "0"; | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | bindings.emplace_back(_val.str()); | ||
| 179 | |||
| 180 | if (notlogic == f.get_notlogic()) | ||
| 181 | { | ||
| 182 | return "stress = ?"; | ||
| 183 | } else { | ||
| 184 | return "stress != ?"; | ||
| 185 | } | ||
| 186 | } | ||
| 187 | |||
| 188 | case filter<std::vector<bool>>::type::group: | ||
| 189 | { | ||
| 190 | bool truelogic = notlogic != f.get_notlogic(); | ||
| 191 | |||
| 192 | std::list<std::string> clauses; | ||
| 193 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
| 194 | return recur(f2, truelogic); | ||
| 195 | }); | ||
| 196 | |||
| 197 | if (truelogic == f.get_orlogic()) | ||
| 198 | { | ||
| 199 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
| 200 | } else { | ||
| 201 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
| 202 | } | ||
| 203 | } | ||
| 204 | } | ||
| 205 | }; | ||
| 206 | |||
| 207 | cond << recur(_stress, _stress.get_notlogic()); | ||
| 208 | cond << ")"; | ||
| 209 | conditions.push_back(cond.str()); | ||
| 210 | } | ||
| 211 | |||
| 212 | for (auto except : _except) | ||
| 213 | { | ||
| 214 | conditions.push_back("verb_id != ?"); | ||
| 215 | bindings.emplace_back(except._id); | ||
| 216 | } | ||
| 217 | |||
| 218 | if (!_has_frames) | ||
| 219 | { | ||
| 220 | conditions.push_back("verb_id IN (SELECT verb_id FROM verb_groups)"); | ||
| 221 | } | ||
| 222 | |||
| 223 | if (!conditions.empty()) | ||
| 224 | { | ||
| 225 | construct << " WHERE "; | ||
| 226 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
| 227 | } | ||
| 228 | |||
| 229 | if (_random) | ||
| 230 | { | ||
| 231 | construct << " ORDER BY RANDOM()"; | ||
| 232 | } | ||
| 233 | |||
| 234 | if (_limit != unlimited) | ||
| 235 | { | ||
| 236 | construct << " LIMIT " << _limit; | ||
| 237 | } | ||
| 238 | |||
| 239 | sqlite3_stmt* ppstmt; | ||
| 240 | std::string query = construct.str(); | ||
| 241 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 242 | { | ||
| 243 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
| 244 | } | ||
| 245 | |||
| 246 | int i = 1; | ||
| 247 | for (auto& binding : bindings) | ||
| 248 | { | ||
| 249 | switch (binding.get_type()) | ||
| 250 | { | ||
| 251 | case binding::type::integer: | ||
| 252 | { | ||
| 253 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
| 254 | |||
| 255 | break; | ||
| 256 | } | ||
| 257 | |||
| 258 | case binding::type::string: | ||
| 259 | { | ||
| 260 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
| 261 | |||
| 262 | break; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | i++; | ||
| 267 | } | ||
| 268 | |||
| 269 | std::list<verb> output; | ||
| 270 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
| 271 | { | ||
| 272 | verb tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
| 273 | tnc._infinitive = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
| 274 | tnc._past_tense = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
| 275 | tnc._past_participle = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
| 276 | tnc._ing_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4))); | ||
| 277 | tnc._s_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 5))); | ||
| 278 | |||
| 279 | output.push_back(tnc); | ||
| 280 | } | ||
| 281 | |||
| 282 | sqlite3_finalize(ppstmt); | ||
| 283 | |||
| 284 | for (auto& verb : output) | ||
| 285 | { | ||
| 286 | query = "SELECT pronunciation, prerhyme, rhyme FROM verb_pronunciations WHERE verb_id = ?"; | ||
| 287 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
| 288 | { | ||
| 289 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
| 290 | } | ||
| 291 | |||
| 292 | sqlite3_bind_int(ppstmt, 1, verb._id); | ||
| 293 | |||
| 294 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
| 295 | { | ||
| 296 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
| 297 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
| 298 | |||
| 299 | verb.pronunciations.push_back(phonemes); | ||
| 300 | |||
| 301 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
| 302 | { | ||
| 303 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
| 304 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
| 305 | verb.rhymes.emplace_back(prerhyme, rhyming); | ||
| 306 | } | ||
| 307 | } | ||
| 308 | |||
| 309 | sqlite3_finalize(ppstmt); | ||
| 310 | } | ||
| 311 | |||
| 312 | return output; | ||
| 313 | } | ||
| 314 | |||
| 315 | }; | ||
| diff --git a/lib/verb_query.h b/lib/verb_query.h deleted file mode 100644 index 566ae37..0000000 --- a/lib/verb_query.h +++ /dev/null | |||
| @@ -1,45 +0,0 @@ | |||
| 1 | #ifndef VERB_QUERY_H_34E5A679 | ||
| 2 | #define VERB_QUERY_H_34E5A679 | ||
| 3 | |||
| 4 | namespace verbly { | ||
| 5 | |||
| 6 | class verb_query { | ||
| 7 | public: | ||
| 8 | verb_query(const data& _data); | ||
| 9 | |||
| 10 | verb_query& limit(int _limit); | ||
| 11 | verb_query& random(); | ||
| 12 | verb_query& except(const verb& _word); | ||
| 13 | verb_query& rhymes_with(const word& _word); | ||
| 14 | verb_query& rhymes_with(rhyme _r); | ||
| 15 | verb_query& has_pronunciation(); | ||
| 16 | verb_query& has_rhyming_noun(); | ||
| 17 | verb_query& has_rhyming_adjective(); | ||
| 18 | verb_query& has_rhyming_adverb(); | ||
| 19 | verb_query& has_rhyming_verb(); | ||
| 20 | verb_query& with_stress(filter<std::vector<bool>> _arg); | ||
| 21 | |||
| 22 | verb_query& has_frames(); | ||
| 23 | |||
| 24 | std::list<verb> run() const; | ||
| 25 | |||
| 26 | const static int unlimited = -1; | ||
| 27 | |||
| 28 | private: | ||
| 29 | const data& _data; | ||
| 30 | int _limit = unlimited; | ||
| 31 | bool _random = false; | ||
| 32 | std::list<rhyme> _rhymes; | ||
| 33 | std::list<verb> _except; | ||
| 34 | bool _has_prn = false; | ||
| 35 | bool _has_frames = false; | ||
| 36 | bool _has_rhyming_noun = false; | ||
| 37 | bool _has_rhyming_adjective = false; | ||
| 38 | bool _has_rhyming_adverb = false; | ||
| 39 | bool _has_rhyming_verb = false; | ||
| 40 | filter<std::vector<bool>> _stress; | ||
| 41 | }; | ||
| 42 | |||
| 43 | }; | ||
| 44 | |||
| 45 | #endif /* end of include guard: VERB_QUERY_H_34E5A679 */ | ||
| diff --git a/lib/verbly.h b/lib/verbly.h index cfaf5bc..6dfc01a 100644 --- a/lib/verbly.h +++ b/lib/verbly.h | |||
| @@ -1,35 +1,17 @@ | |||
| 1 | #ifndef VERBLY_H_5B39CE50 | 1 | #ifndef VERBLY_H_5B39CE50 |
| 2 | #define VERBLY_H_5B39CE50 | 2 | #define VERBLY_H_5B39CE50 |
| 3 | 3 | ||
| 4 | #include <string> | ||
| 5 | #include <list> | ||
| 6 | #include <sstream> | ||
| 7 | #include <algorithm> | ||
| 8 | #include <cassert> | ||
| 9 | #include <set> | ||
| 10 | #include <stdexcept> | ||
| 11 | #include <vector> | ||
| 12 | #include <map> | ||
| 13 | #include <iterator> | ||
| 14 | #include <sstream> | ||
| 15 | #include <functional> | ||
| 16 | #include <iostream> | ||
| 17 | #include <new> | ||
| 18 | |||
| 19 | #include "util.h" | 4 | #include "util.h" |
| 20 | #include "data.h" | 5 | #include "database.h" |
| 6 | #include "filter.h" | ||
| 7 | #include "field.h" | ||
| 8 | #include "query.h" | ||
| 9 | #include "notion.h" | ||
| 21 | #include "word.h" | 10 | #include "word.h" |
| 22 | #include "verb.h" | 11 | #include "group.h" |
| 23 | #include "adverb.h" | ||
| 24 | #include "adjective.h" | ||
| 25 | #include "noun.h" | ||
| 26 | #include "frame.h" | 12 | #include "frame.h" |
| 27 | #include "preposition.h" | 13 | #include "lemma.h" |
| 28 | #include "token.h" | 14 | #include "form.h" |
| 29 | #include "noun_query.h" | 15 | #include "pronunciation.h" |
| 30 | #include "adverb_query.h" | ||
| 31 | #include "adjective_query.h" | ||
| 32 | #include "verb_query.h" | ||
| 33 | #include "frame_query.h" | ||
| 34 | 16 | ||
| 35 | #endif /* end of include guard: VERBLY_H_5B39CE50 */ | 17 | #endif /* end of include guard: VERBLY_H_5B39CE50 */ |
| diff --git a/lib/word.cpp b/lib/word.cpp index 49e34a1..3edf2d2 100644 --- a/lib/word.cpp +++ b/lib/word.cpp | |||
| @@ -1,60 +1,112 @@ | |||
| 1 | #include "verbly.h" | 1 | #include "word.h" |
| 2 | #include <algorithm> | 2 | #include <sqlite3.h> |
| 3 | #include "form.h" | ||
| 4 | #include "util.h" | ||
| 5 | #include "database.h" | ||
| 6 | #include "query.h" | ||
| 3 | 7 | ||
| 4 | namespace verbly { | 8 | namespace verbly { |
| 5 | 9 | ||
| 6 | rhyme::rhyme(std::string prerhyme, std::string phonemes) : _prerhyme(prerhyme), _rhyme(phonemes) | 10 | const object word::objectType = object::word; |
| 7 | { | ||
| 8 | |||
| 9 | } | ||
| 10 | 11 | ||
| 11 | std::string rhyme::get_prerhyme() const | 12 | const std::list<std::string> word::select = {"word_id", "notion_id", "lemma_id", "tag_count", "position", "group_id"}; |
| 12 | { | ||
| 13 | return _prerhyme; | ||
| 14 | } | ||
| 15 | 13 | ||
| 16 | std::string rhyme::get_rhyme() const | 14 | const field word::id = field::integerField(object::word, "word_id"); |
| 17 | { | 15 | const field word::tagCount = field::integerField(object::word, "tag_count", true); |
| 18 | return _rhyme; | 16 | const field word::adjectivePosition = field::integerField(object::word, "position", true); |
| 19 | } | 17 | |
| 18 | const field word::notion = field::joinField(object::word, "notion_id", object::notion); | ||
| 19 | const field word::lemma = field::joinField(object::word, "lemma_id", object::lemma); | ||
| 20 | const field word::group = field::joinField(object::word, "group_id", object::group, true); | ||
| 21 | |||
| 22 | const field word::antonyms = field::selfJoin(object::word, "word_id", "antonymy", "antonym_2_id", "antonym_1_id"); | ||
| 23 | |||
| 24 | const field word::specifications = field::selfJoin(object::word, "word_id", "specification", "general_id", "specific_id"); | ||
| 25 | const field word::generalizations = field::selfJoin(object::word, "word_id", "specification", "specific_id", "general_id"); | ||
| 20 | 26 | ||
| 21 | bool rhyme::operator==(const rhyme& other) const | 27 | const field word::pertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "noun_id", "pertainym_id"); |
| 28 | const field word::antiPertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "pertainym_id", "noun_id"); | ||
| 29 | |||
| 30 | const field word::mannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "adjective_id", "mannernym_id"); | ||
| 31 | const field word::antiMannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "mannernym_id", "adjective_id"); | ||
| 32 | |||
| 33 | const field word::usageTerms = field::selfJoin(object::word, "word_id", "usage", "domain_id", "term_id"); | ||
| 34 | const field word::usageDomains = field::selfJoin(object::word, "word_id", "usage", "term_id", "domain_id"); | ||
| 35 | |||
| 36 | const field word::topicalTerms = field::selfJoin(object::word, "word_id", "topicality", "domain_id", "term_id"); | ||
| 37 | const field word::topicalDomains = field::selfJoin(object::word, "word_id", "topicality", "term_id", "domain_id"); | ||
| 38 | |||
| 39 | const field word::regionalTerms = field::selfJoin(object::word, "word_id", "regionality", "domain_id", "term_id"); | ||
| 40 | const field word::regionalDomains = field::selfJoin(object::word, "word_id", "regionality", "term_id", "domain_id"); | ||
| 41 | |||
| 42 | word::word(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
| 22 | { | 43 | { |
| 23 | return std::tie(_prerhyme, _rhyme) == std::tie(other._prerhyme, other._rhyme); | 44 | id_ = sqlite3_column_int(row, 0); |
| 45 | notionId_ = sqlite3_column_int(row, 1); | ||
| 46 | lemmaId_ = sqlite3_column_int(row, 2); | ||
| 47 | |||
| 48 | if (sqlite3_column_type(row, 3) != SQLITE_NULL) | ||
| 49 | { | ||
| 50 | hasTagCount_ = true; | ||
| 51 | tagCount_ = sqlite3_column_int(row, 3); | ||
| 52 | } | ||
| 53 | |||
| 54 | if (sqlite3_column_type(row, 4) != SQLITE_NULL) | ||
| 55 | { | ||
| 56 | adjectivePosition_ = static_cast<positioning>(sqlite3_column_int(row, 4)); | ||
| 57 | } | ||
| 58 | |||
| 59 | if (sqlite3_column_type(row, 5) != SQLITE_NULL) | ||
| 60 | { | ||
| 61 | hasGroup_ = true; | ||
| 62 | groupId_ = sqlite3_column_int(row, 5); | ||
| 63 | } | ||
| 24 | } | 64 | } |
| 25 | 65 | ||
| 26 | word::word() | 66 | const notion& word::getNotion() const |
| 27 | { | 67 | { |
| 68 | if (!valid_) | ||
| 69 | { | ||
| 70 | throw std::domain_error("Bad access to uninitialized word"); | ||
| 71 | } | ||
| 72 | |||
| 73 | if (!notion_) | ||
| 74 | { | ||
| 75 | notion_ = db_->notions(notion::id == notionId_).first(); | ||
| 76 | } | ||
| 28 | 77 | ||
| 78 | return notion_; | ||
| 29 | } | 79 | } |
| 30 | 80 | ||
| 31 | word::word(const data& _data, int _id) : _data(&_data), _id(_id), _valid(true) | 81 | const lemma& word::getLemma() const |
| 32 | { | 82 | { |
| 83 | if (!valid_) | ||
| 84 | { | ||
| 85 | throw std::domain_error("Bad access to uninitialized word"); | ||
| 86 | } | ||
| 33 | 87 | ||
| 88 | if (!lemma_) | ||
| 89 | { | ||
| 90 | lemma_ = db_->lemmas(lemma::id == lemmaId_).first(); | ||
| 91 | } | ||
| 92 | |||
| 93 | return lemma_; | ||
| 34 | } | 94 | } |
| 35 | 95 | ||
| 36 | std::list<rhyme> word::get_rhymes() const | 96 | std::string word::getBaseForm() const |
| 37 | { | 97 | { |
| 38 | assert(_valid == true); | 98 | return getLemma().getBaseForm().getText(); |
| 39 | |||
| 40 | return rhymes; | ||
| 41 | } | 99 | } |
| 42 | 100 | ||
| 43 | bool word::starts_with_vowel_sound() const | 101 | std::list<std::string> word::getInflections(inflection category) const |
| 44 | { | 102 | { |
| 45 | assert(_valid == true); | 103 | std::list<std::string> result; |
| 46 | 104 | for (const form& infl : getLemma().getInflections(category)) | |
| 47 | if (pronunciations.size() > 0) | ||
| 48 | { | 105 | { |
| 49 | return std::any_of(std::begin(pronunciations), std::end(pronunciations), [] (std::list<std::string> phonemes) { | 106 | result.push_back(infl.getText()); |
| 50 | return (phonemes.front().find_first_of("012") != std::string::npos); | ||
| 51 | }); | ||
| 52 | } else { | ||
| 53 | // If the word is not in CMUDICT, fall back to checking whether the first letter is a vowel | ||
| 54 | // Not perfect but will work in most cases | ||
| 55 | char ch = tolower(base_form().front()); | ||
| 56 | return (ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u'); | ||
| 57 | } | 107 | } |
| 108 | |||
| 109 | return result; | ||
| 58 | } | 110 | } |
| 59 | 111 | ||
| 60 | }; | 112 | }; |
| diff --git a/lib/word.h b/lib/word.h index 08797a3..f71dad9 100644 --- a/lib/word.h +++ b/lib/word.h | |||
| @@ -1,48 +1,173 @@ | |||
| 1 | #ifndef WORD_H_8FC89498 | 1 | #ifndef WORD_H_DF91B1B4 |
| 2 | #define WORD_H_8FC89498 | 2 | #define WORD_H_DF91B1B4 |
| 3 | |||
| 4 | #include <stdexcept> | ||
| 5 | #include <map> | ||
| 6 | #include "field.h" | ||
| 7 | #include "filter.h" | ||
| 8 | #include "notion.h" | ||
| 9 | #include "lemma.h" | ||
| 10 | #include "group.h" | ||
| 11 | |||
| 12 | struct sqlite3_stmt; | ||
| 3 | 13 | ||
| 4 | namespace verbly { | 14 | namespace verbly { |
| 5 | 15 | ||
| 6 | class rhyme { | 16 | class database; |
| 7 | public: | 17 | |
| 8 | rhyme(std::string prerhyme, std::string phonemes); | 18 | class word { |
| 19 | public: | ||
| 20 | |||
| 21 | // Default constructor | ||
| 22 | |||
| 23 | word() = default; | ||
| 24 | |||
| 25 | // Construct from database | ||
| 26 | |||
| 27 | word(const database& db, sqlite3_stmt* row); | ||
| 28 | |||
| 29 | // Accessors | ||
| 30 | |||
| 31 | operator bool() const | ||
| 32 | { | ||
| 33 | return valid_; | ||
| 34 | } | ||
| 35 | |||
| 36 | int getId() const | ||
| 37 | { | ||
| 38 | if (!valid_) | ||
| 39 | { | ||
| 40 | throw std::domain_error("Bad access to uninitialized word"); | ||
| 41 | } | ||
| 9 | 42 | ||
| 10 | std::string get_prerhyme() const; | 43 | return id_; |
| 11 | std::string get_rhyme() const; | 44 | } |
| 45 | |||
| 46 | bool hasTagCount() const | ||
| 47 | { | ||
| 48 | if (!valid_) | ||
| 49 | { | ||
| 50 | throw std::domain_error("Bad access to uninitialized word"); | ||
| 51 | } | ||
| 12 | 52 | ||
| 13 | bool operator==(const rhyme& other) const; | 53 | return hasTagCount_; |
| 54 | } | ||
| 55 | |||
| 56 | int getTagCount() const | ||
| 57 | { | ||
| 58 | if (!valid_) | ||
| 59 | { | ||
| 60 | throw std::domain_error("Bad access to uninitialized word"); | ||
| 61 | } | ||
| 14 | 62 | ||
| 15 | private: | 63 | if (!hasTagCount_) |
| 16 | std::string _prerhyme; | 64 | { |
| 17 | std::string _rhyme; | 65 | throw std::domain_error("Word has no tag count"); |
| 18 | }; | 66 | } |
| 19 | |||
| 20 | class word { | ||
| 21 | protected: | ||
| 22 | const data* _data; | ||
| 23 | int _id; | ||
| 24 | bool _valid = false; | ||
| 25 | 67 | ||
| 26 | std::list<std::list<std::string>> pronunciations; | 68 | return tagCount_; |
| 27 | std::list<rhyme> rhymes; | 69 | } |
| 70 | |||
| 71 | bool hasAdjectivePositioning() const | ||
| 72 | { | ||
| 73 | if (!valid_) | ||
| 74 | { | ||
| 75 | throw std::domain_error("Bad access to uninitialized word"); | ||
| 76 | } | ||
| 28 | 77 | ||
| 29 | word(); | 78 | return (adjectivePosition_ != positioning::undefined); |
| 30 | word(const data& _data, int _id); | 79 | } |
| 80 | |||
| 81 | positioning getAdjectivePosition() const | ||
| 82 | { | ||
| 83 | if (!valid_) | ||
| 84 | { | ||
| 85 | throw std::domain_error("Bad access to uninitialized word"); | ||
| 86 | } | ||
| 31 | 87 | ||
| 32 | friend class adjective_query; | 88 | if (adjectivePosition_ == positioning::undefined) |
| 33 | friend class verb_query; | 89 | { |
| 34 | friend class noun_query; | 90 | throw std::domain_error("Word has no adjective position"); |
| 35 | friend class adverb_query; | 91 | } |
| 36 | friend class frame_query; | ||
| 37 | friend class preposition_query; | ||
| 38 | |||
| 39 | public: | ||
| 40 | virtual std::string base_form() const = 0; | ||
| 41 | 92 | ||
| 42 | std::list<rhyme> get_rhymes() const; | 93 | return adjectivePosition_; |
| 43 | bool starts_with_vowel_sound() const; | 94 | } |
| 95 | |||
| 96 | const notion& getNotion() const; | ||
| 97 | |||
| 98 | const lemma& getLemma() const; | ||
| 99 | |||
| 100 | // Convenience accessors | ||
| 101 | |||
| 102 | std::string getBaseForm() const; | ||
| 103 | |||
| 104 | std::list<std::string> getInflections(inflection infl) const; | ||
| 105 | |||
| 106 | // Type info | ||
| 107 | |||
| 108 | static const object objectType; | ||
| 109 | |||
| 110 | static const std::list<std::string> select; | ||
| 111 | |||
| 112 | // Query fields | ||
| 113 | |||
| 114 | static const field id; | ||
| 115 | static const field tagCount; | ||
| 116 | static const field adjectivePosition; | ||
| 117 | |||
| 118 | operator filter() const | ||
| 119 | { | ||
| 120 | return (id == id_); | ||
| 121 | } | ||
| 122 | |||
| 123 | // Relationships with other objects | ||
| 124 | |||
| 125 | static const field notion; | ||
| 126 | static const field lemma; | ||
| 127 | static const field group; | ||
| 128 | |||
| 129 | // Relationships with self | ||
| 130 | |||
| 131 | static const field antonyms; | ||
| 132 | |||
| 133 | static const field specifications; | ||
| 134 | static const field generalizations; | ||
| 135 | |||
| 136 | static const field pertainyms; | ||
| 137 | static const field antiPertainyms; | ||
| 138 | |||
| 139 | static const field mannernyms; | ||
| 140 | static const field antiMannernyms; | ||
| 141 | |||
| 142 | static const field usageTerms; | ||
| 143 | static const field usageDomains; | ||
| 144 | |||
| 145 | static const field topicalTerms; | ||
| 146 | static const field topicalDomains; | ||
| 147 | |||
| 148 | static const field regionalTerms; | ||
| 149 | static const field regionalDomains; | ||
| 150 | |||
| 151 | private: | ||
| 152 | bool valid_ = false; | ||
| 153 | |||
| 154 | int id_; | ||
| 155 | bool hasTagCount_ = false; | ||
| 156 | int tagCount_; | ||
| 157 | positioning adjectivePosition_ = positioning::undefined; | ||
| 158 | int notionId_; | ||
| 159 | int lemmaId_; | ||
| 160 | bool hasGroup_ = false; | ||
| 161 | int groupId_; | ||
| 162 | |||
| 163 | const database* db_; | ||
| 164 | |||
| 165 | mutable class notion notion_; | ||
| 166 | mutable class lemma lemma_; | ||
| 167 | mutable class group group_; | ||
| 168 | |||
| 44 | }; | 169 | }; |
| 45 | 170 | ||
| 46 | }; | 171 | }; |
| 47 | 172 | ||
| 48 | #endif /* end of include guard: WORD_H_8FC89498 */ | 173 | #endif /* end of include guard: WORD_H_DF91B1B4 */ |
