From e4fa0cb86d97c23c24cd7bdd62c23f03eed312da Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Sun, 5 Feb 2017 08:56:39 -0500 Subject: Flattened selrestrs Now, selrestrs are, instead of logically being a tree of positive/negative restrictions that are ANDed/ORed together, they are a flat set of positive restrictions that are ORed together. They are stored as strings in a table called selrestrs, just like synrestrs, which makes them a lot more queryable now as well. This change required some changes to the VerbNet data, because we needed to consolidate any ANDed clauses into single selrestrs, as well as convert any negative selrestrs into positive ones. The changes made are detailed on the wiki. Preposition choices are now encoded as comma-separated lists instead of using JSON. This change, along with the selrestrs one, allows us to remove verbly's dependency on nlohmann::json. --- lib/database.cpp | 32 ++++++ lib/database.h | 2 + lib/part.cpp | 41 ++++---- lib/part.h | 22 +++- lib/selrestr.cpp | 309 ------------------------------------------------------- lib/selrestr.h | 90 ---------------- lib/verbly.h | 1 - 7 files changed, 71 insertions(+), 426 deletions(-) delete mode 100644 lib/selrestr.cpp delete mode 100644 lib/selrestr.h (limited to 'lib') diff --git a/lib/database.cpp b/lib/database.cpp index 563ec31..c7b37ec 100644 --- a/lib/database.cpp +++ b/lib/database.cpp @@ -76,6 +76,38 @@ namespace verbly { return query(*this, ppdb_, std::move(where), std::move(sortOrder), limit); } + std::set database::selrestrs(int partId) const + { + std::string queryString = "SELECT selrestr FROM selrestrs WHERE part_id = ?"; + + sqlite3_stmt* ppstmt; + if (sqlite3_prepare_v2(ppdb_, queryString.c_str(), queryString.length(), &ppstmt, NULL) != SQLITE_OK) + { + std::string errorMsg = sqlite3_errmsg(ppdb_); + sqlite3_finalize(ppstmt); + + throw database_error("Error preparing query", errorMsg); + } + + if (sqlite3_bind_int(ppstmt, 1, partId) != SQLITE_OK) + { + std::string errorMsg = sqlite3_errmsg(ppdb_); + sqlite3_finalize(ppstmt); + + throw database_error("Error binding value to query", errorMsg); + } + + std::set result; + while (sqlite3_step(ppstmt) == SQLITE_ROW) + { + result.insert(reinterpret_cast(sqlite3_column_blob(ppstmt, 0))); + } + + sqlite3_finalize(ppstmt); + + return result; + } + std::set database::synrestrs(int partId) const { std::string queryString = "SELECT synrestr FROM synrestrs WHERE part_id = ?"; diff --git a/lib/database.h b/lib/database.h index 0b10eba..5567061 100644 --- a/lib/database.h +++ b/lib/database.h @@ -62,6 +62,8 @@ namespace verbly { query pronunciations(filter where, order sortOrder = {}, int limit = 1) const; + std::set selrestrs(int partId) const; + std::set synrestrs(int partId) const; private: diff --git a/lib/part.cpp b/lib/part.cpp index cbd951b..341d4bb 100644 --- a/lib/part.cpp +++ b/lib/part.cpp @@ -1,14 +1,14 @@ #include "part.h" #include #include -#include "selrestr.h" #include "database.h" +#include "util.h" namespace verbly { const object part::objectType = object::part; - const std::list part::select = {"part_id", "frame_id", "part_index", "type", "role", "selrestrs", "prepositions", "preposition_literality", "literal_value"}; + const std::list part::select = {"part_id", "frame_id", "part_index", "type", "role", "prepositions", "preposition_literality", "literal_value"}; const field part::index = field::integerField(object::part, "part_index"); const field part::type = field::integerField(object::part, "type"); @@ -17,17 +17,21 @@ namespace verbly { const field part::frames = field::joinField(object::part, "frame_id", object::frame); + const field part::selrestr_field::selrestrJoin = field::joinField(object::part, "part_id", "selrestrs"); + const field part::selrestr_field::selrestrField = field::stringField("selrestrs", "selrestr"); + const field part::synrestr_field::synrestrJoin = field::joinField(object::part, "part_id", "synrestrs"); const field part::synrestr_field::synrestrField = field::stringField("synrestrs", "synrestr"); + const part::selrestr_field part::selrestrs = {}; const part::synrestr_field part::synrestrs = {}; - part part::createNounPhrase(std::string role, selrestr selrestrs, std::set synrestrs) + part part::createNounPhrase(std::string role, std::set selrestrs, std::set synrestrs) { part p(part_type::noun_phrase); new(&p.noun_phrase_.role) std::string(std::move(role)); - new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs)); + new(&p.noun_phrase_.selrestrs) std::set(std::move(selrestrs)); new(&p.noun_phrase_.synrestrs) std::set(std::move(synrestrs)); return p; @@ -78,7 +82,7 @@ namespace verbly { case part_type::noun_phrase: { new(&noun_phrase_.role) std::string(reinterpret_cast(sqlite3_column_blob(row, 4))); - new(&noun_phrase_.selrestrs) selrestr(nlohmann::json::parse(reinterpret_cast(sqlite3_column_blob(row, 5)))); + new(&noun_phrase_.selrestrs) std::set(db.selrestrs(id)); new(&noun_phrase_.synrestrs) std::set(db.synrestrs(id)); break; @@ -86,22 +90,17 @@ namespace verbly { case part_type::preposition: { - new(&preposition_.choices) std::vector(); - preposition_.literal = (sqlite3_column_int(row, 7) == 1); - - std::string choicesJsonStr(reinterpret_cast(sqlite3_column_blob(row, 6))); - nlohmann::json choicesJson = nlohmann::json::parse(std::move(choicesJsonStr)); - for (const nlohmann::json& choiceJson : choicesJson) - { - preposition_.choices.push_back(choiceJson.get()); - } + std::string serializedChoices(reinterpret_cast(sqlite3_column_blob(row, 5))); + new(&preposition_.choices) std::vector(split>(serializedChoices, ",")); + + preposition_.literal = (sqlite3_column_int(row, 6) == 1); break; } case part_type::literal: { - new(&literal_) std::string(reinterpret_cast(sqlite3_column_blob(row, 8))); + new(&literal_) std::string(reinterpret_cast(sqlite3_column_blob(row, 7))); break; } @@ -125,7 +124,7 @@ namespace verbly { case part_type::noun_phrase: { new(&noun_phrase_.role) std::string(other.noun_phrase_.role); - new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs); + new(&noun_phrase_.selrestrs) std::set(other.noun_phrase_.selrestrs); new(&noun_phrase_.synrestrs) std::set(other.noun_phrase_.synrestrs); break; @@ -174,7 +173,7 @@ namespace verbly { type tempType = first.type_; std::string tempRole; - selrestr tempSelrestrs; + std::set tempSelrestrs; std::set tempSynrestrs; std::vector tempChoices; bool tempPrepLiteral; @@ -224,7 +223,7 @@ namespace verbly { case type::noun_phrase: { new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role)); - new(&first.noun_phrase_.selrestrs) selrestr(std::move(second.noun_phrase_.selrestrs)); + new(&first.noun_phrase_.selrestrs) std::set(std::move(second.noun_phrase_.selrestrs)); new(&first.noun_phrase_.synrestrs) std::set(std::move(second.noun_phrase_.synrestrs)); break; @@ -263,7 +262,7 @@ namespace verbly { case type::noun_phrase: { new(&second.noun_phrase_.role) std::string(std::move(tempRole)); - new(&second.noun_phrase_.selrestrs) selrestr(std::move(tempSelrestrs)); + new(&second.noun_phrase_.selrestrs) std::set(std::move(tempSelrestrs)); new(&second.noun_phrase_.synrestrs) std::set(std::move(tempSynrestrs)); break; @@ -304,7 +303,7 @@ namespace verbly { using set_type = std::set; noun_phrase_.role.~string_type(); - noun_phrase_.selrestrs.~selrestr(); + noun_phrase_.selrestrs.~set_type(); noun_phrase_.synrestrs.~set_type(); break; @@ -348,7 +347,7 @@ namespace verbly { } } - selrestr part::getNounSelrestrs() const + std::set part::getNounSelrestrs() const { if (type_ == part_type::noun_phrase) { diff --git a/lib/part.h b/lib/part.h index 7180f57..450db3d 100644 --- a/lib/part.h +++ b/lib/part.h @@ -5,7 +5,6 @@ #include #include #include -#include "selrestr.h" #include "field.h" #include "filter.h" #include "enums.h" @@ -21,7 +20,7 @@ namespace verbly { // Static factories - static part createNounPhrase(std::string role, selrestr selrestrs, std::set synrestrs); + static part createNounPhrase(std::string role, std::set selrestrs, std::set synrestrs); static part createVerb(); @@ -77,7 +76,7 @@ namespace verbly { std::string getNounRole() const; - selrestr getNounSelrestrs() const; + std::set getNounSelrestrs() const; std::set getNounSynrestrs() const; @@ -110,8 +109,21 @@ namespace verbly { static const field frames; - // Noun synrestr relationship + // Noun selrestr and synrestr relationships + class selrestr_field { + public: + + filter operator%=(std::string selrestr) const; + + private: + + static const field selrestrJoin; + static const field selrestrField; + }; + + static const selrestr_field selrestrs; + class synrestr_field { public: @@ -138,7 +150,7 @@ namespace verbly { union { struct { std::string role; - selrestr selrestrs; + std::set selrestrs; std::set synrestrs; } noun_phrase_; struct { diff --git a/lib/selrestr.cpp b/lib/selrestr.cpp deleted file mode 100644 index 8646871..0000000 --- a/lib/selrestr.cpp +++ /dev/null @@ -1,309 +0,0 @@ -#include "selrestr.h" - -namespace verbly { - - selrestr::selrestr(nlohmann::json data) - { - if (data.find("children") != data.end()) - { - type_ = type::group; - new(&group_.children) std::list(); - - for (const nlohmann::json& child : data["children"]) - { - group_.children.emplace_back(child); - } - - group_.orlogic = (data["logic"] == "or"); - } else if (data.find("type") != data.end()) - { - type_ = type::singleton; - singleton_.pos = data["pos"].get(); - new(&singleton_.restriction) std::string(data["type"].get()); - } else { - type_ = type::empty; - } - } - - selrestr::selrestr(const selrestr& other) - { - type_ = other.type_; - - switch (type_) - { - case type::singleton: - { - singleton_.pos = other.singleton_.pos; - new(&singleton_.restriction) std::string(other.singleton_.restriction); - - break; - } - - case type::group: - { - new(&group_.children) std::list(other.group_.children); - group_.orlogic = other.group_.orlogic; - - break; - } - - case type::empty: - { - break; - } - } - } - - selrestr::selrestr(selrestr&& other) : selrestr() - { - swap(*this, other); - } - - selrestr& selrestr::operator=(selrestr other) - { - swap(*this, other); - - return *this; - } - - void swap(selrestr& first, selrestr& second) - { - using type = selrestr::type; - - type tempType = first.type_; - int tempPos; - std::string tempRestriction; - std::list tempChildren; - bool tempOrlogic; - - switch (tempType) - { - case type::singleton: - { - tempPos = first.singleton_.pos; - tempRestriction = std::move(first.singleton_.restriction); - - break; - } - - case type::group: - { - tempChildren = std::move(first.group_.children); - tempOrlogic = first.group_.orlogic; - - break; - } - - case type::empty: - { - break; - } - } - - first.~selrestr(); - - first.type_ = second.type_; - - switch (first.type_) - { - case type::singleton: - { - first.singleton_.pos = second.singleton_.pos; - new(&first.singleton_.restriction) std::string(std::move(second.singleton_.restriction)); - - break; - } - - case type::group: - { - new(&first.group_.children) std::list(std::move(second.group_.children)); - first.group_.orlogic = second.group_.orlogic; - - break; - } - - case type::empty: - { - break; - } - } - - second.~selrestr(); - - second.type_ = tempType; - - switch (second.type_) - { - case type::singleton: - { - second.singleton_.pos = tempPos; - new(&second.singleton_.restriction) std::string(std::move(tempRestriction)); - - break; - } - - case type::group: - { - new(&second.group_.children) std::list(std::move(tempChildren)); - second.group_.orlogic = tempOrlogic; - - break; - } - - case type::empty: - { - break; - } - } - } - - selrestr::~selrestr() - { - switch (type_) - { - case type::singleton: - { - using string_type = std::string; - singleton_.restriction.~string_type(); - - break; - } - - case type::group: - { - using list_type = std::list; - group_.children.~list_type(); - - break; - } - - case type::empty: - { - break; - } - } - } - - selrestr::selrestr() : type_(type::empty) - { - } - - selrestr::selrestr( - std::string restriction, - bool pos) : - type_(type::singleton) - { - new(&singleton_.restriction) std::string(std::move(restriction)); - singleton_.pos = pos; - } - - std::string selrestr::getRestriction() const - { - if (type_ == type::singleton) - { - return singleton_.restriction; - } else { - throw std::domain_error("Only singleton selrestrs have restrictions"); - } - } - - bool selrestr::getPos() const - { - if (type_ == type::singleton) - { - return singleton_.pos; - } else { - throw std::domain_error("Only singleton selrestrs have positivity flags"); - } - } - - selrestr::selrestr( - std::list children, - bool orlogic) : - type_(type::group) - { - new(&group_.children) std::list(std::move(children)); - group_.orlogic = orlogic; - } - - std::list selrestr::getChildren() const - { - if (type_ == type::group) - { - return group_.children; - } else { - throw std::domain_error("Only group selrestrs have children"); - } - } - - std::list::const_iterator selrestr::begin() const - { - if (type_ == type::group) - { - return std::begin(group_.children); - } else { - throw std::domain_error("Only group selrestrs have children"); - } - } - - std::list::const_iterator selrestr::end() const - { - if (type_ == type::group) - { - return std::end(group_.children); - } else { - throw std::domain_error("Only group selrestrs have children"); - } - } - - bool selrestr::getOrlogic() const - { - if (type_ == type::group) - { - return group_.orlogic; - } else { - throw std::domain_error("Only group selrestrs have logic"); - } - } - - nlohmann::json selrestr::toJson() const - { - switch (type_) - { - case type::empty: - { - return {}; - } - - case type::singleton: - { - return { - {"type", singleton_.restriction}, - {"pos", singleton_.pos} - }; - } - - case type::group: - { - std::string logic; - if (group_.orlogic) - { - logic = "or"; - } else { - logic = "and"; - } - - std::list children; - std::transform(std::begin(group_.children), std::end(group_.children), std::back_inserter(children), [] (const selrestr& child) { - return child.toJson(); - }); - - return { - {"logic", logic}, - {"children", children} - }; - } - } - } - -}; diff --git a/lib/selrestr.h b/lib/selrestr.h deleted file mode 100644 index a7cde0a..0000000 --- a/lib/selrestr.h +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef SELRESTR_H_50652FB7 -#define SELRESTR_H_50652FB7 - -#include -#include -#include "../vendor/json/json.hpp" - -namespace verbly { - - class selrestr { - public: - enum class type { - empty, - singleton, - group - }; - - // Construct from json - - explicit selrestr(nlohmann::json json); - - // Copy and move constructors - - selrestr(const selrestr& other); - selrestr(selrestr&& other); - - // Assignment - - selrestr& operator=(selrestr other); - - // Swap - - friend void swap(selrestr& first, selrestr& second); - - // Destructor - - ~selrestr(); - - // Generic accessors - - type getType() const - { - return type_; - } - - // Empty - - selrestr(); - - // Singleton - - selrestr(std::string restriction, bool pos); - - std::string getRestriction() const; - - bool getPos() const; - - // Group - - selrestr(std::list children, bool orlogic); - - std::list getChildren() const; - - std::list::const_iterator begin() const; - - std::list::const_iterator end() const; - - bool getOrlogic() const; - - // Helpers - - nlohmann::json toJson() const; - - private: - union { - struct { - bool pos; - std::string restriction; - } singleton_; - struct { - std::list children; - bool orlogic; - } group_; - }; - type type_; - }; - -}; - -#endif /* end of include guard: SELRESTR_H_50652FB7 */ diff --git a/lib/verbly.h b/lib/verbly.h index 112907b..0f48a8c 100644 --- a/lib/verbly.h +++ b/lib/verbly.h @@ -15,6 +15,5 @@ #include "form.h" #include "pronunciation.h" #include "token.h" -#include "selrestr.h" #endif /* end of include guard: VERBLY_H_5B39CE50 */ -- cgit 1.4.1