From 9bd863c9002b525b7827f9158d9136143393be5c Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 23 Jan 2017 11:49:51 -0500 Subject: Added verb frame parsing --- CMakeLists.txt | 2 +- lib/frame.cpp | 89 ++++++++++++-- lib/frame.h | 60 +++++---- lib/frame_query.cpp | 166 ------------------------- lib/frame_query.h | 21 ---- lib/group.cpp | 44 +++++-- lib/group.h | 60 ++++----- lib/part.cpp | 344 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/part.h | 117 ++++++++++++++++++ 9 files changed, 643 insertions(+), 260 deletions(-) delete mode 100644 lib/frame_query.cpp delete mode 100644 lib/frame_query.h create mode 100644 lib/part.cpp create mode 100644 lib/part.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 61fcce2..8d42fdd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) set(CMAKE_BUILD_TYPE Debug) include_directories(vendor/json) -add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp) +add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp lib/selrestr.cpp lib/part.cpp) set_property(TARGET verbly PROPERTY CXX_STANDARD 11) set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) target_link_libraries(verbly ${sqlite3_LIBRARIES}) diff --git a/lib/frame.cpp b/lib/frame.cpp index bc3f842..3ce95ec 100644 --- a/lib/frame.cpp +++ b/lib/frame.cpp @@ -1,21 +1,96 @@ #include "frame.h" #include +#include namespace verbly { - + const object frame::objectType = object::frame; - + const std::list frame::select = {"frame_id", "data"}; - + const field frame::id = field::integerField(object::frame, "frame_id"); - + const field frame::group = field::joinThrough(object::frame, "frame_id", object::group, "groups_frames", "group_id"); - + frame::frame(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) { id_ = sqlite3_column_int(row, 0); - + // TODO: Initialize frame data from row. + std::string partsJsonStr(reinterpret_cast(sqlite3_column_blob(row, 1))); + nlohmann::json partsJson = nlohmann::json::parse(std::move(partsJsonStr)); + + for (const nlohmann::json& partJson : partsJson) + { + part::type partType = static_cast(partJson["type"].get()); + + switch (partType) + { + case part::type::noun_phrase: + { + std::set synrestrs; + for (const nlohmann::json& synrestrJson : partJson["synrestrs"]) + { + synrestrs.insert(synrestrJson.get()); + } + + parts_.push_back(part::createNounPhrase( + partJson["role"].get(), + selrestr(partJson["selrestrs"]), + std::move(synrestrs))); + + break; + } + + case part::type::preposition: + { + std::vector choices; + for (const nlohmann::json& choiceJson : partJson["choices"]) + { + choices.push_back(choiceJson.get()); + } + + parts_.push_back(part::createPreposition( + std::move(choices), + partJson["literal"].get())); + + break; + } + + case part::type::verb: + { + parts_.push_back(part::createVerb()); + + break; + } + + case part::type::adjective: + { + parts_.push_back(part::createAdjective()); + + break; + } + + case part::type::adverb: + { + parts_.push_back(part::createAdverb()); + + break; + } + + case part::type::literal: + { + parts_.push_back(part::createLiteral(partJson["value"].get())); + + break; + } + + case part::type::invalid: + { + throw std::domain_error("Invalid part data"); + } + } + } } - + }; diff --git a/lib/frame.h b/lib/frame.h index 68a4346..97473a0 100644 --- a/lib/frame.h +++ b/lib/frame.h @@ -5,74 +5,86 @@ #include #include "field.h" #include "filter.h" +#include "part.h" struct sqlite3_stmt; namespace verbly { - + class database; - + class frame { public: - + // Default constructor - + frame() = default; - + // Construct from database - + frame(const database& db, sqlite3_stmt* row); - + // Accessors - + operator bool() const { return valid_; } - + int getId() const { if (!valid_) { throw std::domain_error("Bad access to uninitialized frame"); } - + return id_; } - + + const std::vector& getParts() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized frame"); + } + + return parts_; + } + // Type info - + static const object objectType; - + static const std::list select; - + // Query fields - + static const field id; - + operator filter() const { if (!valid_) { throw std::domain_error("Bad access to uninitialized frame"); } - + return (id == id_); } - + // Relationships to other objects - + static const field group; - + private: bool valid_ = false; - + int id_; - + std::vector parts_; + const database* db_; - + }; - + }; #endif /* end of include guard: FRAME_H_EA29065A */ diff --git a/lib/frame_query.cpp b/lib/frame_query.cpp deleted file mode 100644 index 11f0432..0000000 --- a/lib/frame_query.cpp +++ /dev/null @@ -1,166 +0,0 @@ -#include "verbly.h" -#include - -using json = nlohmann::json; - -namespace verbly { - - frame_query::frame_query(const data& _data) : _data(_data) - { - - } - - frame_query& frame_query::for_verb(const verb& _v) - { - _for_verb.push_back(_v); - - return *this; - } - - frame::selrestr parse_selrestr(const json data) - { - if (data.find("children") != data.end()) - { - std::list children; - std::transform(std::begin(data["children"]), std::end(data["children"]), std::back_inserter(children), &parse_selrestr); - - return frame::selrestr{children, data["logic"] == "or"}; - } else if (data.find("type") != data.end()) - { - return frame::selrestr{data["type"].get(), data["pos"].get()}; - } else { - return frame::selrestr{}; - } - } - - std::list frame_query::run() const - { - std::stringstream construct; - construct << "SELECT frames.data, groups.data FROM frames INNER JOIN groups ON frames.group_id = groups.group_id"; - std::list bindings; - - if (!_for_verb.empty()) - { - std::list clauses(_for_verb.size(), "verb_id = ?"); - construct << " WHERE frames.group_id IN (SELECT group_id FROM verb_groups WHERE "; - construct << verbly::implode(std::begin(clauses), std::end(clauses), " OR "); - construct << ")"; - - for (auto v : _for_verb) - { - bindings.emplace_back(v._id); - } - } - - sqlite3_stmt* ppstmt; - std::string query = construct.str(); - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - int i = 1; - for (auto& binding : bindings) - { - switch (binding.get_type()) - { - case binding::type::integer: - { - sqlite3_bind_int(ppstmt, i, binding.get_integer()); - - break; - } - - case binding::type::string: - { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); - - break; - } - } - - i++; - } - - std::list output; - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - frame f; - - std::string fdatat(reinterpret_cast(sqlite3_column_blob(ppstmt, 0))); - const json fdata = json::parse(fdatat); - for (const auto& part : fdata) - { - frame::part p; - - if (part["type"] == "np") - { - p._type = frame::part::type::noun_phrase; - new(&p._noun_phrase.role) std::string(part["role"].get()); - new(&p._noun_phrase.selrestrs) frame::selrestr(parse_selrestr(part["selrestrs"])); - new(&p._noun_phrase.synrestrs) std::set(); - for (auto synrestr : part["synrestrs"]) - { - p._noun_phrase.synrestrs.insert(synrestr.get()); - } - } else if (part["type"] == "pp") - { - if (!part["values"].empty()) - { - p._type = frame::part::type::literal_preposition; - new(&p._literal_preposition.choices) std::vector(); - for (auto choice : part["values"]) - { - p._literal_preposition.choices.push_back(choice.get()); - } - } else if (!part["preprestrs"].empty()) - { - p._type = frame::part::type::selection_preposition; - new(&p._selection_preposition.preprestrs) std::vector(); - for (auto preprestr : part["preprestrs"]) - { - p._selection_preposition.preprestrs.push_back(preprestr.get()); - } - } - } else if (part["type"] == "v") - { - p._type = frame::part::type::verb; - } else if (part["type"] == "adj") - { - p._type = frame::part::type::adjective; - } else if (part["type"] == "adv") - { - p._type = frame::part::type::adverb; - } else if (part["type"] == "lex") - { - p._type = frame::part::type::literal; - new(&p._literal.lexval) std::string(part["value"].get()); - } - - f._parts.push_back(p); - } - - std::string rdatat(reinterpret_cast(sqlite3_column_blob(ppstmt, 1))); - const json rdata = json::parse(rdatat); - for (const auto& role : rdata) - { - std::string rt = role["type"]; - frame::selrestr rs; - - if (role.find("selrestrs") != role.end()) - { - rs = parse_selrestr(role["selrestrs"]); - } - - f._roles[rt] = rs; - } - - output.push_back(f); - } - - sqlite3_finalize(ppstmt); - - return output; - } - -}; diff --git a/lib/frame_query.h b/lib/frame_query.h deleted file mode 100644 index dd11d16..0000000 --- a/lib/frame_query.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef FRAME_QUERY_H_334B9D47 -#define FRAME_QUERY_H_334B9D47 - -namespace verbly { - - class frame_query { - public: - frame_query(const data& _data); - - frame_query& for_verb(const verb& _v); - - std::list run() const; - - private: - const data& _data; - std::list _for_verb; - }; - -}; - -#endif /* end of include guard: FRAME_QUERY_H_334B9D47 */ diff --git a/lib/group.cpp b/lib/group.cpp index 8b6d985..d5790e9 100644 --- a/lib/group.cpp +++ b/lib/group.cpp @@ -1,43 +1,61 @@ #include "group.h" #include -#include "frame.h" +#include #include "database.h" #include "query.h" namespace verbly { - + const object group::objectType = object::group; - + const std::list group::select = {"group_id", "data"}; - + const field group::id = field::integerField(object::group, "group_id"); - + const field group::frame = field::joinThrough(object::group, "group_id", object::frame, "groups_frames", "frame_id"); const field group::word = field::joinField(object::group, "group_id", object::word); - + group::group(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) { id_ = sqlite3_column_int(row, 0); - - // TODO: Initialize role data from row. + + std::string rolesJsonStr(reinterpret_cast(sqlite3_column_blob(row, 1))); + nlohmann::json rolesJson = nlohmann::json::parse(std::move(rolesJsonStr)); + for (const nlohmann::json& roleJson : rolesJson) + { + std::string roleName = roleJson["type"]; + selrestr roleSelrestr; + + if (roleJson.find("selrestrs") != roleJson.end()) + { + roleSelrestr = selrestr(roleJson["selrestrs"]); + } + + roles_[roleName] = role(roleName, std::move(roleSelrestr)); + } } - + const std::vector& group::getFrames() const { if (!valid_) { throw std::domain_error("Bad access to uninitialized group"); } - + if (!initializedFrames_) { frames_ = db_->frames(frame::group %= *this, false, -1).all(); - + initializedFrames_ = true; } - + return frames_; } - + + const role& group::getRole(std::string roleName) const + { + return roles_.at(roleName); + } + }; diff --git a/lib/group.h b/lib/group.h index dd53503..fe62d39 100644 --- a/lib/group.h +++ b/lib/group.h @@ -6,82 +6,86 @@ #include #include "field.h" #include "filter.h" +#include "frame.h" +#include "role.h" struct sqlite3_stmt; namespace verbly { - + class database; - class frame; - + class group { public: - + // Default constructor - + group() = default; - + // Construct from database - + group(const database& db, sqlite3_stmt* row); - + // Accessors - + operator bool() const { return valid_; } - + int getId() const { if (!valid_) { throw std::domain_error("Bad access to uninitialized group"); } - + return id_; } - + const std::vector& getFrames() const; - + + const role& getRole(std::string roleName) const; + // Type info - + static const object objectType; - + static const std::list select; - + // Query fields - + static const field id; - + operator filter() const { if (!valid_) { throw std::domain_error("Bad access to uninitialized group"); } - + return (id == id_); } - + // Relationships to other objects - + static const field frame; - + static const field word; - + private: bool valid_ = false; - + int id_; - + std::map roles_; + const database* db_; - + mutable bool initializedFrames_ = false; mutable std::vector frames_; - + }; - + }; #endif /* end of include guard: GROUP_H_BD6933C0 */ diff --git a/lib/part.cpp b/lib/part.cpp new file mode 100644 index 0000000..e66d151 --- /dev/null +++ b/lib/part.cpp @@ -0,0 +1,344 @@ +#include "part.h" +#include +#include "selrestr.h" + +namespace verbly { + + part part::createNounPhrase(std::string role, selrestr selrestrs, std::set synrestrs) + { + part p(type::noun_phrase); + + new(&p.noun_phrase_.role) std::string(std::move(role)); + new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs)); + new(&p.noun_phrase_.synrestrs) std::set(std::move(synrestrs)); + + return p; + } + + part part::createVerb() + { + return part(type::verb); + } + + part part::createPreposition(std::vector choices, bool literal) + { + part p(type::preposition); + + new(&p.preposition_.choices) std::vector(std::move(choices)); + p.preposition_.literal = literal; + + return p; + } + + part part::createAdjective() + { + return part(type::adjective); + } + + part part::createAdverb() + { + return part(type::adverb); + } + + part part::createLiteral(std::string value) + { + part p(type::literal); + + new(&p.literal_) std::string(std::move(value)); + + return p; + } + + part::part(const part& other) + { + type_ = other.type_; + + switch (type_) + { + case type::noun_phrase: + { + new(&noun_phrase_.role) std::string(other.noun_phrase_.role); + new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs); + new(&noun_phrase_.synrestrs) std::set(other.noun_phrase_.synrestrs); + + break; + } + + case type::preposition: + { + new(&preposition_.choices) std::vector(other.preposition_.choices); + preposition_.literal = other.preposition_.literal; + + break; + } + + case type::literal: + { + new(&literal_) std::string(other.literal_); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + } + + part::part(part&& other) : part() + { + swap(*this, other); + } + + part& part::operator=(part other) + { + swap(*this, other); + + return *this; + } + + void swap(part& first, part& second) + { + using type = part::type; + + type tempType = first.type_; + std::string tempRole; + selrestr tempSelrestrs; + std::set tempSynrestrs; + std::vector tempChoices; + bool tempPrepLiteral; + std::string tempLiteralValue; + + switch (tempType) + { + case type::noun_phrase: + { + tempRole = std::move(first.noun_phrase_.role); + tempSelrestrs = std::move(first.noun_phrase_.selrestrs); + tempSynrestrs = std::move(first.noun_phrase_.synrestrs); + + break; + } + + case type::preposition: + { + tempChoices = std::move(first.preposition_.choices); + tempPrepLiteral = first.preposition_.literal; + + break; + } + + case type::literal: + { + tempLiteralValue = std::move(first.literal_); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + + first.~part(); + + first.type_ = second.type_; + + switch (first.type_) + { + case type::noun_phrase: + { + new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role)); + new(&first.noun_phrase_.selrestrs) selrestr(std::move(second.noun_phrase_.selrestrs)); + new(&first.noun_phrase_.synrestrs) std::set(std::move(second.noun_phrase_.synrestrs)); + + break; + } + + case type::preposition: + { + new(&first.preposition_.choices) std::vector(std::move(second.preposition_.choices)); + first.preposition_.literal = second.preposition_.literal; + + break; + } + + case type::literal: + { + new(&first.literal_) std::string(std::move(second.literal_)); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + + second.~part(); + + second.type_ = tempType; + + switch (second.type_) + { + case type::noun_phrase: + { + new(&second.noun_phrase_.role) std::string(std::move(tempRole)); + new(&second.noun_phrase_.selrestrs) selrestr(std::move(tempSelrestrs)); + new(&second.noun_phrase_.synrestrs) std::set(std::move(tempSynrestrs)); + + break; + } + + case type::preposition: + { + new(&second.preposition_.choices) std::vector(std::move(tempChoices)); + second.preposition_.literal = tempPrepLiteral; + + break; + } + + case type::literal: + { + new(&second.literal_) std::string(std::move(tempLiteralValue)); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + } + + part::~part() + { + switch (type_) + { + case type::noun_phrase: + { + using string_type = std::string; + using set_type = std::set; + + noun_phrase_.role.~string_type(); + noun_phrase_.selrestrs.~selrestr(); + noun_phrase_.synrestrs.~set_type(); + + break; + } + + case type::preposition: + { + using vector_type = std::vector; + + preposition_.choices.~vector_type(); + + break; + } + + case type::literal: + { + using string_type = std::string; + + literal_.~string_type(); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + } + + std::string part::getNounRole() const + { + if (type_ == type::noun_phrase) + { + return noun_phrase_.role; + } else { + throw std::domain_error("part::getNounRole is only valid for noun phrase parts"); + } + } + + selrestr part::getNounSelrestrs() const + { + if (type_ == type::noun_phrase) + { + return noun_phrase_.selrestrs; + } else { + throw std::domain_error("part::getNounSelrestrs is only valid for noun phrase parts"); + } + } + + std::set part::getNounSynrestrs() const + { + if (type_ == type::noun_phrase) + { + return noun_phrase_.synrestrs; + } else { + throw std::domain_error("part::getNounSynrestrs is only valid for noun phrase parts"); + } + } + + bool part::nounHasSynrestr(std::string synrestr) const + { + if (type_ != type::noun_phrase) + { + throw std::domain_error("part::nounHasSynrestr is only valid for noun phrase parts"); + } + + return (noun_phrase_.synrestrs.count(synrestr) == 1); + } + + std::vector part::getPrepositionChoices() const + { + if (type_ == type::preposition) + { + return preposition_.choices; + } else { + throw std::domain_error("part::getPrepositionChoices is only valid for preposition parts"); + } + } + + bool part::isPrepositionLiteral() const + { + if (type_ == type::preposition) + { + return preposition_.literal; + } else { + throw std::domain_error("part::isPrepositionLiteral is only valid for preposition parts"); + } + } + + std::string part::getLiteralValue() const + { + if (type_ == type::literal) + { + return literal_; + } else { + throw std::domain_error("part::getLiteralValue is only valid for literal parts"); + } + } + +}; diff --git a/lib/part.h b/lib/part.h new file mode 100644 index 0000000..3a15638 --- /dev/null +++ b/lib/part.h @@ -0,0 +1,117 @@ +#ifndef PART_H_C8F0661B +#define PART_H_C8F0661B + +#include +#include +#include +#include "selrestr.h" + +namespace verbly { + + class part { + public: + enum class type { + invalid = -1, + noun_phrase = 0, + verb = 1, + preposition = 2, + adjective = 3, + adverb = 4, + literal = 5 + }; + + // Static factories + + static part createNounPhrase(std::string role, selrestr selrestrs, std::set synrestrs); + + static part createVerb(); + + static part createPreposition(std::vector choices, bool literal); + + static part createAdjective(); + + static part createAdverb(); + + static part createLiteral(std::string value); + + // Default constructor + + part() + { + } + + // Copy and move constructors + + part(const part& other); + + part(part&& other); + + // Assignment + + part& operator=(part other); + + // Swap + + friend void swap(part& first, part& second); + + // Destructor + + ~part(); + + // General accessors + + type getType() const + { + return type_; + } + + // Noun phrase accessors + + std::string getNounRole() const; + + selrestr getNounSelrestrs() const; + + std::set getNounSynrestrs() const; + + bool nounHasSynrestr(std::string synrestr) const; + + // Preposition accessors + + std::vector getPrepositionChoices() const; + + bool isPrepositionLiteral() const; + + // Literal accessors + + std::string getLiteralValue() const; + + private: + + // Private constructors + + part(type t) : type_(t) + { + } + + // Data + + union { + struct { + std::string role; + selrestr selrestrs; + std::set synrestrs; + } noun_phrase_; + struct { + std::vector choices; + bool literal; + } preposition_; + std::string literal_; + }; + + type type_ = type::invalid; + + }; + +}; + +#endif /* end of include guard: PART_H_C8F0661B */ -- cgit 1.4.1