From a7645346293ed6a912c26d0c50b6f7943f1f3072 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Sat, 28 Jan 2017 12:59:42 -0500 Subject: Restructured verb frame schema to be more queryable Groups are much less significant now, and they no longer have a database table, nor are they considered a top level object anymore. Instead of containing their own role data, that data is folded into the frames so that it's easier to query; as a result, each group has its own copy of the frames that it contains. Additionally, parts are considered top level objects now, and you can query for frames based on attributes of their indexed parts. Synrestrs are also contained in their own table now, so that parts can be filtered against their synrestrs; they are however not considered top level objects. Created a new type of field, the "join where" or "condition join" field, which is a normal join field that has a built in condition on a specified field. This is used to allow creating multiple distinct join fields from one object to another. This is required for the lemma::form and frame::part joins, because filters for forms of separate inflections should not be coalesced; similarly, filters on differently indexed frame parts should not be coalesced. Queries can now be ordered, ascending or descending, by a field, in addition to randomly as before. This is necessary for accessing the parts of a verb frame in the correct order, but may be useful to an end user as well. Fixed a bug with statement generation in that condition groups were not being surrounded in parentheses, which made mixing OR groups and AND groups generate inaccurate statements. This has been fixed; additionally, parentheses are not placed around the top level condition, and nested condition groups with the same logic type are coalesced, to make query strings as easy to read as possible. Also simplified the form::lemma field; it no longer conditions on the inflection of the form like the lemma::form field does. Also added a debug flag to statement::getQueryString that makes it return a query string with all of the bindings filled in, for debug use only. --- CMakeLists.txt | 2 +- generator/frame.cpp | 69 +++----------------- generator/frame.h | 20 +++--- generator/generator.cpp | 39 +++++------- generator/generator.h | 3 +- generator/group.cpp | 166 ++++++++++++++++++++++++++++-------------------- generator/group.h | 30 ++++----- generator/part.cpp | 48 ++++++++++++++ generator/part.h | 30 ++++++--- generator/role.h | 60 +++++++++++++++++ generator/schema.sql | 33 +++++++--- generator/word.h | 1 + lib/database.cpp | 60 +++++++++++++---- lib/database.h | 20 +++--- lib/enums.h | 14 +++- lib/field.cpp | 20 ++++++ lib/field.h | 78 +++++++++++++++++++---- lib/filter.cpp | 35 +++++----- lib/form.cpp | 16 +---- lib/form.h | 31 +-------- lib/frame.cpp | 95 ++++++--------------------- lib/frame.h | 21 +++++- lib/group.cpp | 61 ------------------ lib/group.h | 91 -------------------------- lib/lemma.cpp | 15 ++--- lib/lemma.h | 27 +------- lib/order.h | 69 ++++++++++++++++++++ lib/part.cpp | 128 +++++++++++++++++++++++++++++-------- lib/part.h | 64 +++++++++++++++---- lib/query.h | 11 +++- lib/role.h | 60 ----------------- lib/statement.cpp | 164 +++++++++++++++++++++++++++++++++++++++++------ lib/statement.h | 9 ++- lib/verbly.h | 5 +- lib/word.cpp | 36 +++++++++-- lib/word.h | 23 +++---- 36 files changed, 945 insertions(+), 709 deletions(-) create mode 100644 generator/role.h delete mode 100644 lib/group.cpp delete mode 100644 lib/group.h create mode 100644 lib/order.h delete mode 100644 lib/role.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c15e79..32c73c1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) set(CMAKE_BUILD_TYPE Debug) include_directories(vendor/json) -add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp lib/token.cpp lib/selrestr.cpp lib/part.cpp) +add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/frame.cpp lib/part.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp lib/token.cpp lib/selrestr.cpp lib/part.cpp) set_property(TARGET verbly PROPERTY CXX_STANDARD 11) set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) target_link_libraries(verbly ${sqlite3_LIBRARIES}) diff --git a/generator/frame.cpp b/generator/frame.cpp index f75e3ba..4e4ac5f 100644 --- a/generator/frame.cpp +++ b/generator/frame.cpp @@ -11,72 +11,21 @@ namespace verbly { { } - void frame::push_back(part fp) - { - parts_.push_back(std::move(fp)); - } - - database& operator<<(database& db, const frame& arg) + frame frame::duplicate(const frame& other) { - std::list fields; - fields.emplace_back("frame_id", arg.getId()); + frame result; - nlohmann::json jsonParts; - for (const part& p : arg) + for (const part& p : other.parts_) { - nlohmann::json jsonPart; - jsonPart["type"] = static_cast(p.getType()); - - switch (p.getType()) - { - case part::type::noun_phrase: - { - jsonPart["role"] = p.getNounRole(); - jsonPart["selrestrs"] = p.getNounSelrestrs().toJson(); - jsonPart["synrestrs"] = p.getNounSynrestrs(); - - break; - } - - case part::type::preposition: - { - jsonPart["choices"] = p.getPrepositionChoices(); - jsonPart["literal"] = p.isPrepositionLiteral(); - - break; - } - - case part::type::literal: - { - jsonPart["value"] = p.getLiteralValue(); - - break; - } - - case part::type::verb: - case part::type::adjective: - case part::type::adverb: - { - break; - } - - case part::type::invalid: - { - // Invalid parts should not be serialized. - assert(false); - - break; - } - } - - jsonParts.push_back(std::move(jsonPart)); + result.push_back(part::duplicate(p)); } - fields.emplace_back("data", jsonParts.dump()); - - db.insertIntoTable("frames", std::move(fields)); + return result; + } - return db; + void frame::push_back(part fp) + { + parts_.push_back(std::move(fp)); } }; diff --git a/generator/frame.h b/generator/frame.h index 764564d..ba266f0 100644 --- a/generator/frame.h +++ b/generator/frame.h @@ -19,6 +19,10 @@ namespace verbly { // Constructor frame(); + + // Duplication + + static frame duplicate(const frame& other); // Mutators @@ -30,15 +34,15 @@ namespace verbly { { return id_; } - - const_iterator begin() const + + int getLength() const { - return std::begin(parts_); + return parts_.size(); } - - const_iterator end() const + + const part& operator[](int index) const { - return std::end(parts_); + return parts_.at(index); } private: @@ -47,12 +51,10 @@ namespace verbly { const int id_; - std::list parts_; + std::vector parts_; }; - database& operator<<(database& db, const frame& arg); - }; }; diff --git a/generator/generator.cpp b/generator/generator.cpp index 610a602..4cc9f64 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -8,7 +8,7 @@ #include "../lib/enums.h" #include "progress.h" #include "../lib/selrestr.h" -#include "../lib/role.h" +#include "role.h" #include "part.h" #include "field.h" #include "../lib/util.h" @@ -640,7 +640,7 @@ namespace verbly { } { - progress ppgs("Writing verb groups...", groups_.size()); + progress ppgs("Writing verb frames...", groups_.size()); for (group& g : groups_) { @@ -649,17 +649,6 @@ namespace verbly { ppgs.update(); } } - - { - progress ppgs("Writing verb frames...", frames_.size()); - - for (frame& f : frames_) - { - db_ << f; - - ppgs.update(); - } - } } void generator::readWordNetAntonymy() @@ -1212,9 +1201,15 @@ namespace verbly { return w; } - group& generator::createGroup(xmlNodePtr top) + void generator::createGroup(xmlNodePtr top, const group* parent) { - groups_.emplace_back(); + if (parent != nullptr) + { + groups_.emplace_back(*parent); + } else { + groups_.emplace_back(); + } + group& grp = groups_.back(); xmlChar* key; @@ -1229,8 +1224,11 @@ namespace verbly { { try { - group& subgrp = createGroup(subclass); - subgrp.setParent(grp); + // Parsing a subgroup starts by making a copy of everything in + // the parent. This is okay to do at this point because in the + // VerbNet data, subgroups are always defined after everything + // else. + createGroup(subclass, &grp); } catch (const std::exception& e) { key = xmlGetProp(subclass, reinterpret_cast("ID")); @@ -1323,8 +1321,7 @@ namespace verbly { { if (!xmlStrcmp(frametopnode->name, reinterpret_cast("FRAME"))) { - frames_.emplace_back(); - frame& fr = frames_.back(); + frame fr; for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) { @@ -1428,15 +1425,13 @@ namespace verbly { } } - grp.addFrame(fr); + grp.addFrame(std::move(fr)); } } } } } } - - return grp; } selrestr generator::parseSelrestr(xmlNodePtr top) diff --git a/generator/generator.h b/generator/generator.h index 8352693..bc9b3c7 100644 --- a/generator/generator.h +++ b/generator/generator.h @@ -105,7 +105,7 @@ namespace verbly { template word& createWord(Args&&... args); - group& createGroup(xmlNodePtr top); + void createGroup(xmlNodePtr top, const group* parent = nullptr); selrestr parseSelrestr(xmlNodePtr top); @@ -128,7 +128,6 @@ namespace verbly { std::list lemmas_; std::list
forms_; std::list pronunciations_; - std::list frames_; std::list groups_; // Indexes diff --git a/generator/group.cpp b/generator/group.cpp index cebe2b9..aa28d42 100644 --- a/generator/group.cpp +++ b/generator/group.cpp @@ -15,12 +15,15 @@ namespace verbly { { } - void group::setParent(const group& parent) + group::group(const group& parent) : + id_(nextId_++), + roles_(parent.roles_), + roleNames_(parent.roleNames_) { - // Adding a group to itself is nonsensical. - assert(&parent != this); - - parent_ = &parent; + for (const frame& f : parent.frames_) + { + frames_.push_back(frame::duplicate(f)); + } } void group::addRole(role r) @@ -30,87 +33,114 @@ namespace verbly { roleNames_.insert(std::move(name)); } - void group::addFrame(const frame& f) + void group::addFrame(frame f) { - frames_.insert(&f); + frames_.push_back(std::move(f)); } - std::set group::getRoles() const + bool group::hasRole(std::string name) const { - std::set fullRoles = roleNames_; - - if (hasParent()) - { - for (std::string name : getParent().getRoles()) - { - fullRoles.insert(name); - } - } - - return fullRoles; + // Rarely, a noun phrase part may use a role that is not defined in the + // group. See confess-37.10 "NP V NP ADJ". + return (roles_.count(name) == 1); } const role& group::getRole(std::string name) const { - if (roles_.count(name)) - { - return roles_.at(name); - } else if (hasParent()) - { - return getParent().getRole(name); - } else { - throw std::invalid_argument("Specified role not found in verb group"); - } - } - - std::set group::getFrames() const - { - std::set fullFrames = frames_; - - if (hasParent()) - { - for (const frame* f : getParent().getFrames()) - { - fullFrames.insert(f); - } - } - - return fullFrames; + return roles_.at(name); } database& operator<<(database& db, const group& arg) { - // Serialize the group first + // Serialize each frame + for (const frame& f : arg.getFrames()) { - std::list fields; - fields.emplace_back("group_id", arg.getId()); - - nlohmann::json jsonRoles; - for (std::string name : arg.getRoles()) + // First, serialize the group/frame relationship { - const role& r = arg.getRole(name); + std::list fields; - nlohmann::json jsonRole; - jsonRole["type"] = name; - jsonRole["selrestrs"] = r.getSelrestrs().toJson(); + fields.emplace_back("frame_id", f.getId()); + fields.emplace_back("group_id", arg.getId()); + fields.emplace_back("length", f.getLength()); - jsonRoles.emplace_back(std::move(jsonRole)); + db.insertIntoTable("frames", std::move(fields)); } - fields.emplace_back("data", jsonRoles.dump()); - - db.insertIntoTable("groups", std::move(fields)); - } - - // Then, serialize the group/frame relationship - for (const frame* f : arg.getFrames()) - { - std::list fields; - - fields.emplace_back("group_id", arg.getId()); - fields.emplace_back("frame_id", f->getId()); - - db.insertIntoTable("groups_frames", std::move(fields)); + // Then, serialize the frame parts in the context of the group + for (int partIndex = 0; partIndex < f.getLength(); partIndex++) + { + const part& p = f[partIndex]; + + std::list fields; + fields.emplace_back("part_id", p.getId()); + fields.emplace_back("frame_id", f.getId()); + fields.emplace_back("part_index", partIndex); + fields.emplace_back("type", static_cast(p.getType())); + + switch (p.getType()) + { + case part::type::noun_phrase: + { + fields.emplace_back("role", p.getNounRole()); + + selrestr partSelrestr; + if (p.getNounSelrestrs().getType() != selrestr::type::empty) + { + partSelrestr = p.getNounSelrestrs(); + } else if (arg.hasRole(p.getNounRole())) + { + partSelrestr = arg.getRole(p.getNounRole()).getSelrestrs(); + } + + fields.emplace_back("selrestrs", partSelrestr.toJson().dump()); + + // Short interlude to serialize the synrestrs + for (const std::string& s : p.getNounSynrestrs()) + { + std::list synrestrFields; + + synrestrFields.emplace_back("part_id", p.getId()); + synrestrFields.emplace_back("synrestr", s); + + db.insertIntoTable("synrestrs", std::move(synrestrFields)); + } + + break; + } + + case part::type::preposition: + { + fields.emplace_back("prepositions", nlohmann::json(p.getPrepositionChoices()).dump()); + fields.emplace_back("preposition_literality", p.isPrepositionLiteral() ? 1 : 0); + + break; + } + + case part::type::literal: + { + fields.emplace_back("literal_value", p.getLiteralValue()); + + break; + } + + case part::type::verb: + case part::type::adjective: + case part::type::adverb: + { + break; + } + + case part::type::invalid: + { + // Invalid parts should not be serialized. + assert(false); + + break; + } + } + + db.insertIntoTable("parts", std::move(fields)); + } } return db; diff --git a/generator/group.h b/generator/group.h index 83f40c2..5486fbe 100644 --- a/generator/group.h +++ b/generator/group.h @@ -5,7 +5,7 @@ #include #include #include -#include "../lib/role.h" +#include "role.h" namespace verbly { namespace generator { @@ -20,13 +20,13 @@ namespace verbly { group(); - // Mutators + explicit group(const group& parent); - void setParent(const group& parent); + // Mutators void addRole(role r); - void addFrame(const frame& f); + void addFrame(frame f); // Accessors @@ -35,24 +35,19 @@ namespace verbly { return id_; } - bool hasParent() const - { - return (parent_ != nullptr); - } - - const group& getParent() const + const std::set& getRoles() const { - // Calling code should always call hasParent first - assert(parent_ != nullptr); - - return *parent_; + return roleNames_; } - std::set getRoles() const; + bool hasRole(std::string name) const; const role& getRole(std::string name) const; - std::set getFrames() const; + const std::list& getFrames() const + { + return frames_; + } private: @@ -60,9 +55,8 @@ namespace verbly { const int id_; - const group* parent_ = nullptr; std::map roles_; - std::set frames_; + std::list frames_; // Caches diff --git a/generator/part.cpp b/generator/part.cpp index 8a75ed4..07618a8 100644 --- a/generator/part.cpp +++ b/generator/part.cpp @@ -4,6 +4,8 @@ namespace verbly { namespace generator { + int part::nextId_ = 0; + part part::createNounPhrase(std::string role, selrestr selrestrs, std::set synrestrs) { part p(type::noun_phrase); @@ -49,9 +51,52 @@ namespace verbly { return p; } + part part::duplicate(const part& other) + { + part result(other.type_); + + switch (result.type_) + { + case type::noun_phrase: + { + new(&result.noun_phrase_.role) std::string(other.noun_phrase_.role); + new(&result.noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs); + new(&result.noun_phrase_.synrestrs) std::set(other.noun_phrase_.synrestrs); + + break; + } + + case type::preposition: + { + new(&result.preposition_.choices) std::set(other.preposition_.choices); + result.preposition_.literal = other.preposition_.literal; + + break; + } + + case type::literal: + { + new(&result.literal_) std::string(other.literal_); + + break; + } + + case type::verb: + case type::adjective: + case type::adverb: + case type::invalid: + { + break; + } + } + + return result; + } + part::part(const part& other) { type_ = other.type_; + id_ = other.id_; switch (type_) { @@ -106,6 +151,7 @@ namespace verbly { using type = part::type; type tempType = first.type_; + int tempId = first.id_; std::string tempRole; selrestr tempSelrestrs; std::set tempSynrestrs; @@ -151,6 +197,7 @@ namespace verbly { first.~part(); first.type_ = second.type_; + first.id_ = second.id_; switch (first.type_) { @@ -190,6 +237,7 @@ namespace verbly { second.~part(); second.type_ = tempType; + second.id_ = tempId; switch (second.type_) { diff --git a/generator/part.h b/generator/part.h index b010f62..39ba1e7 100644 --- a/generator/part.h +++ b/generator/part.h @@ -4,21 +4,16 @@ #include #include #include "../lib/selrestr.h" +#include "../lib/enums.h" namespace verbly { + namespace generator { class part { public: - enum class type { - invalid = -1, - noun_phrase = 0, - verb = 1, - preposition = 2, - adjective = 3, - adverb = 4, - literal = 5 - }; + + using type = part_type; // Static factories @@ -34,6 +29,10 @@ namespace verbly { static part createLiteral(std::string value); + // Duplication + + static part duplicate(const part& other); + // Copy and move constructors part(const part& other); @@ -54,6 +53,11 @@ namespace verbly { // General accessors + int getId() const + { + return id_; + } + type getType() const { return type_; @@ -79,13 +83,19 @@ namespace verbly { private: + static int nextId_; + + int id_; + // Private constructors part() { } - part(type t) : type_(t) + part(type t) : + id_(nextId_++), + type_(t) { } diff --git a/generator/role.h b/generator/role.h new file mode 100644 index 0000000..4884ef3 --- /dev/null +++ b/generator/role.h @@ -0,0 +1,60 @@ +#ifndef ROLE_H_249F9A9C +#define ROLE_H_249F9A9C + +#include +#include +#include "../lib/selrestr.h" + +namespace verbly { + + class role { + public: + + // Default constructor + + role() = default; + + // Constructor + + role( + std::string name, + selrestr selrestrs = {}) : + valid_(true), + name_(name), + selrestrs_(selrestrs) + { + } + + // Accessors + + const std::string& getName() const + { + if (!valid_) + { + throw std::domain_error("Bad access to invalid role"); + } + + return name_; + } + + const selrestr& getSelrestrs() const + { + if (!valid_) + { + throw std::domain_error("Bad access to invalid role"); + } + + return selrestrs_; + } + + private: + + bool valid_ = false; + std::string name_; + selrestr selrestrs_; + + }; + +}; + +#endif /* end of include guard: ROLE_H_249F9A9C */ diff --git a/generator/schema.sql b/generator/schema.sql index c3e54d8..33ebc28 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -186,19 +186,32 @@ CREATE TABLE `forms_pronunciations` ( CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`); CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`); -CREATE TABLE `groups` ( - `group_id` INTEGER PRIMARY KEY, - `data` BLOB NOT NULL +CREATE TABLE `frames` ( + `frame_id` INTEGER NOT NULL, + `group_id` INTEGER NOT NULL, + 'length' INTEGER NOT NULL ); -CREATE TABLE `frames` ( - `frame_id` INTEGER PRIMARY KEY, - `data` BLOB NOT NULL +CREATE INDEX `frames_in` ON `frames`(`group_id`); + +CREATE TABLE `parts` ( + `part_id` INTEGER PRIMARY KEY, + `frame_id` INTEGER NOT NULL, + `part_index` INTEGER NOT NULL, + `type` INTEGER NOT NULL, + `role` VARCHAR(16), + `selrestrs` BLOB, + `prepositions` BLOB, + `preposition_literality` SMALLINT, + `literal_value` VARCHAR(64) ); -CREATE TABLE `groups_frames` ( - `group_id` INTEGER NOT NULL, - `frame_id` INTEGER NOT NULL +CREATE INDEX `parts_of` ON `parts`(`frame_id`); +CREATE UNIQUE INDEX `part_by_frame_index` ON `parts`(`frame_id`, `part_index`); + +CREATE TABLE `synrestrs` ( + `part_id` INTEGER NOT NULL, + `synrestr` VARCHAR(32) NOT NULL ); -CREATE INDEX `frames_in` ON `groups_frames`(`group_id`); +CREATE INDEX `synrestrs_for` ON `synrestrs`(`part_id`); diff --git a/generator/word.h b/generator/word.h index a994ec3..c6d7b20 100644 --- a/generator/word.h +++ b/generator/word.h @@ -5,6 +5,7 @@ #include "../lib/enums.h" namespace verbly { + namespace generator { class notion; diff --git a/lib/database.cpp b/lib/database.cpp index fb00ef3..563ec31 100644 --- a/lib/database.cpp +++ b/lib/database.cpp @@ -41,39 +41,71 @@ namespace verbly { sqlite3_close_v2(ppdb_); } - query database::notions(filter where, bool random, int limit) const + query database::notions(filter where, order sortOrder, int limit) const { - return query(*this, ppdb_, std::move(where), random, limit); + return query(*this, ppdb_, std::move(where), std::move(sortOrder), limit); } - query database::words(filter where, bool random, int limit) const + query database::words(filter where, order sortOrder, int limit) const { - return query(*this, ppdb_, std::move(where), random, limit); + return query(*this, ppdb_, std::move(where), std::move(sortOrder), limit); } - query database::groups(filter where, bool random, int limit) const + query database::frames(filter where, order sortOrder, int limit) const { - return query(*this, ppdb_, std::move(where), random, limit); + return query(*this, ppdb_, std::move(where), std::move(sortOrder), limit); } - query database::frames(filter where, bool random, int limit) const + query database::parts(filter where, order sortOrder, int limit) const { - return query(*this, ppdb_, std::move(where), random, limit); + return query(*this, ppdb_, std::move(where), std::move(sortOrder), limit); } - query database::lemmas(filter where, bool random, int limit) const + query database::lemmas(filter where, order sortOrder, int limit) const { - return query(*this, ppdb_, std::move(where), random, limit); + return query(*this, ppdb_, std::move(where), std::move(sortOrder), limit); } - query database::forms(filter where, bool random, int limit) const + query database::forms(filter where, order sortOrder, int limit) const { - return query(*this, ppdb_, std::move(where), random, limit); + return query(*this, ppdb_, std::move(where), std::move(sortOrder), limit); } - query database::pronunciations(filter where, bool random, int limit) const + query database::pronunciations(filter where, order sortOrder, int limit) const { - return query(*this, ppdb_, std::move(where), random, limit); + return query(*this, ppdb_, std::move(where), std::move(sortOrder), limit); + } + + std::set database::synrestrs(int partId) const + { + std::string queryString = "SELECT synrestr FROM synrestrs WHERE part_id = ?"; + + sqlite3_stmt* ppstmt; + if (sqlite3_prepare_v2(ppdb_, queryString.c_str(), queryString.length(), &ppstmt, NULL) != SQLITE_OK) + { + std::string errorMsg = sqlite3_errmsg(ppdb_); + sqlite3_finalize(ppstmt); + + throw database_error("Error preparing query", errorMsg); + } + + if (sqlite3_bind_int(ppstmt, 1, partId) != SQLITE_OK) + { + std::string errorMsg = sqlite3_errmsg(ppdb_); + sqlite3_finalize(ppstmt); + + throw database_error("Error binding value to query", errorMsg); + } + + std::set result; + while (sqlite3_step(ppstmt) == SQLITE_ROW) + { + result.insert(reinterpret_cast(sqlite3_column_blob(ppstmt, 0))); + } + + sqlite3_finalize(ppstmt); + + return result; } }; diff --git a/lib/database.h b/lib/database.h index ef50739..0b10eba 100644 --- a/lib/database.h +++ b/lib/database.h @@ -4,13 +4,15 @@ #include #include #include +#include #include "notion.h" #include "word.h" -#include "group.h" #include "frame.h" +#include "part.h" #include "lemma.h" #include "form.h" #include "pronunciation.h" +#include "order.h" struct sqlite3; @@ -46,19 +48,21 @@ namespace verbly { // Queries - query notions(filter where, bool random = true, int limit = 1) const; + query notions(filter where, order sortOrder = {}, int limit = 1) const; - query words(filter where, bool random = true, int limit = 1) const; + query words(filter where, order sortOrder = {}, int limit = 1) const; - query groups(filter where, bool random = true, int limit = 1) const; + query frames(filter where, order sortOrder = {}, int limit = 1) const; - query frames(filter where, bool random = true, int limit = 1) const; + query parts(filter where, order sortOrder = {}, int limit = 1) const; - query lemmas(filter where, bool random = true, int limit = 1) const; + query lemmas(filter where, order sortOrder = {}, int limit = 1) const; - query forms(filter where, bool random = true, int limit = 1) const; + query forms(filter where, order sortOrder = {}, int limit = 1) const; - query pronunciations(filter where, bool random = true, int limit = 1) const; + query pronunciations(filter where, order sortOrder = {}, int limit = 1) const; + + std::set synrestrs(int partId) const; private: diff --git a/lib/enums.h b/lib/enums.h index e634959..2646fa4 100644 --- a/lib/enums.h +++ b/lib/enums.h @@ -33,13 +33,23 @@ namespace verbly { undefined = -1, notion = 0, word = 1, - group = 2, - frame = 3, + frame = 2, + part = 3, lemma = 4, form = 5, pronunciation = 6 }; + enum class part_type { + invalid = -1, + noun_phrase = 0, + verb = 1, + preposition = 2, + adjective = 3, + adverb = 4, + literal = 5 + }; + }; #endif /* end of include guard: ENUMS_H_260BA847 */ diff --git a/lib/field.cpp b/lib/field.cpp index deecb06..5b51ef4 100644 --- a/lib/field.cpp +++ b/lib/field.cpp @@ -48,6 +48,11 @@ namespace verbly { return filter(*this, filter::comparison::int_equals, static_cast(value)); } + filter field::operator==(part_type value) const + { + return filter(*this, filter::comparison::int_equals, static_cast(value)); + } + filter field::operator==(bool value) const { return filter(*this, filter::comparison::boolean_equals, value); @@ -68,6 +73,21 @@ namespace verbly { return filter(*this, filter::comparison::string_is_like, std::move(value)); } + filter field::operator==(const char* value) const + { + return filter(*this, filter::comparison::string_equals, std::string(value)); + } + + filter field::operator!=(const char* value) const + { + return filter(*this, filter::comparison::string_does_not_equal, std::string(value)); + } + + filter field::operator%=(const char* value) const + { + return filter(*this, filter::comparison::string_is_like, std::string(value)); + } + field::operator filter() const { if (isJoin()) diff --git a/lib/field.h b/lib/field.h index f61e038..b4bf02d 100644 --- a/lib/field.h +++ b/lib/field.h @@ -17,6 +17,7 @@ namespace verbly { integer, boolean, join, + join_where, join_through, hierarchal_join }; @@ -95,6 +96,17 @@ namespace verbly { return field(obj, type::join, name, nullable, table); } + static field joinWhere( + object obj, + const char* name, + object joinWith, + const field& conditionField, + int conditionValue, + bool nullable = false) + { + return field(obj, type::join_where, name, nullable, 0, joinWith, 0, 0, 0, &conditionField, conditionValue); + } + static field joinThrough( object obj, const char* name, @@ -151,7 +163,10 @@ namespace verbly { bool isJoin() const { - return ((type_ == type::join) || (type_ == type::join_through) || (type_ == type::hierarchal_join)); + return ((type_ == type::join) + || (type_ == type::join_where) + || (type_ == type::join_through) + || (type_ == type::hierarchal_join)); } const char* getColumn() const @@ -180,7 +195,7 @@ namespace verbly { { return (type_ == type::hierarchal_join) ? object_ - : ((type_ == type::join) || (type_ == type::join_through)) + : ((type_ == type::join) || (type_ == type::join_where) || (type_ == type::join_through)) ? joinObject_ : throw std::domain_error("Non-join fields don't have join objects"); } @@ -209,6 +224,22 @@ namespace verbly { : throw std::domain_error("Only many-to-many join fields have a foreign join column"); } + // Condition joins + + const field& getConditionField() const + { + return (type_ == type::join_where) + ? *conditionField_ + : throw std::domain_error("Only condition join fields have a condition field"); + } + + int getConditionValue() const + { + return (type_ == type::join_where) + ? conditionValue_ + : throw std::domain_error("Only condition join fields have a condition value"); + } + // Ordering bool operator<(const field& other) const @@ -217,20 +248,30 @@ namespace verbly { // However, there do exist a number of relationships from an object to // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have // the same object (notion), the same column (notion_id), and the same - // table (hypernymy); however, they have different join columns. - return std::tie(object_, column_, table_, joinColumn_) < std::tie(other.object_, other.column_, other.table_, other.joinColumn_); + // table (hypernymy); however, they have different join columns. For + // condition joins, the condition field and condition value are also + // significant. + if (conditionField_) + { + return std::tie(object_, column_, table_, joinColumn_, *conditionField_, conditionValue_) + < std::tie(other.object_, other.column_, other.table_, other.joinColumn_, *other.conditionField_, other.conditionValue_); + } else { + return std::tie(object_, column_, table_, joinColumn_) < std::tie(other.object_, other.column_, other.table_, other.joinColumn_); + } } // Equality bool operator==(const field& other) const { - // For the most part, (object, column) uniquely identifies fields. - // However, there do exist a number of relationships from an object to - // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have - // the same object (notion), the same column (notion_id), and the same - // table (hypernymy); however, they have different join columns. - return std::tie(object_, column_, table_, joinColumn_) == std::tie(other.object_, other.column_, other.table_, other.joinColumn_); + // See operator<() for documentation. + if (conditionField_) + { + return std::tie(object_, column_, table_, joinColumn_, *conditionField_, conditionValue_) + == std::tie(other.object_, other.column_, other.table_, other.joinColumn_, *other.conditionField_, other.conditionValue_); + } else { + return std::tie(object_, column_, table_, joinColumn_) == std::tie(other.object_, other.column_, other.table_, other.joinColumn_); + } } // Filter construction @@ -245,6 +286,7 @@ namespace verbly { filter operator==(part_of_speech value) const; // Part of speech equality filter operator==(positioning value) const; // Adjective positioning equality filter operator==(inflection value) const; // Inflection category equality + filter operator==(part_type value) const; // Verb frame part type equality filter operator==(bool value) const; // Boolean equality @@ -252,6 +294,10 @@ namespace verbly { filter operator!=(std::string value) const; // String inequality filter operator%=(std::string value) const; // String matching + filter operator==(const char* value) const; // String equality + filter operator!=(const char* value) const; // String inequality + filter operator%=(const char* value) const; // String matching + operator filter() const; // Non-nullity filter operator!() const; // Nullity @@ -270,7 +316,9 @@ namespace verbly { object joinObject = object::undefined, const char* foreignColumn = 0, const char* joinColumn = 0, - const char* foreignJoinColumn = 0) : + const char* foreignJoinColumn = 0, + const field* conditionField = 0, + int conditionValue = 0) : object_(obj), type_(datatype), column_(column), @@ -279,7 +327,9 @@ namespace verbly { joinObject_(joinObject), foreignColumn_(foreignColumn), joinColumn_(joinColumn), - foreignJoinColumn_(foreignJoinColumn) + foreignJoinColumn_(foreignJoinColumn), + conditionField_(conditionField), + conditionValue_(conditionValue) { } @@ -300,6 +350,10 @@ namespace verbly { const char* joinColumn_ = 0; const char* foreignJoinColumn_ = 0; + // Condition joins + const field* conditionField_ = 0; + int conditionValue_ = 0; + }; }; diff --git a/lib/filter.cpp b/lib/filter.cpp index ceb9327..ab46df2 100644 --- a/lib/filter.cpp +++ b/lib/filter.cpp @@ -3,8 +3,8 @@ #include #include "notion.h" #include "word.h" -#include "group.h" #include "frame.h" +#include "part.h" #include "lemma.h" #include "form.h" #include "pronunciation.h" @@ -594,6 +594,7 @@ namespace verbly { switch (joinOn.getType()) { case field::type::join: + case field::type::join_where: case field::type::join_through: { switch (filterType) @@ -1108,8 +1109,8 @@ namespace verbly { } case object::word: - case object::group: case object::frame: + case object::part: case object::lemma: case object::form: case object::pronunciation: @@ -1134,10 +1135,10 @@ namespace verbly { return *this; } - case object::group: case object::frame: + case object::part: { - return (verbly::word::group %= *this); + return (verbly::word::frame %= *this); } case object::lemma: @@ -1148,12 +1149,12 @@ namespace verbly { } } - case object::group: + case object::frame: { switch (singleton_.filterField.getObject()) { case object::undefined: - case object::group: + case object::frame: { return *this; } @@ -1164,34 +1165,34 @@ namespace verbly { case object::form: case object::pronunciation: { - return (verbly::group::word %= *this); + return (verbly::frame::word %= *this); } - case object::frame: + case object::part: { - return (verbly::group::frame %= *this); + return (verbly::frame::part() %= *this); } } } - case object::frame: + case object::part: { switch (singleton_.filterField.getObject()) { case object::undefined: - case object::frame: + case object::part: { return *this; } case object::notion: case object::word: - case object::group: + case object::frame: case object::lemma: case object::form: case object::pronunciation: { - return (verbly::frame::group %= *this); + return (verbly::part::frame %= *this); } } } @@ -1202,8 +1203,8 @@ namespace verbly { { case object::notion: case object::word: - case object::group: case object::frame: + case object::part: { return verbly::lemma::word %= *this; } @@ -1228,11 +1229,11 @@ namespace verbly { { case object::notion: case object::word: - case object::group: case object::frame: + case object::part: case object::lemma: { - return verbly::form::lemma(inflection::base) %= *this; + return verbly::form::lemma %= *this; } case object::undefined: @@ -1254,8 +1255,8 @@ namespace verbly { { case object::notion: case object::word: - case object::group: case object::frame: + case object::part: case object::lemma: case object::form: { diff --git a/lib/form.cpp b/lib/form.cpp index 5d4c343..4811f14 100644 --- a/lib/form.cpp +++ b/lib/form.cpp @@ -16,11 +16,9 @@ namespace verbly { const field form::complexity = field::integerField(object::form, "complexity"); const field form::proper = field::booleanField(object::form, "proper"); + const field form::lemma = field::joinField(object::form, "form_id", object::lemma); const field form::pronunciation = field::joinThrough(object::form, "form_id", object::pronunciation, "forms_pronunciations", "pronunciation_id"); - const field form::lemmaJoin = field::joinField(object::form, "form_id", object::lemma); - const field form::inflectionCategory = field::integerField("lemmas_forms", "category"); - form::form(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) { id_ = sqlite3_column_int(row, 0); @@ -29,16 +27,6 @@ namespace verbly { proper_ = (sqlite3_column_int(row, 3) == 1); } - filter operator%=(form::inflection_field check, filter joinCondition) - { - return (form::lemmaJoin %= (joinCondition && (form::inflectionCategory == check.getCategory()))); - } - - form::inflection_field::operator filter() const - { - return (form::lemmaJoin %= (form::inflectionCategory == category_)); - } - const std::vector& form::getPronunciations() const { if (!valid_) @@ -48,7 +36,7 @@ namespace verbly { if (!initializedPronunciations_) { - pronunciations_ = db_->pronunciations(pronunciation::form %= *this, false, -1).all(); + pronunciations_ = db_->pronunciations(pronunciation::form %= *this, verbly::pronunciation::id, -1).all(); initializedPronunciations_ = true; } diff --git a/lib/form.h b/lib/form.h index aca5b2f..cf64117 100644 --- a/lib/form.h +++ b/lib/form.h @@ -104,33 +104,9 @@ namespace verbly { // Relationships to other objects - static const field pronunciation; - - class inflection_field { - public: - - inflection_field(inflection category) : category_(category) - { - } - - const inflection getCategory() const - { - return category_; - } + static const field lemma; - operator filter() const; - - private: - - const inflection category_; - }; - - static const inflection_field lemma(inflection category) - { - return inflection_field(category); - } - - friend filter operator%=(form::inflection_field check, filter joinCondition); + static const field pronunciation; private: bool valid_ = false; @@ -145,9 +121,6 @@ namespace verbly { mutable bool initializedPronunciations_ = false; mutable std::vector pronunciations_; - static const field lemmaJoin; - static const field inflectionCategory; - }; }; diff --git a/lib/frame.cpp b/lib/frame.cpp index 8cab56b..a73fbda 100644 --- a/lib/frame.cpp +++ b/lib/frame.cpp @@ -1,95 +1,36 @@ #include "frame.h" #include -#include +#include "database.h" +#include "query.h" namespace verbly { const object frame::objectType = object::frame; - const std::list frame::select = {"frame_id", "data"}; + const std::list frame::select = {"frame_id", "group_id", "length"}; const field frame::id = field::integerField(object::frame, "frame_id"); + const field frame::length = field::integerField(object::frame, "length"); - const field frame::group = field::joinThrough(object::frame, "frame_id", object::group, "groups_frames", "group_id"); + const field frame::word = field::joinField(object::frame, "group_id", object::word); - frame::frame(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) + field frame::part() { - id_ = sqlite3_column_int(row, 0); - - std::string partsJsonStr(reinterpret_cast(sqlite3_column_blob(row, 1))); - nlohmann::json partsJson = nlohmann::json::parse(std::move(partsJsonStr)); - - for (const nlohmann::json& partJson : partsJson) - { - part::type partType = static_cast(partJson["type"].get()); - - switch (partType) - { - case part::type::noun_phrase: - { - std::set synrestrs; - for (const nlohmann::json& synrestrJson : partJson["synrestrs"]) - { - synrestrs.insert(synrestrJson.get()); - } - - parts_.push_back(part::createNounPhrase( - partJson["role"].get(), - selrestr(partJson["selrestrs"]), - std::move(synrestrs))); - - break; - } - - case part::type::preposition: - { - std::vector choices; - for (const nlohmann::json& choiceJson : partJson["choices"]) - { - choices.push_back(choiceJson.get()); - } - - parts_.push_back(part::createPreposition( - std::move(choices), - partJson["literal"].get())); - - break; - } - - case part::type::verb: - { - parts_.push_back(part::createVerb()); - - break; - } - - case part::type::adjective: - { - parts_.push_back(part::createAdjective()); - - break; - } - - case part::type::adverb: - { - parts_.push_back(part::createAdverb()); - - break; - } + return field::joinField(object::frame, "frame_id", object::part); + } - case part::type::literal: - { - parts_.push_back(part::createLiteral(partJson["value"].get())); + field frame::part(int index) + { + return field::joinWhere(object::frame, "frame_id", object::part, part::index, index); + } - break; - } + frame::frame(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) + { + id_ = sqlite3_column_int(row, 0); + groupId_ = sqlite3_column_int(row, 1); + length_ = sqlite3_column_int(row, 2); - case part::type::invalid: - { - throw std::domain_error("Invalid part data"); - } - } - } + parts_ = db.parts(*this, verbly::part::index, -1).all(); } }; diff --git a/lib/frame.h b/lib/frame.h index 97473a0..36e179e 100644 --- a/lib/frame.h +++ b/lib/frame.h @@ -41,6 +41,16 @@ namespace verbly { return id_; } + int getLength() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized frame"); + } + + return length_; + } + const std::vector& getParts() const { if (!valid_) @@ -61,6 +71,8 @@ namespace verbly { static const field id; + static const field length; + operator filter() const { if (!valid_) @@ -73,13 +85,18 @@ namespace verbly { // Relationships to other objects - static const field group; + static const field word; + + static field part(); + static field part(int index); private: bool valid_ = false; int id_; - std::vector parts_; + int groupId_; + int length_; + std::vector parts_; const database* db_; diff --git a/lib/group.cpp b/lib/group.cpp deleted file mode 100644 index d5790e9..0000000 --- a/lib/group.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "group.h" -#include -#include -#include "database.h" -#include "query.h" - -namespace verbly { - - const object group::objectType = object::group; - - const std::list group::select = {"group_id", "data"}; - - const field group::id = field::integerField(object::group, "group_id"); - - const field group::frame = field::joinThrough(object::group, "group_id", object::frame, "groups_frames", "frame_id"); - const field group::word = field::joinField(object::group, "group_id", object::word); - - group::group(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) - { - id_ = sqlite3_column_int(row, 0); - - std::string rolesJsonStr(reinterpret_cast(sqlite3_column_blob(row, 1))); - nlohmann::json rolesJson = nlohmann::json::parse(std::move(rolesJsonStr)); - for (const nlohmann::json& roleJson : rolesJson) - { - std::string roleName = roleJson["type"]; - selrestr roleSelrestr; - - if (roleJson.find("selrestrs") != roleJson.end()) - { - roleSelrestr = selrestr(roleJson["selrestrs"]); - } - - roles_[roleName] = role(roleName, std::move(roleSelrestr)); - } - } - - const std::vector& group::getFrames() const - { - if (!valid_) - { - throw std::domain_error("Bad access to uninitialized group"); - } - - if (!initializedFrames_) - { - frames_ = db_->frames(frame::group %= *this, false, -1).all(); - - initializedFrames_ = true; - } - - return frames_; - } - - const role& group::getRole(std::string roleName) const - { - return roles_.at(roleName); - } - -}; - diff --git a/lib/group.h b/lib/group.h deleted file mode 100644 index fe62d39..0000000 --- a/lib/group.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef GROUP_H_BD6933C0 -#define GROUP_H_BD6933C0 - -#include -#include -#include -#include "field.h" -#include "filter.h" -#include "frame.h" -#include "role.h" - -struct sqlite3_stmt; - -namespace verbly { - - class database; - - class group { - public: - - // Default constructor - - group() = default; - - // Construct from database - - group(const database& db, sqlite3_stmt* row); - - // Accessors - - operator bool() const - { - return valid_; - } - - int getId() const - { - if (!valid_) - { - throw std::domain_error("Bad access to uninitialized group"); - } - - return id_; - } - - const std::vector& getFrames() const; - - const role& getRole(std::string roleName) const; - - // Type info - - static const object objectType; - - static const std::list select; - - // Query fields - - static const field id; - - operator filter() const - { - if (!valid_) - { - throw std::domain_error("Bad access to uninitialized group"); - } - - return (id == id_); - } - - // Relationships to other objects - - static const field frame; - - static const field word; - - private: - bool valid_ = false; - - int id_; - std::map roles_; - - const database* db_; - - mutable bool initializedFrames_ = false; - mutable std::vector frames_; - - }; - -}; - -#endif /* end of include guard: GROUP_H_BD6933C0 */ diff --git a/lib/lemma.cpp b/lib/lemma.cpp index 1601460..0c6e99e 100644 --- a/lib/lemma.cpp +++ b/lib/lemma.cpp @@ -10,20 +10,13 @@ namespace verbly { const std::list lemma::select = {"lemma_id"}; const field lemma::id = field::integerField(object::lemma, "lemma_id"); - - const field lemma::word = field::joinField(object::lemma, "lemma_id", object::word); - - const field lemma::formJoin = field::joinField(object::lemma, "form_id", object::form); const field lemma::inflectionCategory = field::integerField(object::lemma, "category"); - filter operator%=(lemma::inflection_field check, filter joinCondition) - { - return (lemma::formJoin %= joinCondition) && (lemma::inflectionCategory == check.getCategory()); - } + const field lemma::word = field::joinField(object::lemma, "lemma_id", object::word); - lemma::inflection_field::operator filter() const + field lemma::form(inflection category) { - return (lemma::inflectionCategory == category_); + return field::joinWhere(object::lemma, "form_id", object::form, inflectionCategory, static_cast(category)); } lemma::lemma(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) @@ -68,7 +61,7 @@ namespace verbly { void lemma::initializeForm(inflection infl) const { - forms_[infl] = db_->forms(form::lemma(infl) %= *this, false, -1).all(); + forms_[infl] = db_->forms(form::lemma %= ((inflectionCategory == infl) && *this), verbly::form::id, -1).all(); } }; diff --git a/lib/lemma.h b/lib/lemma.h index 407fa3c..56cfc56 100644 --- a/lib/lemma.h +++ b/lib/lemma.h @@ -74,31 +74,7 @@ namespace verbly { static const field word; - class inflection_field { - public: - - inflection_field(inflection category) : category_(category) - { - } - - const inflection getCategory() const - { - return category_; - } - - operator filter() const; - - private: - - const inflection category_; - }; - - static const inflection_field form(inflection category) - { - return inflection_field(category); - } - - friend filter operator%=(lemma::inflection_field check, filter joinCondition); + static field form(inflection category); private: @@ -112,7 +88,6 @@ namespace verbly { const database* db_; - static const field formJoin; static const field inflectionCategory; }; diff --git a/lib/order.h b/lib/order.h new file mode 100644 index 0000000..d2f0f92 --- /dev/null +++ b/lib/order.h @@ -0,0 +1,69 @@ +#ifndef ORDER_H_0EC669D5 +#define ORDER_H_0EC669D5 + +#include +#include "field.h" + +namespace verbly { + + class order { + public: + enum class type { + random, + field + }; + + // Type + + type getType() const + { + return type_; + } + + // Random + + order() : type_(type::random) + { + } + + // Field + + order( + field arg, + bool asc = true) : + type_(type::field), + sortField_(std::move(arg)), + ascending_(asc) + { + } + + field getSortField() const + { + if (type_ != type::field) + { + throw std::domain_error("Invalid access to non-field order"); + } + + return sortField_; + } + + bool isAscending() const + { + if (type_ != type::field) + { + throw std::domain_error("Invalid access to non-field order"); + } + + return ascending_; + } + + private: + type type_; + field sortField_; + bool ascending_; + + }; + +}; + +#endif /* end of include guard: ORDER_H_0EC669D5 */ diff --git a/lib/part.cpp b/lib/part.cpp index e66d151..1fbb24d 100644 --- a/lib/part.cpp +++ b/lib/part.cpp @@ -1,12 +1,30 @@ #include "part.h" #include +#include #include "selrestr.h" +#include "database.h" namespace verbly { + const object part::objectType = object::part; + + const std::list part::select = {"part_id", "frame_id", "part_index", "type", "role", "selrestrs", "prepositions", "preposition_literality", "literal_value"}; + + const field part::index = field::integerField(object::part, "part_index"); + const field part::type = field::integerField(object::part, "type"); + + const field part::role = field::stringField(object::part, "role", true); + + const field part::frame = field::joinField(object::part, "frame_id", object::frame); + + const field part::synrestr_field::synrestrJoin = field::joinField(object::part, "part_id", "synrestrs"); + const field part::synrestr_field::synrestrField = field::stringField("synrestrs", "synrestr"); + + const part::synrestr_field part::synrestr = {}; + part part::createNounPhrase(std::string role, selrestr selrestrs, std::set synrestrs) { - part p(type::noun_phrase); + part p(part_type::noun_phrase); new(&p.noun_phrase_.role) std::string(std::move(role)); new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs)); @@ -17,12 +35,12 @@ namespace verbly { part part::createVerb() { - return part(type::verb); + return part(part_type::verb); } part part::createPreposition(std::vector choices, bool literal) { - part p(type::preposition); + part p(part_type::preposition); new(&p.preposition_.choices) std::vector(std::move(choices)); p.preposition_.literal = literal; @@ -32,30 +50,79 @@ namespace verbly { part part::createAdjective() { - return part(type::adjective); + return part(part_type::adjective); } part part::createAdverb() { - return part(type::adverb); + return part(part_type::adverb); } part part::createLiteral(std::string value) { - part p(type::literal); + part p(part_type::literal); new(&p.literal_) std::string(std::move(value)); return p; } + part::part(const database& db, sqlite3_stmt* row) + { + int id = sqlite3_column_int(row, 0); + + type_ = static_cast(sqlite3_column_int(row, 3)); + + switch (type_) + { + case part_type::noun_phrase: + { + new(&noun_phrase_.role) std::string(reinterpret_cast(sqlite3_column_blob(row, 4))); + new(&noun_phrase_.selrestrs) selrestr(nlohmann::json::parse(reinterpret_cast(sqlite3_column_blob(row, 5)))); + new(&noun_phrase_.synrestrs) std::set(db.synrestrs(id)); + + break; + } + + case part_type::preposition: + { + new(&preposition_.choices) std::vector(); + preposition_.literal = (sqlite3_column_int(row, 7) == 1); + + std::string choicesJsonStr(reinterpret_cast(sqlite3_column_blob(row, 6))); + nlohmann::json choicesJson = nlohmann::json::parse(std::move(choicesJsonStr)); + for (const nlohmann::json& choiceJson : choicesJson) + { + preposition_.choices.push_back(choiceJson.get()); + } + + break; + } + + case part_type::literal: + { + new(&literal_) std::string(reinterpret_cast(sqlite3_column_blob(row, 8))); + + break; + } + + case part_type::verb: + case part_type::adjective: + case part_type::adverb: + case part_type::invalid: + { + break; + } + } + } + part::part(const part& other) { type_ = other.type_; switch (type_) { - case type::noun_phrase: + case part_type::noun_phrase: { new(&noun_phrase_.role) std::string(other.noun_phrase_.role); new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs); @@ -64,7 +131,7 @@ namespace verbly { break; } - case type::preposition: + case part_type::preposition: { new(&preposition_.choices) std::vector(other.preposition_.choices); preposition_.literal = other.preposition_.literal; @@ -72,17 +139,17 @@ namespace verbly { break; } - case type::literal: + case part_type::literal: { new(&literal_) std::string(other.literal_); break; } - case type::verb: - case type::adjective: - case type::adverb: - case type::invalid: + case part_type::verb: + case part_type::adjective: + case part_type::adverb: + case part_type::invalid: { break; } @@ -103,7 +170,7 @@ namespace verbly { void swap(part& first, part& second) { - using type = part::type; + using type = part_type; type tempType = first.type_; std::string tempRole; @@ -231,7 +298,7 @@ namespace verbly { { switch (type_) { - case type::noun_phrase: + case part_type::noun_phrase: { using string_type = std::string; using set_type = std::set; @@ -243,7 +310,7 @@ namespace verbly { break; } - case type::preposition: + case part_type::preposition: { using vector_type = std::vector; @@ -252,7 +319,7 @@ namespace verbly { break; } - case type::literal: + case part_type::literal: { using string_type = std::string; @@ -261,10 +328,10 @@ namespace verbly { break; } - case type::verb: - case type::adjective: - case type::adverb: - case type::invalid: + case part_type::verb: + case part_type::adjective: + case part_type::adverb: + case part_type::invalid: { break; } @@ -273,7 +340,7 @@ namespace verbly { std::string part::getNounRole() const { - if (type_ == type::noun_phrase) + if (type_ == part_type::noun_phrase) { return noun_phrase_.role; } else { @@ -283,7 +350,7 @@ namespace verbly { selrestr part::getNounSelrestrs() const { - if (type_ == type::noun_phrase) + if (type_ == part_type::noun_phrase) { return noun_phrase_.selrestrs; } else { @@ -293,7 +360,7 @@ namespace verbly { std::set part::getNounSynrestrs() const { - if (type_ == type::noun_phrase) + if (type_ == part_type::noun_phrase) { return noun_phrase_.synrestrs; } else { @@ -303,7 +370,7 @@ namespace verbly { bool part::nounHasSynrestr(std::string synrestr) const { - if (type_ != type::noun_phrase) + if (type_ != part_type::noun_phrase) { throw std::domain_error("part::nounHasSynrestr is only valid for noun phrase parts"); } @@ -313,7 +380,7 @@ namespace verbly { std::vector part::getPrepositionChoices() const { - if (type_ == type::preposition) + if (type_ == part_type::preposition) { return preposition_.choices; } else { @@ -323,7 +390,7 @@ namespace verbly { bool part::isPrepositionLiteral() const { - if (type_ == type::preposition) + if (type_ == part_type::preposition) { return preposition_.literal; } else { @@ -333,7 +400,7 @@ namespace verbly { std::string part::getLiteralValue() const { - if (type_ == type::literal) + if (type_ == part_type::literal) { return literal_; } else { @@ -341,4 +408,9 @@ namespace verbly { } } + filter part::synrestr_field::operator%=(std::string synrestr) const + { + return (synrestrJoin %= (synrestrField == synrestr)); + } + }; diff --git a/lib/part.h b/lib/part.h index 3a15638..9a01312 100644 --- a/lib/part.h +++ b/lib/part.h @@ -4,21 +4,20 @@ #include #include #include +#include #include "selrestr.h" +#include "field.h" +#include "filter.h" +#include "enums.h" + +struct sqlite3_stmt; namespace verbly { + class database; + class part { public: - enum class type { - invalid = -1, - noun_phrase = 0, - verb = 1, - preposition = 2, - adjective = 3, - adverb = 4, - literal = 5 - }; // Static factories @@ -40,6 +39,10 @@ namespace verbly { { } + // Construct from database + + part(const database& db, sqlite3_stmt* row); + // Copy and move constructors part(const part& other); @@ -60,7 +63,12 @@ namespace verbly { // General accessors - type getType() const + operator bool() const + { + return (type_ != part_type::invalid); + } + + part_type getType() const { return type_; } @@ -85,11 +93,43 @@ namespace verbly { std::string getLiteralValue() const; + // Type info + + static const object objectType; + + static const std::list select; + + // Query fields + + static const field index; + static const field type; + + static const field role; + + // Relationships to other objects + + static const field frame; + + // Noun synrestr relationship + + class synrestr_field { + public: + + filter operator%=(std::string synrestr) const; + + private: + + static const field synrestrJoin; + static const field synrestrField; + }; + + static const synrestr_field synrestr; + private: // Private constructors - part(type t) : type_(t) + part(part_type t) : type_(t) { } @@ -108,7 +148,7 @@ namespace verbly { std::string literal_; }; - type type_ = type::invalid; + part_type type_ = part_type::invalid; }; diff --git a/lib/query.h b/lib/query.h index 214bf99..75651f6 100644 --- a/lib/query.h +++ b/lib/query.h @@ -9,6 +9,7 @@ #include #include "statement.h" #include "binding.h" +#include "order.h" namespace verbly { @@ -24,11 +25,17 @@ namespace verbly { class query { public: - query(const database& db, sqlite3* ppdb, filter queryFilter, bool random, int limit) : db_(&db) + query(const database& db, sqlite3* ppdb, filter queryFilter, order sortOrder, int limit) : db_(&db) { + if ((sortOrder.getType() == order::type::field) + && (sortOrder.getSortField().getObject() != Object::objectType)) + { + throw std::invalid_argument("Can only sort query by a field in the result table"); + } + statement stmt(Object::objectType, std::move(queryFilter)); - std::string queryString = stmt.getQueryString(Object::select, random, limit); + std::string queryString = stmt.getQueryString(Object::select, std::move(sortOrder), limit); std::list bindings = stmt.getBindings(); if (sqlite3_prepare_v2(ppdb, queryString.c_str(), queryString.length(), &ppstmt_, NULL) != SQLITE_OK) diff --git a/lib/role.h b/lib/role.h deleted file mode 100644 index 4884ef3..0000000 --- a/lib/role.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef ROLE_H_249F9A9C -#define ROLE_H_249F9A9C - -#include -#include -#include "../lib/selrestr.h" - -namespace verbly { - - class role { - public: - - // Default constructor - - role() = default; - - // Constructor - - role( - std::string name, - selrestr selrestrs = {}) : - valid_(true), - name_(name), - selrestrs_(selrestrs) - { - } - - // Accessors - - const std::string& getName() const - { - if (!valid_) - { - throw std::domain_error("Bad access to invalid role"); - } - - return name_; - } - - const selrestr& getSelrestrs() const - { - if (!valid_) - { - throw std::domain_error("Bad access to invalid role"); - } - - return selrestrs_; - } - - private: - - bool valid_ = false; - std::string name_; - selrestr selrestrs_; - - }; - -}; - -#endif /* end of include guard: ROLE_H_249F9A9C */ diff --git a/lib/statement.cpp b/lib/statement.cpp index 846b9de..1512aa5 100644 --- a/lib/statement.cpp +++ b/lib/statement.cpp @@ -5,11 +5,12 @@ #include "util.h" #include "notion.h" #include "word.h" -#include "group.h" #include "frame.h" +#include "part.h" #include "lemma.h" #include "form.h" #include "pronunciation.h" +#include "order.h" namespace verbly { @@ -20,7 +21,7 @@ namespace verbly { { } - std::string statement::getQueryString(std::list select, bool random, int limit) const + std::string statement::getQueryString(std::list select, order sortOrder, int limit, bool debug) const { std::stringstream queryStream; @@ -49,7 +50,7 @@ namespace verbly { if (cte.getCondition().getType() != condition::type::empty) { cteStream << " WHERE "; - cteStream << cte.getCondition().toSql(); + cteStream << cte.getCondition().flatten().toSql(true, debug); } if (cte.isRecursive()) @@ -101,12 +102,28 @@ namespace verbly { if (topCondition_.getType() != condition::type::empty) { queryStream << " WHERE "; - queryStream << topCondition_.toSql(); + queryStream << topCondition_.flatten().toSql(true, debug); } - - if (random) + + queryStream << " ORDER BY "; + + switch (sortOrder.getType()) { - queryStream << " ORDER BY RANDOM()"; + case order::type::random: + { + queryStream << "RANDOM()"; + + break; + } + + case order::type::field: + { + queryStream << topTable_; + queryStream << "."; + queryStream << sortOrder.getSortField().getColumn(); + + break; + } } if (limit > 0) @@ -260,6 +277,7 @@ namespace verbly { } case field::type::join: + case field::type::join_where: { // First, figure out what table we need to join against. std::string joinTableName; @@ -269,13 +287,22 @@ namespace verbly { } else { joinTableName = getTableForContext(clause.getField().getJoinObject()); } + + filter joinCondition = clause.getJoinCondition(); + + // If this is a condition join, we need to add the field join + // condition to the clause. + if (clause.getField().getType() == field::type::join_where) + { + joinCondition &= (clause.getField().getConditionField() == clause.getField().getConditionValue()); + } // Recursively parse the subquery, and therefore obtain an // instantiated table to join against, as well as any joins or CTEs // that the subquery may require to function. statement joinStmt( joinTableName, - clause.getJoinCondition().normalize(clause.getField().getJoinObject()), + std::move(joinCondition).normalize(clause.getField().getJoinObject()), nextTableId_, nextWithId_); @@ -801,7 +828,7 @@ namespace verbly { new(&singleton_.value_) binding(std::move(value)); } - std::string statement::condition::toSql() const + std::string statement::condition::toSql(bool toplevel, bool debug) const { switch (type_) { @@ -816,42 +843,92 @@ namespace verbly { { case comparison::equals: { - return singleton_.table_ + "." + singleton_.column_ + " = ?"; + if (debug) + { + if (singleton_.value_.getType() == binding::type::string) + { + return singleton_.table_ + "." + singleton_.column_ + " = \"" + singleton_.value_.getString() + "\""; + } else { + return singleton_.table_ + "." + singleton_.column_ + " = " + std::to_string(singleton_.value_.getInteger()); + } + } else { + return singleton_.table_ + "." + singleton_.column_ + " = ?"; + } } case comparison::does_not_equal: { - return singleton_.table_ + "." + singleton_.column_ + " != ?"; + if (debug) + { + if (singleton_.value_.getType() == binding::type::string) + { + return singleton_.table_ + "." + singleton_.column_ + " != \"" + singleton_.value_.getString() + "\""; + } else { + return singleton_.table_ + "." + singleton_.column_ + " != " + std::to_string(singleton_.value_.getInteger()); + } + } else { + return singleton_.table_ + "." + singleton_.column_ + " != ?"; + } } case comparison::is_greater_than: { - return singleton_.table_ + "." + singleton_.column_ + " > ?"; + if (debug) + { + return singleton_.table_ + "." + singleton_.column_ + " > " + std::to_string(singleton_.value_.getInteger()); + } else { + return singleton_.table_ + "." + singleton_.column_ + " > ?"; + } } case comparison::is_at_most: { - return singleton_.table_ + "." + singleton_.column_ + " <= ?"; + if (debug) + { + return singleton_.table_ + "." + singleton_.column_ + " <= " + std::to_string(singleton_.value_.getInteger()); + } else { + return singleton_.table_ + "." + singleton_.column_ + " <= ?"; + } } case comparison::is_less_than: { - return singleton_.table_ + "." + singleton_.column_ + " < ?"; + if (debug) + { + return singleton_.table_ + "." + singleton_.column_ + " < " + std::to_string(singleton_.value_.getInteger()); + } else { + return singleton_.table_ + "." + singleton_.column_ + " < ?"; + } } case comparison::is_at_least: { - return singleton_.table_ + "." + singleton_.column_ + " >= ?"; + if (debug) + { + return singleton_.table_ + "." + singleton_.column_ + " >= " + std::to_string(singleton_.value_.getInteger()); + } else { + return singleton_.table_ + "." + singleton_.column_ + " >= ?"; + } } case comparison::is_like: { - return singleton_.table_ + "." + singleton_.column_ + " LIKE ?"; + if (debug) + { + return singleton_.table_ + "." + singleton_.column_ + " LIKE \"" + singleton_.value_.getString() + "\""; + } else { + return singleton_.table_ + "." + singleton_.column_ + " LIKE ?"; + } } case comparison::is_not_like: { - return singleton_.table_ + "." + singleton_.column_ + " NOT LIKE ?"; + if (debug) + { + return singleton_.table_ + "." + singleton_.column_ + " NOT LIKE \"" + singleton_.value_.getString() + "\""; + } else { + return singleton_.table_ + "." + singleton_.column_ + " NOT LIKE ?"; + } } case comparison::is_not_null: @@ -871,10 +948,25 @@ namespace verbly { std::list clauses; for (const condition& cond : group_.children_) { - clauses.push_back(cond.toSql()); + clauses.push_back(cond.toSql(false, debug)); } - return implode(std::begin(clauses), std::end(clauses), group_.orlogic_ ? " OR " : " AND "); + if (clauses.empty()) + { + return ""; + } else if (clauses.size() == 1) + { + return clauses.front(); + } else { + std::string result = implode(std::begin(clauses), std::end(clauses), group_.orlogic_ ? " OR " : " AND "); + + if (toplevel) + { + return result; + } else { + return "(" + result + ")"; + } + } } } } @@ -988,5 +1080,39 @@ namespace verbly { throw std::domain_error("Cannot get children of non-group condition"); } } + + statement::condition statement::condition::flatten() const + { + switch (type_) + { + case type::empty: + case type::singleton: + { + return *this; + } + + case type::group: + { + condition result(group_.orlogic_); + + for (const condition& child : group_.children_) + { + condition newChild = child.flatten(); + + if ((newChild.type_ == type::group) && (newChild.group_.orlogic_ == group_.orlogic_)) + { + for (condition subChild : std::move(newChild.group_.children_)) + { + result += std::move(subChild); + } + } else { + result += std::move(newChild); + } + } + + return result; + } + } + } }; diff --git a/lib/statement.h b/lib/statement.h index aa56568..15c4ac3 100644 --- a/lib/statement.h +++ b/lib/statement.h @@ -13,13 +13,14 @@ namespace verbly { class filter; + class order; class statement { public: statement(object context, filter queryFilter); - std::string getQueryString(std::list select, bool random, int limit) const; + std::string getQueryString(std::list select, order sortOrder, int limit, bool debug = false) const; std::list getBindings() const; @@ -153,10 +154,12 @@ namespace verbly { // Utility - std::string toSql() const; + std::string toSql(bool toplevel, bool debug = false) const; std::list flattenBindings() const; + condition flatten() const; + private: union { struct { @@ -246,8 +249,8 @@ namespace verbly { { return (context == object::notion) ? "notions" : (context == object::word) ? "words" - : (context == object::group) ? "groups" : (context == object::frame) ? "frames" + : (context == object::part) ? "parts" : (context == object::lemma) ? "lemmas_forms" : (context == object::form) ? "forms" : (context == object::pronunciation) ? "pronunciations" diff --git a/lib/verbly.h b/lib/verbly.h index d8875b3..112907b 100644 --- a/lib/verbly.h +++ b/lib/verbly.h @@ -6,16 +6,15 @@ #include "filter.h" #include "field.h" #include "query.h" +#include "order.h" #include "notion.h" #include "word.h" -#include "group.h" #include "frame.h" +#include "part.h" #include "lemma.h" #include "form.h" #include "pronunciation.h" #include "token.h" #include "selrestr.h" -#include "part.h" -#include "role.h" #endif /* end of include guard: VERBLY_H_5B39CE50 */ diff --git a/lib/word.cpp b/lib/word.cpp index a928659..90eab1d 100644 --- a/lib/word.cpp +++ b/lib/word.cpp @@ -17,7 +17,7 @@ namespace verbly { const field word::notion = field::joinField(object::word, "notion_id", object::notion); const field word::lemma = field::joinField(object::word, "lemma_id", object::lemma); - const field word::group = field::joinField(object::word, "group_id", object::group, true); + const field word::frame = field::joinField(object::word, "group_id", object::frame, true); const field word::antonyms = field::selfJoin(object::word, "word_id", "antonymy", "antonym_2_id", "antonym_1_id"); @@ -93,7 +93,27 @@ namespace verbly { return lemma_; } - const group& word::getGroup() const + bool word::hasFrames() const + { + if (!valid_) + { + throw std::domain_error("Bad access to uninitialized word"); + } + + if (!hasGroup_) + { + return false; + } + + if (!initializedFrames_) + { + initializeFrames(); + } + + return !frames_.empty(); + } + + const std::vector& word::getFrames() const { if (!valid_) { @@ -105,12 +125,12 @@ namespace verbly { throw std::domain_error("Word does not have a group"); } - if (!group_) + if (!initializedFrames_) { - group_ = db_->groups(group::id == groupId_).first(); + initializeFrames(); } - return group_; + return frames_; } std::string word::getBaseForm() const @@ -129,4 +149,10 @@ namespace verbly { return result; } + void word::initializeFrames() const + { + initializedFrames_ = true; + frames_ = db_->frames(*this, {}, -1).all(); + } + }; diff --git a/lib/word.h b/lib/word.h index ddcabe4..8a333a4 100644 --- a/lib/word.h +++ b/lib/word.h @@ -7,7 +7,7 @@ #include "filter.h" #include "notion.h" #include "lemma.h" -#include "group.h" +#include "frame.h" struct sqlite3_stmt; @@ -97,17 +97,9 @@ namespace verbly { const lemma& getLemma() const; - bool hasGroup() const - { - if (!valid_) - { - throw std::domain_error("Bad access to uninitialized word"); - } + bool hasFrames() const; - return hasGroup_; - } - - const group& getGroup() const; + const std::vector& getFrames() const; // Convenience accessors @@ -136,7 +128,7 @@ namespace verbly { static const field notion; static const field lemma; - static const field group; + static const field frame; // Relationships with self @@ -161,6 +153,9 @@ namespace verbly { static const field regionalDomains; private: + + void initializeFrames() const; + bool valid_ = false; int id_; @@ -176,7 +171,9 @@ namespace verbly { mutable class notion notion_; mutable class lemma lemma_; - mutable class group group_; + + mutable bool initializedFrames_ = false; + mutable std::vector frames_; }; -- cgit 1.4.1