From eef5de613c75661e5d94baa086f6f2ddc26c7ed0 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Thu, 24 Mar 2016 23:16:07 -0400 Subject: Added verb frames In addition: - Added prepositions. - Rewrote a lot of the query interface. It now, for a lot of relationships, supports nested AND, OR, and NOT logic. - Rewrote the token class. It is now a union-like class instead of being polymorphic, which means smart pointers are no longer necessary. - Querying with regards to word derivation has been temporarily removed. - Sentinel values are now supported for all word types. - The VerbNet data retrieved from http://verbs.colorado.edu/~mpalmer/projects/verbnet/downloads.html was found to not be perfectly satisfactory in some regards, especially regarding adjective phrases. A patch file is now included in the repository describing the changes made to the VerbNet v3.2 download for the canonical verbly datafile. --- lib/adjective_query.cpp | 819 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 819 insertions(+) create mode 100644 lib/adjective_query.cpp (limited to 'lib/adjective_query.cpp') diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp new file mode 100644 index 0000000..ec100e3 --- /dev/null +++ b/lib/adjective_query.cpp @@ -0,0 +1,819 @@ +#include "verbly.h" + +namespace verbly { + + adjective_query::adjective_query(const data& _data) : _data(_data) + { + + } + + adjective_query& adjective_query::limit(int _limit) + { + if ((_limit > 0) || (_limit == unlimited)) + { + this->_limit = _limit; + } + + return *this; + } + + adjective_query& adjective_query::random() + { + this->_random = true; + + return *this; + } + + adjective_query& adjective_query::except(const adjective& _word) + { + _except.push_back(_word); + + return *this; + } + + adjective_query& adjective_query::rhymes_with(const word& _word) + { + for (auto rhyme : _word.rhyme_phonemes()) + { + _rhymes.push_back(rhyme); + } + + if (dynamic_cast(&_word) != nullptr) + { + _except.push_back(dynamic_cast(_word)); + } + + return *this; + } + + adjective_query& adjective_query::has_pronunciation() + { + this->_has_prn = true; + + return *this; + } + + adjective_query& adjective_query::is_variant() + { + this->_is_variant = true; + + return *this; + } + + adjective_query& adjective_query::variant_of(filter _f) + { + _f.clean(); + _variant_of = _f; + + return *this; + } + + adjective_query& adjective_query::has_antonyms() + { + this->_is_antonymic = true; + + return *this; + } + + adjective_query& adjective_query::antonym_of(filter _f) + { + _f.clean(); + _antonym_of = _f; + + return *this; + } + + adjective_query& adjective_query::has_synonyms() + { + this->_is_synonymic = true; + + return *this; + } + + adjective_query& adjective_query::synonym_of(filter _f) + { + _f.clean(); + _synonym_of = _f; + + return *this; + } + + adjective_query& adjective_query::is_generalization() + { + this->_is_generalization = true; + + return *this; + } + + adjective_query& adjective_query::generalization_of(filter _f) + { + _f.clean(); + _generalization_of = _f; + + return *this; + } + + adjective_query& adjective_query::is_specification() + { + this->_is_specification = true; + + return *this; + } + + adjective_query& adjective_query::specification_of(filter _f) + { + _f.clean(); + _specification_of = _f; + + return *this; + } + + adjective_query& adjective_query::is_pertainymic() + { + this->_is_pertainymic = true; + + return *this; + } + + adjective_query& adjective_query::pertainym_of(filter _f) + { + _f.clean(); + _pertainym_of = _f; + + return *this; + } + + adjective_query& adjective_query::is_mannernymic() + { + this->_is_mannernymic = true; + + return *this; + } + + adjective_query& adjective_query::anti_mannernym_of(filter _f) + { + _f.clean(); + _anti_mannernym_of = _f; + + return *this; + } + /* + adjective_query& adjective_query::derived_from(const word& _w) + { + if (dynamic_cast(&_w) != nullptr) + { + _derived_from_adjective.push_back(dynamic_cast(_w)); + } else if (dynamic_cast(&_w) != nullptr) + { + _derived_from_adverb.push_back(dynamic_cast(_w)); + } else if (dynamic_cast(&_w) != nullptr) + { + _derived_from_noun.push_back(dynamic_cast(_w)); + } + + return *this; + } + + adjective_query& adjective_query::not_derived_from(const word& _w) + { + if (dynamic_cast(&_w) != nullptr) + { + _not_derived_from_adjective.push_back(dynamic_cast(_w)); + } else if (dynamic_cast(&_w) != nullptr) + { + _not_derived_from_adverb.push_back(dynamic_cast(_w)); + } else if (dynamic_cast(&_w) != nullptr) + { + _not_derived_from_noun.push_back(dynamic_cast(_w)); + } + + return *this; + } + */ + std::list adjective_query::run() const + { + std::stringstream construct; + construct << "SELECT adjective_id, base_form, comparative, superlative, position FROM adjectives"; + std::list conditions; + + if (_has_prn) + { + conditions.push_back("adjective_id IN (SELECT adjective_id FROM adjective_pronunciations)"); + } + + if (!_rhymes.empty()) + { + std::list clauses(_rhymes.size(), "pronunciation LIKE @RHMPRN"); + std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + conditions.push_back(cond); + } + + for (auto except : _except) + { + conditions.push_back("adjective_id != @EXCID"); + } + + if (_requires_comparative_form) + { + conditions.push_back("comparative IS NOT NULL"); + } + + if (_requires_superlative_form) + { + conditions.push_back("superlative IS NOT NULL"); + } + + switch (_position) + { + case adjective::positioning::predicate: conditions.push_back("position = 'p'"); break; + case adjective::positioning::attributive: conditions.push_back("position = 'a'"); break; + case adjective::positioning::postnominal: conditions.push_back("position = 'i'"); break; + case adjective::positioning::undefined: break; + } + + if (_is_variant) + { + conditions.push_back("adjective_id IN (SELECT adjective_id FROM variation)"); + } + + if (!_variant_of.empty()) + { + std::stringstream cond; + if (_variant_of.get_notlogic()) + { + cond << "adjective_id NOT IN"; + } else { + cond << "adjective_id IN"; + } + + cond << "(SELECT adjective_id FROM variation WHERE "; + + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "noun_id = @ATTRID"; + } else { + return "noun_id != @ATTRID"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_variant_of, _variant_of.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } + + if (_is_antonymic) + { + conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_antonymy)"); + } + + if (!_antonym_of.empty()) + { + std::stringstream cond; + if (_antonym_of.get_notlogic()) + { + cond << "adjective_id NOT IN"; + } else { + cond << "adjective_id IN"; + } + + cond << "(SELECT adjective_2_id FROM adjective_antonymy WHERE "; + + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "adjective_1_id = @ANTID"; + } else { + return "adjective_1_id != @ANTID"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_antonym_of, _antonym_of.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } + + if (_is_synonymic) + { + conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_synonymy)"); + } + + if (!_synonym_of.empty()) + { + std::stringstream cond; + if (_synonym_of.get_notlogic()) + { + cond << "adjective_id NOT IN"; + } else { + cond << "adjective_id IN"; + } + + cond << "(SELECT adjective_2_id FROM adjective_synonymy WHERE "; + + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "adjective_1_id = @SYNID"; + } else { + return "adjective_1_id != @SYNID"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_synonym_of, _synonym_of.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } + + if (_is_generalization) + { + conditions.push_back("adjective_id IN (SELECT general_id FROM specification)"); + } + + if (!_generalization_of.empty()) + { + std::stringstream cond; + if (_generalization_of.get_notlogic()) + { + cond << "adjective_id NOT IN"; + } else { + cond << "adjective_id IN"; + } + + cond << "(SELECT general_id FROM specification WHERE "; + + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "specific_id = @SPECID"; + } else { + return "specific_id != @SPECID"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_generalization_of, _generalization_of.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } + + if (_is_specification) + { + conditions.push_back("adjective_id IN (SELECT specific_id FROM specification)"); + } + + if (!_specification_of.empty()) + { + std::stringstream cond; + if (_specification_of.get_notlogic()) + { + cond << "adjective_id NOT IN"; + } else { + cond << "adjective_id IN"; + } + + cond << "(SELECT specific_id FROM specification WHERE "; + + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "general_id = @GENID"; + } else { + return "general_id != @GENID"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_specification_of, _specification_of.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } + + if (_is_pertainymic) + { + conditions.push_back("adjective_id IN (SELECT pertainym_id FROM pertainymy)"); + } + + if (!_pertainym_of.empty()) + { + std::stringstream cond; + if (_pertainym_of.get_notlogic()) + { + cond << "adjective_id NOT IN"; + } else { + cond << "adjective_id IN"; + } + + cond << "(SELECT pertainym_id FROM pertainymy WHERE "; + + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "noun_id = @APERID"; + } else { + return "noun_id != @APERID"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_pertainym_of, _pertainym_of.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } + + if (_is_mannernymic) + { + conditions.push_back("adjective_id IN (SELECT adjective_id FROM mannernymy)"); + } + + if (!_anti_mannernym_of.empty()) + { + std::stringstream cond; + if (_anti_mannernym_of.get_notlogic()) + { + cond << "adjective_id NOT IN"; + } else { + cond << "adjective_id IN"; + } + + cond << "(SELECT adjective_id FROM mannernymy WHERE "; + + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "mannernym_id = @MANID"; + } else { + return "mannernym_id != @MANID"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_anti_mannernym_of, _anti_mannernym_of.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } +/* + if (!_derived_from_adjective.empty()) + { + std::list clauses(_derived_from_adjective.size(), "adjective_2_id = @DERADJ"); + std::string cond = "adjective_id IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + conditions.push_back(cond); + } + + if (!_not_derived_from_adjective.empty()) + { + std::list clauses(_not_derived_from_adjective.size(), "adjective_2_id = @NDERADJ"); + std::string cond = "adjective_id NOT IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + conditions.push_back(cond); + } + + if (!_derived_from_adverb.empty()) + { + std::list clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); + std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + conditions.push_back(cond); + } + + if (!_not_derived_from_adverb.empty()) + { + std::list clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); + std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + conditions.push_back(cond); + } + + if (!_derived_from_noun.empty()) + { + std::list clauses(_derived_from_noun.size(), "noun_id = @DERN"); + std::string cond = "adjective_id IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + conditions.push_back(cond); + } + + if (!_not_derived_from_noun.empty()) + { + std::list clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); + std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + conditions.push_back(cond); + }*/ + + if (!conditions.empty()) + { + construct << " WHERE "; + construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); + } + + if (_random) + { + construct << " ORDER BY RANDOM()"; + } + + if (_limit != unlimited) + { + construct << " LIMIT " << _limit; + } + + sqlite3_stmt* ppstmt; + std::string query = construct.str(); + if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) + { + throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); + } + + if (!_rhymes.empty()) + { + int i = 0; + for (auto rhyme : _rhymes) + { + std::string rhymer = "%" + rhyme; + sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@RHMPRN"), rhymer.c_str(), rhymer.length(), SQLITE_STATIC); + + i++; + } + } + + for (auto except : _except) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@EXCID"), except._id); + } + + for (auto attribute : _variant_of.inorder_flatten()) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@ATTRID"), attribute._id); + } + + for (auto antonym : _antonym_of.inorder_flatten()) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@ANTID"), antonym._id); + } + + for (auto synonym : _synonym_of.inorder_flatten()) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SYNID"), synonym._id); + } + + for (auto specific : _generalization_of.inorder_flatten()) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SPECID"), specific._id); + } + + for (auto general : _specification_of.inorder_flatten()) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@GENID"), general._id); + } + + for (auto n : _pertainym_of.inorder_flatten()) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@APERID"), n._id); + } + + for (auto mannernym : _anti_mannernym_of.inorder_flatten()) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@MANID"), mannernym._id); + } + /* + for (auto adj : _derived_from_adjective) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); + } + + for (auto adj : _not_derived_from_adjective) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); + } + + for (auto adv : _derived_from_adverb) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); + } + + for (auto adv : _not_derived_from_adverb) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); + } + + for (auto n : _derived_from_noun) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); + } + + for (auto n : _not_derived_from_noun) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); + } +*/ + std::list output; + while (sqlite3_step(ppstmt) == SQLITE_ROW) + { + adjective tnc {_data, sqlite3_column_int(ppstmt, 0)}; + tnc._base_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + + if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) + { + tnc._comparative_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); + } + + if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) + { + tnc._superlative_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 3))); + } + + if (sqlite3_column_type(ppstmt, 4) != SQLITE_NULL) + { + std::string adjpos(reinterpret_cast(sqlite3_column_text(ppstmt, 4))); + if (adjpos == "p") + { + tnc._position = adjective::positioning::predicate; + } else if (adjpos == "a") + { + tnc._position = adjective::positioning::attributive; + } else if (adjpos == "i") + { + tnc._position = adjective::positioning::postnominal; + } + } + + output.push_back(tnc); + } + + sqlite3_finalize(ppstmt); + + for (auto& adjective : output) + { + query = "SELECT pronunciation FROM adjective_pronunciations WHERE adjective_id = ?"; + if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) + { + throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); + } + + sqlite3_bind_int(ppstmt, 1, adjective._id); + + while (sqlite3_step(ppstmt) == SQLITE_ROW) + { + std::string pronunciation(reinterpret_cast(sqlite3_column_text(ppstmt, 0))); + auto phonemes = verbly::split>(pronunciation, " "); + + adjective.pronunciations.push_back(phonemes); + } + + sqlite3_finalize(ppstmt); + } + + return output; + } + +}; -- cgit 1.4.1