From 6746da6edd7d9d50efe374eabbb79a3cac882d81 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 16 Jan 2017 18:02:50 -0500 Subject: Started structural rewrite The new object structure was designed to build on the existing WordNet structure, while also adding in all of the data that we get from other sources. More information about this can be found on the project wiki. The generator has already been completely rewritten to generate a datafile that uses the new structure. In addition, a number of indexes are created, which does double the size of the datafile, but also allows for much faster lookups. Finally, the new generator is written modularly and is a lot more readable than the old one. The verbly interface to the new object structure has mostly been completed, but has not been tested fully. There is a completely new search API which utilizes a lot of operator overloading; documentation on how to use it should go up at some point. Token processing and verb frames are currently unimplemented. Source for these have been left in the repository for now. --- lib/adverb_query.cpp | 758 --------------------------------------------------- 1 file changed, 758 deletions(-) delete mode 100644 lib/adverb_query.cpp (limited to 'lib/adverb_query.cpp') diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp deleted file mode 100644 index 3e62bb7..0000000 --- a/lib/adverb_query.cpp +++ /dev/null @@ -1,758 +0,0 @@ -#include "verbly.h" - -namespace verbly { - - adverb_query::adverb_query(const data& _data) : _data(_data) - { - - } - - adverb_query& adverb_query::limit(int _limit) - { - if ((_limit > 0) || (_limit == unlimited)) - { - this->_limit = _limit; - } - - return *this; - } - - adverb_query& adverb_query::random() - { - this->_random = true; - - return *this; - } - - adverb_query& adverb_query::except(const adverb& _word) - { - _except.push_back(_word); - - return *this; - } - - adverb_query& adverb_query::rhymes_with(const word& _word) - { - for (auto rhyme : _word.get_rhymes()) - { - _rhymes.push_back(rhyme); - } - - if (dynamic_cast(&_word) != nullptr) - { - _except.push_back(dynamic_cast(_word)); - } - - return *this; - } - - adverb_query& adverb_query::rhymes_with(rhyme _r) - { - _rhymes.push_back(_r); - - return *this; - } - - adverb_query& adverb_query::has_pronunciation() - { - this->_has_prn = true; - - return *this; - } - - adverb_query& adverb_query::has_rhyming_noun() - { - _has_rhyming_noun = true; - - return *this; - } - - adverb_query& adverb_query::has_rhyming_adjective() - { - _has_rhyming_adjective = true; - - return *this; - } - - adverb_query& adverb_query::has_rhyming_adverb() - { - _has_rhyming_adverb = true; - - return *this; - } - - adverb_query& adverb_query::has_rhyming_verb() - { - _has_rhyming_verb = true; - - return *this; - } - - adverb_query& adverb_query::requires_comparative_form() - { - _requires_comparative_form = true; - - return *this; - } - - adverb_query& adverb_query::requires_superlative_form() - { - _requires_superlative_form = true; - - return *this; - } - - adverb_query& adverb_query::with_stress(filter> _arg) - { - _stress = _arg; - - return *this; - } - - adverb_query& adverb_query::with_prefix(filter _f) - { - _f.clean(); - _with_prefix = _f; - - return *this; - } - - adverb_query& adverb_query::with_suffix(filter _f) - { - _f.clean(); - _with_suffix = _f; - - return *this; - } - - adverb_query& adverb_query::with_complexity(int _arg) - { - _with_complexity = _arg; - - return *this; - } - - adverb_query& adverb_query::has_antonyms() - { - _has_antonyms = true; - - return *this; - } - - adverb_query& adverb_query::antonym_of(filter _f) - { - _f.clean(); - _antonym_of = _f; - - return *this; - } - - adverb_query& adverb_query::has_synonyms() - { - _has_synonyms = true; - - return *this; - } - - adverb_query& adverb_query::synonym_of(filter _f) - { - _f.clean(); - _synonym_of = _f; - - return *this; - } - - adverb_query& adverb_query::is_mannernymic() - { - _is_mannernymic = true; - - return *this; - } - - adverb_query& adverb_query::mannernym_of(filter _f) - { - _f.clean(); - _mannernym_of = _f; - - return *this; - } - /* - adverb_query& adverb_query::derived_from(const word& _w) - { - if (dynamic_cast(&_w) != nullptr) - { - _derived_from_adjective.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _derived_from_adverb.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _derived_from_noun.push_back(dynamic_cast(_w)); - } - - return *this; - } - - adverb_query& adverb_query::not_derived_from(const word& _w) - { - if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_adjective.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_adverb.push_back(dynamic_cast(_w)); - } else if (dynamic_cast(&_w) != nullptr) - { - _not_derived_from_noun.push_back(dynamic_cast(_w)); - } - - return *this; - } - */ - std::list adverb_query::run() const - { - std::stringstream construct; - construct << "SELECT adverb_id, base_form, comparative, superlative FROM adverbs"; - std::list conditions; - std::list bindings; - - if (_has_prn) - { - conditions.push_back("adverb_id IN (SELECT adverb_id FROM adverb_pronunciations)"); - } - - if (!_rhymes.empty()) - { - std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); - std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - - for (auto rhy : _rhymes) - { - bindings.emplace_back(rhy.get_prerhyme()); - bindings.emplace_back(rhy.get_rhyme()); - } - } - - if (_has_rhyming_noun) - { - conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_adjective) - { - conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - if (_has_rhyming_adverb) - { - conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adverb_id != curp.adverb_id)"); - } - - if (_has_rhyming_verb) - { - conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); - } - - for (auto except : _except) - { - conditions.push_back("adverb_id != ?"); - bindings.emplace_back(except._id); - } - - if (_requires_comparative_form) - { - conditions.push_back("comparative IS NOT NULL"); - } - - if (_requires_superlative_form) - { - conditions.push_back("superlative IS NOT NULL"); - } - - if (!_stress.empty()) - { - std::stringstream cond; - if (_stress.get_notlogic()) - { - cond << "adverb_id NOT IN"; - } else { - cond << "adverb_id IN"; - } - - cond << "(SELECT adverb_id FROM adverb_pronunciations WHERE "; - - std::function>, bool)> recur = [&] (filter> f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter>::type::singleton: - { - std::ostringstream _val; - for (auto syl : f.get_elem()) - { - if (syl) - { - _val << "1"; - } else { - _val << "0"; - } - } - - bindings.emplace_back(_val.str()); - - if (notlogic == f.get_notlogic()) - { - return "stress = ?"; - } else { - return "stress != ?"; - } - } - - case filter>::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter> f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_stress, _stress.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (!_with_prefix.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem() + "%"); - - if (notlogic == f.get_notlogic()) - { - return "base_form LIKE ?"; - } else { - return "base_form NOT LIKE ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_with_prefix, false)); - } - - if (!_with_suffix.empty()) - { - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back("%" + f.get_elem()); - - if (notlogic == f.get_notlogic()) - { - return "base_form LIKE ?"; - } else { - return "base_form NOT LIKE ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - conditions.push_back(recur(_with_suffix, false)); - } - - if (_with_complexity != unlimited) - { - conditions.push_back("complexity = ?"); - bindings.emplace_back(_with_complexity); - } - - if (_has_antonyms) - { - conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_antonymy)"); - } - - if (!_antonym_of.empty()) - { - std::stringstream cond; - if (_antonym_of.get_notlogic()) - { - cond << "adverb_id NOT IN"; - } else { - cond << "adverb_id IN"; - } - - cond << "(SELECT adverb_2_id FROM adverb_antonymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "adverb_1_id = ?"; - } else { - return "adverb_1_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_antonym_of, _antonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_has_synonyms) - { - conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_synonymy)"); - } - - if (!_synonym_of.empty()) - { - std::stringstream cond; - if (_antonym_of.get_notlogic()) - { - cond << "adverb_id NOT IN"; - } else { - cond << "adverb_id IN"; - } - - cond << "(SELECT adverb_2_id FROM adverb_synonymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "adverb_1_id = ?"; - } else { - return "adverb_1_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_synonym_of, _synonym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - - if (_is_mannernymic) - { - conditions.push_back("adverb_id IN (SELECT mannernym_id FROM mannernymy)"); - } - - if (!_mannernym_of.empty()) - { - std::stringstream cond; - if (_antonym_of.get_notlogic()) - { - cond << "adverb_id NOT IN"; - } else { - cond << "adverb_id IN"; - } - - cond << "(SELECT mannernym_id FROM mannernymy WHERE "; - - std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { - switch (f.get_type()) - { - case filter::type::singleton: - { - bindings.emplace_back(f.get_elem()._id); - - if (notlogic == f.get_notlogic()) - { - return "adjective_id = ?"; - } else { - return "adjective_id != ?"; - } - } - - case filter::type::group: - { - bool truelogic = notlogic != f.get_notlogic(); - - std::list clauses; - std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { - return recur(f2, truelogic); - }); - - if (truelogic == f.get_orlogic()) - { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; - } else { - return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - } - } - } - }; - - cond << recur(_mannernym_of, _mannernym_of.get_notlogic()); - cond << ")"; - conditions.push_back(cond.str()); - } - -/* if (!_derived_from_adjective.empty()) - { - std::list clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); - std::string cond = "adverb_id IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_adjective.empty()) - { - std::list clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); - std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_derived_from_adverb.empty()) - { - std::list clauses(_derived_from_adverb.size(), "adverb_2_id = @DERADV"); - std::string cond = "adverb_id IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_adverb.empty()) - { - std::list clauses(_not_derived_from_adverb.size(), "adverb_2_id = @NDERADV"); - std::string cond = "adverb_id NOT IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_derived_from_noun.empty()) - { - std::list clauses(_derived_from_noun.size(), "noun_id = @DERN"); - std::string cond = "adverb_id IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - } - - if (!_not_derived_from_noun.empty()) - { - std::list clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); - std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; - conditions.push_back(cond); - }*/ - - if (!conditions.empty()) - { - construct << " WHERE "; - construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); - } - - if (_random) - { - construct << " ORDER BY RANDOM()"; - } - - if (_limit != unlimited) - { - construct << " LIMIT " << _limit; - } - - sqlite3_stmt* ppstmt; - std::string query = construct.str(); - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - int i = 1; - for (auto& binding : bindings) - { - switch (binding.get_type()) - { - case binding::type::integer: - { - sqlite3_bind_int(ppstmt, i, binding.get_integer()); - - break; - } - - case binding::type::string: - { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); - - break; - } - } - - i++; - } - - /* - for (auto adj : _derived_from_adjective) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); - } - - for (auto adj : _not_derived_from_adjective) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); - } - - for (auto adv : _derived_from_adverb) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); - } - - for (auto adv : _not_derived_from_adverb) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); - } - - for (auto n : _derived_from_noun) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); - } - - for (auto n : _not_derived_from_noun) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); - }*/ - - std::list output; - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - adverb tnc {_data, sqlite3_column_int(ppstmt, 0)}; - tnc._base_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); - - if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) - { - tnc._comparative_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); - } - - if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) - { - tnc._superlative_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 3))); - } - - output.push_back(tnc); - } - - sqlite3_finalize(ppstmt); - - for (auto& adverb : output) - { - query = "SELECT pronunciation, prerhyme, rhyme FROM adverb_pronunciations WHERE adverb_id = ?"; - if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) - { - throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); - } - - sqlite3_bind_int(ppstmt, 1, adverb._id); - - while (sqlite3_step(ppstmt) == SQLITE_ROW) - { - std::string pronunciation(reinterpret_cast(sqlite3_column_text(ppstmt, 0))); - auto phonemes = verbly::split>(pronunciation, " "); - - adverb.pronunciations.push_back(phonemes); - - if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) - { - std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); - std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); - adverb.rhymes.emplace_back(prerhyme, rhyming); - } - } - - sqlite3_finalize(ppstmt); - } - - return output; - } - -}; -- cgit 1.4.1