From 4c94e100e87a09284f0e0a5bc0df688672492a1e Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Sun, 27 Mar 2016 14:28:54 -0400 Subject: Added prefix/suffix search, and word complexity search for nouns, adjectives, and adverbs Word complexity refers to the number of words in a noun, adjective, or adverb. --- generator/generator.cpp | 22 +++++---- generator/schema.sql | 9 ++-- lib/adjective_query.cpp | 119 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/adjective_query.h | 10 ++++ lib/adverb_query.cpp | 119 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/adverb_query.h | 10 ++++ lib/noun.cpp | 42 +++++++++++++++++ lib/noun.h | 6 +++ lib/noun_query.cpp | 119 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/noun_query.h | 8 ++++ 10 files changed, 452 insertions(+), 12 deletions(-) diff --git a/generator/generator.cpp b/generator/generator.cpp index aea750c..6fbbfb8 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -1027,9 +1027,9 @@ int main(int argc, char** argv) { if (nouns.count(word) == 1) { - query = "INSERT INTO nouns (singular, proper, plural) VALUES (?, ?, ?)"; + query = "INSERT INTO nouns (singular, proper, complexity, plural) VALUES (?, ?, ?, ?)"; } else { - query = "INSERT INTO nouns (singular, proper) VALUES (?, ?)"; + query = "INSERT INTO nouns (singular, proper, complexity) VALUES (?, ?, ?)"; } break; @@ -1046,9 +1046,9 @@ int main(int argc, char** argv) { if (adjectives.count(word) == 1) { - query = "INSERT INTO adjectives (base_form, comparative, superlative) VALUES (?, ?, ?)"; + query = "INSERT INTO adjectives (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; } else { - query = "INSERT INTO adjectives (base_form) VALUES (?)"; + query = "INSERT INTO adjectives (base_form, complexity) VALUES (?, ?)"; } break; @@ -1058,9 +1058,9 @@ int main(int argc, char** argv) { if (adjectives.count(word) == 1) { - query = "INSERT INTO adverbs (base_form, comparative, superlative) VALUES (?, ?, ?)"; + query = "INSERT INTO adverbs (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; } else { - query = "INSERT INTO adverbs (base_form) VALUES (?)"; + query = "INSERT INTO adverbs (base_form, complexity) VALUES (?, ?)"; } break; @@ -1082,9 +1082,11 @@ int main(int argc, char** argv) return isupper(ch); }) ? 1 : 0)); + sqlite3_bind_int(ppstmt, 3, verbly::split>(word, " ").size()); + if (nouns.count(word) == 1) { - sqlite3_bind_text(ppstmt, 3, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 4, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); } break; @@ -1093,10 +1095,12 @@ int main(int argc, char** argv) case 3: // Adjective case 4: // Adverb { + sqlite3_bind_int(ppstmt, 2, verbly::split>(word, " ").size()); + if (adjectives.count(word) == 1) { - sqlite3_bind_text(ppstmt, 2, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 3, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); } break; diff --git a/generator/schema.sql b/generator/schema.sql index 2295444..f2445f0 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -36,7 +36,8 @@ CREATE TABLE `adjectives` ( `base_form` VARCHAR(32) NOT NULL, `comparative` VARCHAR(32), `superlative` VARCHAR(32), - `position` CHAR(1) + `position` CHAR(1), + `complexity` INTEGER NOT NULL ); DROP TABLE IF EXISTS `adverbs`; @@ -44,7 +45,8 @@ CREATE TABLE `adverbs` ( `adverb_id` INTEGER PRIMARY KEY, `base_form` VARCHAR(32) NOT NULL, `comparative` VARCHAR(32), - `superlative` VARCHAR(32) + `superlative` VARCHAR(32), + `complexity` INTEGER NOT NULL ); DROP TABLE IF EXISTS `nouns`; @@ -52,7 +54,8 @@ CREATE TABLE `nouns` ( `noun_id` INTEGER PRIMARY KEY, `singular` VARCHAR(32) NOT NULL, `plural` VARCHAR(32), - `proper` INTEGER(1) NOT NULL + `proper` INTEGER(1) NOT NULL, + `complexity` INTEGER NOT NULL ); DROP TABLE IF EXISTS `hypernymy`; diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp index ec100e3..283fdca 100644 --- a/lib/adjective_query.cpp +++ b/lib/adjective_query.cpp @@ -53,6 +53,29 @@ namespace verbly { return *this; } + adjective_query& adjective_query::with_prefix(filter _f) + { + _f.clean(); + _with_prefix = _f; + + return *this; + } + + adjective_query& adjective_query::with_suffix(filter _f) + { + _f.clean(); + _with_suffix = _f; + + return *this; + } + + adjective_query& adjective_query::with_complexity(int _arg) + { + _with_complexity = _arg; + + return *this; + } + adjective_query& adjective_query::is_variant() { this->_is_variant = true; @@ -231,6 +254,85 @@ namespace verbly { case adjective::positioning::undefined: break; } + if (!_with_prefix.empty()) + { + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "base_form LIKE @PREFIX"; + } else { + return "base_form NOT LIKE @PREFIX"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + conditions.push_back(recur(_with_prefix, false)); + } + + if (!_with_suffix.empty()) + { + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "base_form LIKE @SUFFIX"; + } else { + return "base_form NOT LIKE @SUFFIX"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + conditions.push_back(recur(_with_suffix, false)); + } + + if (_with_complexity != unlimited) + { + conditions.push_back("complexity = @COMPLEX"); + } + if (_is_variant) { conditions.push_back("adjective_id IN (SELECT adjective_id FROM variation)"); @@ -691,6 +793,23 @@ namespace verbly { sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@EXCID"), except._id); } + for (auto prefix : _with_prefix.inorder_flatten()) + { + std::string pfat = prefix + "%"; + sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@PREFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); + } + + for (auto suffix : _with_suffix.inorder_flatten()) + { + std::string pfat = "%" + suffix; + sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SUFFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); + } + + if (_with_complexity != unlimited) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@COMPLEX"), _with_complexity); + } + for (auto attribute : _variant_of.inorder_flatten()) { sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@ATTRID"), attribute._id); diff --git a/lib/adjective_query.h b/lib/adjective_query.h index e7755cb..b2859dc 100644 --- a/lib/adjective_query.h +++ b/lib/adjective_query.h @@ -17,6 +17,11 @@ namespace verbly { adjective_query& requires_superlative_form(); adjective_query& position(adjective::positioning pos); + adjective_query& with_prefix(filter _f); + adjective_query& with_suffix(filter _f); + + adjective_query& with_complexity(int _arg); + adjective_query& is_variant(); adjective_query& variant_of(filter _f); @@ -57,6 +62,11 @@ namespace verbly { bool _requires_superlative_form = false; adjective::positioning _position = adjective::positioning::undefined; + filter _with_prefix; + filter _with_suffix; + + int _with_complexity = unlimited; + bool _is_variant = false; filter _variant_of; diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp index 639f16f..c9d0d09 100644 --- a/lib/adverb_query.cpp +++ b/lib/adverb_query.cpp @@ -67,6 +67,29 @@ namespace verbly { return *this; } + adverb_query& adverb_query::with_prefix(filter _f) + { + _f.clean(); + _with_prefix = _f; + + return *this; + } + + adverb_query& adverb_query::with_suffix(filter _f) + { + _f.clean(); + _with_suffix = _f; + + return *this; + } + + adverb_query& adverb_query::with_complexity(int _arg) + { + _with_complexity = _arg; + + return *this; + } + adverb_query& adverb_query::has_antonyms() { _has_antonyms = true; @@ -177,6 +200,85 @@ namespace verbly { conditions.push_back("superlative IS NOT NULL"); } + if (!_with_prefix.empty()) + { + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "base_form LIKE @PREFIX"; + } else { + return "base_form NOT LIKE @PREFIX"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + conditions.push_back(recur(_with_prefix, false)); + } + + if (!_with_suffix.empty()) + { + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "base_form LIKE @SUFFIX"; + } else { + return "base_form NOT LIKE @SUFFIX"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + conditions.push_back(recur(_with_suffix, false)); + } + + if (_with_complexity != unlimited) + { + conditions.push_back("complexity = @COMPLEX"); + } + if (_has_antonyms) { conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_antonymy)"); @@ -421,6 +523,23 @@ namespace verbly { sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@EXCID"), except._id); } + for (auto prefix : _with_prefix.inorder_flatten()) + { + std::string pfat = prefix + "%"; + sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@PREFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); + } + + for (auto suffix : _with_suffix.inorder_flatten()) + { + std::string pfat = "%" + suffix; + sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SUFFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); + } + + if (_with_complexity != unlimited) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@COMPLEX"), _with_complexity); + } + for (auto antonym : _antonym_of.inorder_flatten()) { sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@ANTID"), antonym._id); diff --git a/lib/adverb_query.h b/lib/adverb_query.h index 20f9ce5..e9354bb 100644 --- a/lib/adverb_query.h +++ b/lib/adverb_query.h @@ -16,6 +16,11 @@ namespace verbly { adverb_query& requires_comparative_form(); adverb_query& requires_superlative_form(); + adverb_query& with_prefix(filter _f); + adverb_query& with_suffix(filter _f); + + adverb_query& with_complexity(int _arg); + adverb_query& has_antonyms(); adverb_query& antonym_of(filter _f); @@ -43,6 +48,11 @@ namespace verbly { bool _requires_comparative_form = false; bool _requires_superlative_form = false; + filter _with_prefix; + filter _with_suffix; + + int _with_complexity = unlimited; + bool _has_antonyms = false; filter _antonym_of; diff --git a/lib/noun.cpp b/lib/noun.cpp index f575117..71c9af0 100644 --- a/lib/noun.cpp +++ b/lib/noun.cpp @@ -77,6 +77,13 @@ namespace verbly { return _data->nouns().part_meronym_of(*this); } + noun_query noun::full_part_meronyms() const + { + assert(_valid == true); + + return _data->nouns().full_part_meronym_of(*this); + } + noun_query noun::part_holonyms() const { assert(_valid == true); @@ -84,6 +91,13 @@ namespace verbly { return _data->nouns().part_holonym_of(*this); } + noun_query noun::full_part_holonyms() const + { + assert(_valid == true); + + return _data->nouns().full_part_holonym_of(*this); + } + noun_query noun::substance_meronyms() const { assert(_valid == true); @@ -91,6 +105,13 @@ namespace verbly { return _data->nouns().substance_meronym_of(*this); } + noun_query noun::full_substance_meronyms() const + { + assert(_valid == true); + + return _data->nouns().full_substance_meronym_of(*this); + } + noun_query noun::substance_holonyms() const { assert(_valid == true); @@ -98,6 +119,13 @@ namespace verbly { return _data->nouns().substance_holonym_of(*this); } + noun_query noun::full_substance_holonyms() const + { + assert(_valid == true); + + return _data->nouns().full_substance_holonym_of(*this); + } + noun_query noun::member_meronyms() const { assert(_valid == true); @@ -105,6 +133,13 @@ namespace verbly { return _data->nouns().member_meronym_of(*this); } + noun_query noun::full_member_meronyms() const + { + assert(_valid == true); + + return _data->nouns().full_member_meronym_of(*this); + } + noun_query noun::member_holonyms() const { assert(_valid == true); @@ -112,6 +147,13 @@ namespace verbly { return _data->nouns().member_holonym_of(*this); } + noun_query noun::full_member_holonyms() const + { + assert(_valid == true); + + return _data->nouns().full_member_holonym_of(*this); + } + noun_query noun::classes() const { assert(_valid == true); diff --git a/lib/noun.h b/lib/noun.h index 77601d0..969d2c8 100644 --- a/lib/noun.h +++ b/lib/noun.h @@ -25,11 +25,17 @@ namespace verbly { noun_query hyponyms() const; noun_query full_hyponyms() const; noun_query part_meronyms() const; + noun_query full_part_meronyms() const; noun_query part_holonyms() const; + noun_query full_part_holonyms() const; noun_query substance_meronyms() const; + noun_query full_substance_meronyms() const; noun_query substance_holonyms() const; + noun_query full_substance_holonyms() const; noun_query member_meronyms() const; + noun_query full_member_meronyms() const; noun_query member_holonyms() const; + noun_query full_member_holonyms() const; noun_query classes() const; noun_query instances() const; noun_query synonyms() const; diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp index 2c3f57c..83bb47d 100644 --- a/lib/noun_query.cpp +++ b/lib/noun_query.cpp @@ -60,6 +60,29 @@ namespace verbly { return *this; } + noun_query& noun_query::with_prefix(filter _f) + { + _f.clean(); + _with_prefix = _f; + + return *this; + } + + noun_query& noun_query::with_suffix(filter _f) + { + _f.clean(); + _with_suffix = _f; + + return *this; + } + + noun_query& noun_query::with_complexity(int _arg) + { + _with_complexity = _arg; + + return *this; + } + noun_query& noun_query::is_hypernym() { _is_hypernym = true; @@ -461,6 +484,85 @@ namespace verbly { conditions.push_back(cond); } + if (!_with_prefix.empty()) + { + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "singular LIKE @PREFIX"; + } else { + return "singular NOT LIKE @PREFIX"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + conditions.push_back(recur(_with_prefix, false)); + } + + if (!_with_suffix.empty()) + { + std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter::type::singleton: + { + if (notlogic == f.get_notlogic()) + { + return "singular LIKE @SUFFIX"; + } else { + return "singular NOT LIKE @SUFFIX"; + } + } + + case filter::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + conditions.push_back(recur(_with_suffix, false)); + } + + if (_with_complexity != unlimited) + { + conditions.push_back("complexity = @COMPLEX"); + } + if (_is_hypernym) { conditions.push_back("noun_id IN (SELECT hypernym_id FROM hypernymy)"); @@ -1610,6 +1712,23 @@ namespace verbly { sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SFORM"), sform.c_str(), sform.size(), SQLITE_STATIC); } + for (auto prefix : _with_prefix.inorder_flatten()) + { + std::string pfat = prefix + "%"; + sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@PREFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); + } + + for (auto suffix : _with_suffix.inorder_flatten()) + { + std::string pfat = "%" + suffix; + sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SUFFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); + } + + if (_with_complexity != unlimited) + { + sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@COMPLEX"), _with_complexity); + } + for (auto hyponym : _hypernym_of.inorder_flatten()) { sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@HYPO"), hyponym._id); diff --git a/lib/noun_query.h b/lib/noun_query.h index e95e0c0..5b73f8d 100644 --- a/lib/noun_query.h +++ b/lib/noun_query.h @@ -14,6 +14,10 @@ namespace verbly { noun_query& has_pronunciation(); noun_query& with_singular_form(std::string _arg); + noun_query& with_prefix(filter _f); + noun_query& with_suffix(filter _f); + + noun_query& with_complexity(int _arg); noun_query& is_hypernym(); noun_query& hypernym_of(filter _f); @@ -84,6 +88,10 @@ namespace verbly { bool _has_prn = false; std::list _with_singular_form; + filter _with_prefix; + filter _with_suffix; + + int _with_complexity = unlimited; bool _is_hypernym = false; filter _hypernym_of; -- cgit 1.4.1