From 040ee58fecdc9c478004bc2e554e1ae126ec4602 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Fri, 15 Apr 2016 17:24:44 -0400 Subject: Added support for ImageNet and fixed bug with query interface Datafile change: nouns now know how many images are associated with them on ImageNet, and also have their WordNet synset ID saved so that you can query for images of that noun via the ImageNet API. So far, verbly only exposes the ImageNet API URL, and doesn't actually interact with it itself. This may be changed in the future. The query interface had a huge issue in which multiple instances of the same condition would overwrite each other. This has been fixed. --- generator/generator.cpp | 57 +++++++--- generator/schema.sql | 4 +- lib/adjective_query.cpp | 147 +++++++++++--------------- lib/adverb_query.cpp | 103 +++++++++--------- lib/data.cpp | 117 +++++++++++++++++++++ lib/data.h | 32 ++++++ lib/frame_query.cpp | 30 +++++- lib/noun.cpp | 17 +++ lib/noun.h | 4 + lib/noun_query.cpp | 274 ++++++++++++++++++++++++------------------------ lib/noun_query.h | 6 ++ lib/preposition.cpp | 30 +++++- lib/verb_query.cpp | 39 ++++--- 13 files changed, 551 insertions(+), 309 deletions(-) diff --git a/generator/generator.cpp b/generator/generator.cpp index 6fbbfb8..e67bda7 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -81,6 +81,7 @@ std::map verbs; std::map adjectives; std::map nouns; std::map> wn; +std::map images; std::map> pronunciations; void print_usage() @@ -89,10 +90,10 @@ void print_usage() std::cout << "-------------------------" << std::endl; std::cout << "Requires exactly six arguments." << std::endl; std::cout << "1. The path to a VerbNet data directory." << std::endl; - std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl; - std::cout << "3. The path to an AGID infl.txt file." << std::endl; - std::cout << "4. The path to a WordNet prolog data directory." << std::endl; - std::cout << "5. The path to a CMUDICT pronunciation file." << std::endl; + std::cout << "2. The path to an AGID infl.txt file." << std::endl; + std::cout << "3. The path to a WordNet prolog data directory." << std::endl; + std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl; + std::cout << "5. The path to an ImageNet urls.txt file." << std::endl; std::cout << "6. Datafile output path." << std::endl; exit(1); @@ -431,10 +432,10 @@ int main(int argc, char** argv) // Get verbs from AGID std::cout << "Reading inflections..." << std::endl; - std::ifstream agidfile(argv[3]); + std::ifstream agidfile(argv[2]); if (!agidfile.is_open()) { - std::cout << "Could not open AGID file: " << argv[3] << std::endl; + std::cout << "Could not open AGID file: " << argv[2] << std::endl; print_usage(); } @@ -562,10 +563,10 @@ int main(int argc, char** argv) // Pronounciations std::cout << "Reading pronunciations..." << std::endl; - std::ifstream pronfile(argv[5]); + std::ifstream pronfile(argv[4]); if (!pronfile.is_open()) { - std::cout << "Could not open CMUDICT file: " << argv[5] << std::endl; + std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl; print_usage(); } @@ -593,6 +594,36 @@ int main(int argc, char** argv) } } + // Images + std::cout << "Reading images..." << std::endl; + + std::ifstream imagefile(argv[5]); + if (!imagefile.is_open()) + { + std::cout << "Could not open ImageNet file: " << argv[5] << std::endl; + print_usage(); + } + + for (;;) + { + std::string line; + if (!getline(imagefile, line)) + { + break; + } + + if (line.back() == '\r') + { + line.pop_back(); + } + + std::string wnid_s = line.substr(1, 8); + int wnid = stoi(wnid_s) + 100000000; + images[wnid]++; + } + + imagefile.close(); + // Start writing output std::cout << "Writing schema..." << std::endl; @@ -972,7 +1003,7 @@ int main(int argc, char** argv) // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) // - syntax: positioning flags for some adjectives - std::string wnpref {argv[4]}; + std::string wnpref {argv[3]}; if (wnpref.back() != '/') { wnpref += '/'; @@ -1027,9 +1058,9 @@ int main(int argc, char** argv) { if (nouns.count(word) == 1) { - query = "INSERT INTO nouns (singular, proper, complexity, plural) VALUES (?, ?, ?, ?)"; + query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)"; } else { - query = "INSERT INTO nouns (singular, proper, complexity) VALUES (?, ?, ?)"; + query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)"; } break; @@ -1083,10 +1114,12 @@ int main(int argc, char** argv) }) ? 1 : 0)); sqlite3_bind_int(ppstmt, 3, verbly::split>(word, " ").size()); + sqlite3_bind_int(ppstmt, 4, images[synset_id]); + sqlite3_bind_int(ppstmt, 5, synset_id); if (nouns.count(word) == 1) { - sqlite3_bind_text(ppstmt, 4, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); } break; diff --git a/generator/schema.sql b/generator/schema.sql index f2445f0..9a39944 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -55,7 +55,9 @@ CREATE TABLE `nouns` ( `singular` VARCHAR(32) NOT NULL, `plural` VARCHAR(32), `proper` INTEGER(1) NOT NULL, - `complexity` INTEGER NOT NULL + `complexity` INTEGER NOT NULL, + `images` INTEGER NOT NULL, + `wnid` INTEGER NOT NULL ); DROP TABLE IF EXISTS `hypernymy`; diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp index 283fdca..a7f915c 100644 --- a/lib/adjective_query.cpp +++ b/lib/adjective_query.cpp @@ -218,6 +218,7 @@ namespace verbly { std::stringstream construct; construct << "SELECT adjective_id, base_form, comparative, superlative, position FROM adjectives"; std::list conditions; + std::list bindings; if (_has_prn) { @@ -226,14 +227,20 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE @RHMPRN"); + std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); + + for (auto rhyme : _rhymes) + { + bindings.emplace_back("%" + rhyme); + } } for (auto except : _except) { - conditions.push_back("adjective_id != @EXCID"); + conditions.push_back("adjective_id != ?"); + bindings.emplace_back(except._id); } if (_requires_comparative_form) @@ -261,11 +268,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem() + "%"); + if (notlogic == f.get_notlogic()) { - return "base_form LIKE @PREFIX"; + return "base_form LIKE ?"; } else { - return "base_form NOT LIKE @PREFIX"; + return "base_form NOT LIKE ?"; } } @@ -298,11 +307,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back("%" + f.get_elem()); + if (notlogic == f.get_notlogic()) { - return "base_form LIKE @SUFFIX"; + return "base_form LIKE ?"; } else { - return "base_form NOT LIKE @SUFFIX"; + return "base_form NOT LIKE ?"; } } @@ -330,7 +341,8 @@ namespace verbly { if (_with_complexity != unlimited) { - conditions.push_back("complexity = @COMPLEX"); + conditions.push_back("complexity = ?"); + bindings.emplace_back(_with_complexity); } if (_is_variant) @@ -355,11 +367,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "noun_id = @ATTRID"; + return "noun_id = ?"; } else { - return "noun_id != @ATTRID"; + return "noun_id != ?"; } } @@ -409,11 +423,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "adjective_1_id = @ANTID"; + return "adjective_1_id = ?"; } else { - return "adjective_1_id != @ANTID"; + return "adjective_1_id != ?"; } } @@ -463,11 +479,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "adjective_1_id = @SYNID"; + return "adjective_1_id = ?"; } else { - return "adjective_1_id != @SYNID"; + return "adjective_1_id != ?"; } } @@ -517,11 +535,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "specific_id = @SPECID"; + return "specific_id = ?"; } else { - return "specific_id != @SPECID"; + return "specific_id != ?"; } } @@ -571,11 +591,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "general_id = @GENID"; + return "general_id = ?"; } else { - return "general_id != @GENID"; + return "general_id != ?"; } } @@ -625,11 +647,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "noun_id = @APERID"; + return "noun_id = ?"; } else { - return "noun_id != @APERID"; + return "noun_id != ?"; } } @@ -679,11 +703,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "mannernym_id = @MANID"; + return "mannernym_id = ?"; } else { - return "mannernym_id != @MANID"; + return "mannernym_id != ?"; } } @@ -776,74 +802,29 @@ namespace verbly { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); } - if (!_rhymes.empty()) + int i = 1; + for (auto& binding : bindings) { - int i = 0; - for (auto rhyme : _rhymes) + switch (binding.get_type()) { - std::string rhymer = "%" + rhyme; - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@RHMPRN"), rhymer.c_str(), rhymer.length(), SQLITE_STATIC); + case binding::type::integer: + { + sqlite3_bind_int(ppstmt, i, binding.get_integer()); + + break; + } - i++; + case binding::type::string: + { + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + + break; + } } + + i++; } - for (auto except : _except) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@EXCID"), except._id); - } - - for (auto prefix : _with_prefix.inorder_flatten()) - { - std::string pfat = prefix + "%"; - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@PREFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); - } - - for (auto suffix : _with_suffix.inorder_flatten()) - { - std::string pfat = "%" + suffix; - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SUFFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); - } - - if (_with_complexity != unlimited) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@COMPLEX"), _with_complexity); - } - - for (auto attribute : _variant_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@ATTRID"), attribute._id); - } - - for (auto antonym : _antonym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@ANTID"), antonym._id); - } - - for (auto synonym : _synonym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SYNID"), synonym._id); - } - - for (auto specific : _generalization_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SPECID"), specific._id); - } - - for (auto general : _specification_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@GENID"), general._id); - } - - for (auto n : _pertainym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@APERID"), n._id); - } - - for (auto mannernym : _anti_mannernym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@MANID"), mannernym._id); - } /* for (auto adj : _derived_from_adjective) { diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp index c9d0d09..30ba92b 100644 --- a/lib/adverb_query.cpp +++ b/lib/adverb_query.cpp @@ -172,6 +172,7 @@ namespace verbly { std::stringstream construct; construct << "SELECT adverb_id, base_form, comparative, superlative FROM adverbs"; std::list conditions; + std::list bindings; if (_has_prn) { @@ -180,14 +181,20 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE @RHMPRN"); + std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); + + for (auto rhyme : _rhymes) + { + bindings.emplace_back("%" + rhyme); + } } for (auto except : _except) { - conditions.push_back("adverb_id != @EXCID"); + conditions.push_back("adverb_id != ?"); + bindings.emplace_back(except._id); } if (_requires_comparative_form) @@ -207,11 +214,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem() + "%"); + if (notlogic == f.get_notlogic()) { - return "base_form LIKE @PREFIX"; + return "base_form LIKE ?"; } else { - return "base_form NOT LIKE @PREFIX"; + return "base_form NOT LIKE ?"; } } @@ -244,11 +253,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back("%" + f.get_elem()); + if (notlogic == f.get_notlogic()) { - return "base_form LIKE @SUFFIX"; + return "base_form LIKE ?"; } else { - return "base_form NOT LIKE @SUFFIX"; + return "base_form NOT LIKE ?"; } } @@ -276,7 +287,8 @@ namespace verbly { if (_with_complexity != unlimited) { - conditions.push_back("complexity = @COMPLEX"); + conditions.push_back("complexity = ?"); + bindings.emplace_back(_with_complexity); } if (_has_antonyms) @@ -301,11 +313,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "adverb_1_id = @ANTID"; + return "adverb_1_id = ?"; } else { - return "adverb_1_id != @ANTID"; + return "adverb_1_id != ?"; } } @@ -355,11 +369,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "adverb_1_id = @SYNID"; + return "adverb_1_id = ?"; } else { - return "adverb_1_id != @SYNID"; + return "adverb_1_id != ?"; } } @@ -409,11 +425,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "adjective_id = @AMANID"; + return "adjective_id = ?"; } else { - return "adjective_id != @AMANID"; + return "adjective_id != ?"; } } @@ -506,54 +524,29 @@ namespace verbly { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); } - if (!_rhymes.empty()) + int i = 1; + for (auto& binding : bindings) { - int i = 0; - for (auto rhyme : _rhymes) + switch (binding.get_type()) { - std::string rhymer = "%" + rhyme; - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@RHMPRN"), rhymer.c_str(), rhymer.length(), SQLITE_STATIC); + case binding::type::integer: + { + sqlite3_bind_int(ppstmt, i, binding.get_integer()); + + break; + } - i++; + case binding::type::string: + { + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + + break; + } } + + i++; } - for (auto except : _except) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@EXCID"), except._id); - } - - for (auto prefix : _with_prefix.inorder_flatten()) - { - std::string pfat = prefix + "%"; - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@PREFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); - } - - for (auto suffix : _with_suffix.inorder_flatten()) - { - std::string pfat = "%" + suffix; - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SUFFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); - } - - if (_with_complexity != unlimited) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@COMPLEX"), _with_complexity); - } - - for (auto antonym : _antonym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@ANTID"), antonym._id); - } - - for (auto synonym : _synonym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SYNID"), synonym._id); - } - - for (auto adj : _mannernym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@AMANID"), adj._id); - } /* for (auto adj : _derived_from_adjective) { diff --git a/lib/data.cpp b/lib/data.cpp index 5a9397b..c14956f 100644 --- a/lib/data.cpp +++ b/lib/data.cpp @@ -57,4 +57,121 @@ namespace verbly { return preposition_query(*this); } + binding::type binding::get_type() const + { + return _type; + } + + binding::binding(const binding& other) + { + _type = other._type; + + switch (_type) + { + case type::integer: + { + _integer = other._integer; + + break; + } + + case type::string: + { + new(&_string) std::string(other._string); + + break; + } + } + } + + binding::~binding() + { + switch (_type) + { + case type::string: + { + using string_type = std::string; + _string.~string_type(); + + break; + } + } + } + + binding& binding::operator=(const binding& other) + { + this->~binding(); + + _type = other._type; + + switch (_type) + { + case type::integer: + { + _integer = other._integer; + + break; + } + + case type::string: + { + new(&_string) std::string(other._string); + + break; + } + } + + return *this; + } + + binding::binding(int _arg) + { + _type = type::integer; + _integer = _arg; + } + + int binding::get_integer() const + { + assert(_type == type::integer); + + return _integer; + } + + void binding::set_integer(int _arg) + { + *this = binding(_arg); + } + + binding& binding::operator=(int _arg) + { + *this = binding(_arg); + + return *this; + } + + binding::binding(std::string _arg) + { + _type = type::string; + new(&_string) std::string(_arg); + } + + std::string binding::get_string() const + { + assert(_type == type::string); + + return _string; + } + + void binding::set_string(std::string _arg) + { + *this = binding(_arg); + } + + binding& binding::operator=(std::string _arg) + { + *this = binding(_arg); + + return *this; + } + }; diff --git a/lib/data.h b/lib/data.h index 0d599c4..b8b12b9 100644 --- a/lib/data.h +++ b/lib/data.h @@ -343,6 +343,38 @@ namespace verbly { }; }; + class binding { + public: + enum class type { + integer, + string + }; + + type get_type() const; + binding(const binding& other); + ~binding(); + binding& operator=(const binding& other); + + // Integer + binding(int _arg); + int get_integer() const; + void set_integer(int _arg); + binding& operator=(int _arg); + + // String + binding(std::string _arg); + std::string get_string() const; + void set_string(std::string _arg); + binding& operator=(std::string _arg); + + private: + union { + int _integer; + std::string _string; + }; + type _type; + }; + }; #endif /* end of include guard: DATA_H_C4AEC3DD */ diff --git a/lib/frame_query.cpp b/lib/frame_query.cpp index 6583da4..3c4a3e8 100644 --- a/lib/frame_query.cpp +++ b/lib/frame_query.cpp @@ -37,13 +37,19 @@ namespace verbly { { std::stringstream construct; construct << "SELECT frames.data, groups.data FROM frames INNER JOIN groups ON frames.group_id = groups.group_id"; + std::list bindings; if (!_for_verb.empty()) { - std::list clauses(_for_verb.size(), "verb_id = @VERID"); + std::list clauses(_for_verb.size(), "verb_id = ?"); construct << " WHERE frames.group_id IN (SELECT group_id FROM verb_groups WHERE "; construct << verbly::implode(std::begin(clauses), std::end(clauses), " OR "); construct << ")"; + + for (auto v : _for_verb) + { + bindings.emplace_back(v._id); + } } sqlite3_stmt* ppstmt; @@ -53,9 +59,27 @@ namespace verbly { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); } - for (auto verb : _for_verb) + int i = 1; + for (auto& binding : bindings) { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@VERID"), verb._id); + switch (binding.get_type()) + { + case binding::type::integer: + { + sqlite3_bind_int(ppstmt, i, binding.get_integer()); + + break; + } + + case binding::type::string: + { + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + + break; + } + } + + i++; } std::list output; diff --git a/lib/noun.cpp b/lib/noun.cpp index 71c9af0..d8b34c9 100644 --- a/lib/noun.cpp +++ b/lib/noun.cpp @@ -34,6 +34,13 @@ namespace verbly { return _plural; } + + int noun::wnid() const + { + assert(_valid == true); + + return _wnid; + } bool noun::has_plural_form() const { @@ -196,6 +203,16 @@ namespace verbly { return _data->adjectives().variant_of(*this); } + std::string noun::imagenet_url() const + { + std::stringstream url; + url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n"; + url.width(8); + url.fill('0'); + url << (_wnid % 100000000); + return url.str(); + } + bool noun::operator<(const noun& other) const { return _id < other._id; diff --git a/lib/noun.h b/lib/noun.h index 969d2c8..bd71e57 100644 --- a/lib/noun.h +++ b/lib/noun.h @@ -7,6 +7,7 @@ namespace verbly { private: std::string _singular; std::string _plural; + int _wnid; friend class noun_query; @@ -17,6 +18,7 @@ namespace verbly { std::string base_form() const; std::string singular_form() const; std::string plural_form() const; + int wnid() const; bool has_plural_form() const; @@ -43,6 +45,8 @@ namespace verbly { adjective_query pertainyms() const; adjective_query variations() const; + std::string imagenet_url() const; + bool operator<(const noun& other) const; }; diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp index 83bb47d..19a1297 100644 --- a/lib/noun_query.cpp +++ b/lib/noun_query.cpp @@ -370,6 +370,21 @@ namespace verbly { return *this; } + + noun_query& noun_query::at_least_n_images(int _arg) + { + _at_least_n_images = _arg; + + return *this; + } + + noun_query& noun_query::with_wnid(int _arg) + { + _with_wnid.insert(_arg); + + return *this; + } + /* noun_query& noun_query::derived_from(const word& _w) { @@ -457,8 +472,9 @@ namespace verbly { construct << " "; } - construct << "SELECT noun_id, singular, plural FROM nouns"; + construct << "SELECT noun_id, singular, plural, wnid FROM nouns"; std::list conditions; + std::list bindings; if (_has_prn) { @@ -467,21 +483,32 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE @RHMPRN"); + std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); std::string cond = "noun_id IN (SELECT noun_id FROM noun_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); + + for (auto rhyme : _rhymes) + { + bindings.emplace_back("%" + rhyme); + } } for (auto except : _except) { - conditions.push_back("noun_id != @EXCID"); + conditions.push_back("noun_id != ?"); + bindings.emplace_back(except._id); } if (!_with_singular_form.empty()) { - std::list clauses(_with_singular_form.size(), "singular = @SFORM"); + std::list clauses(_with_singular_form.size(), "singular = ?"); std::string cond = "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); + + for (auto form : _with_singular_form) + { + bindings.emplace_back(form); + } } if (!_with_prefix.empty()) @@ -491,11 +518,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem() + "%"); + if (notlogic == f.get_notlogic()) { - return "singular LIKE @PREFIX"; + return "singular LIKE ?"; } else { - return "singular NOT LIKE @PREFIX"; + return "singular NOT LIKE ?"; } } @@ -528,11 +557,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back("%" + f.get_elem()); + if (notlogic == f.get_notlogic()) { - return "singular LIKE @SUFFIX"; + return "singular LIKE ?"; } else { - return "singular NOT LIKE @SUFFIX"; + return "singular NOT LIKE ?"; } } @@ -560,7 +591,8 @@ namespace verbly { if (_with_complexity != unlimited) { - conditions.push_back("complexity = @COMPLEX"); + conditions.push_back("complexity = ?"); + bindings.emplace_back(_with_complexity); } if (_is_hypernym) @@ -585,11 +617,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "hyponym_id = @HYPO"; + return "hyponym_id = ?"; } else { - return "hyponym_id != @HYPO"; + return "hyponym_id != ?"; } } @@ -713,11 +747,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "hypernym_id = @HYPER"; + return "hypernym_id = ?"; } else { - return "hypernym_id != @HYPER"; + return "hypernym_id != ?"; } } @@ -767,11 +803,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "holonym_id = @PHOLO"; + return "holonym_id = ?"; } else { - return "holonym_id != @PHOLO"; + return "holonym_id != ?"; } } @@ -858,11 +896,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "meronym_id = @PMERO"; + return "meronym_id = ?"; } else { - return "meronym_id != @PMERO"; + return "meronym_id != ?"; } } @@ -949,11 +989,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "holonym_id = @SHOLO"; + return "holonym_id = ?"; } else { - return "holonym_id != @SHOLO"; + return "holonym_id != ?"; } } @@ -1040,11 +1082,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "meronym_id = @SMERO"; + return "meronym_id = ?"; } else { - return "meronym_id != @SMERO"; + return "meronym_id != ?"; } } @@ -1131,11 +1175,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "holonym_id = @MHOLO"; + return "holonym_id = ?"; } else { - return "holonym_id != @MHOLO"; + return "holonym_id != ?"; } } @@ -1222,11 +1268,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "meronym_id = @MMERO"; + return "meronym_id = ?"; } else { - return "meronym_id != @MMERO"; + return "meronym_id != ?"; } } @@ -1323,11 +1371,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "class_id = @CLSID"; + return "class_id = ?"; } else { - return "class_id != @CLSID"; + return "class_id != ?"; } } @@ -1377,11 +1427,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "instance_id = @INSID"; + return "instance_id = ?"; } else { - return "instance_id != @INSID"; + return "instance_id != ?"; } } @@ -1431,11 +1483,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "noun_1_id = @SYNID"; + return "noun_1_id = ?"; } else { - return "noun_1_id != @SYNID"; + return "noun_1_id != ?"; } } @@ -1485,11 +1539,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "noun_1_id = @ANTID"; + return "noun_1_id = ?"; } else { - return "noun_1_id != @ANTID"; + return "noun_1_id != ?"; } } @@ -1539,11 +1595,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "pertainym_id = @PERID"; + return "pertainym_id = ?"; } else { - return "pertainym_id != @PERID"; + return "pertainym_id != ?"; } } @@ -1593,11 +1651,13 @@ namespace verbly { { case filter::type::singleton: { + bindings.emplace_back(f.get_elem()._id); + if (notlogic == f.get_notlogic()) { - return "adjective_id = @VALID"; + return "adjective_id = ?"; } else { - return "adjective_id != @VALID"; + return "adjective_id != ?"; } } @@ -1624,6 +1684,25 @@ namespace verbly { cond << ")"; conditions.push_back(cond.str()); } + + if (_at_least_n_images != unlimited) + { + conditions.push_back("images >= ?"); + bindings.emplace_back(_at_least_n_images); + } + + if (!_with_wnid.empty()) + { + std::vector clauses(_with_wnid.size(), "wnid = ?"); + std::string cond = verbly::implode(std::begin(clauses), std::end(clauses), " OR "); + conditions.push_back("(" + cond + ")"); + + for (auto wnid : _with_wnid) + { + bindings.emplace_back(wnid); + } + } + /* if (!_derived_from_adjective.empty()) { @@ -1690,114 +1769,29 @@ namespace verbly { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); } - if (!_rhymes.empty()) + int i = 1; + for (auto& binding : bindings) { - int i = 0; - for (auto rhyme : _rhymes) + switch (binding.get_type()) { - std::string rhymer = "%" + rhyme; - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@RHMPRN"), rhymer.c_str(), rhymer.length(), SQLITE_STATIC); + case binding::type::integer: + { + sqlite3_bind_int(ppstmt, i, binding.get_integer()); + + break; + } - i++; + case binding::type::string: + { + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + + break; + } } + + i++; } - for (auto except : _except) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@EXCID"), except._id); - } - - for (auto sform : _with_singular_form) - { - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SFORM"), sform.c_str(), sform.size(), SQLITE_STATIC); - } - - for (auto prefix : _with_prefix.inorder_flatten()) - { - std::string pfat = prefix + "%"; - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@PREFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); - } - - for (auto suffix : _with_suffix.inorder_flatten()) - { - std::string pfat = "%" + suffix; - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SUFFIX"), pfat.c_str(), pfat.length(), SQLITE_STATIC); - } - - if (_with_complexity != unlimited) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@COMPLEX"), _with_complexity); - } - - for (auto hyponym : _hypernym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@HYPO"), hyponym._id); - } - - for (auto hypernym : _hyponym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@HYPER"), hypernym._id); - } - - for (auto holonym : _part_meronym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@PHOLO"), holonym._id); - } - - for (auto meronym : _part_holonym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@PMERO"), meronym._id); - } - - for (auto holonym : _substance_meronym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SHOLO"), holonym._id); - } - - for (auto meronym : _substance_holonym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SMERO"), meronym._id); - } - - for (auto holonym : _member_meronym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@MHOLO"), holonym._id); - } - - for (auto meronym : _member_holonym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@MMERO"), meronym._id); - } - - for (auto cls : _instance_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@CLSID"), cls._id); - } - - for (auto inst : _class_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@INSID"), inst._id); - } - - for (auto synonym : _synonym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@SYNID"), synonym._id); - } - - for (auto antonym : _antonym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@ANTID"), antonym._id); - } - - for (auto pertainym : _anti_pertainym_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@PERID"), pertainym._id); - } - - for (auto value : _attribute_of.inorder_flatten()) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@VALID"), value._id); - } /* for (auto adj : _derived_from_adjective) { @@ -1839,6 +1833,8 @@ namespace verbly { { tnc._plural = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); } + + tnc._wnid = sqlite3_column_int(ppstmt, 3); output.push_back(tnc); } diff --git a/lib/noun_query.h b/lib/noun_query.h index 5b73f8d..8768f5d 100644 --- a/lib/noun_query.h +++ b/lib/noun_query.h @@ -72,6 +72,9 @@ namespace verbly { noun_query& is_attribute(); noun_query& attribute_of(filter _f); + noun_query& at_least_n_images(int _arg); + noun_query& with_wnid(int _arg); + /* noun_query& derived_from(const word& _w); noun_query& not_derived_from(const word& _w);*/ @@ -146,6 +149,9 @@ namespace verbly { bool _is_attribute = false; filter _attribute_of; + int _at_least_n_images = unlimited; + std::set _with_wnid; + /* std::list _derived_from_adjective; std::list _not_derived_from_adjective; std::list _derived_from_adverb; diff --git a/lib/preposition.cpp b/lib/preposition.cpp index c619bbf..8df13aa 100644 --- a/lib/preposition.cpp +++ b/lib/preposition.cpp @@ -37,13 +37,19 @@ namespace verbly { { std::stringstream construct; construct << "SELECT form FROM prepositions"; + std::list bindings; if (!_in_group.empty()) { - std::list clauses(_in_group.size(), "groupname = @GNAME"); + std::list clauses(_in_group.size(), "groupname = ?"); construct << " WHERE preposition_id IN (SELECT preposition_id FROM preposition_groups WHERE "; construct << verbly::implode(std::begin(clauses), std::end(clauses), " OR "); construct << ")"; + + for (auto g : _in_group) + { + bindings.emplace_back(g); + } } if (_random) @@ -63,9 +69,27 @@ namespace verbly { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); } - for (auto& group : _in_group) + int i = 1; + for (auto& binding : bindings) { - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@GNAME"), group.c_str(), group.length(), SQLITE_STATIC); + switch (binding.get_type()) + { + case binding::type::integer: + { + sqlite3_bind_int(ppstmt, i, binding.get_integer()); + + break; + } + + case binding::type::string: + { + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + + break; + } + } + + i++; } std::list output; diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp index 173a04e..929ecc7 100644 --- a/lib/verb_query.cpp +++ b/lib/verb_query.cpp @@ -65,6 +65,7 @@ namespace verbly { std::stringstream construct; construct << "SELECT verb_id, infinitive, past_tense, past_participle, ing_form, s_form FROM verbs"; std::list conditions; + std::list bindings; if (_has_prn) { @@ -73,14 +74,20 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE @RHMPRN"); + std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); + + for (auto rhyme : _rhymes) + { + bindings.emplace_back("%" + rhyme); + } } for (auto except : _except) { - conditions.push_back("verb_id != @EXCID"); + conditions.push_back("verb_id != ?"); + bindings.emplace_back(except._id); } if (!_has_frames) @@ -111,21 +118,27 @@ namespace verbly { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); } - if (!_rhymes.empty()) + int i = 1; + for (auto& binding : bindings) { - int i = 0; - for (auto rhyme : _rhymes) + switch (binding.get_type()) { - std::string rhymer = "%" + rhyme; - sqlite3_bind_text(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@RHMPRN"), rhymer.c_str(), rhymer.length(), SQLITE_STATIC); + case binding::type::integer: + { + sqlite3_bind_int(ppstmt, i, binding.get_integer()); + + break; + } - i++; + case binding::type::string: + { + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + + break; + } } - } - - for (auto except : _except) - { - sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@EXCID"), except._id); + + i++; } std::list output; -- cgit 1.4.1