From 040ee58fecdc9c478004bc2e554e1ae126ec4602 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Fri, 15 Apr 2016 17:24:44 -0400 Subject: Added support for ImageNet and fixed bug with query interface Datafile change: nouns now know how many images are associated with them on ImageNet, and also have their WordNet synset ID saved so that you can query for images of that noun via the ImageNet API. So far, verbly only exposes the ImageNet API URL, and doesn't actually interact with it itself. This may be changed in the future. The query interface had a huge issue in which multiple instances of the same condition would overwrite each other. This has been fixed. --- generator/generator.cpp | 57 ++++++++++++++++++++++++++++++++++++++----------- generator/schema.sql | 4 +++- 2 files changed, 48 insertions(+), 13 deletions(-) (limited to 'generator') diff --git a/generator/generator.cpp b/generator/generator.cpp index 6fbbfb8..e67bda7 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -81,6 +81,7 @@ std::map verbs; std::map adjectives; std::map nouns; std::map> wn; +std::map images; std::map> pronunciations; void print_usage() @@ -89,10 +90,10 @@ void print_usage() std::cout << "-------------------------" << std::endl; std::cout << "Requires exactly six arguments." << std::endl; std::cout << "1. The path to a VerbNet data directory." << std::endl; - std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl; - std::cout << "3. The path to an AGID infl.txt file." << std::endl; - std::cout << "4. The path to a WordNet prolog data directory." << std::endl; - std::cout << "5. The path to a CMUDICT pronunciation file." << std::endl; + std::cout << "2. The path to an AGID infl.txt file." << std::endl; + std::cout << "3. The path to a WordNet prolog data directory." << std::endl; + std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl; + std::cout << "5. The path to an ImageNet urls.txt file." << std::endl; std::cout << "6. Datafile output path." << std::endl; exit(1); @@ -431,10 +432,10 @@ int main(int argc, char** argv) // Get verbs from AGID std::cout << "Reading inflections..." << std::endl; - std::ifstream agidfile(argv[3]); + std::ifstream agidfile(argv[2]); if (!agidfile.is_open()) { - std::cout << "Could not open AGID file: " << argv[3] << std::endl; + std::cout << "Could not open AGID file: " << argv[2] << std::endl; print_usage(); } @@ -562,10 +563,10 @@ int main(int argc, char** argv) // Pronounciations std::cout << "Reading pronunciations..." << std::endl; - std::ifstream pronfile(argv[5]); + std::ifstream pronfile(argv[4]); if (!pronfile.is_open()) { - std::cout << "Could not open CMUDICT file: " << argv[5] << std::endl; + std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl; print_usage(); } @@ -593,6 +594,36 @@ int main(int argc, char** argv) } } + // Images + std::cout << "Reading images..." << std::endl; + + std::ifstream imagefile(argv[5]); + if (!imagefile.is_open()) + { + std::cout << "Could not open ImageNet file: " << argv[5] << std::endl; + print_usage(); + } + + for (;;) + { + std::string line; + if (!getline(imagefile, line)) + { + break; + } + + if (line.back() == '\r') + { + line.pop_back(); + } + + std::string wnid_s = line.substr(1, 8); + int wnid = stoi(wnid_s) + 100000000; + images[wnid]++; + } + + imagefile.close(); + // Start writing output std::cout << "Writing schema..." << std::endl; @@ -972,7 +1003,7 @@ int main(int argc, char** argv) // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) // - syntax: positioning flags for some adjectives - std::string wnpref {argv[4]}; + std::string wnpref {argv[3]}; if (wnpref.back() != '/') { wnpref += '/'; @@ -1027,9 +1058,9 @@ int main(int argc, char** argv) { if (nouns.count(word) == 1) { - query = "INSERT INTO nouns (singular, proper, complexity, plural) VALUES (?, ?, ?, ?)"; + query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)"; } else { - query = "INSERT INTO nouns (singular, proper, complexity) VALUES (?, ?, ?)"; + query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)"; } break; @@ -1083,10 +1114,12 @@ int main(int argc, char** argv) }) ? 1 : 0)); sqlite3_bind_int(ppstmt, 3, verbly::split>(word, " ").size()); + sqlite3_bind_int(ppstmt, 4, images[synset_id]); + sqlite3_bind_int(ppstmt, 5, synset_id); if (nouns.count(word) == 1) { - sqlite3_bind_text(ppstmt, 4, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); } break; diff --git a/generator/schema.sql b/generator/schema.sql index f2445f0..9a39944 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -55,7 +55,9 @@ CREATE TABLE `nouns` ( `singular` VARCHAR(32) NOT NULL, `plural` VARCHAR(32), `proper` INTEGER(1) NOT NULL, - `complexity` INTEGER NOT NULL + `complexity` INTEGER NOT NULL, + `images` INTEGER NOT NULL, + `wnid` INTEGER NOT NULL ); DROP TABLE IF EXISTS `hypernymy`; -- cgit 1.4.1 From 04338f2b040fee5142904c062e0e38c836601034 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Sun, 17 Apr 2016 13:44:37 -0400 Subject: Fixed perfect rhyming Rhyme detection now ensures that any rhymes it finds are perfect rhymes and not identical rhymes. Rhyme detection is also now a lot faster because additional information is stored in the datafile. Also fixed a bug in the query interface (and the generator) that could cause incorrect queries to be executed. --- generator/generator.cpp | 128 +++++++++++++++++++++++++++++++++++++++--------- generator/schema.sql | 8 +++ lib/adjective_query.cpp | 68 ++++++++++++++++++++++--- lib/adjective_query.h | 10 +++- lib/adverb_query.cpp | 68 ++++++++++++++++++++++--- lib/adverb_query.h | 10 +++- lib/data.cpp | 2 +- lib/frame_query.cpp | 2 +- lib/noun_query.cpp | 73 ++++++++++++++++++++++++--- lib/noun_query.h | 10 +++- lib/preposition.cpp | 2 +- lib/verb_query.cpp | 68 ++++++++++++++++++++++--- lib/verb_query.h | 10 +++- lib/word.cpp | 41 +++++++++------- lib/word.h | 17 ++++++- 15 files changed, 442 insertions(+), 75 deletions(-) (limited to 'generator') diff --git a/generator/generator.cpp b/generator/generator.cpp index e67bda7..e2ebfa1 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -76,13 +76,24 @@ struct group_t { std::list> frames; }; +struct pronunciation_t { + std::string phonemes; + std::string prerhyme; + std::string rhyme; + + bool operator<(const pronunciation_t& other) const + { + return phonemes < other.phonemes; + } +}; + std::map groups; std::map verbs; std::map adjectives; std::map nouns; std::map> wn; std::map images; -std::map> pronunciations; +std::map> pronunciations; void print_usage() { @@ -590,7 +601,47 @@ int main(int argc, char** argv) std::string canonical(phoneme_data[1]); std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); - pronunciations[canonical].insert(phoneme_data[2]); + std::string phonemes = phoneme_data[2]; + auto phoneme_set = verbly::split>(phonemes, " "); + auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) { + return phoneme.find("1") != std::string::npos; + }); + + pronunciation_t p; + p.phonemes = phonemes; + if (phemstrt != std::end(phoneme_set)) + { + std::stringstream rhymer; + for (auto it = phemstrt; it != std::end(phoneme_set); it++) + { + std::string naked; + std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) { + return isdigit(ch); + }); + + if (it != phemstrt) + { + rhymer << " "; + } + + rhymer << naked; + } + + p.rhyme = rhymer.str(); + + if (phemstrt != std::begin(phoneme_set)) + { + phemstrt--; + p.prerhyme = *phemstrt; + } else { + p.prerhyme = ""; + } + } else { + p.prerhyme = ""; + p.rhyme = ""; + } + + pronunciations[canonical].insert(p); } } @@ -720,7 +771,7 @@ int main(int argc, char** argv) db_error(ppdb, query); } - sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT); if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -752,7 +803,7 @@ int main(int argc, char** argv) } sqlite3_bind_int(ppstmt, 1, rowid); - sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -775,11 +826,11 @@ int main(int argc, char** argv) db_error(ppdb, query); } - sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT); if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -811,14 +862,26 @@ int main(int argc, char** argv) for (auto pronunciation : pronunciations[canonical]) { - query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; + if (!pronunciation.rhyme.empty()) + { + query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + } else { + query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; + } + if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, rowid); - sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); + + if (!pronunciation.rhyme.empty()) + { + sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); + } if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -856,7 +919,7 @@ int main(int argc, char** argv) db_error(ppdb, query); } - sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_STATIC); + sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT); if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -949,7 +1012,7 @@ int main(int argc, char** argv) } sqlite3_bind_int(ppstmt, 1, gid); - sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_STATIC); + sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT); if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -1104,7 +1167,7 @@ int main(int argc, char** argv) db_error(ppdb, query); } - sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); switch (synset_id / 100000000) { case 1: // Noun @@ -1119,7 +1182,7 @@ int main(int argc, char** argv) if (nouns.count(word) == 1) { - sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT); } break; @@ -1132,8 +1195,8 @@ int main(int argc, char** argv) if (adjectives.count(word) == 1) { - sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT); } break; @@ -1173,21 +1236,36 @@ int main(int argc, char** argv) { case 1: // Noun { - query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; + if (!pronunciation.rhyme.empty()) + { + query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + } else { + query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; + } break; } case 3: // Adjective { - query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; + if (!pronunciation.rhyme.empty()) + { + query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + } else { + query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; + } break; } case 4: // Adverb { - query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; + if (!pronunciation.rhyme.empty()) + { + query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + } else { + query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; + } break; } @@ -1199,7 +1277,13 @@ int main(int argc, char** argv) } sqlite3_bind_int(ppstmt, 1, rowid); - sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); + + if (!pronunciation.rhyme.empty()) + { + sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); + } if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -2188,7 +2272,7 @@ int main(int argc, char** argv) db_error(ppdb, query); } - sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); if (sqlite3_step(ppstmt) != SQLITE_DONE) diff --git a/generator/schema.sql b/generator/schema.sql index 9a39944..1836c62 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -184,6 +184,8 @@ DROP TABLE IF EXISTS `noun_pronunciations`; CREATE TABLE `noun_pronunciations` ( `noun_id` INTEGER NOT NULL, `pronunciation` VARCHAR(64) NOT NULL, + `prerhyme` VARCHAR(8), + `rhyme` VARCHAR(64), FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) ); @@ -191,6 +193,8 @@ DROP TABLE IF EXISTS `verb_pronunciations`; CREATE TABLE `verb_pronunciations` ( `verb_id` INTEGER NOT NULL, `pronunciation` VARCHAR(64) NOT NULL, + `prerhyme` VARCHAR(8), + `rhyme` VARCHAR(64), FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) ); @@ -198,6 +202,8 @@ DROP TABLE IF EXISTS `adjective_pronunciations`; CREATE TABLE `adjective_pronunciations` ( `adjective_id` INTEGER NOT NULL, `pronunciation` VARCHAR(64) NOT NULL, + `prerhyme` VARCHAR(8), + `rhyme` VARCHAR(64), FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) ); @@ -205,6 +211,8 @@ DROP TABLE IF EXISTS `adverb_pronunciations`; CREATE TABLE `adverb_pronunciations` ( `adverb_id` INTEGER NOT NULL, `pronunciation` VARCHAR(64) NOT NULL, + `prerhyme` VARCHAR(8), + `rhyme` VARCHAR(64), FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) ); diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp index a7f915c..2bea68f 100644 --- a/lib/adjective_query.cpp +++ b/lib/adjective_query.cpp @@ -33,7 +33,7 @@ namespace verbly { adjective_query& adjective_query::rhymes_with(const word& _word) { - for (auto rhyme : _word.rhyme_phonemes()) + for (auto rhyme : _word.get_rhymes()) { _rhymes.push_back(rhyme); } @@ -53,6 +53,34 @@ namespace verbly { return *this; } + adjective_query& adjective_query::has_rhyming_noun() + { + _has_rhyming_noun = true; + + return *this; + } + + adjective_query& adjective_query::has_rhyming_adjective() + { + _has_rhyming_adjective = true; + + return *this; + } + + adjective_query& adjective_query::has_rhyming_adverb() + { + _has_rhyming_adverb = true; + + return *this; + } + + adjective_query& adjective_query::has_rhyming_verb() + { + _has_rhyming_verb = true; + + return *this; + } + adjective_query& adjective_query::with_prefix(filter _f) { _f.clean(); @@ -227,16 +255,37 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); + std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); - for (auto rhyme : _rhymes) + for (auto rhy : _rhymes) { - bindings.emplace_back("%" + rhyme); + bindings.emplace_back(rhy.get_prerhyme()); + bindings.emplace_back(rhy.get_rhyme()); } } + if (_has_rhyming_noun) + { + conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adjective) + { + conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adjective_id != curp.adjective_id)"); + } + + if (_has_rhyming_adverb) + { + conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_verb) + { + conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + for (auto except : _except) { conditions.push_back("adjective_id != ?"); @@ -816,7 +865,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } @@ -894,7 +943,7 @@ namespace verbly { for (auto& adjective : output) { - query = "SELECT pronunciation FROM adjective_pronunciations WHERE adjective_id = ?"; + query = "SELECT pronunciation, prerhyme, rhyme FROM adjective_pronunciations WHERE adjective_id = ?"; if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); @@ -908,6 +957,13 @@ namespace verbly { auto phonemes = verbly::split>(pronunciation, " "); adjective.pronunciations.push_back(phonemes); + + if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) + { + std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); + adjective.rhymes.emplace_back(prerhyme, rhyming); + } } sqlite3_finalize(ppstmt); diff --git a/lib/adjective_query.h b/lib/adjective_query.h index b2859dc..030a494 100644 --- a/lib/adjective_query.h +++ b/lib/adjective_query.h @@ -12,6 +12,10 @@ namespace verbly { adjective_query& except(const adjective& _word); adjective_query& rhymes_with(const word& _word); adjective_query& has_pronunciation(); + adjective_query& has_rhyming_noun(); + adjective_query& has_rhyming_adjective(); + adjective_query& has_rhyming_adverb(); + adjective_query& has_rhyming_verb(); adjective_query& requires_comparative_form(); adjective_query& requires_superlative_form(); @@ -54,9 +58,13 @@ namespace verbly { const data& _data; int _limit = unlimited; bool _random = false; - std::list _rhymes; + std::list _rhymes; std::list _except; bool _has_prn = false; + bool _has_rhyming_noun = false; + bool _has_rhyming_adjective = false; + bool _has_rhyming_adverb = false; + bool _has_rhyming_verb = false; bool _requires_comparative_form = false; bool _requires_superlative_form = false; diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp index 30ba92b..797e6a6 100644 --- a/lib/adverb_query.cpp +++ b/lib/adverb_query.cpp @@ -33,7 +33,7 @@ namespace verbly { adverb_query& adverb_query::rhymes_with(const word& _word) { - for (auto rhyme : _word.rhyme_phonemes()) + for (auto rhyme : _word.get_rhymes()) { _rhymes.push_back(rhyme); } @@ -53,6 +53,34 @@ namespace verbly { return *this; } + adverb_query& adverb_query::has_rhyming_noun() + { + _has_rhyming_noun = true; + + return *this; + } + + adverb_query& adverb_query::has_rhyming_adjective() + { + _has_rhyming_adjective = true; + + return *this; + } + + adverb_query& adverb_query::has_rhyming_adverb() + { + _has_rhyming_adverb = true; + + return *this; + } + + adverb_query& adverb_query::has_rhyming_verb() + { + _has_rhyming_verb = true; + + return *this; + } + adverb_query& adverb_query::requires_comparative_form() { _requires_comparative_form = true; @@ -181,16 +209,37 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); + std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); - for (auto rhyme : _rhymes) + for (auto rhy : _rhymes) { - bindings.emplace_back("%" + rhyme); + bindings.emplace_back(rhy.get_prerhyme()); + bindings.emplace_back(rhy.get_rhyme()); } } + if (_has_rhyming_noun) + { + conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adjective) + { + conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adverb) + { + conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adverb_id != curp.adverb_id)"); + } + + if (_has_rhyming_verb) + { + conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + for (auto except : _except) { conditions.push_back("adverb_id != ?"); @@ -538,7 +587,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } @@ -601,7 +650,7 @@ namespace verbly { for (auto& adverb : output) { - query = "SELECT pronunciation FROM adverb_pronunciations WHERE adverb_id = ?"; + query = "SELECT pronunciation, prerhyme, rhyme FROM adverb_pronunciations WHERE adverb_id = ?"; if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); @@ -615,6 +664,13 @@ namespace verbly { auto phonemes = verbly::split>(pronunciation, " "); adverb.pronunciations.push_back(phonemes); + + if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) + { + std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); + adverb.rhymes.emplace_back(prerhyme, rhyming); + } } sqlite3_finalize(ppstmt); diff --git a/lib/adverb_query.h b/lib/adverb_query.h index e9354bb..403a616 100644 --- a/lib/adverb_query.h +++ b/lib/adverb_query.h @@ -12,6 +12,10 @@ namespace verbly { adverb_query& except(const adverb& _word); adverb_query& rhymes_with(const word& _word); adverb_query& has_pronunciation(); + adverb_query& has_rhyming_noun(); + adverb_query& has_rhyming_adjective(); + adverb_query& has_rhyming_adverb(); + adverb_query& has_rhyming_verb(); adverb_query& requires_comparative_form(); adverb_query& requires_superlative_form(); @@ -41,9 +45,13 @@ namespace verbly { const data& _data; int _limit = unlimited; bool _random = false; - std::list _rhymes; + std::list _rhymes; std::list _except; bool _has_prn = false; + bool _has_rhyming_noun = false; + bool _has_rhyming_adjective = false; + bool _has_rhyming_adverb = false; + bool _has_rhyming_verb = false; bool _requires_comparative_form = false; bool _requires_superlative_form = false; diff --git a/lib/data.cpp b/lib/data.cpp index c14956f..db42487 100644 --- a/lib/data.cpp +++ b/lib/data.cpp @@ -1,7 +1,7 @@ #include "verbly.h" namespace verbly { - + data::data(std::string datafile) { if (sqlite3_open_v2(datafile.c_str(), &ppdb, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) diff --git a/lib/frame_query.cpp b/lib/frame_query.cpp index 3c4a3e8..11f0432 100644 --- a/lib/frame_query.cpp +++ b/lib/frame_query.cpp @@ -73,7 +73,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp index 19a1297..b4336b6 100644 --- a/lib/noun_query.cpp +++ b/lib/noun_query.cpp @@ -33,7 +33,7 @@ namespace verbly { noun_query& noun_query::rhymes_with(const word& _word) { - for (auto rhyme : _word.rhyme_phonemes()) + for (auto rhyme : _word.get_rhymes()) { _rhymes.push_back(rhyme); } @@ -53,6 +53,34 @@ namespace verbly { return *this; } + noun_query& noun_query::has_rhyming_noun() + { + _has_rhyming_noun = true; + + return *this; + } + + noun_query& noun_query::has_rhyming_adjective() + { + _has_rhyming_adjective = true; + + return *this; + } + + noun_query& noun_query::has_rhyming_adverb() + { + _has_rhyming_adverb = true; + + return *this; + } + + noun_query& noun_query::has_rhyming_verb() + { + _has_rhyming_verb = true; + + return *this; + } + noun_query& noun_query::with_singular_form(std::string _arg) { _with_singular_form.push_back(_arg); @@ -483,16 +511,37 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); + std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); std::string cond = "noun_id IN (SELECT noun_id FROM noun_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); - for (auto rhyme : _rhymes) + for (auto rhy : _rhymes) { - bindings.emplace_back("%" + rhyme); + bindings.emplace_back(rhy.get_prerhyme()); + bindings.emplace_back(rhy.get_rhyme()); } } + if (_has_rhyming_noun) + { + conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.noun_id != curp.noun_id)"); + } + + if (_has_rhyming_adjective) + { + conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adverb) + { + conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_verb) + { + conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + for (auto except : _except) { conditions.push_back("noun_id != ?"); @@ -1768,7 +1817,7 @@ namespace verbly { { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); } - + int i = 1; for (auto& binding : bindings) { @@ -1783,7 +1832,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } @@ -1791,7 +1840,7 @@ namespace verbly { i++; } - + /* for (auto adj : _derived_from_adjective) { @@ -1843,7 +1892,7 @@ namespace verbly { for (auto& noun : output) { - query = "SELECT pronunciation FROM noun_pronunciations WHERE noun_id = ?"; + query = "SELECT pronunciation, prerhyme, rhyme FROM noun_pronunciations WHERE noun_id = ?"; if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); @@ -1857,6 +1906,14 @@ namespace verbly { auto phonemes = verbly::split>(pronunciation, " "); noun.pronunciations.push_back(phonemes); + + if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) + { + std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); + + noun.rhymes.emplace_back(prerhyme, rhyming); + } } sqlite3_finalize(ppstmt); diff --git a/lib/noun_query.h b/lib/noun_query.h index 8768f5d..6b5733f 100644 --- a/lib/noun_query.h +++ b/lib/noun_query.h @@ -12,6 +12,10 @@ namespace verbly { noun_query& except(const noun& _word); noun_query& rhymes_with(const word& _word); noun_query& has_pronunciation(); + noun_query& has_rhyming_noun(); + noun_query& has_rhyming_adjective(); + noun_query& has_rhyming_adverb(); + noun_query& has_rhyming_verb(); noun_query& with_singular_form(std::string _arg); noun_query& with_prefix(filter _f); @@ -86,9 +90,13 @@ namespace verbly { const data& _data; int _limit = unlimited; bool _random = false; - std::list _rhymes; + std::list _rhymes; std::list _except; bool _has_prn = false; + bool _has_rhyming_noun = false; + bool _has_rhyming_adjective = false; + bool _has_rhyming_adverb = false; + bool _has_rhyming_verb = false; std::list _with_singular_form; filter _with_prefix; diff --git a/lib/preposition.cpp b/lib/preposition.cpp index 8df13aa..cea9165 100644 --- a/lib/preposition.cpp +++ b/lib/preposition.cpp @@ -83,7 +83,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp index 929ecc7..654bc33 100644 --- a/lib/verb_query.cpp +++ b/lib/verb_query.cpp @@ -33,7 +33,7 @@ namespace verbly { verb_query& verb_query::rhymes_with(const word& _word) { - for (auto rhyme : _word.rhyme_phonemes()) + for (auto rhyme : _word.get_rhymes()) { _rhymes.push_back(rhyme); } @@ -53,6 +53,34 @@ namespace verbly { return *this; } + verb_query& verb_query::has_rhyming_noun() + { + _has_rhyming_noun = true; + + return *this; + } + + verb_query& verb_query::has_rhyming_adjective() + { + _has_rhyming_adjective = true; + + return *this; + } + + verb_query& verb_query::has_rhyming_adverb() + { + _has_rhyming_adverb = true; + + return *this; + } + + verb_query& verb_query::has_rhyming_verb() + { + _has_rhyming_verb = true; + + return *this; + } + verb_query& verb_query::has_frames() { this->_has_frames = true; @@ -74,16 +102,37 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); + std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); - for (auto rhyme : _rhymes) + for (auto rhy : _rhymes) { - bindings.emplace_back("%" + rhyme); + bindings.emplace_back(rhy.get_prerhyme()); + bindings.emplace_back(rhy.get_rhyme()); } } + if (_has_rhyming_noun) + { + conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adjective) + { + conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adverb) + { + conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_verb) + { + conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)"); + } + for (auto except : _except) { conditions.push_back("verb_id != ?"); @@ -132,7 +181,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } @@ -158,7 +207,7 @@ namespace verbly { for (auto& verb : output) { - query = "SELECT pronunciation FROM verb_pronunciations WHERE verb_id = ?"; + query = "SELECT pronunciation, prerhyme, rhyme FROM verb_pronunciations WHERE verb_id = ?"; if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); @@ -172,6 +221,13 @@ namespace verbly { auto phonemes = verbly::split>(pronunciation, " "); verb.pronunciations.push_back(phonemes); + + if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) + { + std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); + verb.rhymes.emplace_back(prerhyme, rhyming); + } } sqlite3_finalize(ppstmt); diff --git a/lib/verb_query.h b/lib/verb_query.h index 24f5732..a07dc18 100644 --- a/lib/verb_query.h +++ b/lib/verb_query.h @@ -12,6 +12,10 @@ namespace verbly { verb_query& except(const verb& _word); verb_query& rhymes_with(const word& _word); verb_query& has_pronunciation(); + verb_query& has_rhyming_noun(); + verb_query& has_rhyming_adjective(); + verb_query& has_rhyming_adverb(); + verb_query& has_rhyming_verb(); verb_query& has_frames(); @@ -23,10 +27,14 @@ namespace verbly { const data& _data; int _limit = unlimited; bool _random = false; - std::list _rhymes; + std::list _rhymes; std::list _except; bool _has_prn = false; bool _has_frames = false; + bool _has_rhyming_noun = false; + bool _has_rhyming_adjective = false; + bool _has_rhyming_adverb = false; + bool _has_rhyming_verb = false; }; }; diff --git a/lib/word.cpp b/lib/word.cpp index 13c611f..49e34a1 100644 --- a/lib/word.cpp +++ b/lib/word.cpp @@ -3,6 +3,26 @@ namespace verbly { + rhyme::rhyme(std::string prerhyme, std::string phonemes) : _prerhyme(prerhyme), _rhyme(phonemes) + { + + } + + std::string rhyme::get_prerhyme() const + { + return _prerhyme; + } + + std::string rhyme::get_rhyme() const + { + return _rhyme; + } + + bool rhyme::operator==(const rhyme& other) const + { + return std::tie(_prerhyme, _rhyme) == std::tie(other._prerhyme, other._rhyme); + } + word::word() { @@ -13,28 +33,11 @@ namespace verbly { } - std::list word::rhyme_phonemes() const + std::list word::get_rhymes() const { assert(_valid == true); - std::list result; - - for (auto pronunciation : pronunciations) - { - auto phemstrt = std::find_if(std::begin(pronunciation), std::end(pronunciation), [] (std::string phoneme) { - return phoneme.find("1") != std::string::npos; - }); - - std::stringstream rhymer; - for (auto it = phemstrt; it != std::end(pronunciation); it++) - { - rhymer << " " << *it; - } - - result.push_back(rhymer.str()); - } - - return result; + return rhymes; } bool word::starts_with_vowel_sound() const diff --git a/lib/word.h b/lib/word.h index dc6fac8..08797a3 100644 --- a/lib/word.h +++ b/lib/word.h @@ -3,6 +3,20 @@ namespace verbly { + class rhyme { + public: + rhyme(std::string prerhyme, std::string phonemes); + + std::string get_prerhyme() const; + std::string get_rhyme() const; + + bool operator==(const rhyme& other) const; + + private: + std::string _prerhyme; + std::string _rhyme; + }; + class word { protected: const data* _data; @@ -10,6 +24,7 @@ namespace verbly { bool _valid = false; std::list> pronunciations; + std::list rhymes; word(); word(const data& _data, int _id); @@ -24,7 +39,7 @@ namespace verbly { public: virtual std::string base_form() const = 0; - std::list rhyme_phonemes() const; + std::list get_rhymes() const; bool starts_with_vowel_sound() const; }; -- cgit 1.4.1 From 965a3206df834f846f2c560438c80a707dcee4cb Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 18 Apr 2016 15:09:20 -0400 Subject: Fixed problem with words containing certain characters The generator previously had a problem wherein it would ignore WordNet lemmas containing certain non-alpha characters (hyphens, slashes, numbers, apostrophes). In addition to these words not being included in the generated datafile, it had the side effect of causing relationships involving the ignored words (e.g. hypernymy, synonymy, etc) to instead be related to the word with id 0, which did not exist. This rarely caused a failure with direct queries; but it caused hierarchal queries (most notably full hyponymy, which is where the error was noticed) to potentially permit far more lemmas than they should have because a very large number of words could be transitively reached through the sentinel word id 0. The generator has been fixed to not ignore the words containing special characters, which removed the word id 0 from most relationships and therefore fixed hierarchal queries. The only remaining word id 0s are as a synonym of "free-flying" (synset 301380571) and as an anti-mannernym of "aerially" (synset 400202718). This is because the WordNet data is malformed in the definitions of two words: "aerial" (synset 301380267) and "marine" (synset 301380721). The generator ignored those two lines, causing the described error, although the latter word being ignored did not cause any other errors. The bug was discovered when the Twitter bot difference (https://github.com/hatkirby/difference) generated a tweet (https://twitter.com/differencebot/status/722084219925700613) as a result of returning the noun "tearaway" in a full hyponym query of "artifact". --- generator/generator.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'generator') diff --git a/generator/generator.cpp b/generator/generator.cpp index e2ebfa1..3201154 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -1103,7 +1103,7 @@ int main(int argc, char** argv) { ppgs.update(); - std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',"); + std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$"); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { @@ -1113,6 +1113,11 @@ int main(int argc, char** argv) int synset_id = stoi(relation_data[1]); int wnum = stoi(relation_data[2]); std::string word = relation_data[3]; + size_t word_it; + while ((word_it = word.find("''")) != std::string::npos) + { + word.erase(word_it, 1); + } std::string query; switch (synset_id / 100000000) -- cgit 1.4.1