From 04338f2b040fee5142904c062e0e38c836601034 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Sun, 17 Apr 2016 13:44:37 -0400 Subject: Fixed perfect rhyming Rhyme detection now ensures that any rhymes it finds are perfect rhymes and not identical rhymes. Rhyme detection is also now a lot faster because additional information is stored in the datafile. Also fixed a bug in the query interface (and the generator) that could cause incorrect queries to be executed. --- generator/generator.cpp | 128 +++++++++++++++++++++++++++++++++++++++--------- generator/schema.sql | 8 +++ lib/adjective_query.cpp | 68 ++++++++++++++++++++++--- lib/adjective_query.h | 10 +++- lib/adverb_query.cpp | 68 ++++++++++++++++++++++--- lib/adverb_query.h | 10 +++- lib/data.cpp | 2 +- lib/frame_query.cpp | 2 +- lib/noun_query.cpp | 73 ++++++++++++++++++++++++--- lib/noun_query.h | 10 +++- lib/preposition.cpp | 2 +- lib/verb_query.cpp | 68 ++++++++++++++++++++++--- lib/verb_query.h | 10 +++- lib/word.cpp | 41 +++++++++------- lib/word.h | 17 ++++++- 15 files changed, 442 insertions(+), 75 deletions(-) diff --git a/generator/generator.cpp b/generator/generator.cpp index e67bda7..e2ebfa1 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -76,13 +76,24 @@ struct group_t { std::list> frames; }; +struct pronunciation_t { + std::string phonemes; + std::string prerhyme; + std::string rhyme; + + bool operator<(const pronunciation_t& other) const + { + return phonemes < other.phonemes; + } +}; + std::map groups; std::map verbs; std::map adjectives; std::map nouns; std::map> wn; std::map images; -std::map> pronunciations; +std::map> pronunciations; void print_usage() { @@ -590,7 +601,47 @@ int main(int argc, char** argv) std::string canonical(phoneme_data[1]); std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); - pronunciations[canonical].insert(phoneme_data[2]); + std::string phonemes = phoneme_data[2]; + auto phoneme_set = verbly::split>(phonemes, " "); + auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) { + return phoneme.find("1") != std::string::npos; + }); + + pronunciation_t p; + p.phonemes = phonemes; + if (phemstrt != std::end(phoneme_set)) + { + std::stringstream rhymer; + for (auto it = phemstrt; it != std::end(phoneme_set); it++) + { + std::string naked; + std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) { + return isdigit(ch); + }); + + if (it != phemstrt) + { + rhymer << " "; + } + + rhymer << naked; + } + + p.rhyme = rhymer.str(); + + if (phemstrt != std::begin(phoneme_set)) + { + phemstrt--; + p.prerhyme = *phemstrt; + } else { + p.prerhyme = ""; + } + } else { + p.prerhyme = ""; + p.rhyme = ""; + } + + pronunciations[canonical].insert(p); } } @@ -720,7 +771,7 @@ int main(int argc, char** argv) db_error(ppdb, query); } - sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT); if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -752,7 +803,7 @@ int main(int argc, char** argv) } sqlite3_bind_int(ppstmt, 1, rowid); - sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -775,11 +826,11 @@ int main(int argc, char** argv) db_error(ppdb, query); } - sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT); if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -811,14 +862,26 @@ int main(int argc, char** argv) for (auto pronunciation : pronunciations[canonical]) { - query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; + if (!pronunciation.rhyme.empty()) + { + query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + } else { + query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; + } + if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, rowid); - sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); + + if (!pronunciation.rhyme.empty()) + { + sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); + } if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -856,7 +919,7 @@ int main(int argc, char** argv) db_error(ppdb, query); } - sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_STATIC); + sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT); if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -949,7 +1012,7 @@ int main(int argc, char** argv) } sqlite3_bind_int(ppstmt, 1, gid); - sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_STATIC); + sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT); if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -1104,7 +1167,7 @@ int main(int argc, char** argv) db_error(ppdb, query); } - sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); switch (synset_id / 100000000) { case 1: // Noun @@ -1119,7 +1182,7 @@ int main(int argc, char** argv) if (nouns.count(word) == 1) { - sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT); } break; @@ -1132,8 +1195,8 @@ int main(int argc, char** argv) if (adjectives.count(word) == 1) { - sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); - sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT); } break; @@ -1173,21 +1236,36 @@ int main(int argc, char** argv) { case 1: // Noun { - query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; + if (!pronunciation.rhyme.empty()) + { + query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + } else { + query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; + } break; } case 3: // Adjective { - query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; + if (!pronunciation.rhyme.empty()) + { + query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + } else { + query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; + } break; } case 4: // Adverb { - query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; + if (!pronunciation.rhyme.empty()) + { + query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + } else { + query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; + } break; } @@ -1199,7 +1277,13 @@ int main(int argc, char** argv) } sqlite3_bind_int(ppstmt, 1, rowid); - sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); + + if (!pronunciation.rhyme.empty()) + { + sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); + } if (sqlite3_step(ppstmt) != SQLITE_DONE) { @@ -2188,7 +2272,7 @@ int main(int argc, char** argv) db_error(ppdb, query); } - sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC); + sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); if (sqlite3_step(ppstmt) != SQLITE_DONE) diff --git a/generator/schema.sql b/generator/schema.sql index 9a39944..1836c62 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -184,6 +184,8 @@ DROP TABLE IF EXISTS `noun_pronunciations`; CREATE TABLE `noun_pronunciations` ( `noun_id` INTEGER NOT NULL, `pronunciation` VARCHAR(64) NOT NULL, + `prerhyme` VARCHAR(8), + `rhyme` VARCHAR(64), FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) ); @@ -191,6 +193,8 @@ DROP TABLE IF EXISTS `verb_pronunciations`; CREATE TABLE `verb_pronunciations` ( `verb_id` INTEGER NOT NULL, `pronunciation` VARCHAR(64) NOT NULL, + `prerhyme` VARCHAR(8), + `rhyme` VARCHAR(64), FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) ); @@ -198,6 +202,8 @@ DROP TABLE IF EXISTS `adjective_pronunciations`; CREATE TABLE `adjective_pronunciations` ( `adjective_id` INTEGER NOT NULL, `pronunciation` VARCHAR(64) NOT NULL, + `prerhyme` VARCHAR(8), + `rhyme` VARCHAR(64), FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) ); @@ -205,6 +211,8 @@ DROP TABLE IF EXISTS `adverb_pronunciations`; CREATE TABLE `adverb_pronunciations` ( `adverb_id` INTEGER NOT NULL, `pronunciation` VARCHAR(64) NOT NULL, + `prerhyme` VARCHAR(8), + `rhyme` VARCHAR(64), FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) ); diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp index a7f915c..2bea68f 100644 --- a/lib/adjective_query.cpp +++ b/lib/adjective_query.cpp @@ -33,7 +33,7 @@ namespace verbly { adjective_query& adjective_query::rhymes_with(const word& _word) { - for (auto rhyme : _word.rhyme_phonemes()) + for (auto rhyme : _word.get_rhymes()) { _rhymes.push_back(rhyme); } @@ -53,6 +53,34 @@ namespace verbly { return *this; } + adjective_query& adjective_query::has_rhyming_noun() + { + _has_rhyming_noun = true; + + return *this; + } + + adjective_query& adjective_query::has_rhyming_adjective() + { + _has_rhyming_adjective = true; + + return *this; + } + + adjective_query& adjective_query::has_rhyming_adverb() + { + _has_rhyming_adverb = true; + + return *this; + } + + adjective_query& adjective_query::has_rhyming_verb() + { + _has_rhyming_verb = true; + + return *this; + } + adjective_query& adjective_query::with_prefix(filter _f) { _f.clean(); @@ -227,16 +255,37 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); + std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); - for (auto rhyme : _rhymes) + for (auto rhy : _rhymes) { - bindings.emplace_back("%" + rhyme); + bindings.emplace_back(rhy.get_prerhyme()); + bindings.emplace_back(rhy.get_rhyme()); } } + if (_has_rhyming_noun) + { + conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adjective) + { + conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adjective_id != curp.adjective_id)"); + } + + if (_has_rhyming_adverb) + { + conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_verb) + { + conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + for (auto except : _except) { conditions.push_back("adjective_id != ?"); @@ -816,7 +865,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } @@ -894,7 +943,7 @@ namespace verbly { for (auto& adjective : output) { - query = "SELECT pronunciation FROM adjective_pronunciations WHERE adjective_id = ?"; + query = "SELECT pronunciation, prerhyme, rhyme FROM adjective_pronunciations WHERE adjective_id = ?"; if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); @@ -908,6 +957,13 @@ namespace verbly { auto phonemes = verbly::split>(pronunciation, " "); adjective.pronunciations.push_back(phonemes); + + if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) + { + std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); + adjective.rhymes.emplace_back(prerhyme, rhyming); + } } sqlite3_finalize(ppstmt); diff --git a/lib/adjective_query.h b/lib/adjective_query.h index b2859dc..030a494 100644 --- a/lib/adjective_query.h +++ b/lib/adjective_query.h @@ -12,6 +12,10 @@ namespace verbly { adjective_query& except(const adjective& _word); adjective_query& rhymes_with(const word& _word); adjective_query& has_pronunciation(); + adjective_query& has_rhyming_noun(); + adjective_query& has_rhyming_adjective(); + adjective_query& has_rhyming_adverb(); + adjective_query& has_rhyming_verb(); adjective_query& requires_comparative_form(); adjective_query& requires_superlative_form(); @@ -54,9 +58,13 @@ namespace verbly { const data& _data; int _limit = unlimited; bool _random = false; - std::list _rhymes; + std::list _rhymes; std::list _except; bool _has_prn = false; + bool _has_rhyming_noun = false; + bool _has_rhyming_adjective = false; + bool _has_rhyming_adverb = false; + bool _has_rhyming_verb = false; bool _requires_comparative_form = false; bool _requires_superlative_form = false; diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp index 30ba92b..797e6a6 100644 --- a/lib/adverb_query.cpp +++ b/lib/adverb_query.cpp @@ -33,7 +33,7 @@ namespace verbly { adverb_query& adverb_query::rhymes_with(const word& _word) { - for (auto rhyme : _word.rhyme_phonemes()) + for (auto rhyme : _word.get_rhymes()) { _rhymes.push_back(rhyme); } @@ -53,6 +53,34 @@ namespace verbly { return *this; } + adverb_query& adverb_query::has_rhyming_noun() + { + _has_rhyming_noun = true; + + return *this; + } + + adverb_query& adverb_query::has_rhyming_adjective() + { + _has_rhyming_adjective = true; + + return *this; + } + + adverb_query& adverb_query::has_rhyming_adverb() + { + _has_rhyming_adverb = true; + + return *this; + } + + adverb_query& adverb_query::has_rhyming_verb() + { + _has_rhyming_verb = true; + + return *this; + } + adverb_query& adverb_query::requires_comparative_form() { _requires_comparative_form = true; @@ -181,16 +209,37 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); + std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); - for (auto rhyme : _rhymes) + for (auto rhy : _rhymes) { - bindings.emplace_back("%" + rhyme); + bindings.emplace_back(rhy.get_prerhyme()); + bindings.emplace_back(rhy.get_rhyme()); } } + if (_has_rhyming_noun) + { + conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adjective) + { + conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adverb) + { + conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adverb_id != curp.adverb_id)"); + } + + if (_has_rhyming_verb) + { + conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + for (auto except : _except) { conditions.push_back("adverb_id != ?"); @@ -538,7 +587,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } @@ -601,7 +650,7 @@ namespace verbly { for (auto& adverb : output) { - query = "SELECT pronunciation FROM adverb_pronunciations WHERE adverb_id = ?"; + query = "SELECT pronunciation, prerhyme, rhyme FROM adverb_pronunciations WHERE adverb_id = ?"; if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); @@ -615,6 +664,13 @@ namespace verbly { auto phonemes = verbly::split>(pronunciation, " "); adverb.pronunciations.push_back(phonemes); + + if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) + { + std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); + adverb.rhymes.emplace_back(prerhyme, rhyming); + } } sqlite3_finalize(ppstmt); diff --git a/lib/adverb_query.h b/lib/adverb_query.h index e9354bb..403a616 100644 --- a/lib/adverb_query.h +++ b/lib/adverb_query.h @@ -12,6 +12,10 @@ namespace verbly { adverb_query& except(const adverb& _word); adverb_query& rhymes_with(const word& _word); adverb_query& has_pronunciation(); + adverb_query& has_rhyming_noun(); + adverb_query& has_rhyming_adjective(); + adverb_query& has_rhyming_adverb(); + adverb_query& has_rhyming_verb(); adverb_query& requires_comparative_form(); adverb_query& requires_superlative_form(); @@ -41,9 +45,13 @@ namespace verbly { const data& _data; int _limit = unlimited; bool _random = false; - std::list _rhymes; + std::list _rhymes; std::list _except; bool _has_prn = false; + bool _has_rhyming_noun = false; + bool _has_rhyming_adjective = false; + bool _has_rhyming_adverb = false; + bool _has_rhyming_verb = false; bool _requires_comparative_form = false; bool _requires_superlative_form = false; diff --git a/lib/data.cpp b/lib/data.cpp index c14956f..db42487 100644 --- a/lib/data.cpp +++ b/lib/data.cpp @@ -1,7 +1,7 @@ #include "verbly.h" namespace verbly { - + data::data(std::string datafile) { if (sqlite3_open_v2(datafile.c_str(), &ppdb, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) diff --git a/lib/frame_query.cpp b/lib/frame_query.cpp index 3c4a3e8..11f0432 100644 --- a/lib/frame_query.cpp +++ b/lib/frame_query.cpp @@ -73,7 +73,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp index 19a1297..b4336b6 100644 --- a/lib/noun_query.cpp +++ b/lib/noun_query.cpp @@ -33,7 +33,7 @@ namespace verbly { noun_query& noun_query::rhymes_with(const word& _word) { - for (auto rhyme : _word.rhyme_phonemes()) + for (auto rhyme : _word.get_rhymes()) { _rhymes.push_back(rhyme); } @@ -53,6 +53,34 @@ namespace verbly { return *this; } + noun_query& noun_query::has_rhyming_noun() + { + _has_rhyming_noun = true; + + return *this; + } + + noun_query& noun_query::has_rhyming_adjective() + { + _has_rhyming_adjective = true; + + return *this; + } + + noun_query& noun_query::has_rhyming_adverb() + { + _has_rhyming_adverb = true; + + return *this; + } + + noun_query& noun_query::has_rhyming_verb() + { + _has_rhyming_verb = true; + + return *this; + } + noun_query& noun_query::with_singular_form(std::string _arg) { _with_singular_form.push_back(_arg); @@ -483,16 +511,37 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); + std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); std::string cond = "noun_id IN (SELECT noun_id FROM noun_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); - for (auto rhyme : _rhymes) + for (auto rhy : _rhymes) { - bindings.emplace_back("%" + rhyme); + bindings.emplace_back(rhy.get_prerhyme()); + bindings.emplace_back(rhy.get_rhyme()); } } + if (_has_rhyming_noun) + { + conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.noun_id != curp.noun_id)"); + } + + if (_has_rhyming_adjective) + { + conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adverb) + { + conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_verb) + { + conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + for (auto except : _except) { conditions.push_back("noun_id != ?"); @@ -1768,7 +1817,7 @@ namespace verbly { { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); } - + int i = 1; for (auto& binding : bindings) { @@ -1783,7 +1832,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } @@ -1791,7 +1840,7 @@ namespace verbly { i++; } - + /* for (auto adj : _derived_from_adjective) { @@ -1843,7 +1892,7 @@ namespace verbly { for (auto& noun : output) { - query = "SELECT pronunciation FROM noun_pronunciations WHERE noun_id = ?"; + query = "SELECT pronunciation, prerhyme, rhyme FROM noun_pronunciations WHERE noun_id = ?"; if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); @@ -1857,6 +1906,14 @@ namespace verbly { auto phonemes = verbly::split>(pronunciation, " "); noun.pronunciations.push_back(phonemes); + + if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) + { + std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); + + noun.rhymes.emplace_back(prerhyme, rhyming); + } } sqlite3_finalize(ppstmt); diff --git a/lib/noun_query.h b/lib/noun_query.h index 8768f5d..6b5733f 100644 --- a/lib/noun_query.h +++ b/lib/noun_query.h @@ -12,6 +12,10 @@ namespace verbly { noun_query& except(const noun& _word); noun_query& rhymes_with(const word& _word); noun_query& has_pronunciation(); + noun_query& has_rhyming_noun(); + noun_query& has_rhyming_adjective(); + noun_query& has_rhyming_adverb(); + noun_query& has_rhyming_verb(); noun_query& with_singular_form(std::string _arg); noun_query& with_prefix(filter _f); @@ -86,9 +90,13 @@ namespace verbly { const data& _data; int _limit = unlimited; bool _random = false; - std::list _rhymes; + std::list _rhymes; std::list _except; bool _has_prn = false; + bool _has_rhyming_noun = false; + bool _has_rhyming_adjective = false; + bool _has_rhyming_adverb = false; + bool _has_rhyming_verb = false; std::list _with_singular_form; filter _with_prefix; diff --git a/lib/preposition.cpp b/lib/preposition.cpp index 8df13aa..cea9165 100644 --- a/lib/preposition.cpp +++ b/lib/preposition.cpp @@ -83,7 +83,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp index 929ecc7..654bc33 100644 --- a/lib/verb_query.cpp +++ b/lib/verb_query.cpp @@ -33,7 +33,7 @@ namespace verbly { verb_query& verb_query::rhymes_with(const word& _word) { - for (auto rhyme : _word.rhyme_phonemes()) + for (auto rhyme : _word.get_rhymes()) { _rhymes.push_back(rhyme); } @@ -53,6 +53,34 @@ namespace verbly { return *this; } + verb_query& verb_query::has_rhyming_noun() + { + _has_rhyming_noun = true; + + return *this; + } + + verb_query& verb_query::has_rhyming_adjective() + { + _has_rhyming_adjective = true; + + return *this; + } + + verb_query& verb_query::has_rhyming_adverb() + { + _has_rhyming_adverb = true; + + return *this; + } + + verb_query& verb_query::has_rhyming_verb() + { + _has_rhyming_verb = true; + + return *this; + } + verb_query& verb_query::has_frames() { this->_has_frames = true; @@ -74,16 +102,37 @@ namespace verbly { if (!_rhymes.empty()) { - std::list clauses(_rhymes.size(), "pronunciation LIKE ?"); + std::list clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; conditions.push_back(cond); - for (auto rhyme : _rhymes) + for (auto rhy : _rhymes) { - bindings.emplace_back("%" + rhyme); + bindings.emplace_back(rhy.get_prerhyme()); + bindings.emplace_back(rhy.get_rhyme()); } } + if (_has_rhyming_noun) + { + conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adjective) + { + conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_adverb) + { + conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); + } + + if (_has_rhyming_verb) + { + conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)"); + } + for (auto except : _except) { conditions.push_back("verb_id != ?"); @@ -132,7 +181,7 @@ namespace verbly { case binding::type::string: { - sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_STATIC); + sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); break; } @@ -158,7 +207,7 @@ namespace verbly { for (auto& verb : output) { - query = "SELECT pronunciation FROM verb_pronunciations WHERE verb_id = ?"; + query = "SELECT pronunciation, prerhyme, rhyme FROM verb_pronunciations WHERE verb_id = ?"; if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); @@ -172,6 +221,13 @@ namespace verbly { auto phonemes = verbly::split>(pronunciation, " "); verb.pronunciations.push_back(phonemes); + + if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) + { + std::string prerhyme(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + std::string rhyming(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); + verb.rhymes.emplace_back(prerhyme, rhyming); + } } sqlite3_finalize(ppstmt); diff --git a/lib/verb_query.h b/lib/verb_query.h index 24f5732..a07dc18 100644 --- a/lib/verb_query.h +++ b/lib/verb_query.h @@ -12,6 +12,10 @@ namespace verbly { verb_query& except(const verb& _word); verb_query& rhymes_with(const word& _word); verb_query& has_pronunciation(); + verb_query& has_rhyming_noun(); + verb_query& has_rhyming_adjective(); + verb_query& has_rhyming_adverb(); + verb_query& has_rhyming_verb(); verb_query& has_frames(); @@ -23,10 +27,14 @@ namespace verbly { const data& _data; int _limit = unlimited; bool _random = false; - std::list _rhymes; + std::list _rhymes; std::list _except; bool _has_prn = false; bool _has_frames = false; + bool _has_rhyming_noun = false; + bool _has_rhyming_adjective = false; + bool _has_rhyming_adverb = false; + bool _has_rhyming_verb = false; }; }; diff --git a/lib/word.cpp b/lib/word.cpp index 13c611f..49e34a1 100644 --- a/lib/word.cpp +++ b/lib/word.cpp @@ -3,6 +3,26 @@ namespace verbly { + rhyme::rhyme(std::string prerhyme, std::string phonemes) : _prerhyme(prerhyme), _rhyme(phonemes) + { + + } + + std::string rhyme::get_prerhyme() const + { + return _prerhyme; + } + + std::string rhyme::get_rhyme() const + { + return _rhyme; + } + + bool rhyme::operator==(const rhyme& other) const + { + return std::tie(_prerhyme, _rhyme) == std::tie(other._prerhyme, other._rhyme); + } + word::word() { @@ -13,28 +33,11 @@ namespace verbly { } - std::list word::rhyme_phonemes() const + std::list word::get_rhymes() const { assert(_valid == true); - std::list result; - - for (auto pronunciation : pronunciations) - { - auto phemstrt = std::find_if(std::begin(pronunciation), std::end(pronunciation), [] (std::string phoneme) { - return phoneme.find("1") != std::string::npos; - }); - - std::stringstream rhymer; - for (auto it = phemstrt; it != std::end(pronunciation); it++) - { - rhymer << " " << *it; - } - - result.push_back(rhymer.str()); - } - - return result; + return rhymes; } bool word::starts_with_vowel_sound() const diff --git a/lib/word.h b/lib/word.h index dc6fac8..08797a3 100644 --- a/lib/word.h +++ b/lib/word.h @@ -3,6 +3,20 @@ namespace verbly { + class rhyme { + public: + rhyme(std::string prerhyme, std::string phonemes); + + std::string get_prerhyme() const; + std::string get_rhyme() const; + + bool operator==(const rhyme& other) const; + + private: + std::string _prerhyme; + std::string _rhyme; + }; + class word { protected: const data* _data; @@ -10,6 +24,7 @@ namespace verbly { bool _valid = false; std::list> pronunciations; + std::list rhymes; word(); word(const data& _data, int _id); @@ -24,7 +39,7 @@ namespace verbly { public: virtual std::string base_form() const = 0; - std::list rhyme_phonemes() const; + std::list get_rhymes() const; bool starts_with_vowel_sound() const; }; -- cgit 1.4.1