From 6c2aca03c89b37e136ab4c7ea58b485dadc85bcd Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 30 May 2016 11:31:20 -0400 Subject: Added pronunciation syllable count and stress structure Also updated CMakeLists.txt such that including projects don't have to include sqlite3. --- CMakeLists.txt | 1 + generator/generator.cpp | 49 ++++++++++++++++++++++++++--------- generator/schema.sql | 8 ++++++ lib/adjective_query.cpp | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/adjective_query.h | 2 ++ lib/adverb_query.cpp | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/adverb_query.h | 2 ++ lib/noun_query.cpp | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/noun_query.h | 2 ++ lib/verb_query.cpp | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/verb_query.h | 2 ++ 11 files changed, 330 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8af6cff..03ce05b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,3 +8,4 @@ include_directories(vendor/json/src) add_library(verbly lib/data.cpp lib/adjective.cpp lib/noun.cpp lib/verb.cpp lib/adverb.cpp lib/token.cpp lib/word.cpp lib/frame.cpp lib/preposition.cpp lib/adjective_query.cpp lib/adverb_query.cpp lib/noun_query.cpp lib/verb_query.cpp lib/frame_query.cpp) set_property(TARGET verbly PROPERTY CXX_STANDARD 11) set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) +target_link_libraries(verbly ${sqlite3_LIBRARIES}) diff --git a/generator/generator.cpp b/generator/generator.cpp index 3201154..6a16467 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -80,6 +80,8 @@ struct pronunciation_t { std::string phonemes; std::string prerhyme; std::string rhyme; + int syllables = 0; + std::string stress; bool operator<(const pronunciation_t& other) const { @@ -609,6 +611,8 @@ int main(int argc, char** argv) pronunciation_t p; p.phonemes = phonemes; + + // Rhyme detection if (phemstrt != std::end(phoneme_set)) { std::stringstream rhymer; @@ -641,6 +645,23 @@ int main(int argc, char** argv) p.rhyme = ""; } + // Syllable/stress + for (auto phm : phoneme_set) + { + if (isdigit(phm.back())) + { + // It's a vowel! + p.syllables++; + + if (phm.back() == '1') + { + p.stress.push_back('1'); + } else { + p.stress.push_back('0'); + } + } + } + pronunciations[canonical].insert(p); } } @@ -864,9 +885,9 @@ int main(int argc, char** argv) { if (!pronunciation.rhyme.empty()) { - query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; } else { - query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; + query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; } if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) @@ -876,11 +897,13 @@ int main(int argc, char** argv) sqlite3_bind_int(ppstmt, 1, rowid); sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); + sqlite3_bind_int(ppstmt, 3, pronunciation.syllables); + sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT); if (!pronunciation.rhyme.empty()) { - sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); - sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); } if (sqlite3_step(ppstmt) != SQLITE_DONE) @@ -1243,9 +1266,9 @@ int main(int argc, char** argv) { if (!pronunciation.rhyme.empty()) { - query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; } else { - query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; + query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; } break; @@ -1255,9 +1278,9 @@ int main(int argc, char** argv) { if (!pronunciation.rhyme.empty()) { - query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; } else { - query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; + query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; } break; @@ -1267,9 +1290,9 @@ int main(int argc, char** argv) { if (!pronunciation.rhyme.empty()) { - query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; + query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; } else { - query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; + query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; } break; @@ -1283,11 +1306,13 @@ int main(int argc, char** argv) sqlite3_bind_int(ppstmt, 1, rowid); sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); + sqlite3_bind_int(ppstmt, 3, pronunciation.syllables); + sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT); if (!pronunciation.rhyme.empty()) { - sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); - sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); + sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); } if (sqlite3_step(ppstmt) != SQLITE_DONE) diff --git a/generator/schema.sql b/generator/schema.sql index 1836c62..410b536 100644 --- a/generator/schema.sql +++ b/generator/schema.sql @@ -186,6 +186,8 @@ CREATE TABLE `noun_pronunciations` ( `pronunciation` VARCHAR(64) NOT NULL, `prerhyme` VARCHAR(8), `rhyme` VARCHAR(64), + `syllables` INT NOT NULL, + `stress` VARCHAR(64) NOT NULL, FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) ); @@ -195,6 +197,8 @@ CREATE TABLE `verb_pronunciations` ( `pronunciation` VARCHAR(64) NOT NULL, `prerhyme` VARCHAR(8), `rhyme` VARCHAR(64), + `syllables` INT NOT NULL, + `stress` VARCHAR(64) NOT NULL, FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) ); @@ -204,6 +208,8 @@ CREATE TABLE `adjective_pronunciations` ( `pronunciation` VARCHAR(64) NOT NULL, `prerhyme` VARCHAR(8), `rhyme` VARCHAR(64), + `syllables` INT NOT NULL, + `stress` VARCHAR(64) NOT NULL, FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) ); @@ -213,6 +219,8 @@ CREATE TABLE `adverb_pronunciations` ( `pronunciation` VARCHAR(64) NOT NULL, `prerhyme` VARCHAR(8), `rhyme` VARCHAR(64), + `syllables` INT NOT NULL, + `stress` VARCHAR(64) NOT NULL, FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) ); diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp index 5f1cbe7..90ccef4 100644 --- a/lib/adjective_query.cpp +++ b/lib/adjective_query.cpp @@ -88,6 +88,13 @@ namespace verbly { return *this; } + adjective_query& adjective_query::with_stress(filter> _arg) + { + _stress = _arg; + + return *this; + } + adjective_query& adjective_query::with_prefix(filter _f) { _f.clean(); @@ -338,6 +345,68 @@ namespace verbly { case adjective::positioning::undefined: break; } + if (!_stress.empty()) + { + std::stringstream cond; + if (_stress.get_notlogic()) + { + cond << "adjective_id NOT IN"; + } else { + cond << "adjective_id IN"; + } + + cond << "(SELECT adjective_id FROM adjective_pronunciations WHERE "; + + std::function>, bool)> recur = [&] (filter> f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter>::type::singleton: + { + std::ostringstream _val; + for (auto syl : f.get_elem()) + { + if (syl) + { + _val << "1"; + } else { + _val << "0"; + } + } + + bindings.emplace_back(_val.str()); + + if (notlogic == f.get_notlogic()) + { + return "stress = ?"; + } else { + return "stress != ?"; + } + } + + case filter>::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter> f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_stress, _stress.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } + if (!_with_prefix.empty()) { std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { diff --git a/lib/adjective_query.h b/lib/adjective_query.h index 945e6bd..e6a6609 100644 --- a/lib/adjective_query.h +++ b/lib/adjective_query.h @@ -17,6 +17,7 @@ namespace verbly { adjective_query& has_rhyming_adjective(); adjective_query& has_rhyming_adverb(); adjective_query& has_rhyming_verb(); + adjective_query& with_stress(filter> _arg); adjective_query& requires_comparative_form(); adjective_query& requires_superlative_form(); @@ -66,6 +67,7 @@ namespace verbly { bool _has_rhyming_adjective = false; bool _has_rhyming_adverb = false; bool _has_rhyming_verb = false; + filter> _stress; bool _requires_comparative_form = false; bool _requires_superlative_form = false; diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp index 1c22712..3e62bb7 100644 --- a/lib/adverb_query.cpp +++ b/lib/adverb_query.cpp @@ -102,6 +102,13 @@ namespace verbly { return *this; } + adverb_query& adverb_query::with_stress(filter> _arg) + { + _stress = _arg; + + return *this; + } + adverb_query& adverb_query::with_prefix(filter _f) { _f.clean(); @@ -263,6 +270,68 @@ namespace verbly { conditions.push_back("superlative IS NOT NULL"); } + if (!_stress.empty()) + { + std::stringstream cond; + if (_stress.get_notlogic()) + { + cond << "adverb_id NOT IN"; + } else { + cond << "adverb_id IN"; + } + + cond << "(SELECT adverb_id FROM adverb_pronunciations WHERE "; + + std::function>, bool)> recur = [&] (filter> f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter>::type::singleton: + { + std::ostringstream _val; + for (auto syl : f.get_elem()) + { + if (syl) + { + _val << "1"; + } else { + _val << "0"; + } + } + + bindings.emplace_back(_val.str()); + + if (notlogic == f.get_notlogic()) + { + return "stress = ?"; + } else { + return "stress != ?"; + } + } + + case filter>::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter> f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_stress, _stress.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } + if (!_with_prefix.empty()) { std::function, bool)> recur = [&] (filter f, bool notlogic) -> std::string { diff --git a/lib/adverb_query.h b/lib/adverb_query.h index cf6c046..30e7400 100644 --- a/lib/adverb_query.h +++ b/lib/adverb_query.h @@ -17,6 +17,7 @@ namespace verbly { adverb_query& has_rhyming_adjective(); adverb_query& has_rhyming_adverb(); adverb_query& has_rhyming_verb(); + adverb_query& with_stress(filter> _arg); adverb_query& requires_comparative_form(); adverb_query& requires_superlative_form(); @@ -53,6 +54,7 @@ namespace verbly { bool _has_rhyming_adjective = false; bool _has_rhyming_adverb = false; bool _has_rhyming_verb = false; + filter> _stress; bool _requires_comparative_form = false; bool _requires_superlative_form = false; diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp index f4c832b..8648227 100644 --- a/lib/noun_query.cpp +++ b/lib/noun_query.cpp @@ -88,6 +88,13 @@ namespace verbly { return *this; } + noun_query& noun_query::with_stress(filter> _arg) + { + _stress = _arg; + + return *this; + } + noun_query& noun_query::with_singular_form(std::string _arg) { _with_singular_form.push_back(_arg); @@ -555,6 +562,68 @@ namespace verbly { { conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); } + + if (!_stress.empty()) + { + std::stringstream cond; + if (_stress.get_notlogic()) + { + cond << "noun_id NOT IN"; + } else { + cond << "noun_id IN"; + } + + cond << "(SELECT noun_id FROM noun_pronunciations WHERE "; + + std::function>, bool)> recur = [&] (filter> f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter>::type::singleton: + { + std::ostringstream _val; + for (auto syl : f.get_elem()) + { + if (syl) + { + _val << "1"; + } else { + _val << "0"; + } + } + + bindings.emplace_back(_val.str()); + + if (notlogic == f.get_notlogic()) + { + return "stress = ?"; + } else { + return "stress != ?"; + } + } + + case filter>::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter> f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_stress, _stress.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } for (auto except : _except) { diff --git a/lib/noun_query.h b/lib/noun_query.h index eb4a57c..74df260 100644 --- a/lib/noun_query.h +++ b/lib/noun_query.h @@ -17,6 +17,7 @@ namespace verbly { noun_query& has_rhyming_adjective(); noun_query& has_rhyming_adverb(); noun_query& has_rhyming_verb(); + noun_query& with_stress(filter> _arg); noun_query& with_singular_form(std::string _arg); noun_query& with_prefix(filter _f); @@ -100,6 +101,7 @@ namespace verbly { bool _has_rhyming_adjective = false; bool _has_rhyming_adverb = false; bool _has_rhyming_verb = false; + filter> _stress; std::list _with_singular_form; filter _with_prefix; diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp index d871f83..4e6c253 100644 --- a/lib/verb_query.cpp +++ b/lib/verb_query.cpp @@ -88,6 +88,13 @@ namespace verbly { return *this; } + verb_query& verb_query::with_stress(filter> _arg) + { + _stress = _arg; + + return *this; + } + verb_query& verb_query::has_frames() { this->_has_frames = true; @@ -140,6 +147,68 @@ namespace verbly { conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)"); } + if (!_stress.empty()) + { + std::stringstream cond; + if (_stress.get_notlogic()) + { + cond << "verb_id NOT IN"; + } else { + cond << "verb_id IN"; + } + + cond << "(SELECT verb_id FROM verb_pronunciations WHERE "; + + std::function>, bool)> recur = [&] (filter> f, bool notlogic) -> std::string { + switch (f.get_type()) + { + case filter>::type::singleton: + { + std::ostringstream _val; + for (auto syl : f.get_elem()) + { + if (syl) + { + _val << "1"; + } else { + _val << "0"; + } + } + + bindings.emplace_back(_val.str()); + + if (notlogic == f.get_notlogic()) + { + return "stress = ?"; + } else { + return "stress != ?"; + } + } + + case filter>::type::group: + { + bool truelogic = notlogic != f.get_notlogic(); + + std::list clauses; + std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter> f2) { + return recur(f2, truelogic); + }); + + if (truelogic == f.get_orlogic()) + { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; + } else { + return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; + } + } + } + }; + + cond << recur(_stress, _stress.get_notlogic()); + cond << ")"; + conditions.push_back(cond.str()); + } + for (auto except : _except) { conditions.push_back("verb_id != ?"); diff --git a/lib/verb_query.h b/lib/verb_query.h index 0ee5666..566ae37 100644 --- a/lib/verb_query.h +++ b/lib/verb_query.h @@ -17,6 +17,7 @@ namespace verbly { verb_query& has_rhyming_adjective(); verb_query& has_rhyming_adverb(); verb_query& has_rhyming_verb(); + verb_query& with_stress(filter> _arg); verb_query& has_frames(); @@ -36,6 +37,7 @@ namespace verbly { bool _has_rhyming_adjective = false; bool _has_rhyming_adverb = false; bool _has_rhyming_verb = false; + filter> _stress; }; }; -- cgit 1.4.1