diff options
| author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-04-17 13:44:37 -0400 |
|---|---|---|
| committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-04-17 13:44:37 -0400 |
| commit | 04338f2b040fee5142904c062e0e38c836601034 (patch) | |
| tree | a3ca42f738839ae4f6c83d599277c33203beb733 /generator | |
| parent | 040ee58fecdc9c478004bc2e554e1ae126ec4602 (diff) | |
| download | verbly-04338f2b040fee5142904c062e0e38c836601034.tar.gz verbly-04338f2b040fee5142904c062e0e38c836601034.tar.bz2 verbly-04338f2b040fee5142904c062e0e38c836601034.zip | |
Fixed perfect rhyming
Rhyme detection now ensures that any rhymes it finds are perfect rhymes and not identical rhymes. Rhyme detection is also now a lot faster because additional information is stored in the datafile. Also fixed a bug in the query interface (and the generator) that could cause incorrect queries to be executed.
Diffstat (limited to 'generator')
| -rw-r--r-- | generator/generator.cpp | 128 | ||||
| -rw-r--r-- | generator/schema.sql | 8 |
2 files changed, 114 insertions, 22 deletions
| diff --git a/generator/generator.cpp b/generator/generator.cpp index e67bda7..e2ebfa1 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
| @@ -76,13 +76,24 @@ struct group_t { | |||
| 76 | std::list<std::list<framepart_t>> frames; | 76 | std::list<std::list<framepart_t>> frames; |
| 77 | }; | 77 | }; |
| 78 | 78 | ||
| 79 | struct pronunciation_t { | ||
| 80 | std::string phonemes; | ||
| 81 | std::string prerhyme; | ||
| 82 | std::string rhyme; | ||
| 83 | |||
| 84 | bool operator<(const pronunciation_t& other) const | ||
| 85 | { | ||
| 86 | return phonemes < other.phonemes; | ||
| 87 | } | ||
| 88 | }; | ||
| 89 | |||
| 79 | std::map<std::string, group_t> groups; | 90 | std::map<std::string, group_t> groups; |
| 80 | std::map<std::string, verb_t> verbs; | 91 | std::map<std::string, verb_t> verbs; |
| 81 | std::map<std::string, adjective_t> adjectives; | 92 | std::map<std::string, adjective_t> adjectives; |
| 82 | std::map<std::string, noun_t> nouns; | 93 | std::map<std::string, noun_t> nouns; |
| 83 | std::map<int, std::map<int, int>> wn; | 94 | std::map<int, std::map<int, int>> wn; |
| 84 | std::map<int, int> images; | 95 | std::map<int, int> images; |
| 85 | std::map<std::string, std::set<std::string>> pronunciations; | 96 | std::map<std::string, std::set<pronunciation_t>> pronunciations; |
| 86 | 97 | ||
| 87 | void print_usage() | 98 | void print_usage() |
| 88 | { | 99 | { |
| @@ -590,7 +601,47 @@ int main(int argc, char** argv) | |||
| 590 | std::string canonical(phoneme_data[1]); | 601 | std::string canonical(phoneme_data[1]); |
| 591 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | 602 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); |
| 592 | 603 | ||
| 593 | pronunciations[canonical].insert(phoneme_data[2]); | 604 | std::string phonemes = phoneme_data[2]; |
| 605 | auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " "); | ||
| 606 | auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) { | ||
| 607 | return phoneme.find("1") != std::string::npos; | ||
| 608 | }); | ||
| 609 | |||
| 610 | pronunciation_t p; | ||
| 611 | p.phonemes = phonemes; | ||
| 612 | if (phemstrt != std::end(phoneme_set)) | ||
| 613 | { | ||
| 614 | std::stringstream rhymer; | ||
| 615 | for (auto it = phemstrt; it != std::end(phoneme_set); it++) | ||
| 616 | { | ||
| 617 | std::string naked; | ||
| 618 | std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) { | ||
| 619 | return isdigit(ch); | ||
| 620 | }); | ||
| 621 | |||
| 622 | if (it != phemstrt) | ||
| 623 | { | ||
| 624 | rhymer << " "; | ||
| 625 | } | ||
| 626 | |||
| 627 | rhymer << naked; | ||
| 628 | } | ||
| 629 | |||
| 630 | p.rhyme = rhymer.str(); | ||
| 631 | |||
| 632 | if (phemstrt != std::begin(phoneme_set)) | ||
| 633 | { | ||
| 634 | phemstrt--; | ||
| 635 | p.prerhyme = *phemstrt; | ||
| 636 | } else { | ||
| 637 | p.prerhyme = ""; | ||
| 638 | } | ||
| 639 | } else { | ||
| 640 | p.prerhyme = ""; | ||
| 641 | p.rhyme = ""; | ||
| 642 | } | ||
| 643 | |||
| 644 | pronunciations[canonical].insert(p); | ||
| 594 | } | 645 | } |
| 595 | } | 646 | } |
| 596 | 647 | ||
| @@ -720,7 +771,7 @@ int main(int argc, char** argv) | |||
| 720 | db_error(ppdb, query); | 771 | db_error(ppdb, query); |
| 721 | } | 772 | } |
| 722 | 773 | ||
| 723 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_STATIC); | 774 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT); |
| 724 | 775 | ||
| 725 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 776 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 726 | { | 777 | { |
| @@ -752,7 +803,7 @@ int main(int argc, char** argv) | |||
| 752 | } | 803 | } |
| 753 | 804 | ||
| 754 | sqlite3_bind_int(ppstmt, 1, rowid); | 805 | sqlite3_bind_int(ppstmt, 1, rowid); |
| 755 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_STATIC); | 806 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); |
| 756 | 807 | ||
| 757 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 808 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 758 | { | 809 | { |
| @@ -775,11 +826,11 @@ int main(int argc, char** argv) | |||
| 775 | db_error(ppdb, query); | 826 | db_error(ppdb, query); |
| 776 | } | 827 | } |
| 777 | 828 | ||
| 778 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC); | 829 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT); |
| 779 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC); | 830 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT); |
| 780 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC); | 831 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT); |
| 781 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC); | 832 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT); |
| 782 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC); | 833 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT); |
| 783 | 834 | ||
| 784 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 835 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 785 | { | 836 | { |
| @@ -811,14 +862,26 @@ int main(int argc, char** argv) | |||
| 811 | 862 | ||
| 812 | for (auto pronunciation : pronunciations[canonical]) | 863 | for (auto pronunciation : pronunciations[canonical]) |
| 813 | { | 864 | { |
| 814 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; | 865 | if (!pronunciation.rhyme.empty()) |
| 866 | { | ||
| 867 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
| 868 | } else { | ||
| 869 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; | ||
| 870 | } | ||
| 871 | |||
| 815 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | 872 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) |
| 816 | { | 873 | { |
| 817 | db_error(ppdb, query); | 874 | db_error(ppdb, query); |
| 818 | } | 875 | } |
| 819 | 876 | ||
| 820 | sqlite3_bind_int(ppstmt, 1, rowid); | 877 | sqlite3_bind_int(ppstmt, 1, rowid); |
| 821 | sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); | 878 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); |
| 879 | |||
| 880 | if (!pronunciation.rhyme.empty()) | ||
| 881 | { | ||
| 882 | sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
| 883 | sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
| 884 | } | ||
| 822 | 885 | ||
| 823 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 886 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 824 | { | 887 | { |
| @@ -856,7 +919,7 @@ int main(int argc, char** argv) | |||
| 856 | db_error(ppdb, query); | 919 | db_error(ppdb, query); |
| 857 | } | 920 | } |
| 858 | 921 | ||
| 859 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_STATIC); | 922 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT); |
| 860 | 923 | ||
| 861 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 924 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 862 | { | 925 | { |
| @@ -949,7 +1012,7 @@ int main(int argc, char** argv) | |||
| 949 | } | 1012 | } |
| 950 | 1013 | ||
| 951 | sqlite3_bind_int(ppstmt, 1, gid); | 1014 | sqlite3_bind_int(ppstmt, 1, gid); |
| 952 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_STATIC); | 1015 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT); |
| 953 | 1016 | ||
| 954 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1017 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 955 | { | 1018 | { |
| @@ -1104,7 +1167,7 @@ int main(int argc, char** argv) | |||
| 1104 | db_error(ppdb, query); | 1167 | db_error(ppdb, query); |
| 1105 | } | 1168 | } |
| 1106 | 1169 | ||
| 1107 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC); | 1170 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); |
| 1108 | switch (synset_id / 100000000) | 1171 | switch (synset_id / 100000000) |
| 1109 | { | 1172 | { |
| 1110 | case 1: // Noun | 1173 | case 1: // Noun |
| @@ -1119,7 +1182,7 @@ int main(int argc, char** argv) | |||
| 1119 | 1182 | ||
| 1120 | if (nouns.count(word) == 1) | 1183 | if (nouns.count(word) == 1) |
| 1121 | { | 1184 | { |
| 1122 | sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); | 1185 | sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT); |
| 1123 | } | 1186 | } |
| 1124 | 1187 | ||
| 1125 | break; | 1188 | break; |
| @@ -1132,8 +1195,8 @@ int main(int argc, char** argv) | |||
| 1132 | 1195 | ||
| 1133 | if (adjectives.count(word) == 1) | 1196 | if (adjectives.count(word) == 1) |
| 1134 | { | 1197 | { |
| 1135 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); | 1198 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); |
| 1136 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); | 1199 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT); |
| 1137 | } | 1200 | } |
| 1138 | 1201 | ||
| 1139 | break; | 1202 | break; |
| @@ -1173,21 +1236,36 @@ int main(int argc, char** argv) | |||
| 1173 | { | 1236 | { |
| 1174 | case 1: // Noun | 1237 | case 1: // Noun |
| 1175 | { | 1238 | { |
| 1176 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; | 1239 | if (!pronunciation.rhyme.empty()) |
| 1240 | { | ||
| 1241 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
| 1242 | } else { | ||
| 1243 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; | ||
| 1244 | } | ||
| 1177 | 1245 | ||
| 1178 | break; | 1246 | break; |
| 1179 | } | 1247 | } |
| 1180 | 1248 | ||
| 1181 | case 3: // Adjective | 1249 | case 3: // Adjective |
| 1182 | { | 1250 | { |
| 1183 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; | 1251 | if (!pronunciation.rhyme.empty()) |
| 1252 | { | ||
| 1253 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
| 1254 | } else { | ||
| 1255 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; | ||
| 1256 | } | ||
| 1184 | 1257 | ||
| 1185 | break; | 1258 | break; |
| 1186 | } | 1259 | } |
| 1187 | 1260 | ||
| 1188 | case 4: // Adverb | 1261 | case 4: // Adverb |
| 1189 | { | 1262 | { |
| 1190 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; | 1263 | if (!pronunciation.rhyme.empty()) |
| 1264 | { | ||
| 1265 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
| 1266 | } else { | ||
| 1267 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; | ||
| 1268 | } | ||
| 1191 | 1269 | ||
| 1192 | break; | 1270 | break; |
| 1193 | } | 1271 | } |
| @@ -1199,7 +1277,13 @@ int main(int argc, char** argv) | |||
| 1199 | } | 1277 | } |
| 1200 | 1278 | ||
| 1201 | sqlite3_bind_int(ppstmt, 1, rowid); | 1279 | sqlite3_bind_int(ppstmt, 1, rowid); |
| 1202 | sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); | 1280 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); |
| 1281 | |||
| 1282 | if (!pronunciation.rhyme.empty()) | ||
| 1283 | { | ||
| 1284 | sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
| 1285 | sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
| 1286 | } | ||
| 1203 | 1287 | ||
| 1204 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1288 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 1205 | { | 1289 | { |
| @@ -2188,7 +2272,7 @@ int main(int argc, char** argv) | |||
| 2188 | db_error(ppdb, query); | 2272 | db_error(ppdb, query); |
| 2189 | } | 2273 | } |
| 2190 | 2274 | ||
| 2191 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC); | 2275 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); |
| 2192 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); | 2276 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); |
| 2193 | 2277 | ||
| 2194 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 2278 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| diff --git a/generator/schema.sql b/generator/schema.sql index 9a39944..1836c62 100644 --- a/generator/schema.sql +++ b/generator/schema.sql | |||
| @@ -184,6 +184,8 @@ DROP TABLE IF EXISTS `noun_pronunciations`; | |||
| 184 | CREATE TABLE `noun_pronunciations` ( | 184 | CREATE TABLE `noun_pronunciations` ( |
| 185 | `noun_id` INTEGER NOT NULL, | 185 | `noun_id` INTEGER NOT NULL, |
| 186 | `pronunciation` VARCHAR(64) NOT NULL, | 186 | `pronunciation` VARCHAR(64) NOT NULL, |
| 187 | `prerhyme` VARCHAR(8), | ||
| 188 | `rhyme` VARCHAR(64), | ||
| 187 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) | 189 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) |
| 188 | ); | 190 | ); |
| 189 | 191 | ||
| @@ -191,6 +193,8 @@ DROP TABLE IF EXISTS `verb_pronunciations`; | |||
| 191 | CREATE TABLE `verb_pronunciations` ( | 193 | CREATE TABLE `verb_pronunciations` ( |
| 192 | `verb_id` INTEGER NOT NULL, | 194 | `verb_id` INTEGER NOT NULL, |
| 193 | `pronunciation` VARCHAR(64) NOT NULL, | 195 | `pronunciation` VARCHAR(64) NOT NULL, |
| 196 | `prerhyme` VARCHAR(8), | ||
| 197 | `rhyme` VARCHAR(64), | ||
| 194 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) | 198 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) |
| 195 | ); | 199 | ); |
| 196 | 200 | ||
| @@ -198,6 +202,8 @@ DROP TABLE IF EXISTS `adjective_pronunciations`; | |||
| 198 | CREATE TABLE `adjective_pronunciations` ( | 202 | CREATE TABLE `adjective_pronunciations` ( |
| 199 | `adjective_id` INTEGER NOT NULL, | 203 | `adjective_id` INTEGER NOT NULL, |
| 200 | `pronunciation` VARCHAR(64) NOT NULL, | 204 | `pronunciation` VARCHAR(64) NOT NULL, |
| 205 | `prerhyme` VARCHAR(8), | ||
| 206 | `rhyme` VARCHAR(64), | ||
| 201 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | 207 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) |
| 202 | ); | 208 | ); |
| 203 | 209 | ||
| @@ -205,6 +211,8 @@ DROP TABLE IF EXISTS `adverb_pronunciations`; | |||
| 205 | CREATE TABLE `adverb_pronunciations` ( | 211 | CREATE TABLE `adverb_pronunciations` ( |
| 206 | `adverb_id` INTEGER NOT NULL, | 212 | `adverb_id` INTEGER NOT NULL, |
| 207 | `pronunciation` VARCHAR(64) NOT NULL, | 213 | `pronunciation` VARCHAR(64) NOT NULL, |
| 214 | `prerhyme` VARCHAR(8), | ||
| 215 | `rhyme` VARCHAR(64), | ||
| 208 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | 216 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) |
| 209 | ); | 217 | ); |
| 210 | 218 | ||
