diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-04-17 13:44:37 -0400 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-04-17 13:44:37 -0400 |
commit | 04338f2b040fee5142904c062e0e38c836601034 (patch) | |
tree | a3ca42f738839ae4f6c83d599277c33203beb733 /generator/generator.cpp | |
parent | 040ee58fecdc9c478004bc2e554e1ae126ec4602 (diff) | |
download | verbly-04338f2b040fee5142904c062e0e38c836601034.tar.gz verbly-04338f2b040fee5142904c062e0e38c836601034.tar.bz2 verbly-04338f2b040fee5142904c062e0e38c836601034.zip |
Fixed perfect rhyming
Rhyme detection now ensures that any rhymes it finds are perfect rhymes and not identical rhymes. Rhyme detection is also now a lot faster because additional information is stored in the datafile. Also fixed a bug in the query interface (and the generator) that could cause incorrect queries to be executed.
Diffstat (limited to 'generator/generator.cpp')
-rw-r--r-- | generator/generator.cpp | 128 |
1 files changed, 106 insertions, 22 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index e67bda7..e2ebfa1 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -76,13 +76,24 @@ struct group_t { | |||
76 | std::list<std::list<framepart_t>> frames; | 76 | std::list<std::list<framepart_t>> frames; |
77 | }; | 77 | }; |
78 | 78 | ||
79 | struct pronunciation_t { | ||
80 | std::string phonemes; | ||
81 | std::string prerhyme; | ||
82 | std::string rhyme; | ||
83 | |||
84 | bool operator<(const pronunciation_t& other) const | ||
85 | { | ||
86 | return phonemes < other.phonemes; | ||
87 | } | ||
88 | }; | ||
89 | |||
79 | std::map<std::string, group_t> groups; | 90 | std::map<std::string, group_t> groups; |
80 | std::map<std::string, verb_t> verbs; | 91 | std::map<std::string, verb_t> verbs; |
81 | std::map<std::string, adjective_t> adjectives; | 92 | std::map<std::string, adjective_t> adjectives; |
82 | std::map<std::string, noun_t> nouns; | 93 | std::map<std::string, noun_t> nouns; |
83 | std::map<int, std::map<int, int>> wn; | 94 | std::map<int, std::map<int, int>> wn; |
84 | std::map<int, int> images; | 95 | std::map<int, int> images; |
85 | std::map<std::string, std::set<std::string>> pronunciations; | 96 | std::map<std::string, std::set<pronunciation_t>> pronunciations; |
86 | 97 | ||
87 | void print_usage() | 98 | void print_usage() |
88 | { | 99 | { |
@@ -590,7 +601,47 @@ int main(int argc, char** argv) | |||
590 | std::string canonical(phoneme_data[1]); | 601 | std::string canonical(phoneme_data[1]); |
591 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | 602 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); |
592 | 603 | ||
593 | pronunciations[canonical].insert(phoneme_data[2]); | 604 | std::string phonemes = phoneme_data[2]; |
605 | auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " "); | ||
606 | auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) { | ||
607 | return phoneme.find("1") != std::string::npos; | ||
608 | }); | ||
609 | |||
610 | pronunciation_t p; | ||
611 | p.phonemes = phonemes; | ||
612 | if (phemstrt != std::end(phoneme_set)) | ||
613 | { | ||
614 | std::stringstream rhymer; | ||
615 | for (auto it = phemstrt; it != std::end(phoneme_set); it++) | ||
616 | { | ||
617 | std::string naked; | ||
618 | std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) { | ||
619 | return isdigit(ch); | ||
620 | }); | ||
621 | |||
622 | if (it != phemstrt) | ||
623 | { | ||
624 | rhymer << " "; | ||
625 | } | ||
626 | |||
627 | rhymer << naked; | ||
628 | } | ||
629 | |||
630 | p.rhyme = rhymer.str(); | ||
631 | |||
632 | if (phemstrt != std::begin(phoneme_set)) | ||
633 | { | ||
634 | phemstrt--; | ||
635 | p.prerhyme = *phemstrt; | ||
636 | } else { | ||
637 | p.prerhyme = ""; | ||
638 | } | ||
639 | } else { | ||
640 | p.prerhyme = ""; | ||
641 | p.rhyme = ""; | ||
642 | } | ||
643 | |||
644 | pronunciations[canonical].insert(p); | ||
594 | } | 645 | } |
595 | } | 646 | } |
596 | 647 | ||
@@ -720,7 +771,7 @@ int main(int argc, char** argv) | |||
720 | db_error(ppdb, query); | 771 | db_error(ppdb, query); |
721 | } | 772 | } |
722 | 773 | ||
723 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_STATIC); | 774 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT); |
724 | 775 | ||
725 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 776 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
726 | { | 777 | { |
@@ -752,7 +803,7 @@ int main(int argc, char** argv) | |||
752 | } | 803 | } |
753 | 804 | ||
754 | sqlite3_bind_int(ppstmt, 1, rowid); | 805 | sqlite3_bind_int(ppstmt, 1, rowid); |
755 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_STATIC); | 806 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); |
756 | 807 | ||
757 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 808 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
758 | { | 809 | { |
@@ -775,11 +826,11 @@ int main(int argc, char** argv) | |||
775 | db_error(ppdb, query); | 826 | db_error(ppdb, query); |
776 | } | 827 | } |
777 | 828 | ||
778 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC); | 829 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT); |
779 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC); | 830 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT); |
780 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC); | 831 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT); |
781 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC); | 832 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT); |
782 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC); | 833 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT); |
783 | 834 | ||
784 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 835 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
785 | { | 836 | { |
@@ -811,14 +862,26 @@ int main(int argc, char** argv) | |||
811 | 862 | ||
812 | for (auto pronunciation : pronunciations[canonical]) | 863 | for (auto pronunciation : pronunciations[canonical]) |
813 | { | 864 | { |
814 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; | 865 | if (!pronunciation.rhyme.empty()) |
866 | { | ||
867 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
868 | } else { | ||
869 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; | ||
870 | } | ||
871 | |||
815 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | 872 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) |
816 | { | 873 | { |
817 | db_error(ppdb, query); | 874 | db_error(ppdb, query); |
818 | } | 875 | } |
819 | 876 | ||
820 | sqlite3_bind_int(ppstmt, 1, rowid); | 877 | sqlite3_bind_int(ppstmt, 1, rowid); |
821 | sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); | 878 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); |
879 | |||
880 | if (!pronunciation.rhyme.empty()) | ||
881 | { | ||
882 | sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
883 | sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
884 | } | ||
822 | 885 | ||
823 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 886 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
824 | { | 887 | { |
@@ -856,7 +919,7 @@ int main(int argc, char** argv) | |||
856 | db_error(ppdb, query); | 919 | db_error(ppdb, query); |
857 | } | 920 | } |
858 | 921 | ||
859 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_STATIC); | 922 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT); |
860 | 923 | ||
861 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 924 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
862 | { | 925 | { |
@@ -949,7 +1012,7 @@ int main(int argc, char** argv) | |||
949 | } | 1012 | } |
950 | 1013 | ||
951 | sqlite3_bind_int(ppstmt, 1, gid); | 1014 | sqlite3_bind_int(ppstmt, 1, gid); |
952 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_STATIC); | 1015 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT); |
953 | 1016 | ||
954 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1017 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
955 | { | 1018 | { |
@@ -1104,7 +1167,7 @@ int main(int argc, char** argv) | |||
1104 | db_error(ppdb, query); | 1167 | db_error(ppdb, query); |
1105 | } | 1168 | } |
1106 | 1169 | ||
1107 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC); | 1170 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); |
1108 | switch (synset_id / 100000000) | 1171 | switch (synset_id / 100000000) |
1109 | { | 1172 | { |
1110 | case 1: // Noun | 1173 | case 1: // Noun |
@@ -1119,7 +1182,7 @@ int main(int argc, char** argv) | |||
1119 | 1182 | ||
1120 | if (nouns.count(word) == 1) | 1183 | if (nouns.count(word) == 1) |
1121 | { | 1184 | { |
1122 | sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); | 1185 | sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT); |
1123 | } | 1186 | } |
1124 | 1187 | ||
1125 | break; | 1188 | break; |
@@ -1132,8 +1195,8 @@ int main(int argc, char** argv) | |||
1132 | 1195 | ||
1133 | if (adjectives.count(word) == 1) | 1196 | if (adjectives.count(word) == 1) |
1134 | { | 1197 | { |
1135 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); | 1198 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); |
1136 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); | 1199 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT); |
1137 | } | 1200 | } |
1138 | 1201 | ||
1139 | break; | 1202 | break; |
@@ -1173,21 +1236,36 @@ int main(int argc, char** argv) | |||
1173 | { | 1236 | { |
1174 | case 1: // Noun | 1237 | case 1: // Noun |
1175 | { | 1238 | { |
1176 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; | 1239 | if (!pronunciation.rhyme.empty()) |
1240 | { | ||
1241 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
1242 | } else { | ||
1243 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; | ||
1244 | } | ||
1177 | 1245 | ||
1178 | break; | 1246 | break; |
1179 | } | 1247 | } |
1180 | 1248 | ||
1181 | case 3: // Adjective | 1249 | case 3: // Adjective |
1182 | { | 1250 | { |
1183 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; | 1251 | if (!pronunciation.rhyme.empty()) |
1252 | { | ||
1253 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
1254 | } else { | ||
1255 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; | ||
1256 | } | ||
1184 | 1257 | ||
1185 | break; | 1258 | break; |
1186 | } | 1259 | } |
1187 | 1260 | ||
1188 | case 4: // Adverb | 1261 | case 4: // Adverb |
1189 | { | 1262 | { |
1190 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; | 1263 | if (!pronunciation.rhyme.empty()) |
1264 | { | ||
1265 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
1266 | } else { | ||
1267 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; | ||
1268 | } | ||
1191 | 1269 | ||
1192 | break; | 1270 | break; |
1193 | } | 1271 | } |
@@ -1199,7 +1277,13 @@ int main(int argc, char** argv) | |||
1199 | } | 1277 | } |
1200 | 1278 | ||
1201 | sqlite3_bind_int(ppstmt, 1, rowid); | 1279 | sqlite3_bind_int(ppstmt, 1, rowid); |
1202 | sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); | 1280 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); |
1281 | |||
1282 | if (!pronunciation.rhyme.empty()) | ||
1283 | { | ||
1284 | sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
1285 | sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
1286 | } | ||
1203 | 1287 | ||
1204 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1288 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
1205 | { | 1289 | { |
@@ -2188,7 +2272,7 @@ int main(int argc, char** argv) | |||
2188 | db_error(ppdb, query); | 2272 | db_error(ppdb, query); |
2189 | } | 2273 | } |
2190 | 2274 | ||
2191 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC); | 2275 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); |
2192 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); | 2276 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); |
2193 | 2277 | ||
2194 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 2278 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |