summary refs log tree commit diff stats
path: root/generator/generator.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2016-04-17 13:44:37 -0400
committerKelly Rauchenberger <fefferburbia@gmail.com>2016-04-17 13:44:37 -0400
commit04338f2b040fee5142904c062e0e38c836601034 (patch)
treea3ca42f738839ae4f6c83d599277c33203beb733 /generator/generator.cpp
parent040ee58fecdc9c478004bc2e554e1ae126ec4602 (diff)
downloadverbly-04338f2b040fee5142904c062e0e38c836601034.tar.gz
verbly-04338f2b040fee5142904c062e0e38c836601034.tar.bz2
verbly-04338f2b040fee5142904c062e0e38c836601034.zip
Fixed perfect rhyming
Rhyme detection now ensures that any rhymes it finds are perfect rhymes and not identical rhymes. Rhyme detection is also now a lot faster because additional information is stored in the datafile.

Also fixed a bug in the query interface (and the generator) that could cause incorrect queries to be executed.
Diffstat (limited to 'generator/generator.cpp')
-rw-r--r--generator/generator.cpp128
1 files changed, 106 insertions, 22 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index e67bda7..e2ebfa1 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp
@@ -76,13 +76,24 @@ struct group_t {
76 std::list<std::list<framepart_t>> frames; 76 std::list<std::list<framepart_t>> frames;
77}; 77};
78 78
79struct pronunciation_t {
80 std::string phonemes;
81 std::string prerhyme;
82 std::string rhyme;
83
84 bool operator<(const pronunciation_t& other) const
85 {
86 return phonemes < other.phonemes;
87 }
88};
89
79std::map<std::string, group_t> groups; 90std::map<std::string, group_t> groups;
80std::map<std::string, verb_t> verbs; 91std::map<std::string, verb_t> verbs;
81std::map<std::string, adjective_t> adjectives; 92std::map<std::string, adjective_t> adjectives;
82std::map<std::string, noun_t> nouns; 93std::map<std::string, noun_t> nouns;
83std::map<int, std::map<int, int>> wn; 94std::map<int, std::map<int, int>> wn;
84std::map<int, int> images; 95std::map<int, int> images;
85std::map<std::string, std::set<std::string>> pronunciations; 96std::map<std::string, std::set<pronunciation_t>> pronunciations;
86 97
87void print_usage() 98void print_usage()
88{ 99{
@@ -590,7 +601,47 @@ int main(int argc, char** argv)
590 std::string canonical(phoneme_data[1]); 601 std::string canonical(phoneme_data[1]);
591 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); 602 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
592 603
593 pronunciations[canonical].insert(phoneme_data[2]); 604 std::string phonemes = phoneme_data[2];
605 auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " ");
606 auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) {
607 return phoneme.find("1") != std::string::npos;
608 });
609
610 pronunciation_t p;
611 p.phonemes = phonemes;
612 if (phemstrt != std::end(phoneme_set))
613 {
614 std::stringstream rhymer;
615 for (auto it = phemstrt; it != std::end(phoneme_set); it++)
616 {
617 std::string naked;
618 std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) {
619 return isdigit(ch);
620 });
621
622 if (it != phemstrt)
623 {
624 rhymer << " ";
625 }
626
627 rhymer << naked;
628 }
629
630 p.rhyme = rhymer.str();
631
632 if (phemstrt != std::begin(phoneme_set))
633 {
634 phemstrt--;
635 p.prerhyme = *phemstrt;
636 } else {
637 p.prerhyme = "";
638 }
639 } else {
640 p.prerhyme = "";
641 p.rhyme = "";
642 }
643
644 pronunciations[canonical].insert(p);
594 } 645 }
595 } 646 }
596 647
@@ -720,7 +771,7 @@ int main(int argc, char** argv)
720 db_error(ppdb, query); 771 db_error(ppdb, query);
721 } 772 }
722 773
723 sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_STATIC); 774 sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT);
724 775
725 if (sqlite3_step(ppstmt) != SQLITE_DONE) 776 if (sqlite3_step(ppstmt) != SQLITE_DONE)
726 { 777 {
@@ -752,7 +803,7 @@ int main(int argc, char** argv)
752 } 803 }
753 804
754 sqlite3_bind_int(ppstmt, 1, rowid); 805 sqlite3_bind_int(ppstmt, 1, rowid);
755 sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_STATIC); 806 sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT);
756 807
757 if (sqlite3_step(ppstmt) != SQLITE_DONE) 808 if (sqlite3_step(ppstmt) != SQLITE_DONE)
758 { 809 {
@@ -775,11 +826,11 @@ int main(int argc, char** argv)
775 db_error(ppdb, query); 826 db_error(ppdb, query);
776 } 827 }
777 828
778 sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC); 829 sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT);
779 sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC); 830 sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT);
780 sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC); 831 sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT);
781 sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC); 832 sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT);
782 sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC); 833 sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT);
783 834
784 if (sqlite3_step(ppstmt) != SQLITE_DONE) 835 if (sqlite3_step(ppstmt) != SQLITE_DONE)
785 { 836 {
@@ -811,14 +862,26 @@ int main(int argc, char** argv)
811 862
812 for (auto pronunciation : pronunciations[canonical]) 863 for (auto pronunciation : pronunciations[canonical])
813 { 864 {
814 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; 865 if (!pronunciation.rhyme.empty())
866 {
867 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)";
868 } else {
869 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)";
870 }
871
815 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) 872 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
816 { 873 {
817 db_error(ppdb, query); 874 db_error(ppdb, query);
818 } 875 }
819 876
820 sqlite3_bind_int(ppstmt, 1, rowid); 877 sqlite3_bind_int(ppstmt, 1, rowid);
821 sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); 878 sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT);
879
880 if (!pronunciation.rhyme.empty())
881 {
882 sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT);
883 sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT);
884 }
822 885
823 if (sqlite3_step(ppstmt) != SQLITE_DONE) 886 if (sqlite3_step(ppstmt) != SQLITE_DONE)
824 { 887 {
@@ -856,7 +919,7 @@ int main(int argc, char** argv)
856 db_error(ppdb, query); 919 db_error(ppdb, query);
857 } 920 }
858 921
859 sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_STATIC); 922 sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT);
860 923
861 if (sqlite3_step(ppstmt) != SQLITE_DONE) 924 if (sqlite3_step(ppstmt) != SQLITE_DONE)
862 { 925 {
@@ -949,7 +1012,7 @@ int main(int argc, char** argv)
949 } 1012 }
950 1013
951 sqlite3_bind_int(ppstmt, 1, gid); 1014 sqlite3_bind_int(ppstmt, 1, gid);
952 sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_STATIC); 1015 sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT);
953 1016
954 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1017 if (sqlite3_step(ppstmt) != SQLITE_DONE)
955 { 1018 {
@@ -1104,7 +1167,7 @@ int main(int argc, char** argv)
1104 db_error(ppdb, query); 1167 db_error(ppdb, query);
1105 } 1168 }
1106 1169
1107 sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC); 1170 sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT);
1108 switch (synset_id / 100000000) 1171 switch (synset_id / 100000000)
1109 { 1172 {
1110 case 1: // Noun 1173 case 1: // Noun
@@ -1119,7 +1182,7 @@ int main(int argc, char** argv)
1119 1182
1120 if (nouns.count(word) == 1) 1183 if (nouns.count(word) == 1)
1121 { 1184 {
1122 sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); 1185 sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT);
1123 } 1186 }
1124 1187
1125 break; 1188 break;
@@ -1132,8 +1195,8 @@ int main(int argc, char** argv)
1132 1195
1133 if (adjectives.count(word) == 1) 1196 if (adjectives.count(word) == 1)
1134 { 1197 {
1135 sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); 1198 sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT);
1136 sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); 1199 sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT);
1137 } 1200 }
1138 1201
1139 break; 1202 break;
@@ -1173,21 +1236,36 @@ int main(int argc, char** argv)
1173 { 1236 {
1174 case 1: // Noun 1237 case 1: // Noun
1175 { 1238 {
1176 query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; 1239 if (!pronunciation.rhyme.empty())
1240 {
1241 query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)";
1242 } else {
1243 query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)";
1244 }
1177 1245
1178 break; 1246 break;
1179 } 1247 }
1180 1248
1181 case 3: // Adjective 1249 case 3: // Adjective
1182 { 1250 {
1183 query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; 1251 if (!pronunciation.rhyme.empty())
1252 {
1253 query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)";
1254 } else {
1255 query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)";
1256 }
1184 1257
1185 break; 1258 break;
1186 } 1259 }
1187 1260
1188 case 4: // Adverb 1261 case 4: // Adverb
1189 { 1262 {
1190 query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; 1263 if (!pronunciation.rhyme.empty())
1264 {
1265 query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)";
1266 } else {
1267 query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)";
1268 }
1191 1269
1192 break; 1270 break;
1193 } 1271 }
@@ -1199,7 +1277,13 @@ int main(int argc, char** argv)
1199 } 1277 }
1200 1278
1201 sqlite3_bind_int(ppstmt, 1, rowid); 1279 sqlite3_bind_int(ppstmt, 1, rowid);
1202 sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); 1280 sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT);
1281
1282 if (!pronunciation.rhyme.empty())
1283 {
1284 sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT);
1285 sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT);
1286 }
1203 1287
1204 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1288 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1205 { 1289 {
@@ -2188,7 +2272,7 @@ int main(int argc, char** argv)
2188 db_error(ppdb, query); 2272 db_error(ppdb, query);
2189 } 2273 }
2190 2274
2191 sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC); 2275 sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT);
2192 sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); 2276 sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]);
2193 2277
2194 if (sqlite3_step(ppstmt) != SQLITE_DONE) 2278 if (sqlite3_step(ppstmt) != SQLITE_DONE)