diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-05-02 22:45:11 -0400 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-05-02 22:45:11 -0400 |
commit | bd398509575af0362e53ce6b805eaa210406b9fd (patch) | |
tree | e699624ca6934f591241d5dfa95a91b839ce6bc7 /generator | |
parent | 499dbc410174602a5985bd75b600fbec5cd086f1 (diff) | |
parent | 965a3206df834f846f2c560438c80a707dcee4cb (diff) | |
download | verbly-bd398509575af0362e53ce6b805eaa210406b9fd.tar.gz verbly-bd398509575af0362e53ce6b805eaa210406b9fd.tar.bz2 verbly-bd398509575af0362e53ce6b805eaa210406b9fd.zip |
Merge branch 'master' of https://github.com/hatkirby/verbly
Diffstat (limited to 'generator')
-rw-r--r-- | generator/generator.cpp | 190 | ||||
-rw-r--r-- | generator/schema.sql | 12 |
2 files changed, 167 insertions, 35 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index 6fbbfb8..3201154 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -76,12 +76,24 @@ struct group_t { | |||
76 | std::list<std::list<framepart_t>> frames; | 76 | std::list<std::list<framepart_t>> frames; |
77 | }; | 77 | }; |
78 | 78 | ||
79 | struct pronunciation_t { | ||
80 | std::string phonemes; | ||
81 | std::string prerhyme; | ||
82 | std::string rhyme; | ||
83 | |||
84 | bool operator<(const pronunciation_t& other) const | ||
85 | { | ||
86 | return phonemes < other.phonemes; | ||
87 | } | ||
88 | }; | ||
89 | |||
79 | std::map<std::string, group_t> groups; | 90 | std::map<std::string, group_t> groups; |
80 | std::map<std::string, verb_t> verbs; | 91 | std::map<std::string, verb_t> verbs; |
81 | std::map<std::string, adjective_t> adjectives; | 92 | std::map<std::string, adjective_t> adjectives; |
82 | std::map<std::string, noun_t> nouns; | 93 | std::map<std::string, noun_t> nouns; |
83 | std::map<int, std::map<int, int>> wn; | 94 | std::map<int, std::map<int, int>> wn; |
84 | std::map<std::string, std::set<std::string>> pronunciations; | 95 | std::map<int, int> images; |
96 | std::map<std::string, std::set<pronunciation_t>> pronunciations; | ||
85 | 97 | ||
86 | void print_usage() | 98 | void print_usage() |
87 | { | 99 | { |
@@ -89,10 +101,10 @@ void print_usage() | |||
89 | std::cout << "-------------------------" << std::endl; | 101 | std::cout << "-------------------------" << std::endl; |
90 | std::cout << "Requires exactly six arguments." << std::endl; | 102 | std::cout << "Requires exactly six arguments." << std::endl; |
91 | std::cout << "1. The path to a VerbNet data directory." << std::endl; | 103 | std::cout << "1. The path to a VerbNet data directory." << std::endl; |
92 | std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl; | 104 | std::cout << "2. The path to an AGID infl.txt file." << std::endl; |
93 | std::cout << "3. The path to an AGID infl.txt file." << std::endl; | 105 | std::cout << "3. The path to a WordNet prolog data directory." << std::endl; |
94 | std::cout << "4. The path to a WordNet prolog data directory." << std::endl; | 106 | std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl; |
95 | std::cout << "5. The path to a CMUDICT pronunciation file." << std::endl; | 107 | std::cout << "5. The path to an ImageNet urls.txt file." << std::endl; |
96 | std::cout << "6. Datafile output path." << std::endl; | 108 | std::cout << "6. Datafile output path." << std::endl; |
97 | 109 | ||
98 | exit(1); | 110 | exit(1); |
@@ -431,10 +443,10 @@ int main(int argc, char** argv) | |||
431 | // Get verbs from AGID | 443 | // Get verbs from AGID |
432 | std::cout << "Reading inflections..." << std::endl; | 444 | std::cout << "Reading inflections..." << std::endl; |
433 | 445 | ||
434 | std::ifstream agidfile(argv[3]); | 446 | std::ifstream agidfile(argv[2]); |
435 | if (!agidfile.is_open()) | 447 | if (!agidfile.is_open()) |
436 | { | 448 | { |
437 | std::cout << "Could not open AGID file: " << argv[3] << std::endl; | 449 | std::cout << "Could not open AGID file: " << argv[2] << std::endl; |
438 | print_usage(); | 450 | print_usage(); |
439 | } | 451 | } |
440 | 452 | ||
@@ -562,10 +574,10 @@ int main(int argc, char** argv) | |||
562 | // Pronounciations | 574 | // Pronounciations |
563 | std::cout << "Reading pronunciations..." << std::endl; | 575 | std::cout << "Reading pronunciations..." << std::endl; |
564 | 576 | ||
565 | std::ifstream pronfile(argv[5]); | 577 | std::ifstream pronfile(argv[4]); |
566 | if (!pronfile.is_open()) | 578 | if (!pronfile.is_open()) |
567 | { | 579 | { |
568 | std::cout << "Could not open CMUDICT file: " << argv[5] << std::endl; | 580 | std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl; |
569 | print_usage(); | 581 | print_usage(); |
570 | } | 582 | } |
571 | 583 | ||
@@ -589,10 +601,80 @@ int main(int argc, char** argv) | |||
589 | std::string canonical(phoneme_data[1]); | 601 | std::string canonical(phoneme_data[1]); |
590 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | 602 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); |
591 | 603 | ||
592 | pronunciations[canonical].insert(phoneme_data[2]); | 604 | std::string phonemes = phoneme_data[2]; |
605 | auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " "); | ||
606 | auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) { | ||
607 | return phoneme.find("1") != std::string::npos; | ||
608 | }); | ||
609 | |||
610 | pronunciation_t p; | ||
611 | p.phonemes = phonemes; | ||
612 | if (phemstrt != std::end(phoneme_set)) | ||
613 | { | ||
614 | std::stringstream rhymer; | ||
615 | for (auto it = phemstrt; it != std::end(phoneme_set); it++) | ||
616 | { | ||
617 | std::string naked; | ||
618 | std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) { | ||
619 | return isdigit(ch); | ||
620 | }); | ||
621 | |||
622 | if (it != phemstrt) | ||
623 | { | ||
624 | rhymer << " "; | ||
625 | } | ||
626 | |||
627 | rhymer << naked; | ||
628 | } | ||
629 | |||
630 | p.rhyme = rhymer.str(); | ||
631 | |||
632 | if (phemstrt != std::begin(phoneme_set)) | ||
633 | { | ||
634 | phemstrt--; | ||
635 | p.prerhyme = *phemstrt; | ||
636 | } else { | ||
637 | p.prerhyme = ""; | ||
638 | } | ||
639 | } else { | ||
640 | p.prerhyme = ""; | ||
641 | p.rhyme = ""; | ||
642 | } | ||
643 | |||
644 | pronunciations[canonical].insert(p); | ||
645 | } | ||
646 | } | ||
647 | |||
648 | // Images | ||
649 | std::cout << "Reading images..." << std::endl; | ||
650 | |||
651 | std::ifstream imagefile(argv[5]); | ||
652 | if (!imagefile.is_open()) | ||
653 | { | ||
654 | std::cout << "Could not open ImageNet file: " << argv[5] << std::endl; | ||
655 | print_usage(); | ||
656 | } | ||
657 | |||
658 | for (;;) | ||
659 | { | ||
660 | std::string line; | ||
661 | if (!getline(imagefile, line)) | ||
662 | { | ||
663 | break; | ||
593 | } | 664 | } |
665 | |||
666 | if (line.back() == '\r') | ||
667 | { | ||
668 | line.pop_back(); | ||
669 | } | ||
670 | |||
671 | std::string wnid_s = line.substr(1, 8); | ||
672 | int wnid = stoi(wnid_s) + 100000000; | ||
673 | images[wnid]++; | ||
594 | } | 674 | } |
595 | 675 | ||
676 | imagefile.close(); | ||
677 | |||
596 | // Start writing output | 678 | // Start writing output |
597 | std::cout << "Writing schema..." << std::endl; | 679 | std::cout << "Writing schema..." << std::endl; |
598 | 680 | ||
@@ -689,7 +771,7 @@ int main(int argc, char** argv) | |||
689 | db_error(ppdb, query); | 771 | db_error(ppdb, query); |
690 | } | 772 | } |
691 | 773 | ||
692 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_STATIC); | 774 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT); |
693 | 775 | ||
694 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 776 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
695 | { | 777 | { |
@@ -721,7 +803,7 @@ int main(int argc, char** argv) | |||
721 | } | 803 | } |
722 | 804 | ||
723 | sqlite3_bind_int(ppstmt, 1, rowid); | 805 | sqlite3_bind_int(ppstmt, 1, rowid); |
724 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_STATIC); | 806 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); |
725 | 807 | ||
726 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 808 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
727 | { | 809 | { |
@@ -744,11 +826,11 @@ int main(int argc, char** argv) | |||
744 | db_error(ppdb, query); | 826 | db_error(ppdb, query); |
745 | } | 827 | } |
746 | 828 | ||
747 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC); | 829 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT); |
748 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC); | 830 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT); |
749 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC); | 831 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT); |
750 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC); | 832 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT); |
751 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC); | 833 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT); |
752 | 834 | ||
753 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 835 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
754 | { | 836 | { |
@@ -780,14 +862,26 @@ int main(int argc, char** argv) | |||
780 | 862 | ||
781 | for (auto pronunciation : pronunciations[canonical]) | 863 | for (auto pronunciation : pronunciations[canonical]) |
782 | { | 864 | { |
783 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; | 865 | if (!pronunciation.rhyme.empty()) |
866 | { | ||
867 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
868 | } else { | ||
869 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; | ||
870 | } | ||
871 | |||
784 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | 872 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) |
785 | { | 873 | { |
786 | db_error(ppdb, query); | 874 | db_error(ppdb, query); |
787 | } | 875 | } |
788 | 876 | ||
789 | sqlite3_bind_int(ppstmt, 1, rowid); | 877 | sqlite3_bind_int(ppstmt, 1, rowid); |
790 | sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); | 878 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); |
879 | |||
880 | if (!pronunciation.rhyme.empty()) | ||
881 | { | ||
882 | sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
883 | sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
884 | } | ||
791 | 885 | ||
792 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 886 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
793 | { | 887 | { |
@@ -825,7 +919,7 @@ int main(int argc, char** argv) | |||
825 | db_error(ppdb, query); | 919 | db_error(ppdb, query); |
826 | } | 920 | } |
827 | 921 | ||
828 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_STATIC); | 922 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT); |
829 | 923 | ||
830 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 924 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
831 | { | 925 | { |
@@ -918,7 +1012,7 @@ int main(int argc, char** argv) | |||
918 | } | 1012 | } |
919 | 1013 | ||
920 | sqlite3_bind_int(ppstmt, 1, gid); | 1014 | sqlite3_bind_int(ppstmt, 1, gid); |
921 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_STATIC); | 1015 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT); |
922 | 1016 | ||
923 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1017 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
924 | { | 1018 | { |
@@ -972,7 +1066,7 @@ int main(int argc, char** argv) | |||
972 | // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) | 1066 | // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) |
973 | // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) | 1067 | // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) |
974 | // - syntax: positioning flags for some adjectives | 1068 | // - syntax: positioning flags for some adjectives |
975 | std::string wnpref {argv[4]}; | 1069 | std::string wnpref {argv[3]}; |
976 | if (wnpref.back() != '/') | 1070 | if (wnpref.back() != '/') |
977 | { | 1071 | { |
978 | wnpref += '/'; | 1072 | wnpref += '/'; |
@@ -1009,7 +1103,7 @@ int main(int argc, char** argv) | |||
1009 | { | 1103 | { |
1010 | ppgs.update(); | 1104 | ppgs.update(); |
1011 | 1105 | ||
1012 | std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',"); | 1106 | std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$"); |
1013 | std::smatch relation_data; | 1107 | std::smatch relation_data; |
1014 | if (!std::regex_search(line, relation_data, relation)) | 1108 | if (!std::regex_search(line, relation_data, relation)) |
1015 | { | 1109 | { |
@@ -1019,6 +1113,11 @@ int main(int argc, char** argv) | |||
1019 | int synset_id = stoi(relation_data[1]); | 1113 | int synset_id = stoi(relation_data[1]); |
1020 | int wnum = stoi(relation_data[2]); | 1114 | int wnum = stoi(relation_data[2]); |
1021 | std::string word = relation_data[3]; | 1115 | std::string word = relation_data[3]; |
1116 | size_t word_it; | ||
1117 | while ((word_it = word.find("''")) != std::string::npos) | ||
1118 | { | ||
1119 | word.erase(word_it, 1); | ||
1120 | } | ||
1022 | 1121 | ||
1023 | std::string query; | 1122 | std::string query; |
1024 | switch (synset_id / 100000000) | 1123 | switch (synset_id / 100000000) |
@@ -1027,9 +1126,9 @@ int main(int argc, char** argv) | |||
1027 | { | 1126 | { |
1028 | if (nouns.count(word) == 1) | 1127 | if (nouns.count(word) == 1) |
1029 | { | 1128 | { |
1030 | query = "INSERT INTO nouns (singular, proper, complexity, plural) VALUES (?, ?, ?, ?)"; | 1129 | query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)"; |
1031 | } else { | 1130 | } else { |
1032 | query = "INSERT INTO nouns (singular, proper, complexity) VALUES (?, ?, ?)"; | 1131 | query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)"; |
1033 | } | 1132 | } |
1034 | 1133 | ||
1035 | break; | 1134 | break; |
@@ -1073,7 +1172,7 @@ int main(int argc, char** argv) | |||
1073 | db_error(ppdb, query); | 1172 | db_error(ppdb, query); |
1074 | } | 1173 | } |
1075 | 1174 | ||
1076 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC); | 1175 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); |
1077 | switch (synset_id / 100000000) | 1176 | switch (synset_id / 100000000) |
1078 | { | 1177 | { |
1079 | case 1: // Noun | 1178 | case 1: // Noun |
@@ -1083,10 +1182,12 @@ int main(int argc, char** argv) | |||
1083 | }) ? 1 : 0)); | 1182 | }) ? 1 : 0)); |
1084 | 1183 | ||
1085 | sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size()); | 1184 | sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size()); |
1185 | sqlite3_bind_int(ppstmt, 4, images[synset_id]); | ||
1186 | sqlite3_bind_int(ppstmt, 5, synset_id); | ||
1086 | 1187 | ||
1087 | if (nouns.count(word) == 1) | 1188 | if (nouns.count(word) == 1) |
1088 | { | 1189 | { |
1089 | sqlite3_bind_text(ppstmt, 4, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); | 1190 | sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT); |
1090 | } | 1191 | } |
1091 | 1192 | ||
1092 | break; | 1193 | break; |
@@ -1099,8 +1200,8 @@ int main(int argc, char** argv) | |||
1099 | 1200 | ||
1100 | if (adjectives.count(word) == 1) | 1201 | if (adjectives.count(word) == 1) |
1101 | { | 1202 | { |
1102 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); | 1203 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); |
1103 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); | 1204 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT); |
1104 | } | 1205 | } |
1105 | 1206 | ||
1106 | break; | 1207 | break; |
@@ -1140,21 +1241,36 @@ int main(int argc, char** argv) | |||
1140 | { | 1241 | { |
1141 | case 1: // Noun | 1242 | case 1: // Noun |
1142 | { | 1243 | { |
1143 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; | 1244 | if (!pronunciation.rhyme.empty()) |
1245 | { | ||
1246 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
1247 | } else { | ||
1248 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; | ||
1249 | } | ||
1144 | 1250 | ||
1145 | break; | 1251 | break; |
1146 | } | 1252 | } |
1147 | 1253 | ||
1148 | case 3: // Adjective | 1254 | case 3: // Adjective |
1149 | { | 1255 | { |
1150 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; | 1256 | if (!pronunciation.rhyme.empty()) |
1257 | { | ||
1258 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
1259 | } else { | ||
1260 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; | ||
1261 | } | ||
1151 | 1262 | ||
1152 | break; | 1263 | break; |
1153 | } | 1264 | } |
1154 | 1265 | ||
1155 | case 4: // Adverb | 1266 | case 4: // Adverb |
1156 | { | 1267 | { |
1157 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; | 1268 | if (!pronunciation.rhyme.empty()) |
1269 | { | ||
1270 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
1271 | } else { | ||
1272 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; | ||
1273 | } | ||
1158 | 1274 | ||
1159 | break; | 1275 | break; |
1160 | } | 1276 | } |
@@ -1166,7 +1282,13 @@ int main(int argc, char** argv) | |||
1166 | } | 1282 | } |
1167 | 1283 | ||
1168 | sqlite3_bind_int(ppstmt, 1, rowid); | 1284 | sqlite3_bind_int(ppstmt, 1, rowid); |
1169 | sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); | 1285 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); |
1286 | |||
1287 | if (!pronunciation.rhyme.empty()) | ||
1288 | { | ||
1289 | sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
1290 | sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
1291 | } | ||
1170 | 1292 | ||
1171 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1293 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
1172 | { | 1294 | { |
@@ -2155,7 +2277,7 @@ int main(int argc, char** argv) | |||
2155 | db_error(ppdb, query); | 2277 | db_error(ppdb, query); |
2156 | } | 2278 | } |
2157 | 2279 | ||
2158 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC); | 2280 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); |
2159 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); | 2281 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); |
2160 | 2282 | ||
2161 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 2283 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
diff --git a/generator/schema.sql b/generator/schema.sql index f2445f0..1836c62 100644 --- a/generator/schema.sql +++ b/generator/schema.sql | |||
@@ -55,7 +55,9 @@ CREATE TABLE `nouns` ( | |||
55 | `singular` VARCHAR(32) NOT NULL, | 55 | `singular` VARCHAR(32) NOT NULL, |
56 | `plural` VARCHAR(32), | 56 | `plural` VARCHAR(32), |
57 | `proper` INTEGER(1) NOT NULL, | 57 | `proper` INTEGER(1) NOT NULL, |
58 | `complexity` INTEGER NOT NULL | 58 | `complexity` INTEGER NOT NULL, |
59 | `images` INTEGER NOT NULL, | ||
60 | `wnid` INTEGER NOT NULL | ||
59 | ); | 61 | ); |
60 | 62 | ||
61 | DROP TABLE IF EXISTS `hypernymy`; | 63 | DROP TABLE IF EXISTS `hypernymy`; |
@@ -182,6 +184,8 @@ DROP TABLE IF EXISTS `noun_pronunciations`; | |||
182 | CREATE TABLE `noun_pronunciations` ( | 184 | CREATE TABLE `noun_pronunciations` ( |
183 | `noun_id` INTEGER NOT NULL, | 185 | `noun_id` INTEGER NOT NULL, |
184 | `pronunciation` VARCHAR(64) NOT NULL, | 186 | `pronunciation` VARCHAR(64) NOT NULL, |
187 | `prerhyme` VARCHAR(8), | ||
188 | `rhyme` VARCHAR(64), | ||
185 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) | 189 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) |
186 | ); | 190 | ); |
187 | 191 | ||
@@ -189,6 +193,8 @@ DROP TABLE IF EXISTS `verb_pronunciations`; | |||
189 | CREATE TABLE `verb_pronunciations` ( | 193 | CREATE TABLE `verb_pronunciations` ( |
190 | `verb_id` INTEGER NOT NULL, | 194 | `verb_id` INTEGER NOT NULL, |
191 | `pronunciation` VARCHAR(64) NOT NULL, | 195 | `pronunciation` VARCHAR(64) NOT NULL, |
196 | `prerhyme` VARCHAR(8), | ||
197 | `rhyme` VARCHAR(64), | ||
192 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) | 198 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) |
193 | ); | 199 | ); |
194 | 200 | ||
@@ -196,6 +202,8 @@ DROP TABLE IF EXISTS `adjective_pronunciations`; | |||
196 | CREATE TABLE `adjective_pronunciations` ( | 202 | CREATE TABLE `adjective_pronunciations` ( |
197 | `adjective_id` INTEGER NOT NULL, | 203 | `adjective_id` INTEGER NOT NULL, |
198 | `pronunciation` VARCHAR(64) NOT NULL, | 204 | `pronunciation` VARCHAR(64) NOT NULL, |
205 | `prerhyme` VARCHAR(8), | ||
206 | `rhyme` VARCHAR(64), | ||
199 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | 207 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) |
200 | ); | 208 | ); |
201 | 209 | ||
@@ -203,6 +211,8 @@ DROP TABLE IF EXISTS `adverb_pronunciations`; | |||
203 | CREATE TABLE `adverb_pronunciations` ( | 211 | CREATE TABLE `adverb_pronunciations` ( |
204 | `adverb_id` INTEGER NOT NULL, | 212 | `adverb_id` INTEGER NOT NULL, |
205 | `pronunciation` VARCHAR(64) NOT NULL, | 213 | `pronunciation` VARCHAR(64) NOT NULL, |
214 | `prerhyme` VARCHAR(8), | ||
215 | `rhyme` VARCHAR(64), | ||
206 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | 216 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) |
207 | ); | 217 | ); |
208 | 218 | ||