diff options
| author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-05-02 22:45:11 -0400 |
|---|---|---|
| committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-05-02 22:45:11 -0400 |
| commit | bd398509575af0362e53ce6b805eaa210406b9fd (patch) | |
| tree | e699624ca6934f591241d5dfa95a91b839ce6bc7 /generator | |
| parent | 499dbc410174602a5985bd75b600fbec5cd086f1 (diff) | |
| parent | 965a3206df834f846f2c560438c80a707dcee4cb (diff) | |
| download | verbly-bd398509575af0362e53ce6b805eaa210406b9fd.tar.gz verbly-bd398509575af0362e53ce6b805eaa210406b9fd.tar.bz2 verbly-bd398509575af0362e53ce6b805eaa210406b9fd.zip | |
Merge branch 'master' of https://github.com/hatkirby/verbly
Diffstat (limited to 'generator')
| -rw-r--r-- | generator/generator.cpp | 190 | ||||
| -rw-r--r-- | generator/schema.sql | 12 |
2 files changed, 167 insertions, 35 deletions
| diff --git a/generator/generator.cpp b/generator/generator.cpp index 6fbbfb8..3201154 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
| @@ -76,12 +76,24 @@ struct group_t { | |||
| 76 | std::list<std::list<framepart_t>> frames; | 76 | std::list<std::list<framepart_t>> frames; |
| 77 | }; | 77 | }; |
| 78 | 78 | ||
| 79 | struct pronunciation_t { | ||
| 80 | std::string phonemes; | ||
| 81 | std::string prerhyme; | ||
| 82 | std::string rhyme; | ||
| 83 | |||
| 84 | bool operator<(const pronunciation_t& other) const | ||
| 85 | { | ||
| 86 | return phonemes < other.phonemes; | ||
| 87 | } | ||
| 88 | }; | ||
| 89 | |||
| 79 | std::map<std::string, group_t> groups; | 90 | std::map<std::string, group_t> groups; |
| 80 | std::map<std::string, verb_t> verbs; | 91 | std::map<std::string, verb_t> verbs; |
| 81 | std::map<std::string, adjective_t> adjectives; | 92 | std::map<std::string, adjective_t> adjectives; |
| 82 | std::map<std::string, noun_t> nouns; | 93 | std::map<std::string, noun_t> nouns; |
| 83 | std::map<int, std::map<int, int>> wn; | 94 | std::map<int, std::map<int, int>> wn; |
| 84 | std::map<std::string, std::set<std::string>> pronunciations; | 95 | std::map<int, int> images; |
| 96 | std::map<std::string, std::set<pronunciation_t>> pronunciations; | ||
| 85 | 97 | ||
| 86 | void print_usage() | 98 | void print_usage() |
| 87 | { | 99 | { |
| @@ -89,10 +101,10 @@ void print_usage() | |||
| 89 | std::cout << "-------------------------" << std::endl; | 101 | std::cout << "-------------------------" << std::endl; |
| 90 | std::cout << "Requires exactly six arguments." << std::endl; | 102 | std::cout << "Requires exactly six arguments." << std::endl; |
| 91 | std::cout << "1. The path to a VerbNet data directory." << std::endl; | 103 | std::cout << "1. The path to a VerbNet data directory." << std::endl; |
| 92 | std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl; | 104 | std::cout << "2. The path to an AGID infl.txt file." << std::endl; |
| 93 | std::cout << "3. The path to an AGID infl.txt file." << std::endl; | 105 | std::cout << "3. The path to a WordNet prolog data directory." << std::endl; |
| 94 | std::cout << "4. The path to a WordNet prolog data directory." << std::endl; | 106 | std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl; |
| 95 | std::cout << "5. The path to a CMUDICT pronunciation file." << std::endl; | 107 | std::cout << "5. The path to an ImageNet urls.txt file." << std::endl; |
| 96 | std::cout << "6. Datafile output path." << std::endl; | 108 | std::cout << "6. Datafile output path." << std::endl; |
| 97 | 109 | ||
| 98 | exit(1); | 110 | exit(1); |
| @@ -431,10 +443,10 @@ int main(int argc, char** argv) | |||
| 431 | // Get verbs from AGID | 443 | // Get verbs from AGID |
| 432 | std::cout << "Reading inflections..." << std::endl; | 444 | std::cout << "Reading inflections..." << std::endl; |
| 433 | 445 | ||
| 434 | std::ifstream agidfile(argv[3]); | 446 | std::ifstream agidfile(argv[2]); |
| 435 | if (!agidfile.is_open()) | 447 | if (!agidfile.is_open()) |
| 436 | { | 448 | { |
| 437 | std::cout << "Could not open AGID file: " << argv[3] << std::endl; | 449 | std::cout << "Could not open AGID file: " << argv[2] << std::endl; |
| 438 | print_usage(); | 450 | print_usage(); |
| 439 | } | 451 | } |
| 440 | 452 | ||
| @@ -562,10 +574,10 @@ int main(int argc, char** argv) | |||
| 562 | // Pronounciations | 574 | // Pronounciations |
| 563 | std::cout << "Reading pronunciations..." << std::endl; | 575 | std::cout << "Reading pronunciations..." << std::endl; |
| 564 | 576 | ||
| 565 | std::ifstream pronfile(argv[5]); | 577 | std::ifstream pronfile(argv[4]); |
| 566 | if (!pronfile.is_open()) | 578 | if (!pronfile.is_open()) |
| 567 | { | 579 | { |
| 568 | std::cout << "Could not open CMUDICT file: " << argv[5] << std::endl; | 580 | std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl; |
| 569 | print_usage(); | 581 | print_usage(); |
| 570 | } | 582 | } |
| 571 | 583 | ||
| @@ -589,10 +601,80 @@ int main(int argc, char** argv) | |||
| 589 | std::string canonical(phoneme_data[1]); | 601 | std::string canonical(phoneme_data[1]); |
| 590 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | 602 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); |
| 591 | 603 | ||
| 592 | pronunciations[canonical].insert(phoneme_data[2]); | 604 | std::string phonemes = phoneme_data[2]; |
| 605 | auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " "); | ||
| 606 | auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) { | ||
| 607 | return phoneme.find("1") != std::string::npos; | ||
| 608 | }); | ||
| 609 | |||
| 610 | pronunciation_t p; | ||
| 611 | p.phonemes = phonemes; | ||
| 612 | if (phemstrt != std::end(phoneme_set)) | ||
| 613 | { | ||
| 614 | std::stringstream rhymer; | ||
| 615 | for (auto it = phemstrt; it != std::end(phoneme_set); it++) | ||
| 616 | { | ||
| 617 | std::string naked; | ||
| 618 | std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) { | ||
| 619 | return isdigit(ch); | ||
| 620 | }); | ||
| 621 | |||
| 622 | if (it != phemstrt) | ||
| 623 | { | ||
| 624 | rhymer << " "; | ||
| 625 | } | ||
| 626 | |||
| 627 | rhymer << naked; | ||
| 628 | } | ||
| 629 | |||
| 630 | p.rhyme = rhymer.str(); | ||
| 631 | |||
| 632 | if (phemstrt != std::begin(phoneme_set)) | ||
| 633 | { | ||
| 634 | phemstrt--; | ||
| 635 | p.prerhyme = *phemstrt; | ||
| 636 | } else { | ||
| 637 | p.prerhyme = ""; | ||
| 638 | } | ||
| 639 | } else { | ||
| 640 | p.prerhyme = ""; | ||
| 641 | p.rhyme = ""; | ||
| 642 | } | ||
| 643 | |||
| 644 | pronunciations[canonical].insert(p); | ||
| 645 | } | ||
| 646 | } | ||
| 647 | |||
| 648 | // Images | ||
| 649 | std::cout << "Reading images..." << std::endl; | ||
| 650 | |||
| 651 | std::ifstream imagefile(argv[5]); | ||
| 652 | if (!imagefile.is_open()) | ||
| 653 | { | ||
| 654 | std::cout << "Could not open ImageNet file: " << argv[5] << std::endl; | ||
| 655 | print_usage(); | ||
| 656 | } | ||
| 657 | |||
| 658 | for (;;) | ||
| 659 | { | ||
| 660 | std::string line; | ||
| 661 | if (!getline(imagefile, line)) | ||
| 662 | { | ||
| 663 | break; | ||
| 593 | } | 664 | } |
| 665 | |||
| 666 | if (line.back() == '\r') | ||
| 667 | { | ||
| 668 | line.pop_back(); | ||
| 669 | } | ||
| 670 | |||
| 671 | std::string wnid_s = line.substr(1, 8); | ||
| 672 | int wnid = stoi(wnid_s) + 100000000; | ||
| 673 | images[wnid]++; | ||
| 594 | } | 674 | } |
| 595 | 675 | ||
| 676 | imagefile.close(); | ||
| 677 | |||
| 596 | // Start writing output | 678 | // Start writing output |
| 597 | std::cout << "Writing schema..." << std::endl; | 679 | std::cout << "Writing schema..." << std::endl; |
| 598 | 680 | ||
| @@ -689,7 +771,7 @@ int main(int argc, char** argv) | |||
| 689 | db_error(ppdb, query); | 771 | db_error(ppdb, query); |
| 690 | } | 772 | } |
| 691 | 773 | ||
| 692 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_STATIC); | 774 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT); |
| 693 | 775 | ||
| 694 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 776 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 695 | { | 777 | { |
| @@ -721,7 +803,7 @@ int main(int argc, char** argv) | |||
| 721 | } | 803 | } |
| 722 | 804 | ||
| 723 | sqlite3_bind_int(ppstmt, 1, rowid); | 805 | sqlite3_bind_int(ppstmt, 1, rowid); |
| 724 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_STATIC); | 806 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); |
| 725 | 807 | ||
| 726 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 808 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 727 | { | 809 | { |
| @@ -744,11 +826,11 @@ int main(int argc, char** argv) | |||
| 744 | db_error(ppdb, query); | 826 | db_error(ppdb, query); |
| 745 | } | 827 | } |
| 746 | 828 | ||
| 747 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC); | 829 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT); |
| 748 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC); | 830 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT); |
| 749 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC); | 831 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT); |
| 750 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC); | 832 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT); |
| 751 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC); | 833 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT); |
| 752 | 834 | ||
| 753 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 835 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 754 | { | 836 | { |
| @@ -780,14 +862,26 @@ int main(int argc, char** argv) | |||
| 780 | 862 | ||
| 781 | for (auto pronunciation : pronunciations[canonical]) | 863 | for (auto pronunciation : pronunciations[canonical]) |
| 782 | { | 864 | { |
| 783 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; | 865 | if (!pronunciation.rhyme.empty()) |
| 866 | { | ||
| 867 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
| 868 | } else { | ||
| 869 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; | ||
| 870 | } | ||
| 871 | |||
| 784 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | 872 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) |
| 785 | { | 873 | { |
| 786 | db_error(ppdb, query); | 874 | db_error(ppdb, query); |
| 787 | } | 875 | } |
| 788 | 876 | ||
| 789 | sqlite3_bind_int(ppstmt, 1, rowid); | 877 | sqlite3_bind_int(ppstmt, 1, rowid); |
| 790 | sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); | 878 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); |
| 879 | |||
| 880 | if (!pronunciation.rhyme.empty()) | ||
| 881 | { | ||
| 882 | sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
| 883 | sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
| 884 | } | ||
| 791 | 885 | ||
| 792 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 886 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 793 | { | 887 | { |
| @@ -825,7 +919,7 @@ int main(int argc, char** argv) | |||
| 825 | db_error(ppdb, query); | 919 | db_error(ppdb, query); |
| 826 | } | 920 | } |
| 827 | 921 | ||
| 828 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_STATIC); | 922 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT); |
| 829 | 923 | ||
| 830 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 924 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 831 | { | 925 | { |
| @@ -918,7 +1012,7 @@ int main(int argc, char** argv) | |||
| 918 | } | 1012 | } |
| 919 | 1013 | ||
| 920 | sqlite3_bind_int(ppstmt, 1, gid); | 1014 | sqlite3_bind_int(ppstmt, 1, gid); |
| 921 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_STATIC); | 1015 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT); |
| 922 | 1016 | ||
| 923 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1017 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 924 | { | 1018 | { |
| @@ -972,7 +1066,7 @@ int main(int argc, char** argv) | |||
| 972 | // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) | 1066 | // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) |
| 973 | // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) | 1067 | // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) |
| 974 | // - syntax: positioning flags for some adjectives | 1068 | // - syntax: positioning flags for some adjectives |
| 975 | std::string wnpref {argv[4]}; | 1069 | std::string wnpref {argv[3]}; |
| 976 | if (wnpref.back() != '/') | 1070 | if (wnpref.back() != '/') |
| 977 | { | 1071 | { |
| 978 | wnpref += '/'; | 1072 | wnpref += '/'; |
| @@ -1009,7 +1103,7 @@ int main(int argc, char** argv) | |||
| 1009 | { | 1103 | { |
| 1010 | ppgs.update(); | 1104 | ppgs.update(); |
| 1011 | 1105 | ||
| 1012 | std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',"); | 1106 | std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$"); |
| 1013 | std::smatch relation_data; | 1107 | std::smatch relation_data; |
| 1014 | if (!std::regex_search(line, relation_data, relation)) | 1108 | if (!std::regex_search(line, relation_data, relation)) |
| 1015 | { | 1109 | { |
| @@ -1019,6 +1113,11 @@ int main(int argc, char** argv) | |||
| 1019 | int synset_id = stoi(relation_data[1]); | 1113 | int synset_id = stoi(relation_data[1]); |
| 1020 | int wnum = stoi(relation_data[2]); | 1114 | int wnum = stoi(relation_data[2]); |
| 1021 | std::string word = relation_data[3]; | 1115 | std::string word = relation_data[3]; |
| 1116 | size_t word_it; | ||
| 1117 | while ((word_it = word.find("''")) != std::string::npos) | ||
| 1118 | { | ||
| 1119 | word.erase(word_it, 1); | ||
| 1120 | } | ||
| 1022 | 1121 | ||
| 1023 | std::string query; | 1122 | std::string query; |
| 1024 | switch (synset_id / 100000000) | 1123 | switch (synset_id / 100000000) |
| @@ -1027,9 +1126,9 @@ int main(int argc, char** argv) | |||
| 1027 | { | 1126 | { |
| 1028 | if (nouns.count(word) == 1) | 1127 | if (nouns.count(word) == 1) |
| 1029 | { | 1128 | { |
| 1030 | query = "INSERT INTO nouns (singular, proper, complexity, plural) VALUES (?, ?, ?, ?)"; | 1129 | query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)"; |
| 1031 | } else { | 1130 | } else { |
| 1032 | query = "INSERT INTO nouns (singular, proper, complexity) VALUES (?, ?, ?)"; | 1131 | query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)"; |
| 1033 | } | 1132 | } |
| 1034 | 1133 | ||
| 1035 | break; | 1134 | break; |
| @@ -1073,7 +1172,7 @@ int main(int argc, char** argv) | |||
| 1073 | db_error(ppdb, query); | 1172 | db_error(ppdb, query); |
| 1074 | } | 1173 | } |
| 1075 | 1174 | ||
| 1076 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC); | 1175 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); |
| 1077 | switch (synset_id / 100000000) | 1176 | switch (synset_id / 100000000) |
| 1078 | { | 1177 | { |
| 1079 | case 1: // Noun | 1178 | case 1: // Noun |
| @@ -1083,10 +1182,12 @@ int main(int argc, char** argv) | |||
| 1083 | }) ? 1 : 0)); | 1182 | }) ? 1 : 0)); |
| 1084 | 1183 | ||
| 1085 | sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size()); | 1184 | sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size()); |
| 1185 | sqlite3_bind_int(ppstmt, 4, images[synset_id]); | ||
| 1186 | sqlite3_bind_int(ppstmt, 5, synset_id); | ||
| 1086 | 1187 | ||
| 1087 | if (nouns.count(word) == 1) | 1188 | if (nouns.count(word) == 1) |
| 1088 | { | 1189 | { |
| 1089 | sqlite3_bind_text(ppstmt, 4, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); | 1190 | sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT); |
| 1090 | } | 1191 | } |
| 1091 | 1192 | ||
| 1092 | break; | 1193 | break; |
| @@ -1099,8 +1200,8 @@ int main(int argc, char** argv) | |||
| 1099 | 1200 | ||
| 1100 | if (adjectives.count(word) == 1) | 1201 | if (adjectives.count(word) == 1) |
| 1101 | { | 1202 | { |
| 1102 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); | 1203 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); |
| 1103 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); | 1204 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT); |
| 1104 | } | 1205 | } |
| 1105 | 1206 | ||
| 1106 | break; | 1207 | break; |
| @@ -1140,21 +1241,36 @@ int main(int argc, char** argv) | |||
| 1140 | { | 1241 | { |
| 1141 | case 1: // Noun | 1242 | case 1: // Noun |
| 1142 | { | 1243 | { |
| 1143 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; | 1244 | if (!pronunciation.rhyme.empty()) |
| 1245 | { | ||
| 1246 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
| 1247 | } else { | ||
| 1248 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; | ||
| 1249 | } | ||
| 1144 | 1250 | ||
| 1145 | break; | 1251 | break; |
| 1146 | } | 1252 | } |
| 1147 | 1253 | ||
| 1148 | case 3: // Adjective | 1254 | case 3: // Adjective |
| 1149 | { | 1255 | { |
| 1150 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; | 1256 | if (!pronunciation.rhyme.empty()) |
| 1257 | { | ||
| 1258 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
| 1259 | } else { | ||
| 1260 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; | ||
| 1261 | } | ||
| 1151 | 1262 | ||
| 1152 | break; | 1263 | break; |
| 1153 | } | 1264 | } |
| 1154 | 1265 | ||
| 1155 | case 4: // Adverb | 1266 | case 4: // Adverb |
| 1156 | { | 1267 | { |
| 1157 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; | 1268 | if (!pronunciation.rhyme.empty()) |
| 1269 | { | ||
| 1270 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, prerhyme, rhyme) VALUES (?, ?, ?, ?)"; | ||
| 1271 | } else { | ||
| 1272 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; | ||
| 1273 | } | ||
| 1158 | 1274 | ||
| 1159 | break; | 1275 | break; |
| 1160 | } | 1276 | } |
| @@ -1166,7 +1282,13 @@ int main(int argc, char** argv) | |||
| 1166 | } | 1282 | } |
| 1167 | 1283 | ||
| 1168 | sqlite3_bind_int(ppstmt, 1, rowid); | 1284 | sqlite3_bind_int(ppstmt, 1, rowid); |
| 1169 | sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); | 1285 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); |
| 1286 | |||
| 1287 | if (!pronunciation.rhyme.empty()) | ||
| 1288 | { | ||
| 1289 | sqlite3_bind_text(ppstmt, 3, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
| 1290 | sqlite3_bind_text(ppstmt, 4, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
| 1291 | } | ||
| 1170 | 1292 | ||
| 1171 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1293 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| 1172 | { | 1294 | { |
| @@ -2155,7 +2277,7 @@ int main(int argc, char** argv) | |||
| 2155 | db_error(ppdb, query); | 2277 | db_error(ppdb, query); |
| 2156 | } | 2278 | } |
| 2157 | 2279 | ||
| 2158 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC); | 2280 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); |
| 2159 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); | 2281 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); |
| 2160 | 2282 | ||
| 2161 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 2283 | if (sqlite3_step(ppstmt) != SQLITE_DONE) |
| diff --git a/generator/schema.sql b/generator/schema.sql index f2445f0..1836c62 100644 --- a/generator/schema.sql +++ b/generator/schema.sql | |||
| @@ -55,7 +55,9 @@ CREATE TABLE `nouns` ( | |||
| 55 | `singular` VARCHAR(32) NOT NULL, | 55 | `singular` VARCHAR(32) NOT NULL, |
| 56 | `plural` VARCHAR(32), | 56 | `plural` VARCHAR(32), |
| 57 | `proper` INTEGER(1) NOT NULL, | 57 | `proper` INTEGER(1) NOT NULL, |
| 58 | `complexity` INTEGER NOT NULL | 58 | `complexity` INTEGER NOT NULL, |
| 59 | `images` INTEGER NOT NULL, | ||
| 60 | `wnid` INTEGER NOT NULL | ||
| 59 | ); | 61 | ); |
| 60 | 62 | ||
| 61 | DROP TABLE IF EXISTS `hypernymy`; | 63 | DROP TABLE IF EXISTS `hypernymy`; |
| @@ -182,6 +184,8 @@ DROP TABLE IF EXISTS `noun_pronunciations`; | |||
| 182 | CREATE TABLE `noun_pronunciations` ( | 184 | CREATE TABLE `noun_pronunciations` ( |
| 183 | `noun_id` INTEGER NOT NULL, | 185 | `noun_id` INTEGER NOT NULL, |
| 184 | `pronunciation` VARCHAR(64) NOT NULL, | 186 | `pronunciation` VARCHAR(64) NOT NULL, |
| 187 | `prerhyme` VARCHAR(8), | ||
| 188 | `rhyme` VARCHAR(64), | ||
| 185 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) | 189 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) |
| 186 | ); | 190 | ); |
| 187 | 191 | ||
| @@ -189,6 +193,8 @@ DROP TABLE IF EXISTS `verb_pronunciations`; | |||
| 189 | CREATE TABLE `verb_pronunciations` ( | 193 | CREATE TABLE `verb_pronunciations` ( |
| 190 | `verb_id` INTEGER NOT NULL, | 194 | `verb_id` INTEGER NOT NULL, |
| 191 | `pronunciation` VARCHAR(64) NOT NULL, | 195 | `pronunciation` VARCHAR(64) NOT NULL, |
| 196 | `prerhyme` VARCHAR(8), | ||
| 197 | `rhyme` VARCHAR(64), | ||
| 192 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) | 198 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) |
| 193 | ); | 199 | ); |
| 194 | 200 | ||
| @@ -196,6 +202,8 @@ DROP TABLE IF EXISTS `adjective_pronunciations`; | |||
| 196 | CREATE TABLE `adjective_pronunciations` ( | 202 | CREATE TABLE `adjective_pronunciations` ( |
| 197 | `adjective_id` INTEGER NOT NULL, | 203 | `adjective_id` INTEGER NOT NULL, |
| 198 | `pronunciation` VARCHAR(64) NOT NULL, | 204 | `pronunciation` VARCHAR(64) NOT NULL, |
| 205 | `prerhyme` VARCHAR(8), | ||
| 206 | `rhyme` VARCHAR(64), | ||
| 199 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | 207 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) |
| 200 | ); | 208 | ); |
| 201 | 209 | ||
| @@ -203,6 +211,8 @@ DROP TABLE IF EXISTS `adverb_pronunciations`; | |||
| 203 | CREATE TABLE `adverb_pronunciations` ( | 211 | CREATE TABLE `adverb_pronunciations` ( |
| 204 | `adverb_id` INTEGER NOT NULL, | 212 | `adverb_id` INTEGER NOT NULL, |
| 205 | `pronunciation` VARCHAR(64) NOT NULL, | 213 | `pronunciation` VARCHAR(64) NOT NULL, |
| 214 | `prerhyme` VARCHAR(8), | ||
| 215 | `rhyme` VARCHAR(64), | ||
| 206 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | 216 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) |
| 207 | ); | 217 | ); |
| 208 | 218 | ||
