diff options
Diffstat (limited to 'generator')
| -rw-r--r-- | generator/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | generator/form.cpp | 9 | ||||
| -rw-r--r-- | generator/form.h | 19 | ||||
| -rw-r--r-- | generator/generator.cpp | 270 | ||||
| -rw-r--r-- | generator/generator.h | 11 | ||||
| -rw-r--r-- | generator/pronunciation.cpp | 7 | ||||
| -rw-r--r-- | generator/pronunciation.h | 19 | ||||
| -rw-r--r-- | generator/schema.sql | 24 |
8 files changed, 347 insertions, 14 deletions
| diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 8c070d2..5d2f977 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt | |||
| @@ -11,6 +11,6 @@ include_directories( | |||
| 11 | ../vendor/hkutil) | 11 | ../vendor/hkutil) |
| 12 | 12 | ||
| 13 | add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp generator.cpp main.cpp) | 13 | add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp generator.cpp main.cpp) |
| 14 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) | 14 | set_property(TARGET generator PROPERTY CXX_STANDARD 17) |
| 15 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) | 15 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) |
| 16 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) | 16 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) |
| diff --git a/generator/form.cpp b/generator/form.cpp index c66820c..a88363b 100644 --- a/generator/form.cpp +++ b/generator/form.cpp | |||
| @@ -9,12 +9,13 @@ namespace verbly { | |||
| 9 | 9 | ||
| 10 | int form::nextId_ = 0; | 10 | int form::nextId_ = 0; |
| 11 | 11 | ||
| 12 | form::form(std::string text) : | 12 | form::form(std::string text, int anagram_set_id) : |
| 13 | id_(nextId_++), | 13 | id_(nextId_++), |
| 14 | text_(text), | 14 | text_(text), |
| 15 | complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), | 15 | complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), |
| 16 | proper_(std::any_of(std::begin(text), std::end(text), ::isupper)), | 16 | proper_(std::any_of(std::begin(text), std::end(text), ::isupper)), |
| 17 | length_(text.length()) | 17 | length_(text.length()), |
| 18 | anagram_set_id_(anagram_set_id) | ||
| 18 | { | 19 | { |
| 19 | } | 20 | } |
| 20 | 21 | ||
| @@ -34,7 +35,9 @@ namespace verbly { | |||
| 34 | { "form", arg.getText() }, | 35 | { "form", arg.getText() }, |
| 35 | { "complexity", arg.getComplexity() }, | 36 | { "complexity", arg.getComplexity() }, |
| 36 | { "proper", arg.isProper() }, | 37 | { "proper", arg.isProper() }, |
| 37 | { "length", arg.getLength() } | 38 | { "length", arg.getLength() }, |
| 39 | { "anagram_set_id", arg.getAnagramSetId() }, | ||
| 40 | { "reverse_form_id", arg.getReverseId() } | ||
| 38 | }); | 41 | }); |
| 39 | } | 42 | } |
| 40 | 43 | ||
| diff --git a/generator/form.h b/generator/form.h index f3dd779..c83bbdc 100644 --- a/generator/form.h +++ b/generator/form.h | |||
| @@ -15,7 +15,7 @@ namespace verbly { | |||
| 15 | 15 | ||
| 16 | // Constructor | 16 | // Constructor |
| 17 | 17 | ||
| 18 | explicit form(std::string text); | 18 | form(std::string text, int anagram_set_id); |
| 19 | 19 | ||
| 20 | // Mutators | 20 | // Mutators |
| 21 | 21 | ||
| @@ -48,6 +48,21 @@ namespace verbly { | |||
| 48 | return length_; | 48 | return length_; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | int getAnagramSetId() const | ||
| 52 | { | ||
| 53 | return anagram_set_id_; | ||
| 54 | } | ||
| 55 | |||
| 56 | void setReverseId(int id) | ||
| 57 | { | ||
| 58 | reverse_id_ = id; | ||
| 59 | } | ||
| 60 | |||
| 61 | int getReverseId() const | ||
| 62 | { | ||
| 63 | return reverse_id_; | ||
| 64 | } | ||
| 65 | |||
| 51 | std::set<const pronunciation*> getPronunciations() const | 66 | std::set<const pronunciation*> getPronunciations() const |
| 52 | { | 67 | { |
| 53 | return pronunciations_; | 68 | return pronunciations_; |
| @@ -62,6 +77,8 @@ namespace verbly { | |||
| 62 | const int complexity_; | 77 | const int complexity_; |
| 63 | const bool proper_; | 78 | const bool proper_; |
| 64 | const int length_; | 79 | const int length_; |
| 80 | const int anagram_set_id_; | ||
| 81 | int reverse_id_ = -1; | ||
| 65 | 82 | ||
| 66 | std::set<const pronunciation*> pronunciations_; | 83 | std::set<const pronunciation*> pronunciations_; |
| 67 | 84 | ||
| diff --git a/generator/generator.cpp b/generator/generator.cpp index ad665a2..fdea10f 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
| @@ -6,6 +6,9 @@ | |||
| 6 | #include <fstream> | 6 | #include <fstream> |
| 7 | #include <hkutil/string.h> | 7 | #include <hkutil/string.h> |
| 8 | #include <hkutil/progress.h> | 8 | #include <hkutil/progress.h> |
| 9 | #include <array> | ||
| 10 | #include <mutex> | ||
| 11 | #include <thread> | ||
| 9 | #include "role.h" | 12 | #include "role.h" |
| 10 | #include "part.h" | 13 | #include "part.h" |
| 11 | #include "../lib/enums.h" | 14 | #include "../lib/enums.h" |
| @@ -83,7 +86,7 @@ namespace verbly { | |||
| 83 | readAdjectivePositioning(); | 86 | readAdjectivePositioning(); |
| 84 | 87 | ||
| 85 | // Counts the number of URLs ImageNet has per notion | 88 | // Counts the number of URLs ImageNet has per notion |
| 86 | readImageNetUrls(); | 89 | //readImageNetUrls(); |
| 87 | 90 | ||
| 88 | // Creates a word by WordNet sense key lookup table | 91 | // Creates a word by WordNet sense key lookup table |
| 89 | readWordNetSenseKeys(); | 92 | readWordNetSenseKeys(); |
| @@ -115,9 +118,17 @@ namespace verbly { | |||
| 115 | // Writes the database version | 118 | // Writes the database version |
| 116 | writeVersion(); | 119 | writeVersion(); |
| 117 | 120 | ||
| 121 | // Calculates and writes form merography | ||
| 122 | writeMerography(); | ||
| 123 | |||
| 124 | // Calculates and writes pronunciation merophony | ||
| 125 | writeMerophony(); | ||
| 126 | |||
| 118 | // Dumps data to the database | 127 | // Dumps data to the database |
| 119 | dumpObjects(); | 128 | dumpObjects(); |
| 120 | 129 | ||
| 130 | |||
| 131 | |||
| 121 | // Populates the antonymy relationship from WordNet | 132 | // Populates the antonymy relationship from WordNet |
| 122 | readWordNetAntonymy(); | 133 | readWordNetAntonymy(); |
| 123 | 134 | ||
| @@ -577,9 +588,29 @@ namespace verbly { | |||
| 577 | pronunciation& p = *pronunciationByPhonemes_[phonemes]; | 588 | pronunciation& p = *pronunciationByPhonemes_[phonemes]; |
| 578 | formByText_.at(canonical)->addPronunciation(p); | 589 | formByText_.at(canonical)->addPronunciation(p); |
| 579 | } else { | 590 | } else { |
| 580 | pronunciations_.emplace_back(phonemes); | 591 | std::string stressless; |
| 592 | for (int i=0; i<phonemes.size(); i++) { | ||
| 593 | if (!std::isdigit(phonemes[i])) { | ||
| 594 | stressless.push_back(phonemes[i]); | ||
| 595 | } | ||
| 596 | } | ||
| 597 | auto stresslessList = hatkirby::split<std::vector<std::string>>(stressless, " "); | ||
| 598 | std::string stresslessPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); | ||
| 599 | std::sort(stresslessList.begin(), stresslessList.end()); | ||
| 600 | std::string sortedPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); | ||
| 601 | |||
| 602 | int anaphoneSetId; | ||
| 603 | if (anaphoneSets_.count(sortedPhonemes)) { | ||
| 604 | anaphoneSetId = anaphoneSets_[sortedPhonemes]; | ||
| 605 | } else { | ||
| 606 | anaphoneSetId = anaphoneSets_.size(); | ||
| 607 | anaphoneSets_[sortedPhonemes] = anaphoneSetId; | ||
| 608 | } | ||
| 609 | |||
| 610 | pronunciations_.emplace_back(phonemes, anaphoneSetId); | ||
| 581 | pronunciation& p = pronunciations_.back(); | 611 | pronunciation& p = pronunciations_.back(); |
| 582 | pronunciationByPhonemes_[phonemes] = &p; | 612 | pronunciationByPhonemes_[phonemes] = &p; |
| 613 | pronunciationByBlankPhonemes_[stresslessPhonemes] = &p; | ||
| 583 | formByText_.at(canonical)->addPronunciation(p); | 614 | formByText_.at(canonical)->addPronunciation(p); |
| 584 | } | 615 | } |
| 585 | } | 616 | } |
| @@ -671,6 +702,12 @@ namespace verbly { | |||
| 671 | 702 | ||
| 672 | for (form& f : forms_) | 703 | for (form& f : forms_) |
| 673 | { | 704 | { |
| 705 | std::string reverseText = f.getText(); | ||
| 706 | std::reverse(reverseText.begin(), reverseText.end()); | ||
| 707 | if (formByText_.count(reverseText)) { | ||
| 708 | f.setReverseId(formByText_[reverseText]->getId()); | ||
| 709 | } | ||
| 710 | |||
| 674 | db_ << f; | 711 | db_ << f; |
| 675 | 712 | ||
| 676 | ppgs.update(); | 713 | ppgs.update(); |
| @@ -682,6 +719,19 @@ namespace verbly { | |||
| 682 | 719 | ||
| 683 | for (pronunciation& p : pronunciations_) | 720 | for (pronunciation& p : pronunciations_) |
| 684 | { | 721 | { |
| 722 | std::string stressless; | ||
| 723 | for (int i=0; i<p.getPhonemes().size(); i++) { | ||
| 724 | if (!std::isdigit(p.getPhonemes()[i])) { | ||
| 725 | stressless.push_back(p.getPhonemes()[i]); | ||
| 726 | } | ||
| 727 | } | ||
| 728 | auto stresslessList = hatkirby::split<std::vector<std::string>>(stressless, " "); | ||
| 729 | std::reverse(stresslessList.begin(), stresslessList.end()); | ||
| 730 | std::string reversedPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); | ||
| 731 | if (pronunciationByBlankPhonemes_.count(reversedPhonemes)) { | ||
| 732 | p.setReverseId(pronunciationByBlankPhonemes_[reversedPhonemes]->getId()); | ||
| 733 | } | ||
| 734 | |||
| 685 | db_ << p; | 735 | db_ << p; |
| 686 | 736 | ||
| 687 | ppgs.update(); | 737 | ppgs.update(); |
| @@ -698,6 +748,208 @@ namespace verbly { | |||
| 698 | ppgs.update(); | 748 | ppgs.update(); |
| 699 | } | 749 | } |
| 700 | } | 750 | } |
| 751 | |||
| 752 | /*{ | ||
| 753 | hatkirby::progress ppgs("Writing merography...", formByText_.size()); | ||
| 754 | |||
| 755 | for (const auto& [merotext, meroform] : formByText_) | ||
| 756 | { | ||
| 757 | for (const auto& [holotext, holoform] : formByText_) | ||
| 758 | { | ||
| 759 | if (isMero(merotext, holotext)) | ||
| 760 | { | ||
| 761 | db_.insertIntoTable( | ||
| 762 | "merography", | ||
| 763 | { | ||
| 764 | { "merograph_id", meroform->getId() }, | ||
| 765 | { "holograph_id", holoform->getId() } | ||
| 766 | }); | ||
| 767 | } | ||
| 768 | } | ||
| 769 | |||
| 770 | ppgs.update(); | ||
| 771 | } | ||
| 772 | } | ||
| 773 | |||
| 774 | { | ||
| 775 | hatkirby::progress ppgs("Writing merophony...", pronunciationByBlankPhonemes_.size()); | ||
| 776 | |||
| 777 | for (const auto& [merotext, merop] : pronunciationByBlankPhonemes_) | ||
| 778 | { | ||
| 779 | auto merophonemes = hatkirby::split<std::list<std::string>>(merotext, " "); | ||
| 780 | |||
| 781 | for (const auto& [holotext, holop] : pronunciationByBlankPhonemes_) | ||
| 782 | { | ||
| 783 | auto holophonemes = hatkirby::split<std::list<std::string>>(holotext, " "); | ||
| 784 | |||
| 785 | if (isMero(merophonemes, holophonemes)) | ||
| 786 | { | ||
| 787 | db_.insertIntoTable( | ||
| 788 | "merophony", | ||
| 789 | { | ||
| 790 | { "merophone_id", merop->getId() }, | ||
| 791 | { "holophone_id", holop->getId() } | ||
| 792 | }); | ||
| 793 | } | ||
| 794 | } | ||
| 795 | |||
| 796 | ppgs.update(); | ||
| 797 | } | ||
| 798 | }*/ | ||
| 799 | } | ||
| 800 | |||
| 801 | void generator::writeMerography() | ||
| 802 | { | ||
| 803 | hatkirby::progress ppgs("Writing merography...", formByText_.size()); | ||
| 804 | for (const auto& [text, form] : formByText_) | ||
| 805 | { | ||
| 806 | ppgs.update(); | ||
| 807 | |||
| 808 | std::unordered_set<std::string> visited; | ||
| 809 | for (int i=0; i<text.size(); i++) | ||
| 810 | { | ||
| 811 | for (int l=3; l<text.size()-i; l++) | ||
| 812 | { | ||
| 813 | if (i==0 && l == text.size()) | ||
| 814 | { | ||
| 815 | continue; | ||
| 816 | } | ||
| 817 | |||
| 818 | std::string substr = text.substr(i, l); | ||
| 819 | if (formByText_.count(substr) && !visited.count(substr)) | ||
| 820 | { | ||
| 821 | visited.insert(substr); | ||
| 822 | db_.insertIntoTable( | ||
| 823 | "merography", | ||
| 824 | { | ||
| 825 | { "merograph_id", formByText_[substr]->getId() }, | ||
| 826 | { "holograph_id", form->getId() } | ||
| 827 | }); | ||
| 828 | } | ||
| 829 | } | ||
| 830 | } | ||
| 831 | |||
| 832 | |||
| 833 | /* | ||
| 834 | std::string front = text; | ||
| 835 | while (front.size() > 2) | ||
| 836 | { | ||
| 837 | front.erase(0, 1); | ||
| 838 | |||
| 839 | if (formByText_.count(front)) | ||
| 840 | { | ||
| 841 | visited.insert(front); | ||
| 842 | db_.insertIntoTable( | ||
| 843 | "merography", | ||
| 844 | { | ||
| 845 | { "merograph_id", formByText_[front]->getId() }, | ||
| 846 | { "holograph_id", form->getId() } | ||
| 847 | }); | ||
| 848 | } | ||
| 849 | } | ||
| 850 | |||
| 851 | if (text.size() > 2) | ||
| 852 | { | ||
| 853 | std::string back = text; | ||
| 854 | |||
| 855 | while (back.size() > 2) | ||
| 856 | { | ||
| 857 | back.pop_back(); | ||
| 858 | |||
| 859 | if (formByText_.count(back) && !visited.count(back)) | ||
| 860 | { | ||
| 861 | db_.insertIntoTable( | ||
| 862 | "merography", | ||
| 863 | { | ||
| 864 | { "merograph_id", formByText_[back]->getId() }, | ||
| 865 | { "holograph_id", form->getId() } | ||
| 866 | }); | ||
| 867 | } | ||
| 868 | } | ||
| 869 | }*/ | ||
| 870 | } | ||
| 871 | } | ||
| 872 | |||
| 873 | void generator::writeMerophony() | ||
| 874 | { | ||
| 875 | std::map<std::list<std::string>, pronunciation*> tokenized; | ||
| 876 | for (const auto& [phonemes, pronunciation] : pronunciationByBlankPhonemes_) | ||
| 877 | { | ||
| 878 | tokenized[hatkirby::split<std::list<std::string>>(phonemes, " ")] = pronunciation; | ||
| 879 | } | ||
| 880 | |||
| 881 | hatkirby::progress ppgs("Writing merophony...", tokenized.size()); | ||
| 882 | for (const auto& [phonemes, pronunciation] : tokenized) | ||
| 883 | { | ||
| 884 | ppgs.update(); | ||
| 885 | |||
| 886 | std::set<std::list<std::string>> visited; | ||
| 887 | for (int i=0; i<phonemes.size(); i++) | ||
| 888 | { | ||
| 889 | for (int l=2; l<phonemes.size()-i; l++) | ||
| 890 | { | ||
| 891 | if (i==0 && l == phonemes.size()) | ||
| 892 | { | ||
| 893 | continue; | ||
| 894 | } | ||
| 895 | |||
| 896 | std::list<std::string> sublist; | ||
| 897 | for (auto j=std::next(phonemes.begin(),i); j!=std::next(phonemes.begin(),i+l); j++) | ||
| 898 | { | ||
| 899 | sublist.push_back(*j); | ||
| 900 | } | ||
| 901 | |||
| 902 | if (tokenized.count(sublist) && !visited.count(sublist)) | ||
| 903 | { | ||
| 904 | visited.insert(sublist); | ||
| 905 | db_.insertIntoTable( | ||
| 906 | "merophony", | ||
| 907 | { | ||
| 908 | { "merophone_id", tokenized[sublist]->getId() }, | ||
| 909 | { "holophone_id", pronunciation->getId() } | ||
| 910 | }); | ||
| 911 | } | ||
| 912 | } | ||
| 913 | } | ||
| 914 | /*std::list<std::string> front = phonemes; | ||
| 915 | while (front.size() > 1) | ||
| 916 | { | ||
| 917 | front.pop_front(); | ||
| 918 | |||
| 919 | if (tokenized.count(front)) | ||
| 920 | { | ||
| 921 | visited.insert(front); | ||
| 922 | db_.insertIntoTable( | ||
| 923 | "merophony", | ||
| 924 | { | ||
| 925 | { "merophone_id", tokenized[front]->getId() }, | ||
| 926 | { "holophone_id", pronunciation->getId() } | ||
| 927 | }); | ||
| 928 | break; | ||
| 929 | } | ||
| 930 | } | ||
| 931 | |||
| 932 | if (phonemes.size() > 1) | ||
| 933 | { | ||
| 934 | std::list<std::string> back = phonemes; | ||
| 935 | |||
| 936 | while (back.size() > 1) | ||
| 937 | { | ||
| 938 | back.pop_back(); | ||
| 939 | |||
| 940 | if (tokenized.count(back) && !visited.count(back)) | ||
| 941 | { | ||
| 942 | db_.insertIntoTable( | ||
| 943 | "merophony", | ||
| 944 | { | ||
| 945 | { "merophone_id", tokenized[back]->getId() }, | ||
| 946 | { "holophone_id", pronunciation->getId() } | ||
| 947 | }); | ||
| 948 | break; | ||
| 949 | } | ||
| 950 | } | ||
| 951 | }*/ | ||
| 952 | } | ||
| 701 | } | 953 | } |
| 702 | 954 | ||
| 703 | void generator::readWordNetAntonymy() | 955 | void generator::readWordNetAntonymy() |
| @@ -1316,7 +1568,19 @@ namespace verbly { | |||
| 1316 | { | 1568 | { |
| 1317 | if (!formByText_.count(text)) | 1569 | if (!formByText_.count(text)) |
| 1318 | { | 1570 | { |
| 1319 | forms_.emplace_back(text); | 1571 | std::string sortedText = text; |
| 1572 | std::sort(sortedText.begin(), sortedText.end()); | ||
| 1573 | |||
| 1574 | int anagramSetId; | ||
| 1575 | if (anagramSets_.count(sortedText)) | ||
| 1576 | { | ||
| 1577 | anagramSetId = anagramSets_[sortedText]; | ||
| 1578 | } else { | ||
| 1579 | anagramSetId = anagramSets_.size(); | ||
| 1580 | anagramSets_[sortedText] = anagramSetId; | ||
| 1581 | } | ||
| 1582 | |||
| 1583 | forms_.emplace_back(text, anagramSetId); | ||
| 1320 | formByText_[text] = &forms_.back(); | 1584 | formByText_[text] = &forms_.back(); |
| 1321 | } | 1585 | } |
| 1322 | 1586 | ||
| diff --git a/generator/generator.h b/generator/generator.h index 2cd2ba9..70f0657 100644 --- a/generator/generator.h +++ b/generator/generator.h | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | #include <string> | 4 | #include <string> |
| 5 | #include <map> | 5 | #include <map> |
| 6 | #include <unordered_map> | ||
| 7 | #include <unordered_set> | ||
| 6 | #include <list> | 8 | #include <list> |
| 7 | #include <set> | 9 | #include <set> |
| 8 | #include <libxml/parser.h> | 10 | #include <libxml/parser.h> |
| @@ -64,6 +66,10 @@ namespace verbly { | |||
| 64 | 66 | ||
| 65 | void dumpObjects(); | 67 | void dumpObjects(); |
| 66 | 68 | ||
| 69 | void writeMerography(); | ||
| 70 | |||
| 71 | void writeMerophony(); | ||
| 72 | |||
| 67 | void readWordNetAntonymy(); | 73 | void readWordNetAntonymy(); |
| 68 | 74 | ||
| 69 | void readWordNetVariation(); | 75 | void readWordNetVariation(); |
| @@ -138,8 +144,11 @@ namespace verbly { | |||
| 138 | std::map<std::pair<int, int>, word*> wordByWnidAndWnum_; | 144 | std::map<std::pair<int, int>, word*> wordByWnidAndWnum_; |
| 139 | std::map<std::string, std::set<word*>> wordsByBaseForm_; | 145 | std::map<std::string, std::set<word*>> wordsByBaseForm_; |
| 140 | std::map<std::string, lemma*> lemmaByBaseForm_; | 146 | std::map<std::string, lemma*> lemmaByBaseForm_; |
| 141 | std::map<std::string, form*> formByText_; | 147 | std::unordered_map<std::string, form*> formByText_; |
| 142 | std::map<std::string, pronunciation*> pronunciationByPhonemes_; | 148 | std::map<std::string, pronunciation*> pronunciationByPhonemes_; |
| 149 | std::unordered_map<std::string, pronunciation*> pronunciationByBlankPhonemes_; | ||
| 150 | std::map<std::string, int> anagramSets_; | ||
| 151 | std::map<std::string, int> anaphoneSets_; | ||
| 143 | 152 | ||
| 144 | // Caches | 153 | // Caches |
| 145 | 154 | ||
| diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp index 3075d42..5c4d8e2 100644 --- a/generator/pronunciation.cpp +++ b/generator/pronunciation.cpp | |||
| @@ -10,9 +10,10 @@ namespace verbly { | |||
| 10 | 10 | ||
| 11 | int pronunciation::nextId_ = 0; | 11 | int pronunciation::nextId_ = 0; |
| 12 | 12 | ||
| 13 | pronunciation::pronunciation(std::string phonemes) : | 13 | pronunciation::pronunciation(std::string phonemes, int anaphone_set_id) : |
| 14 | id_(nextId_++), | 14 | id_(nextId_++), |
| 15 | phonemes_(phonemes) | 15 | phonemes_(phonemes), |
| 16 | anaphone_set_id_(anaphone_set_id) | ||
| 16 | { | 17 | { |
| 17 | auto phonemeList = | 18 | auto phonemeList = |
| 18 | hatkirby::split<std::list<std::string>>(phonemes, " "); | 19 | hatkirby::split<std::list<std::string>>(phonemes, " "); |
| @@ -88,6 +89,8 @@ namespace verbly { | |||
| 88 | fields.emplace_back("phonemes", arg.getPhonemes()); | 89 | fields.emplace_back("phonemes", arg.getPhonemes()); |
| 89 | fields.emplace_back("syllables", arg.getSyllables()); | 90 | fields.emplace_back("syllables", arg.getSyllables()); |
| 90 | fields.emplace_back("stress", arg.getStress()); | 91 | fields.emplace_back("stress", arg.getStress()); |
| 92 | fields.emplace_back("anaphone_set_id", arg.getAnaphoneSetId()); | ||
| 93 | fields.emplace_back("reverse_pronunciation_id", arg.getReverseId()); | ||
| 91 | 94 | ||
| 92 | if (arg.hasRhyme()) | 95 | if (arg.hasRhyme()) |
| 93 | { | 96 | { |
| diff --git a/generator/pronunciation.h b/generator/pronunciation.h index 3190e6d..e6dc4b4 100644 --- a/generator/pronunciation.h +++ b/generator/pronunciation.h | |||
| @@ -13,7 +13,7 @@ namespace verbly { | |||
| 13 | 13 | ||
| 14 | // Constructor | 14 | // Constructor |
| 15 | 15 | ||
| 16 | explicit pronunciation(std::string phonemes); | 16 | pronunciation(std::string phonemes, int anaphone_set_id); |
| 17 | 17 | ||
| 18 | // Accessors | 18 | // Accessors |
| 19 | 19 | ||
| @@ -62,6 +62,21 @@ namespace verbly { | |||
| 62 | return stress_; | 62 | return stress_; |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | int getAnaphoneSetId() const | ||
| 66 | { | ||
| 67 | return anaphone_set_id_; | ||
| 68 | } | ||
| 69 | |||
| 70 | void setReverseId(int id) | ||
| 71 | { | ||
| 72 | reverse_id_ = id; | ||
| 73 | } | ||
| 74 | |||
| 75 | int getReverseId() const | ||
| 76 | { | ||
| 77 | return reverse_id_; | ||
| 78 | } | ||
| 79 | |||
| 65 | private: | 80 | private: |
| 66 | 81 | ||
| 67 | static int nextId_; | 82 | static int nextId_; |
| @@ -72,6 +87,8 @@ namespace verbly { | |||
| 72 | std::string prerhyme_; | 87 | std::string prerhyme_; |
| 73 | int syllables_ = 0; | 88 | int syllables_ = 0; |
| 74 | std::string stress_; | 89 | std::string stress_; |
| 90 | int anaphone_set_id_; | ||
| 91 | int reverse_id_ = -1; | ||
| 75 | 92 | ||
| 76 | }; | 93 | }; |
| 77 | 94 | ||
| diff --git a/generator/schema.sql b/generator/schema.sql index 6a7d223..8c910f4 100644 --- a/generator/schema.sql +++ b/generator/schema.sql | |||
| @@ -160,7 +160,9 @@ CREATE TABLE `forms` ( | |||
| 160 | `form` VARCHAR(32) NOT NULL, | 160 | `form` VARCHAR(32) NOT NULL, |
| 161 | `complexity` SMALLINT NOT NULL, | 161 | `complexity` SMALLINT NOT NULL, |
| 162 | `proper` SMALLINT NOT NULL, | 162 | `proper` SMALLINT NOT NULL, |
| 163 | `length` SMALLINT NOT NULL | 163 | `length` SMALLINT NOT NULL, |
| 164 | `anagram_set_id` INTEGER NOT NULL, | ||
| 165 | `reverse_form_id` INTEGER NOT NULL | ||
| 164 | ); | 166 | ); |
| 165 | 167 | ||
| 166 | CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); | 168 | CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); |
| @@ -174,13 +176,23 @@ CREATE TABLE `lemmas_forms` ( | |||
| 174 | 176 | ||
| 175 | CREATE INDEX `forms_lemmas` ON `lemmas_forms`(`form_id`,`category`,`lemma_id`); | 177 | CREATE INDEX `forms_lemmas` ON `lemmas_forms`(`form_id`,`category`,`lemma_id`); |
| 176 | 178 | ||
| 179 | CREATE TABLE `merography` ( | ||
| 180 | `merograph_id` INTEGER NOT NULL, | ||
| 181 | `holograph_id` INTEGER NOT NULL, | ||
| 182 | PRIMARY KEY(`merograph_id`,`holograph_id`) | ||
| 183 | ) WITHOUT ROWID; | ||
| 184 | |||
| 185 | CREATE INDEX `reverse_merography` ON `merography`(`holograph_id`,`merograph_id`); | ||
| 186 | |||
| 177 | CREATE TABLE `pronunciations` ( | 187 | CREATE TABLE `pronunciations` ( |
| 178 | `pronunciation_id` INTEGER PRIMARY KEY, | 188 | `pronunciation_id` INTEGER PRIMARY KEY, |
| 179 | `phonemes` VARCHAR(64) NOT NULL, | 189 | `phonemes` VARCHAR(64) NOT NULL, |
| 180 | `prerhyme` VARCHAR(8), | 190 | `prerhyme` VARCHAR(8), |
| 181 | `rhyme` VARCHAR(64), | 191 | `rhyme` VARCHAR(64), |
| 182 | `syllables` INTEGER NOT NULL, | 192 | `syllables` INTEGER NOT NULL, |
| 183 | `stress` VARCHAR(64) NOT NULL | 193 | `stress` VARCHAR(64) NOT NULL, |
| 194 | `anaphone_set_id` INTEGER NOT NULL, | ||
| 195 | `reverse_pronunciation_id` INTEGER NOT NULL | ||
| 184 | ); | 196 | ); |
| 185 | 197 | ||
| 186 | CREATE INDEX `rhymes_with` ON `pronunciations`(`rhyme`,`prerhyme`); | 198 | CREATE INDEX `rhymes_with` ON `pronunciations`(`rhyme`,`prerhyme`); |
| @@ -193,6 +205,14 @@ CREATE TABLE `forms_pronunciations` ( | |||
| 193 | 205 | ||
| 194 | CREATE INDEX `pronunciations_forms` ON `forms_pronunciations`(`pronunciation_id`,`form_id`); | 206 | CREATE INDEX `pronunciations_forms` ON `forms_pronunciations`(`pronunciation_id`,`form_id`); |
| 195 | 207 | ||
| 208 | CREATE TABLE `merophony` ( | ||
| 209 | `merophone_id` INTEGER NOT NULL, | ||
| 210 | `holophone_id` INTEGER NOT NULL, | ||
| 211 | PRIMARY KEY(`merophone_id`,`holophone_id`) | ||
| 212 | ) WITHOUT ROWID; | ||
| 213 | |||
| 214 | CREATE INDEX `reverse_merophony` ON `merophony`(`holophone_id`,`merophone_id`); | ||
| 215 | |||
| 196 | CREATE TABLE `frames` ( | 216 | CREATE TABLE `frames` ( |
| 197 | `frame_id` INTEGER NOT NULL, | 217 | `frame_id` INTEGER NOT NULL, |
| 198 | `group_id` INTEGER NOT NULL, | 218 | `group_id` INTEGER NOT NULL, |
