diff options
Diffstat (limited to 'generator')
-rw-r--r-- | generator/CMakeLists.txt | 2 | ||||
-rw-r--r-- | generator/form.cpp | 9 | ||||
-rw-r--r-- | generator/form.h | 19 | ||||
-rw-r--r-- | generator/generator.cpp | 270 | ||||
-rw-r--r-- | generator/generator.h | 11 | ||||
-rw-r--r-- | generator/pronunciation.cpp | 7 | ||||
-rw-r--r-- | generator/pronunciation.h | 19 | ||||
-rw-r--r-- | generator/schema.sql | 24 |
8 files changed, 347 insertions, 14 deletions
diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 8c070d2..5d2f977 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt | |||
@@ -11,6 +11,6 @@ include_directories( | |||
11 | ../vendor/hkutil) | 11 | ../vendor/hkutil) |
12 | 12 | ||
13 | add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp generator.cpp main.cpp) | 13 | add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp generator.cpp main.cpp) |
14 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) | 14 | set_property(TARGET generator PROPERTY CXX_STANDARD 17) |
15 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) | 15 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) |
16 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) | 16 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) |
diff --git a/generator/form.cpp b/generator/form.cpp index c66820c..a88363b 100644 --- a/generator/form.cpp +++ b/generator/form.cpp | |||
@@ -9,12 +9,13 @@ namespace verbly { | |||
9 | 9 | ||
10 | int form::nextId_ = 0; | 10 | int form::nextId_ = 0; |
11 | 11 | ||
12 | form::form(std::string text) : | 12 | form::form(std::string text, int anagram_set_id) : |
13 | id_(nextId_++), | 13 | id_(nextId_++), |
14 | text_(text), | 14 | text_(text), |
15 | complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), | 15 | complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), |
16 | proper_(std::any_of(std::begin(text), std::end(text), ::isupper)), | 16 | proper_(std::any_of(std::begin(text), std::end(text), ::isupper)), |
17 | length_(text.length()) | 17 | length_(text.length()), |
18 | anagram_set_id_(anagram_set_id) | ||
18 | { | 19 | { |
19 | } | 20 | } |
20 | 21 | ||
@@ -34,7 +35,9 @@ namespace verbly { | |||
34 | { "form", arg.getText() }, | 35 | { "form", arg.getText() }, |
35 | { "complexity", arg.getComplexity() }, | 36 | { "complexity", arg.getComplexity() }, |
36 | { "proper", arg.isProper() }, | 37 | { "proper", arg.isProper() }, |
37 | { "length", arg.getLength() } | 38 | { "length", arg.getLength() }, |
39 | { "anagram_set_id", arg.getAnagramSetId() }, | ||
40 | { "reverse_form_id", arg.getReverseId() } | ||
38 | }); | 41 | }); |
39 | } | 42 | } |
40 | 43 | ||
diff --git a/generator/form.h b/generator/form.h index f3dd779..c83bbdc 100644 --- a/generator/form.h +++ b/generator/form.h | |||
@@ -15,7 +15,7 @@ namespace verbly { | |||
15 | 15 | ||
16 | // Constructor | 16 | // Constructor |
17 | 17 | ||
18 | explicit form(std::string text); | 18 | form(std::string text, int anagram_set_id); |
19 | 19 | ||
20 | // Mutators | 20 | // Mutators |
21 | 21 | ||
@@ -48,6 +48,21 @@ namespace verbly { | |||
48 | return length_; | 48 | return length_; |
49 | } | 49 | } |
50 | 50 | ||
51 | int getAnagramSetId() const | ||
52 | { | ||
53 | return anagram_set_id_; | ||
54 | } | ||
55 | |||
56 | void setReverseId(int id) | ||
57 | { | ||
58 | reverse_id_ = id; | ||
59 | } | ||
60 | |||
61 | int getReverseId() const | ||
62 | { | ||
63 | return reverse_id_; | ||
64 | } | ||
65 | |||
51 | std::set<const pronunciation*> getPronunciations() const | 66 | std::set<const pronunciation*> getPronunciations() const |
52 | { | 67 | { |
53 | return pronunciations_; | 68 | return pronunciations_; |
@@ -62,6 +77,8 @@ namespace verbly { | |||
62 | const int complexity_; | 77 | const int complexity_; |
63 | const bool proper_; | 78 | const bool proper_; |
64 | const int length_; | 79 | const int length_; |
80 | const int anagram_set_id_; | ||
81 | int reverse_id_ = -1; | ||
65 | 82 | ||
66 | std::set<const pronunciation*> pronunciations_; | 83 | std::set<const pronunciation*> pronunciations_; |
67 | 84 | ||
diff --git a/generator/generator.cpp b/generator/generator.cpp index ad665a2..fdea10f 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -6,6 +6,9 @@ | |||
6 | #include <fstream> | 6 | #include <fstream> |
7 | #include <hkutil/string.h> | 7 | #include <hkutil/string.h> |
8 | #include <hkutil/progress.h> | 8 | #include <hkutil/progress.h> |
9 | #include <array> | ||
10 | #include <mutex> | ||
11 | #include <thread> | ||
9 | #include "role.h" | 12 | #include "role.h" |
10 | #include "part.h" | 13 | #include "part.h" |
11 | #include "../lib/enums.h" | 14 | #include "../lib/enums.h" |
@@ -83,7 +86,7 @@ namespace verbly { | |||
83 | readAdjectivePositioning(); | 86 | readAdjectivePositioning(); |
84 | 87 | ||
85 | // Counts the number of URLs ImageNet has per notion | 88 | // Counts the number of URLs ImageNet has per notion |
86 | readImageNetUrls(); | 89 | //readImageNetUrls(); |
87 | 90 | ||
88 | // Creates a word by WordNet sense key lookup table | 91 | // Creates a word by WordNet sense key lookup table |
89 | readWordNetSenseKeys(); | 92 | readWordNetSenseKeys(); |
@@ -115,9 +118,17 @@ namespace verbly { | |||
115 | // Writes the database version | 118 | // Writes the database version |
116 | writeVersion(); | 119 | writeVersion(); |
117 | 120 | ||
121 | // Calculates and writes form merography | ||
122 | writeMerography(); | ||
123 | |||
124 | // Calculates and writes pronunciation merophony | ||
125 | writeMerophony(); | ||
126 | |||
118 | // Dumps data to the database | 127 | // Dumps data to the database |
119 | dumpObjects(); | 128 | dumpObjects(); |
120 | 129 | ||
130 | |||
131 | |||
121 | // Populates the antonymy relationship from WordNet | 132 | // Populates the antonymy relationship from WordNet |
122 | readWordNetAntonymy(); | 133 | readWordNetAntonymy(); |
123 | 134 | ||
@@ -577,9 +588,29 @@ namespace verbly { | |||
577 | pronunciation& p = *pronunciationByPhonemes_[phonemes]; | 588 | pronunciation& p = *pronunciationByPhonemes_[phonemes]; |
578 | formByText_.at(canonical)->addPronunciation(p); | 589 | formByText_.at(canonical)->addPronunciation(p); |
579 | } else { | 590 | } else { |
580 | pronunciations_.emplace_back(phonemes); | 591 | std::string stressless; |
592 | for (int i=0; i<phonemes.size(); i++) { | ||
593 | if (!std::isdigit(phonemes[i])) { | ||
594 | stressless.push_back(phonemes[i]); | ||
595 | } | ||
596 | } | ||
597 | auto stresslessList = hatkirby::split<std::vector<std::string>>(stressless, " "); | ||
598 | std::string stresslessPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); | ||
599 | std::sort(stresslessList.begin(), stresslessList.end()); | ||
600 | std::string sortedPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); | ||
601 | |||
602 | int anaphoneSetId; | ||
603 | if (anaphoneSets_.count(sortedPhonemes)) { | ||
604 | anaphoneSetId = anaphoneSets_[sortedPhonemes]; | ||
605 | } else { | ||
606 | anaphoneSetId = anaphoneSets_.size(); | ||
607 | anaphoneSets_[sortedPhonemes] = anaphoneSetId; | ||
608 | } | ||
609 | |||
610 | pronunciations_.emplace_back(phonemes, anaphoneSetId); | ||
581 | pronunciation& p = pronunciations_.back(); | 611 | pronunciation& p = pronunciations_.back(); |
582 | pronunciationByPhonemes_[phonemes] = &p; | 612 | pronunciationByPhonemes_[phonemes] = &p; |
613 | pronunciationByBlankPhonemes_[stresslessPhonemes] = &p; | ||
583 | formByText_.at(canonical)->addPronunciation(p); | 614 | formByText_.at(canonical)->addPronunciation(p); |
584 | } | 615 | } |
585 | } | 616 | } |
@@ -671,6 +702,12 @@ namespace verbly { | |||
671 | 702 | ||
672 | for (form& f : forms_) | 703 | for (form& f : forms_) |
673 | { | 704 | { |
705 | std::string reverseText = f.getText(); | ||
706 | std::reverse(reverseText.begin(), reverseText.end()); | ||
707 | if (formByText_.count(reverseText)) { | ||
708 | f.setReverseId(formByText_[reverseText]->getId()); | ||
709 | } | ||
710 | |||
674 | db_ << f; | 711 | db_ << f; |
675 | 712 | ||
676 | ppgs.update(); | 713 | ppgs.update(); |
@@ -682,6 +719,19 @@ namespace verbly { | |||
682 | 719 | ||
683 | for (pronunciation& p : pronunciations_) | 720 | for (pronunciation& p : pronunciations_) |
684 | { | 721 | { |
722 | std::string stressless; | ||
723 | for (int i=0; i<p.getPhonemes().size(); i++) { | ||
724 | if (!std::isdigit(p.getPhonemes()[i])) { | ||
725 | stressless.push_back(p.getPhonemes()[i]); | ||
726 | } | ||
727 | } | ||
728 | auto stresslessList = hatkirby::split<std::vector<std::string>>(stressless, " "); | ||
729 | std::reverse(stresslessList.begin(), stresslessList.end()); | ||
730 | std::string reversedPhonemes = hatkirby::implode(stresslessList.begin(), stresslessList.end(), " "); | ||
731 | if (pronunciationByBlankPhonemes_.count(reversedPhonemes)) { | ||
732 | p.setReverseId(pronunciationByBlankPhonemes_[reversedPhonemes]->getId()); | ||
733 | } | ||
734 | |||
685 | db_ << p; | 735 | db_ << p; |
686 | 736 | ||
687 | ppgs.update(); | 737 | ppgs.update(); |
@@ -698,6 +748,208 @@ namespace verbly { | |||
698 | ppgs.update(); | 748 | ppgs.update(); |
699 | } | 749 | } |
700 | } | 750 | } |
751 | |||
752 | /*{ | ||
753 | hatkirby::progress ppgs("Writing merography...", formByText_.size()); | ||
754 | |||
755 | for (const auto& [merotext, meroform] : formByText_) | ||
756 | { | ||
757 | for (const auto& [holotext, holoform] : formByText_) | ||
758 | { | ||
759 | if (isMero(merotext, holotext)) | ||
760 | { | ||
761 | db_.insertIntoTable( | ||
762 | "merography", | ||
763 | { | ||
764 | { "merograph_id", meroform->getId() }, | ||
765 | { "holograph_id", holoform->getId() } | ||
766 | }); | ||
767 | } | ||
768 | } | ||
769 | |||
770 | ppgs.update(); | ||
771 | } | ||
772 | } | ||
773 | |||
774 | { | ||
775 | hatkirby::progress ppgs("Writing merophony...", pronunciationByBlankPhonemes_.size()); | ||
776 | |||
777 | for (const auto& [merotext, merop] : pronunciationByBlankPhonemes_) | ||
778 | { | ||
779 | auto merophonemes = hatkirby::split<std::list<std::string>>(merotext, " "); | ||
780 | |||
781 | for (const auto& [holotext, holop] : pronunciationByBlankPhonemes_) | ||
782 | { | ||
783 | auto holophonemes = hatkirby::split<std::list<std::string>>(holotext, " "); | ||
784 | |||
785 | if (isMero(merophonemes, holophonemes)) | ||
786 | { | ||
787 | db_.insertIntoTable( | ||
788 | "merophony", | ||
789 | { | ||
790 | { "merophone_id", merop->getId() }, | ||
791 | { "holophone_id", holop->getId() } | ||
792 | }); | ||
793 | } | ||
794 | } | ||
795 | |||
796 | ppgs.update(); | ||
797 | } | ||
798 | }*/ | ||
799 | } | ||
800 | |||
801 | void generator::writeMerography() | ||
802 | { | ||
803 | hatkirby::progress ppgs("Writing merography...", formByText_.size()); | ||
804 | for (const auto& [text, form] : formByText_) | ||
805 | { | ||
806 | ppgs.update(); | ||
807 | |||
808 | std::unordered_set<std::string> visited; | ||
809 | for (int i=0; i<text.size(); i++) | ||
810 | { | ||
811 | for (int l=3; l<text.size()-i; l++) | ||
812 | { | ||
813 | if (i==0 && l == text.size()) | ||
814 | { | ||
815 | continue; | ||
816 | } | ||
817 | |||
818 | std::string substr = text.substr(i, l); | ||
819 | if (formByText_.count(substr) && !visited.count(substr)) | ||
820 | { | ||
821 | visited.insert(substr); | ||
822 | db_.insertIntoTable( | ||
823 | "merography", | ||
824 | { | ||
825 | { "merograph_id", formByText_[substr]->getId() }, | ||
826 | { "holograph_id", form->getId() } | ||
827 | }); | ||
828 | } | ||
829 | } | ||
830 | } | ||
831 | |||
832 | |||
833 | /* | ||
834 | std::string front = text; | ||
835 | while (front.size() > 2) | ||
836 | { | ||
837 | front.erase(0, 1); | ||
838 | |||
839 | if (formByText_.count(front)) | ||
840 | { | ||
841 | visited.insert(front); | ||
842 | db_.insertIntoTable( | ||
843 | "merography", | ||
844 | { | ||
845 | { "merograph_id", formByText_[front]->getId() }, | ||
846 | { "holograph_id", form->getId() } | ||
847 | }); | ||
848 | } | ||
849 | } | ||
850 | |||
851 | if (text.size() > 2) | ||
852 | { | ||
853 | std::string back = text; | ||
854 | |||
855 | while (back.size() > 2) | ||
856 | { | ||
857 | back.pop_back(); | ||
858 | |||
859 | if (formByText_.count(back) && !visited.count(back)) | ||
860 | { | ||
861 | db_.insertIntoTable( | ||
862 | "merography", | ||
863 | { | ||
864 | { "merograph_id", formByText_[back]->getId() }, | ||
865 | { "holograph_id", form->getId() } | ||
866 | }); | ||
867 | } | ||
868 | } | ||
869 | }*/ | ||
870 | } | ||
871 | } | ||
872 | |||
873 | void generator::writeMerophony() | ||
874 | { | ||
875 | std::map<std::list<std::string>, pronunciation*> tokenized; | ||
876 | for (const auto& [phonemes, pronunciation] : pronunciationByBlankPhonemes_) | ||
877 | { | ||
878 | tokenized[hatkirby::split<std::list<std::string>>(phonemes, " ")] = pronunciation; | ||
879 | } | ||
880 | |||
881 | hatkirby::progress ppgs("Writing merophony...", tokenized.size()); | ||
882 | for (const auto& [phonemes, pronunciation] : tokenized) | ||
883 | { | ||
884 | ppgs.update(); | ||
885 | |||
886 | std::set<std::list<std::string>> visited; | ||
887 | for (int i=0; i<phonemes.size(); i++) | ||
888 | { | ||
889 | for (int l=2; l<phonemes.size()-i; l++) | ||
890 | { | ||
891 | if (i==0 && l == phonemes.size()) | ||
892 | { | ||
893 | continue; | ||
894 | } | ||
895 | |||
896 | std::list<std::string> sublist; | ||
897 | for (auto j=std::next(phonemes.begin(),i); j!=std::next(phonemes.begin(),i+l); j++) | ||
898 | { | ||
899 | sublist.push_back(*j); | ||
900 | } | ||
901 | |||
902 | if (tokenized.count(sublist) && !visited.count(sublist)) | ||
903 | { | ||
904 | visited.insert(sublist); | ||
905 | db_.insertIntoTable( | ||
906 | "merophony", | ||
907 | { | ||
908 | { "merophone_id", tokenized[sublist]->getId() }, | ||
909 | { "holophone_id", pronunciation->getId() } | ||
910 | }); | ||
911 | } | ||
912 | } | ||
913 | } | ||
914 | /*std::list<std::string> front = phonemes; | ||
915 | while (front.size() > 1) | ||
916 | { | ||
917 | front.pop_front(); | ||
918 | |||
919 | if (tokenized.count(front)) | ||
920 | { | ||
921 | visited.insert(front); | ||
922 | db_.insertIntoTable( | ||
923 | "merophony", | ||
924 | { | ||
925 | { "merophone_id", tokenized[front]->getId() }, | ||
926 | { "holophone_id", pronunciation->getId() } | ||
927 | }); | ||
928 | break; | ||
929 | } | ||
930 | } | ||
931 | |||
932 | if (phonemes.size() > 1) | ||
933 | { | ||
934 | std::list<std::string> back = phonemes; | ||
935 | |||
936 | while (back.size() > 1) | ||
937 | { | ||
938 | back.pop_back(); | ||
939 | |||
940 | if (tokenized.count(back) && !visited.count(back)) | ||
941 | { | ||
942 | db_.insertIntoTable( | ||
943 | "merophony", | ||
944 | { | ||
945 | { "merophone_id", tokenized[back]->getId() }, | ||
946 | { "holophone_id", pronunciation->getId() } | ||
947 | }); | ||
948 | break; | ||
949 | } | ||
950 | } | ||
951 | }*/ | ||
952 | } | ||
701 | } | 953 | } |
702 | 954 | ||
703 | void generator::readWordNetAntonymy() | 955 | void generator::readWordNetAntonymy() |
@@ -1316,7 +1568,19 @@ namespace verbly { | |||
1316 | { | 1568 | { |
1317 | if (!formByText_.count(text)) | 1569 | if (!formByText_.count(text)) |
1318 | { | 1570 | { |
1319 | forms_.emplace_back(text); | 1571 | std::string sortedText = text; |
1572 | std::sort(sortedText.begin(), sortedText.end()); | ||
1573 | |||
1574 | int anagramSetId; | ||
1575 | if (anagramSets_.count(sortedText)) | ||
1576 | { | ||
1577 | anagramSetId = anagramSets_[sortedText]; | ||
1578 | } else { | ||
1579 | anagramSetId = anagramSets_.size(); | ||
1580 | anagramSets_[sortedText] = anagramSetId; | ||
1581 | } | ||
1582 | |||
1583 | forms_.emplace_back(text, anagramSetId); | ||
1320 | formByText_[text] = &forms_.back(); | 1584 | formByText_[text] = &forms_.back(); |
1321 | } | 1585 | } |
1322 | 1586 | ||
diff --git a/generator/generator.h b/generator/generator.h index 2cd2ba9..70f0657 100644 --- a/generator/generator.h +++ b/generator/generator.h | |||
@@ -3,6 +3,8 @@ | |||
3 | 3 | ||
4 | #include <string> | 4 | #include <string> |
5 | #include <map> | 5 | #include <map> |
6 | #include <unordered_map> | ||
7 | #include <unordered_set> | ||
6 | #include <list> | 8 | #include <list> |
7 | #include <set> | 9 | #include <set> |
8 | #include <libxml/parser.h> | 10 | #include <libxml/parser.h> |
@@ -64,6 +66,10 @@ namespace verbly { | |||
64 | 66 | ||
65 | void dumpObjects(); | 67 | void dumpObjects(); |
66 | 68 | ||
69 | void writeMerography(); | ||
70 | |||
71 | void writeMerophony(); | ||
72 | |||
67 | void readWordNetAntonymy(); | 73 | void readWordNetAntonymy(); |
68 | 74 | ||
69 | void readWordNetVariation(); | 75 | void readWordNetVariation(); |
@@ -138,8 +144,11 @@ namespace verbly { | |||
138 | std::map<std::pair<int, int>, word*> wordByWnidAndWnum_; | 144 | std::map<std::pair<int, int>, word*> wordByWnidAndWnum_; |
139 | std::map<std::string, std::set<word*>> wordsByBaseForm_; | 145 | std::map<std::string, std::set<word*>> wordsByBaseForm_; |
140 | std::map<std::string, lemma*> lemmaByBaseForm_; | 146 | std::map<std::string, lemma*> lemmaByBaseForm_; |
141 | std::map<std::string, form*> formByText_; | 147 | std::unordered_map<std::string, form*> formByText_; |
142 | std::map<std::string, pronunciation*> pronunciationByPhonemes_; | 148 | std::map<std::string, pronunciation*> pronunciationByPhonemes_; |
149 | std::unordered_map<std::string, pronunciation*> pronunciationByBlankPhonemes_; | ||
150 | std::map<std::string, int> anagramSets_; | ||
151 | std::map<std::string, int> anaphoneSets_; | ||
143 | 152 | ||
144 | // Caches | 153 | // Caches |
145 | 154 | ||
diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp index 3075d42..5c4d8e2 100644 --- a/generator/pronunciation.cpp +++ b/generator/pronunciation.cpp | |||
@@ -10,9 +10,10 @@ namespace verbly { | |||
10 | 10 | ||
11 | int pronunciation::nextId_ = 0; | 11 | int pronunciation::nextId_ = 0; |
12 | 12 | ||
13 | pronunciation::pronunciation(std::string phonemes) : | 13 | pronunciation::pronunciation(std::string phonemes, int anaphone_set_id) : |
14 | id_(nextId_++), | 14 | id_(nextId_++), |
15 | phonemes_(phonemes) | 15 | phonemes_(phonemes), |
16 | anaphone_set_id_(anaphone_set_id) | ||
16 | { | 17 | { |
17 | auto phonemeList = | 18 | auto phonemeList = |
18 | hatkirby::split<std::list<std::string>>(phonemes, " "); | 19 | hatkirby::split<std::list<std::string>>(phonemes, " "); |
@@ -88,6 +89,8 @@ namespace verbly { | |||
88 | fields.emplace_back("phonemes", arg.getPhonemes()); | 89 | fields.emplace_back("phonemes", arg.getPhonemes()); |
89 | fields.emplace_back("syllables", arg.getSyllables()); | 90 | fields.emplace_back("syllables", arg.getSyllables()); |
90 | fields.emplace_back("stress", arg.getStress()); | 91 | fields.emplace_back("stress", arg.getStress()); |
92 | fields.emplace_back("anaphone_set_id", arg.getAnaphoneSetId()); | ||
93 | fields.emplace_back("reverse_pronunciation_id", arg.getReverseId()); | ||
91 | 94 | ||
92 | if (arg.hasRhyme()) | 95 | if (arg.hasRhyme()) |
93 | { | 96 | { |
diff --git a/generator/pronunciation.h b/generator/pronunciation.h index 3190e6d..e6dc4b4 100644 --- a/generator/pronunciation.h +++ b/generator/pronunciation.h | |||
@@ -13,7 +13,7 @@ namespace verbly { | |||
13 | 13 | ||
14 | // Constructor | 14 | // Constructor |
15 | 15 | ||
16 | explicit pronunciation(std::string phonemes); | 16 | pronunciation(std::string phonemes, int anaphone_set_id); |
17 | 17 | ||
18 | // Accessors | 18 | // Accessors |
19 | 19 | ||
@@ -62,6 +62,21 @@ namespace verbly { | |||
62 | return stress_; | 62 | return stress_; |
63 | } | 63 | } |
64 | 64 | ||
65 | int getAnaphoneSetId() const | ||
66 | { | ||
67 | return anaphone_set_id_; | ||
68 | } | ||
69 | |||
70 | void setReverseId(int id) | ||
71 | { | ||
72 | reverse_id_ = id; | ||
73 | } | ||
74 | |||
75 | int getReverseId() const | ||
76 | { | ||
77 | return reverse_id_; | ||
78 | } | ||
79 | |||
65 | private: | 80 | private: |
66 | 81 | ||
67 | static int nextId_; | 82 | static int nextId_; |
@@ -72,6 +87,8 @@ namespace verbly { | |||
72 | std::string prerhyme_; | 87 | std::string prerhyme_; |
73 | int syllables_ = 0; | 88 | int syllables_ = 0; |
74 | std::string stress_; | 89 | std::string stress_; |
90 | int anaphone_set_id_; | ||
91 | int reverse_id_ = -1; | ||
75 | 92 | ||
76 | }; | 93 | }; |
77 | 94 | ||
diff --git a/generator/schema.sql b/generator/schema.sql index 6a7d223..8c910f4 100644 --- a/generator/schema.sql +++ b/generator/schema.sql | |||
@@ -160,7 +160,9 @@ CREATE TABLE `forms` ( | |||
160 | `form` VARCHAR(32) NOT NULL, | 160 | `form` VARCHAR(32) NOT NULL, |
161 | `complexity` SMALLINT NOT NULL, | 161 | `complexity` SMALLINT NOT NULL, |
162 | `proper` SMALLINT NOT NULL, | 162 | `proper` SMALLINT NOT NULL, |
163 | `length` SMALLINT NOT NULL | 163 | `length` SMALLINT NOT NULL, |
164 | `anagram_set_id` INTEGER NOT NULL, | ||
165 | `reverse_form_id` INTEGER NOT NULL | ||
164 | ); | 166 | ); |
165 | 167 | ||
166 | CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); | 168 | CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); |
@@ -174,13 +176,23 @@ CREATE TABLE `lemmas_forms` ( | |||
174 | 176 | ||
175 | CREATE INDEX `forms_lemmas` ON `lemmas_forms`(`form_id`,`category`,`lemma_id`); | 177 | CREATE INDEX `forms_lemmas` ON `lemmas_forms`(`form_id`,`category`,`lemma_id`); |
176 | 178 | ||
179 | CREATE TABLE `merography` ( | ||
180 | `merograph_id` INTEGER NOT NULL, | ||
181 | `holograph_id` INTEGER NOT NULL, | ||
182 | PRIMARY KEY(`merograph_id`,`holograph_id`) | ||
183 | ) WITHOUT ROWID; | ||
184 | |||
185 | CREATE INDEX `reverse_merography` ON `merography`(`holograph_id`,`merograph_id`); | ||
186 | |||
177 | CREATE TABLE `pronunciations` ( | 187 | CREATE TABLE `pronunciations` ( |
178 | `pronunciation_id` INTEGER PRIMARY KEY, | 188 | `pronunciation_id` INTEGER PRIMARY KEY, |
179 | `phonemes` VARCHAR(64) NOT NULL, | 189 | `phonemes` VARCHAR(64) NOT NULL, |
180 | `prerhyme` VARCHAR(8), | 190 | `prerhyme` VARCHAR(8), |
181 | `rhyme` VARCHAR(64), | 191 | `rhyme` VARCHAR(64), |
182 | `syllables` INTEGER NOT NULL, | 192 | `syllables` INTEGER NOT NULL, |
183 | `stress` VARCHAR(64) NOT NULL | 193 | `stress` VARCHAR(64) NOT NULL, |
194 | `anaphone_set_id` INTEGER NOT NULL, | ||
195 | `reverse_pronunciation_id` INTEGER NOT NULL | ||
184 | ); | 196 | ); |
185 | 197 | ||
186 | CREATE INDEX `rhymes_with` ON `pronunciations`(`rhyme`,`prerhyme`); | 198 | CREATE INDEX `rhymes_with` ON `pronunciations`(`rhyme`,`prerhyme`); |
@@ -193,6 +205,14 @@ CREATE TABLE `forms_pronunciations` ( | |||
193 | 205 | ||
194 | CREATE INDEX `pronunciations_forms` ON `forms_pronunciations`(`pronunciation_id`,`form_id`); | 206 | CREATE INDEX `pronunciations_forms` ON `forms_pronunciations`(`pronunciation_id`,`form_id`); |
195 | 207 | ||
208 | CREATE TABLE `merophony` ( | ||
209 | `merophone_id` INTEGER NOT NULL, | ||
210 | `holophone_id` INTEGER NOT NULL, | ||
211 | PRIMARY KEY(`merophone_id`,`holophone_id`) | ||
212 | ) WITHOUT ROWID; | ||
213 | |||
214 | CREATE INDEX `reverse_merophony` ON `merophony`(`holophone_id`,`merophone_id`); | ||
215 | |||
196 | CREATE TABLE `frames` ( | 216 | CREATE TABLE `frames` ( |
197 | `frame_id` INTEGER NOT NULL, | 217 | `frame_id` INTEGER NOT NULL, |
198 | `group_id` INTEGER NOT NULL, | 218 | `group_id` INTEGER NOT NULL, |