diff options
| -rw-r--r-- | generator/generator.cpp | 21 | ||||
| -rw-r--r-- | generator/generator.h | 2 | ||||
| -rw-r--r-- | generator/schema.sql | 147 | ||||
| -rw-r--r-- | lib/version.h | 2 |
4 files changed, 92 insertions, 80 deletions
| diff --git a/generator/generator.cpp b/generator/generator.cpp index e52aa90..0d073be 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
| @@ -696,7 +696,8 @@ namespace verbly { | |||
| 696 | 696 | ||
| 697 | void generator::readWordNetAntonymy() | 697 | void generator::readWordNetAntonymy() |
| 698 | { | 698 | { |
| 699 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); | 699 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl", true)); |
| 700 | |||
| 700 | hatkirby::progress ppgs("Writing antonyms...", lines.size()); | 701 | hatkirby::progress ppgs("Writing antonyms...", lines.size()); |
| 701 | for (auto line : lines) | 702 | for (auto line : lines) |
| 702 | { | 703 | { |
| @@ -770,7 +771,7 @@ namespace verbly { | |||
| 770 | 771 | ||
| 771 | void generator::readWordNetClasses() | 772 | void generator::readWordNetClasses() |
| 772 | { | 773 | { |
| 773 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); | 774 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl", true)); |
| 774 | 775 | ||
| 775 | hatkirby::progress ppgs( | 776 | hatkirby::progress ppgs( |
| 776 | "Writing usage, topicality, and regionality...", | 777 | "Writing usage, topicality, and regionality...", |
| @@ -1092,7 +1093,7 @@ namespace verbly { | |||
| 1092 | 1093 | ||
| 1093 | void generator::readWordNetPertainymy() | 1094 | void generator::readWordNetPertainymy() |
| 1094 | { | 1095 | { |
| 1095 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); | 1096 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl", true)); |
| 1096 | 1097 | ||
| 1097 | hatkirby::progress ppgs( | 1098 | hatkirby::progress ppgs( |
| 1098 | "Writing pertainymy and mannernymy...", | 1099 | "Writing pertainymy and mannernymy...", |
| @@ -1228,7 +1229,7 @@ namespace verbly { | |||
| 1228 | db_.execute("ANALYZE"); | 1229 | db_.execute("ANALYZE"); |
| 1229 | } | 1230 | } |
| 1230 | 1231 | ||
| 1231 | std::list<std::string> generator::readFile(std::string path) | 1232 | std::list<std::string> generator::readFile(std::string path, bool uniq) |
| 1232 | { | 1233 | { |
| 1233 | std::ifstream file(path); | 1234 | std::ifstream file(path); |
| 1234 | if (!file) | 1235 | if (!file) |
| @@ -1248,6 +1249,18 @@ namespace verbly { | |||
| 1248 | lines.push_back(line); | 1249 | lines.push_back(line); |
| 1249 | } | 1250 | } |
| 1250 | 1251 | ||
| 1252 | if (uniq) | ||
| 1253 | { | ||
| 1254 | std::vector<std::string> uniq(std::begin(lines), std::end(lines)); | ||
| 1255 | lines.clear(); | ||
| 1256 | |||
| 1257 | std::sort(std::begin(uniq), std::end(uniq)); | ||
| 1258 | std::unique_copy( | ||
| 1259 | std::begin(uniq), | ||
| 1260 | std::end(uniq), | ||
| 1261 | std::back_inserter(lines)); | ||
| 1262 | } | ||
| 1263 | |||
| 1251 | return lines; | 1264 | return lines; |
| 1252 | } | 1265 | } |
| 1253 | 1266 | ||
| diff --git a/generator/generator.h b/generator/generator.h index cd99f88..1547b7c 100644 --- a/generator/generator.h +++ b/generator/generator.h | |||
| @@ -94,7 +94,7 @@ namespace verbly { | |||
| 94 | 94 | ||
| 95 | // Helpers | 95 | // Helpers |
| 96 | 96 | ||
| 97 | std::list<std::string> readFile(std::string path); | 97 | std::list<std::string> readFile(std::string path, bool uniq = false); |
| 98 | 98 | ||
| 99 | inline part_of_speech partOfSpeechByWnid(int wnid); | 99 | inline part_of_speech partOfSpeechByWnid(int wnid); |
| 100 | 100 | ||
| diff --git a/generator/schema.sql b/generator/schema.sql index d97c06e..6a7d223 100644 --- a/generator/schema.sql +++ b/generator/schema.sql | |||
| @@ -14,79 +14,79 @@ CREATE UNIQUE INDEX `notion_by_wnid` ON `notions`(`wnid`); | |||
| 14 | 14 | ||
| 15 | CREATE TABLE `hypernymy` ( | 15 | CREATE TABLE `hypernymy` ( |
| 16 | `hypernym_id` INTEGER NOT NULL, | 16 | `hypernym_id` INTEGER NOT NULL, |
| 17 | `hyponym_id` INTEGER NOT NULL | 17 | `hyponym_id` INTEGER NOT NULL, |
| 18 | ); | 18 | PRIMARY KEY (`hypernym_id`,`hyponym_id`) |
| 19 | ) WITHOUT ROWID; | ||
| 19 | 20 | ||
| 20 | CREATE INDEX `hyponym_of` ON `hypernymy`(`hypernym_id`,`hyponym_id`); | 21 | CREATE INDEX `reverse_hypernymy` ON `hypernymy`(`hyponym_id`,`hypernym_id`); |
| 21 | CREATE INDEX `hypernym_of` ON `hypernymy`(`hyponym_id`,`hypernym_id`); | ||
| 22 | 22 | ||
| 23 | CREATE TABLE `instantiation` ( | 23 | CREATE TABLE `instantiation` ( |
| 24 | `class_id` INTEGER NOT NULL, | 24 | `class_id` INTEGER NOT NULL, |
| 25 | `instance_id` INTEGER NOT NULL | 25 | `instance_id` INTEGER NOT NULL, |
| 26 | ); | 26 | PRIMARY KEY (`class_id`,`instance_id`) |
| 27 | ) WITHOUT ROWID; | ||
| 27 | 28 | ||
| 28 | CREATE INDEX `instance_of` ON `instantiation`(`class_id`,`instance_id`); | 29 | CREATE INDEX `reverse_instantiation` ON `instantiation`(`instance_id`,`class_id`); |
| 29 | CREATE INDEX `class_of` ON `instantiation`(`instance_id`,`class_id`); | ||
| 30 | 30 | ||
| 31 | CREATE TABLE `member_meronymy` ( | 31 | CREATE TABLE `member_meronymy` ( |
| 32 | `meronym_id` INTEGER NOT NULL, | 32 | `meronym_id` INTEGER NOT NULL, |
| 33 | `holonym_id` INTEGER NOT NULL | 33 | `holonym_id` INTEGER NOT NULL, |
| 34 | ); | 34 | PRIMARY KEY (`meronym_id`,`holonym_id`) |
| 35 | ) WITHOUT ROWID; | ||
| 35 | 36 | ||
| 36 | CREATE INDEX `member_holonym_of` ON `member_meronymy`(`meronym_id`,`holonym_id`); | 37 | CREATE INDEX `reverse_member_meronymy` ON `member_meronymy`(`holonym_id`,`meronym_id`); |
| 37 | CREATE INDEX `member_meronym_of` ON `member_meronymy`(`holonym_id`,`meronym_id`); | ||
| 38 | 38 | ||
| 39 | CREATE TABLE `part_meronymy` ( | 39 | CREATE TABLE `part_meronymy` ( |
| 40 | `meronym_id` INTEGER NOT NULL, | 40 | `meronym_id` INTEGER NOT NULL, |
| 41 | `holonym_id` INTEGER NOT NULL | 41 | `holonym_id` INTEGER NOT NULL, |
| 42 | ); | 42 | PRIMARY KEY (`meronym_id`,`holonym_id`) |
| 43 | ) WITHOUT ROWID; | ||
| 43 | 44 | ||
| 44 | CREATE INDEX `part_holonym_of` ON `part_meronymy`(`meronym_id`,`holonym_id`); | 45 | CREATE INDEX `reverse_part_meronymy` ON `part_meronymy`(`holonym_id`,`meronym_id`); |
| 45 | CREATE INDEX `part_meronym_of` ON `part_meronymy`(`holonym_id`,`meronym_id`); | ||
| 46 | 46 | ||
| 47 | CREATE TABLE `substance_meronymy` ( | 47 | CREATE TABLE `substance_meronymy` ( |
| 48 | `meronym_id` INTEGER NOT NULL, | 48 | `meronym_id` INTEGER NOT NULL, |
| 49 | `holonym_id` INTEGER NOT NULL | 49 | `holonym_id` INTEGER NOT NULL, |
| 50 | ); | 50 | PRIMARY KEY (`meronym_id`,`holonym_id`) |
| 51 | ) WITHOUT ROWID; | ||
| 51 | 52 | ||
| 52 | CREATE INDEX `substance_holonym_of` ON `substance_meronymy`(`meronym_id`,`holonym_id`); | 53 | CREATE INDEX `reverse_substance_meronymy` ON `substance_meronymy`(`holonym_id`,`meronym_id`); |
| 53 | CREATE INDEX `substance_meronym_of` ON `substance_meronymy`(`holonym_id`,`meronym_id`); | ||
| 54 | 54 | ||
| 55 | CREATE TABLE `variation` ( | 55 | CREATE TABLE `variation` ( |
| 56 | `noun_id` INTEGER NOT NULL, | 56 | `noun_id` INTEGER NOT NULL, |
| 57 | `adjective_id` INTEGER NOT NULL | 57 | `adjective_id` INTEGER NOT NULL, |
| 58 | ); | 58 | PRIMARY KEY (`noun_id`,`adjective_id`) |
| 59 | ) WITHOUT ROWID; | ||
| 59 | 60 | ||
| 60 | CREATE INDEX `variant_of` ON `variation`(`noun_id`,`adjective_id`); | 61 | CREATE INDEX `reverse_variation` ON `variation`(`adjective_id`,`noun_id`); |
| 61 | CREATE INDEX `attribute_of` ON `variation`(`adjective_id`,`noun_id`); | ||
| 62 | 62 | ||
| 63 | CREATE TABLE `similarity` ( | 63 | CREATE TABLE `similarity` ( |
| 64 | `adjective_1_id` INTEGER NOT NULL, | 64 | `adjective_1_id` INTEGER NOT NULL, |
| 65 | `adjective_2_id` INTEGER NOT NULL | 65 | `adjective_2_id` INTEGER NOT NULL, |
| 66 | ); | 66 | PRIMARY KEY (`adjective_1_id`,`adjective_2_id`) |
| 67 | 67 | ) WITHOUT ROWID; | |
| 68 | CREATE INDEX `similar_to` ON `similarity`(`adjective_1_id`,`adjective_2_id`); | ||
| 69 | 68 | ||
| 70 | CREATE TABLE `is_a` ( | 69 | CREATE TABLE `is_a` ( |
| 71 | `notion_id` INTEGER NOT NULL, | 70 | `notion_id` INTEGER NOT NULL, |
| 72 | `groupname` VARCHAR(32) NOT NULL | 71 | `groupname` VARCHAR(32) NOT NULL, |
| 73 | ); | 72 | PRIMARY KEY (`notion_id`,`groupname`) |
| 73 | ) WITHOUT ROWID; | ||
| 74 | 74 | ||
| 75 | CREATE TABLE `entailment` ( | 75 | CREATE TABLE `entailment` ( |
| 76 | `given_id` INTEGER NOT NULL, | 76 | `given_id` INTEGER NOT NULL, |
| 77 | `entailment_id` INTEGER NOT NULL | 77 | `entailment_id` INTEGER NOT NULL, |
| 78 | ); | 78 | PRIMARY KEY (`given_id`,`entailment_id`) |
| 79 | ) WITHOUT ROWID; | ||
| 79 | 80 | ||
| 80 | CREATE INDEX `entailment_of` ON `entailment`(`given_id`,`entailment_id`); | 81 | CREATE INDEX `reverse_entailment` ON `entailment`(`entailment_id`,`given_id`); |
| 81 | CREATE INDEX `entailed_by` ON `entailment`(`entailment_id`,`given_id`); | ||
| 82 | 82 | ||
| 83 | CREATE TABLE `causality` ( | 83 | CREATE TABLE `causality` ( |
| 84 | `cause_id` INTEGER NOT NULL, | 84 | `cause_id` INTEGER NOT NULL, |
| 85 | `effect_id` INTEGER NOT NULL | 85 | `effect_id` INTEGER NOT NULL, |
| 86 | ); | 86 | PRIMARY KEY (`cause_id`,`effect_id`) |
| 87 | ) WITHOUT ROWID; | ||
| 87 | 88 | ||
| 88 | CREATE INDEX `effect_of` ON `causality`(`cause_id`,`effect_id`); | 89 | CREATE INDEX `reverse_causality` ON `causality`(`effect_id`,`cause_id`); |
| 89 | CREATE INDEX `cause_of` ON `causality`(`effect_id`,`cause_id`); | ||
| 90 | 90 | ||
| 91 | CREATE TABLE `words` ( | 91 | CREATE TABLE `words` ( |
| 92 | `word_id` INTEGER PRIMARY KEY, | 92 | `word_id` INTEGER PRIMARY KEY, |
| @@ -103,58 +103,57 @@ CREATE INDEX `group_words` ON `words`(`group_id`); | |||
| 103 | 103 | ||
| 104 | CREATE TABLE `antonymy` ( | 104 | CREATE TABLE `antonymy` ( |
| 105 | `antonym_1_id` INTEGER NOT NULL, | 105 | `antonym_1_id` INTEGER NOT NULL, |
| 106 | `antonym_2_id` INTEGER NOT NULL | 106 | `antonym_2_id` INTEGER NOT NULL, |
| 107 | ); | 107 | PRIMARY KEY (`antonym_1_id`,`antonym_2_id`) |
| 108 | 108 | ) WITHOUT ROWID; | |
| 109 | CREATE INDEX `antonym_of` ON `antonymy`(`antonym_1_id`,`antonym_2_id`); | ||
| 110 | 109 | ||
| 111 | CREATE TABLE `specification` ( | 110 | CREATE TABLE `specification` ( |
| 112 | `general_id` INTEGER NOT NULL, | 111 | `general_id` INTEGER NOT NULL, |
| 113 | `specific_id` INTEGER NOT NULL | 112 | `specific_id` INTEGER NOT NULL, |
| 114 | ); | 113 | PRIMARY KEY (`general_id`,`specific_id`) |
| 114 | ) WITHOUT ROWID; | ||
| 115 | 115 | ||
| 116 | CREATE INDEX `specification_of` ON `specification`(`general_id`,`specific_id`); | 116 | CREATE INDEX `reverse_specification` ON `specification`(`specific_id`,`general_id`); |
| 117 | CREATE INDEX `generalization_of` ON `specification`(`specific_id`,`general_id`); | ||
| 118 | 117 | ||
| 119 | CREATE TABLE `pertainymy` ( | 118 | CREATE TABLE `pertainymy` ( |
| 120 | `noun_id` INTEGER NOT NULL, | 119 | `noun_id` INTEGER NOT NULL, |
| 121 | `pertainym_id` INTEGER NOT NULL | 120 | `pertainym_id` INTEGER NOT NULL, |
| 122 | ); | 121 | PRIMARY KEY (`noun_id`,`pertainym_id`) |
| 122 | ) WITHOUT ROWID; | ||
| 123 | 123 | ||
| 124 | CREATE INDEX `pertainym_of` ON `pertainymy`(`noun_id`,`pertainym_id`); | 124 | CREATE INDEX `reverse_pertainymy` ON `pertainymy`(`pertainym_id`,`noun_id`); |
| 125 | CREATE INDEX `anti_pertainym_of` ON `pertainymy`(`pertainym_id`,`noun_id`); | ||
| 126 | 125 | ||
| 127 | CREATE TABLE `mannernymy` ( | 126 | CREATE TABLE `mannernymy` ( |
| 128 | `adjective_id` INTEGER NOT NULL, | 127 | `adjective_id` INTEGER NOT NULL, |
| 129 | `mannernym_id` INTEGER NOT NULL | 128 | `mannernym_id` INTEGER NOT NULL, |
| 130 | ); | 129 | PRIMARY KEY (`adjective_id`,`mannernym_id`) |
| 130 | ) WITHOUT ROWID; | ||
| 131 | 131 | ||
| 132 | CREATE INDEX `mannernym_of` ON `mannernymy`(`adjective_id`,`mannernym_id`); | 132 | CREATE INDEX `reverse_mannernymy` ON `mannernymy`(`mannernym_id`,`adjective_id`); |
| 133 | CREATE INDEX `anti_mannernym_of` ON `mannernymy`(`mannernym_id`,`adjective_id`); | ||
| 134 | 133 | ||
| 135 | CREATE TABLE `usage` ( | 134 | CREATE TABLE `usage` ( |
| 136 | `domain_id` INTEGER NOT NULL, | 135 | `domain_id` INTEGER NOT NULL, |
| 137 | `term_id` INTEGER NOT NULL | 136 | `term_id` INTEGER NOT NULL, |
| 138 | ); | 137 | PRIMARY KEY (`domain_id`,`term_id`) |
| 138 | ) WITHOUT ROWID; | ||
| 139 | 139 | ||
| 140 | CREATE INDEX `usage_term_of` ON `usage`(`domain_id`,`term_id`); | 140 | CREATE INDEX `reverse_usage` ON `usage`(`term_id`,`domain_id`); |
| 141 | CREATE INDEX `usage_domain_of` ON `usage`(`term_id`,`domain_id`); | ||
| 142 | 141 | ||
| 143 | CREATE TABLE `topicality` ( | 142 | CREATE TABLE `topicality` ( |
| 144 | `domain_id` INTEGER NOT NULL, | 143 | `domain_id` INTEGER NOT NULL, |
| 145 | `term_id` INTEGER NOT NULL | 144 | `term_id` INTEGER NOT NULL, |
| 146 | ); | 145 | PRIMARY KEY (`domain_id`,`term_id`) |
| 146 | ) WITHOUT ROWID; | ||
| 147 | 147 | ||
| 148 | CREATE INDEX `topical_term_of` ON `topicality`(`domain_id`,`term_id`); | 148 | CREATE INDEX `reverse_topicality` ON `topicality`(`term_id`,`domain_id`); |
| 149 | CREATE INDEX `topical_domain_of` ON `topicality`(`term_id`,`domain_id`); | ||
| 150 | 149 | ||
| 151 | CREATE TABLE `regionality` ( | 150 | CREATE TABLE `regionality` ( |
| 152 | `domain_id` INTEGER NOT NULL, | 151 | `domain_id` INTEGER NOT NULL, |
| 153 | `term_id` INTEGER NOT NULL | 152 | `term_id` INTEGER NOT NULL, |
| 154 | ); | 153 | PRIMARY KEY (`domain_id`,`term_id`) |
| 154 | ) WITHOUT ROWID; | ||
| 155 | 155 | ||
| 156 | CREATE INDEX `regional_term_of` ON `regionality`(`domain_id`,`term_id`); | 156 | CREATE INDEX `reverse_regionality` ON `regionality`(`term_id`,`domain_id`); |
| 157 | CREATE INDEX `regional_domain_of` ON `regionality`(`term_id`,`domain_id`); | ||
| 158 | 157 | ||
| 159 | CREATE TABLE `forms` ( | 158 | CREATE TABLE `forms` ( |
| 160 | `form_id` INTEGER PRIMARY KEY, | 159 | `form_id` INTEGER PRIMARY KEY, |
| @@ -169,11 +168,11 @@ CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); | |||
| 169 | CREATE TABLE `lemmas_forms` ( | 168 | CREATE TABLE `lemmas_forms` ( |
| 170 | `lemma_id` INTEGER NOT NULL, | 169 | `lemma_id` INTEGER NOT NULL, |
| 171 | `form_id` INTEGER NOT NULL, | 170 | `form_id` INTEGER NOT NULL, |
| 172 | `category` SMALLINT NOT NULL | 171 | `category` SMALLINT NOT NULL, |
| 173 | ); | 172 | PRIMARY KEY (`lemma_id`,`category`,`form_id`) |
| 173 | ) WITHOUT ROWID; | ||
| 174 | 174 | ||
| 175 | CREATE INDEX `form_of` ON `lemmas_forms`(`lemma_id`,`category`,`form_id`); | 175 | CREATE INDEX `forms_lemmas` ON `lemmas_forms`(`form_id`,`category`,`lemma_id`); |
| 176 | CREATE INDEX `lemma_of` ON `lemmas_forms`(`form_id`,`category`,`lemma_id`); | ||
| 177 | 176 | ||
| 178 | CREATE TABLE `pronunciations` ( | 177 | CREATE TABLE `pronunciations` ( |
| 179 | `pronunciation_id` INTEGER PRIMARY KEY, | 178 | `pronunciation_id` INTEGER PRIMARY KEY, |
| @@ -188,11 +187,11 @@ CREATE INDEX `rhymes_with` ON `pronunciations`(`rhyme`,`prerhyme`); | |||
| 188 | 187 | ||
| 189 | CREATE TABLE `forms_pronunciations` ( | 188 | CREATE TABLE `forms_pronunciations` ( |
| 190 | `form_id` INTEGER NOT NULL, | 189 | `form_id` INTEGER NOT NULL, |
| 191 | `pronunciation_id` INTEGER NOT NULL | 190 | `pronunciation_id` INTEGER NOT NULL, |
| 192 | ); | 191 | PRIMARY KEY (`form_id`,`pronunciation_id`) |
| 192 | ) WITHOUT ROWID; | ||
| 193 | 193 | ||
| 194 | CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`,`pronunciation_id`); | 194 | CREATE INDEX `pronunciations_forms` ON `forms_pronunciations`(`pronunciation_id`,`form_id`); |
| 195 | CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`,`form_id`); | ||
| 196 | 195 | ||
| 197 | CREATE TABLE `frames` ( | 196 | CREATE TABLE `frames` ( |
| 198 | `frame_id` INTEGER NOT NULL, | 197 | `frame_id` INTEGER NOT NULL, |
| diff --git a/lib/version.h b/lib/version.h index 41ab79e..0404f5f 100644 --- a/lib/version.h +++ b/lib/version.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | namespace verbly { | 4 | namespace verbly { |
| 5 | 5 | ||
| 6 | const int DATABASE_MAJOR_VERSION = 1; | 6 | const int DATABASE_MAJOR_VERSION = 1; |
| 7 | const int DATABASE_MINOR_VERSION = 0; | 7 | const int DATABASE_MINOR_VERSION = 1; |
| 8 | 8 | ||
| 9 | }; | 9 | }; |
| 10 | 10 | ||
