diff options
| author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2018-09-26 21:40:44 -0400 |
|---|---|---|
| committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2018-09-26 21:40:44 -0400 |
| commit | 3a8bfa95a5df04d97f05545d5bb8df5f3c3f96a3 (patch) | |
| tree | e4478f57fe5e3faa57274d3b79c636ea657ab3a0 /generator/generator.cpp | |
| parent | a9188cbc6b3b9d26e675213e3834afdbd06296f6 (diff) | |
| download | verbly-3a8bfa95a5df04d97f05545d5bb8df5f3c3f96a3.tar.gz verbly-3a8bfa95a5df04d97f05545d5bb8df5f3c3f96a3.tar.bz2 verbly-3a8bfa95a5df04d97f05545d5bb8df5f3c3f96a3.zip | |
Removed unnecessary ROWIDs from database schema
The generator also now sorts and uniq's the WordNet files for antonymy, classification, and pertainymy/mannernymy, because those files contained duplicate rows, and the join tables without ROWIDs now enforce a uniqueness constraint. This constitutes a minor database update -- the new database is compatible with d1.0, but is ~12MB smaller. refs #6
Diffstat (limited to 'generator/generator.cpp')
| -rw-r--r-- | generator/generator.cpp | 21 |
1 files changed, 17 insertions, 4 deletions
| diff --git a/generator/generator.cpp b/generator/generator.cpp index e52aa90..0d073be 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
| @@ -696,7 +696,8 @@ namespace verbly { | |||
| 696 | 696 | ||
| 697 | void generator::readWordNetAntonymy() | 697 | void generator::readWordNetAntonymy() |
| 698 | { | 698 | { |
| 699 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); | 699 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl", true)); |
| 700 | |||
| 700 | hatkirby::progress ppgs("Writing antonyms...", lines.size()); | 701 | hatkirby::progress ppgs("Writing antonyms...", lines.size()); |
| 701 | for (auto line : lines) | 702 | for (auto line : lines) |
| 702 | { | 703 | { |
| @@ -770,7 +771,7 @@ namespace verbly { | |||
| 770 | 771 | ||
| 771 | void generator::readWordNetClasses() | 772 | void generator::readWordNetClasses() |
| 772 | { | 773 | { |
| 773 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); | 774 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl", true)); |
| 774 | 775 | ||
| 775 | hatkirby::progress ppgs( | 776 | hatkirby::progress ppgs( |
| 776 | "Writing usage, topicality, and regionality...", | 777 | "Writing usage, topicality, and regionality...", |
| @@ -1092,7 +1093,7 @@ namespace verbly { | |||
| 1092 | 1093 | ||
| 1093 | void generator::readWordNetPertainymy() | 1094 | void generator::readWordNetPertainymy() |
| 1094 | { | 1095 | { |
| 1095 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); | 1096 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl", true)); |
| 1096 | 1097 | ||
| 1097 | hatkirby::progress ppgs( | 1098 | hatkirby::progress ppgs( |
| 1098 | "Writing pertainymy and mannernymy...", | 1099 | "Writing pertainymy and mannernymy...", |
| @@ -1228,7 +1229,7 @@ namespace verbly { | |||
| 1228 | db_.execute("ANALYZE"); | 1229 | db_.execute("ANALYZE"); |
| 1229 | } | 1230 | } |
| 1230 | 1231 | ||
| 1231 | std::list<std::string> generator::readFile(std::string path) | 1232 | std::list<std::string> generator::readFile(std::string path, bool uniq) |
| 1232 | { | 1233 | { |
| 1233 | std::ifstream file(path); | 1234 | std::ifstream file(path); |
| 1234 | if (!file) | 1235 | if (!file) |
| @@ -1248,6 +1249,18 @@ namespace verbly { | |||
| 1248 | lines.push_back(line); | 1249 | lines.push_back(line); |
| 1249 | } | 1250 | } |
| 1250 | 1251 | ||
| 1252 | if (uniq) | ||
| 1253 | { | ||
| 1254 | std::vector<std::string> uniq(std::begin(lines), std::end(lines)); | ||
| 1255 | lines.clear(); | ||
| 1256 | |||
| 1257 | std::sort(std::begin(uniq), std::end(uniq)); | ||
| 1258 | std::unique_copy( | ||
| 1259 | std::begin(uniq), | ||
| 1260 | std::end(uniq), | ||
| 1261 | std::back_inserter(lines)); | ||
| 1262 | } | ||
| 1263 | |||
| 1251 | return lines; | 1264 | return lines; |
| 1252 | } | 1265 | } |
| 1253 | 1266 | ||
