From 3a8bfa95a5df04d97f05545d5bb8df5f3c3f96a3 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Wed, 26 Sep 2018 21:40:44 -0400 Subject: Removed unnecessary ROWIDs from database schema The generator also now sorts and uniq's the WordNet files for antonymy, classification, and pertainymy/mannernymy, because those files contained duplicate rows, and the join tables without ROWIDs now enforce a uniqueness constraint. This constitutes a minor database update -- the new database is compatible with d1.0, but is ~12MB smaller. refs #6 --- generator/generator.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'generator/generator.cpp') diff --git a/generator/generator.cpp b/generator/generator.cpp index e52aa90..0d073be 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -696,7 +696,8 @@ namespace verbly { void generator::readWordNetAntonymy() { - std::list lines(readFile(wordNetPath_ + "wn_ant.pl")); + std::list lines(readFile(wordNetPath_ + "wn_ant.pl", true)); + hatkirby::progress ppgs("Writing antonyms...", lines.size()); for (auto line : lines) { @@ -770,7 +771,7 @@ namespace verbly { void generator::readWordNetClasses() { - std::list lines(readFile(wordNetPath_ + "wn_cls.pl")); + std::list lines(readFile(wordNetPath_ + "wn_cls.pl", true)); hatkirby::progress ppgs( "Writing usage, topicality, and regionality...", @@ -1092,7 +1093,7 @@ namespace verbly { void generator::readWordNetPertainymy() { - std::list lines(readFile(wordNetPath_ + "wn_per.pl")); + std::list lines(readFile(wordNetPath_ + "wn_per.pl", true)); hatkirby::progress ppgs( "Writing pertainymy and mannernymy...", @@ -1228,7 +1229,7 @@ namespace verbly { db_.execute("ANALYZE"); } - std::list generator::readFile(std::string path) + std::list generator::readFile(std::string path, bool uniq) { std::ifstream file(path); if (!file) @@ -1248,6 +1249,18 @@ namespace verbly { lines.push_back(line); } + if (uniq) + { + std::vector uniq(std::begin(lines), std::end(lines)); + lines.clear(); + + std::sort(std::begin(uniq), std::end(uniq)); + std::unique_copy( + std::begin(uniq), + std::end(uniq), + std::back_inserter(lines)); + } + return lines; } -- cgit 1.4.1