diff options
author | Star Rauchenberger <fefferburbia@gmail.com> | 2022-12-09 16:53:33 -0500 |
---|---|---|
committer | Star Rauchenberger <fefferburbia@gmail.com> | 2022-12-09 16:53:33 -0500 |
commit | 06e4672540094a851542b47abaf022f934b63b09 (patch) | |
tree | 5333fb4446fce0043600f7d06427dd243a15c543 | |
parent | f2731325f551c4cfea861e2e31d214936b9bd619 (diff) | |
download | verbly-06e4672540094a851542b47abaf022f934b63b09.tar.gz verbly-06e4672540094a851542b47abaf022f934b63b09.tar.bz2 verbly-06e4672540094a851542b47abaf022f934b63b09.zip |
Generator now splits ImageNet list into per-notion files
-rw-r--r-- | generator/generator.cpp | 23 | ||||
-rw-r--r-- | generator/generator.h | 5 | ||||
-rw-r--r-- | generator/main.cpp | 7 |
3 files changed, 23 insertions, 12 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index fdea10f..969b629 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -23,13 +23,15 @@ namespace verbly { | |||
23 | std::string wordNetPath, | 23 | std::string wordNetPath, |
24 | std::string cmudictPath, | 24 | std::string cmudictPath, |
25 | std::string imageNetPath, | 25 | std::string imageNetPath, |
26 | std::string outputPath) : | 26 | std::string outputPath, |
27 | std::string imageNetOutput) : | ||
27 | verbNetPath_(verbNetPath), | 28 | verbNetPath_(verbNetPath), |
28 | agidPath_(agidPath), | 29 | agidPath_(agidPath), |
29 | wordNetPath_(wordNetPath), | 30 | wordNetPath_(wordNetPath), |
30 | cmudictPath_(cmudictPath), | 31 | cmudictPath_(cmudictPath), |
31 | imageNetPath_(imageNetPath), | 32 | imageNetPath_(imageNetPath), |
32 | db_(outputPath, hatkirby::dbmode::create) | 33 | db_(outputPath, hatkirby::dbmode::create), |
34 | imageNetOutput_(imageNetOutput) | ||
33 | { | 35 | { |
34 | // Ensure VerbNet directory exists | 36 | // Ensure VerbNet directory exists |
35 | DIR* dir; | 37 | DIR* dir; |
@@ -75,6 +77,9 @@ namespace verbly { | |||
75 | { | 77 | { |
76 | throw std::invalid_argument("ImageNet urls.txt file not found"); | 78 | throw std::invalid_argument("ImageNet urls.txt file not found"); |
77 | } | 79 | } |
80 | |||
81 | // Create the ImageNet output directory | ||
82 | std::filesystem::create_directory(imageNetOutput_); | ||
78 | } | 83 | } |
79 | 84 | ||
80 | void generator::run() | 85 | void generator::run() |
@@ -86,7 +91,7 @@ namespace verbly { | |||
86 | readAdjectivePositioning(); | 91 | readAdjectivePositioning(); |
87 | 92 | ||
88 | // Counts the number of URLs ImageNet has per notion | 93 | // Counts the number of URLs ImageNet has per notion |
89 | //readImageNetUrls(); | 94 | readImageNetUrls(); |
90 | 95 | ||
91 | // Creates a word by WordNet sense key lookup table | 96 | // Creates a word by WordNet sense key lookup table |
92 | readWordNetSenseKeys(); | 97 | readWordNetSenseKeys(); |
@@ -118,17 +123,15 @@ namespace verbly { | |||
118 | // Writes the database version | 123 | // Writes the database version |
119 | writeVersion(); | 124 | writeVersion(); |
120 | 125 | ||
126 | // Dumps data to the database | ||
127 | dumpObjects(); | ||
128 | |||
121 | // Calculates and writes form merography | 129 | // Calculates and writes form merography |
122 | writeMerography(); | 130 | writeMerography(); |
123 | 131 | ||
124 | // Calculates and writes pronunciation merophony | 132 | // Calculates and writes pronunciation merophony |
125 | writeMerophony(); | 133 | writeMerophony(); |
126 | 134 | ||
127 | // Dumps data to the database | ||
128 | dumpObjects(); | ||
129 | |||
130 | |||
131 | |||
132 | // Populates the antonymy relationship from WordNet | 135 | // Populates the antonymy relationship from WordNet |
133 | readWordNetAntonymy(); | 136 | readWordNetAntonymy(); |
134 | 137 | ||
@@ -286,6 +289,10 @@ namespace verbly { | |||
286 | { | 289 | { |
287 | // We know that this notion has a wnid and is a noun. | 290 | // We know that this notion has a wnid and is a noun. |
288 | notionByWnid_.at(wnid)->incrementNumOfImages(); | 291 | notionByWnid_.at(wnid)->incrementNumOfImages(); |
292 | |||
293 | std::filesystem::path imagefile = imageNetOutput_ / std::to_string(notionByWnid_[wnid]->getId()); | ||
294 | std::ofstream imageoutput(imagefile, std::ios::app); | ||
295 | imageoutput << line.substr(line.find("\t") + 1) << std::endl; | ||
289 | } | 296 | } |
290 | } | 297 | } |
291 | } | 298 | } |
diff --git a/generator/generator.h b/generator/generator.h index 70f0657..3d51c35 100644 --- a/generator/generator.h +++ b/generator/generator.h | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <set> | 9 | #include <set> |
10 | #include <libxml/parser.h> | 10 | #include <libxml/parser.h> |
11 | #include <hkutil/database.h> | 11 | #include <hkutil/database.h> |
12 | #include <filesystem> | ||
12 | #include "notion.h" | 13 | #include "notion.h" |
13 | #include "word.h" | 14 | #include "word.h" |
14 | #include "lemma.h" | 15 | #include "lemma.h" |
@@ -34,7 +35,8 @@ namespace verbly { | |||
34 | std::string wordNetPath, | 35 | std::string wordNetPath, |
35 | std::string cmudictPath, | 36 | std::string cmudictPath, |
36 | std::string imageNetPath, | 37 | std::string imageNetPath, |
37 | std::string outputPath); | 38 | std::string outputPath, |
39 | std::string imageNetOutput); | ||
38 | 40 | ||
39 | // Action | 41 | // Action |
40 | 42 | ||
@@ -127,6 +129,7 @@ namespace verbly { | |||
127 | // Output | 129 | // Output |
128 | 130 | ||
129 | hatkirby::database db_; | 131 | hatkirby::database db_; |
132 | std::filesystem::path imageNetOutput_; | ||
130 | 133 | ||
131 | // Data | 134 | // Data |
132 | 135 | ||
diff --git a/generator/main.cpp b/generator/main.cpp index 1b07706..7db7203 100644 --- a/generator/main.cpp +++ b/generator/main.cpp | |||
@@ -4,22 +4,23 @@ | |||
4 | 4 | ||
5 | void printUsage() | 5 | void printUsage() |
6 | { | 6 | { |
7 | std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl; | 7 | std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output ino" << std::endl; |
8 | std::cout << "verbnet :: path to a VerbNet data directory" << std::endl; | 8 | std::cout << "verbnet :: path to a VerbNet data directory" << std::endl; |
9 | std::cout << "agid :: path to an AGID infl.txt file" << std::endl; | 9 | std::cout << "agid :: path to an AGID infl.txt file" << std::endl; |
10 | std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl; | 10 | std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl; |
11 | std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl; | 11 | std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl; |
12 | std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl; | 12 | std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl; |
13 | std::cout << "output :: datafile output path" << std::endl; | 13 | std::cout << "output :: datafile output path" << std::endl; |
14 | std::cout << "ino :: imagenet directory output path" << std::endl; | ||
14 | } | 15 | } |
15 | 16 | ||
16 | int main(int argc, char** argv) | 17 | int main(int argc, char** argv) |
17 | { | 18 | { |
18 | if (argc == 7) | 19 | if (argc == 8) |
19 | { | 20 | { |
20 | try | 21 | try |
21 | { | 22 | { |
22 | verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); | 23 | verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7]); |
23 | 24 | ||
24 | try | 25 | try |
25 | { | 26 | { |