diff options
| -rw-r--r-- | generator/generator.cpp | 23 | ||||
| -rw-r--r-- | generator/generator.h | 5 | ||||
| -rw-r--r-- | generator/main.cpp | 7 |
3 files changed, 23 insertions, 12 deletions
| diff --git a/generator/generator.cpp b/generator/generator.cpp index fdea10f..969b629 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
| @@ -23,13 +23,15 @@ namespace verbly { | |||
| 23 | std::string wordNetPath, | 23 | std::string wordNetPath, |
| 24 | std::string cmudictPath, | 24 | std::string cmudictPath, |
| 25 | std::string imageNetPath, | 25 | std::string imageNetPath, |
| 26 | std::string outputPath) : | 26 | std::string outputPath, |
| 27 | std::string imageNetOutput) : | ||
| 27 | verbNetPath_(verbNetPath), | 28 | verbNetPath_(verbNetPath), |
| 28 | agidPath_(agidPath), | 29 | agidPath_(agidPath), |
| 29 | wordNetPath_(wordNetPath), | 30 | wordNetPath_(wordNetPath), |
| 30 | cmudictPath_(cmudictPath), | 31 | cmudictPath_(cmudictPath), |
| 31 | imageNetPath_(imageNetPath), | 32 | imageNetPath_(imageNetPath), |
| 32 | db_(outputPath, hatkirby::dbmode::create) | 33 | db_(outputPath, hatkirby::dbmode::create), |
| 34 | imageNetOutput_(imageNetOutput) | ||
| 33 | { | 35 | { |
| 34 | // Ensure VerbNet directory exists | 36 | // Ensure VerbNet directory exists |
| 35 | DIR* dir; | 37 | DIR* dir; |
| @@ -75,6 +77,9 @@ namespace verbly { | |||
| 75 | { | 77 | { |
| 76 | throw std::invalid_argument("ImageNet urls.txt file not found"); | 78 | throw std::invalid_argument("ImageNet urls.txt file not found"); |
| 77 | } | 79 | } |
| 80 | |||
| 81 | // Create the ImageNet output directory | ||
| 82 | std::filesystem::create_directory(imageNetOutput_); | ||
| 78 | } | 83 | } |
| 79 | 84 | ||
| 80 | void generator::run() | 85 | void generator::run() |
| @@ -86,7 +91,7 @@ namespace verbly { | |||
| 86 | readAdjectivePositioning(); | 91 | readAdjectivePositioning(); |
| 87 | 92 | ||
| 88 | // Counts the number of URLs ImageNet has per notion | 93 | // Counts the number of URLs ImageNet has per notion |
| 89 | //readImageNetUrls(); | 94 | readImageNetUrls(); |
| 90 | 95 | ||
| 91 | // Creates a word by WordNet sense key lookup table | 96 | // Creates a word by WordNet sense key lookup table |
| 92 | readWordNetSenseKeys(); | 97 | readWordNetSenseKeys(); |
| @@ -118,17 +123,15 @@ namespace verbly { | |||
| 118 | // Writes the database version | 123 | // Writes the database version |
| 119 | writeVersion(); | 124 | writeVersion(); |
| 120 | 125 | ||
| 126 | // Dumps data to the database | ||
| 127 | dumpObjects(); | ||
| 128 | |||
| 121 | // Calculates and writes form merography | 129 | // Calculates and writes form merography |
| 122 | writeMerography(); | 130 | writeMerography(); |
| 123 | 131 | ||
| 124 | // Calculates and writes pronunciation merophony | 132 | // Calculates and writes pronunciation merophony |
| 125 | writeMerophony(); | 133 | writeMerophony(); |
| 126 | 134 | ||
| 127 | // Dumps data to the database | ||
| 128 | dumpObjects(); | ||
| 129 | |||
| 130 | |||
| 131 | |||
| 132 | // Populates the antonymy relationship from WordNet | 135 | // Populates the antonymy relationship from WordNet |
| 133 | readWordNetAntonymy(); | 136 | readWordNetAntonymy(); |
| 134 | 137 | ||
| @@ -286,6 +289,10 @@ namespace verbly { | |||
| 286 | { | 289 | { |
| 287 | // We know that this notion has a wnid and is a noun. | 290 | // We know that this notion has a wnid and is a noun. |
| 288 | notionByWnid_.at(wnid)->incrementNumOfImages(); | 291 | notionByWnid_.at(wnid)->incrementNumOfImages(); |
| 292 | |||
| 293 | std::filesystem::path imagefile = imageNetOutput_ / std::to_string(notionByWnid_[wnid]->getId()); | ||
| 294 | std::ofstream imageoutput(imagefile, std::ios::app); | ||
| 295 | imageoutput << line.substr(line.find("\t") + 1) << std::endl; | ||
| 289 | } | 296 | } |
| 290 | } | 297 | } |
| 291 | } | 298 | } |
| diff --git a/generator/generator.h b/generator/generator.h index 70f0657..3d51c35 100644 --- a/generator/generator.h +++ b/generator/generator.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <set> | 9 | #include <set> |
| 10 | #include <libxml/parser.h> | 10 | #include <libxml/parser.h> |
| 11 | #include <hkutil/database.h> | 11 | #include <hkutil/database.h> |
| 12 | #include <filesystem> | ||
| 12 | #include "notion.h" | 13 | #include "notion.h" |
| 13 | #include "word.h" | 14 | #include "word.h" |
| 14 | #include "lemma.h" | 15 | #include "lemma.h" |
| @@ -34,7 +35,8 @@ namespace verbly { | |||
| 34 | std::string wordNetPath, | 35 | std::string wordNetPath, |
| 35 | std::string cmudictPath, | 36 | std::string cmudictPath, |
| 36 | std::string imageNetPath, | 37 | std::string imageNetPath, |
| 37 | std::string outputPath); | 38 | std::string outputPath, |
| 39 | std::string imageNetOutput); | ||
| 38 | 40 | ||
| 39 | // Action | 41 | // Action |
| 40 | 42 | ||
| @@ -127,6 +129,7 @@ namespace verbly { | |||
| 127 | // Output | 129 | // Output |
| 128 | 130 | ||
| 129 | hatkirby::database db_; | 131 | hatkirby::database db_; |
| 132 | std::filesystem::path imageNetOutput_; | ||
| 130 | 133 | ||
| 131 | // Data | 134 | // Data |
| 132 | 135 | ||
| diff --git a/generator/main.cpp b/generator/main.cpp index 1b07706..7db7203 100644 --- a/generator/main.cpp +++ b/generator/main.cpp | |||
| @@ -4,22 +4,23 @@ | |||
| 4 | 4 | ||
| 5 | void printUsage() | 5 | void printUsage() |
| 6 | { | 6 | { |
| 7 | std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl; | 7 | std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output ino" << std::endl; |
| 8 | std::cout << "verbnet :: path to a VerbNet data directory" << std::endl; | 8 | std::cout << "verbnet :: path to a VerbNet data directory" << std::endl; |
| 9 | std::cout << "agid :: path to an AGID infl.txt file" << std::endl; | 9 | std::cout << "agid :: path to an AGID infl.txt file" << std::endl; |
| 10 | std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl; | 10 | std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl; |
| 11 | std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl; | 11 | std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl; |
| 12 | std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl; | 12 | std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl; |
| 13 | std::cout << "output :: datafile output path" << std::endl; | 13 | std::cout << "output :: datafile output path" << std::endl; |
| 14 | std::cout << "ino :: imagenet directory output path" << std::endl; | ||
| 14 | } | 15 | } |
| 15 | 16 | ||
| 16 | int main(int argc, char** argv) | 17 | int main(int argc, char** argv) |
| 17 | { | 18 | { |
| 18 | if (argc == 7) | 19 | if (argc == 8) |
| 19 | { | 20 | { |
| 20 | try | 21 | try |
| 21 | { | 22 | { |
| 22 | verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); | 23 | verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7]); |
| 23 | 24 | ||
| 24 | try | 25 | try |
| 25 | { | 26 | { |
