From 06e4672540094a851542b47abaf022f934b63b09 Mon Sep 17 00:00:00 2001 From: Star Rauchenberger Date: Fri, 9 Dec 2022 16:53:33 -0500 Subject: Generator now splits ImageNet list into per-notion files --- generator/generator.cpp | 23 +++++++++++++++-------- generator/generator.h | 5 ++++- generator/main.cpp | 7 ++++--- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/generator/generator.cpp b/generator/generator.cpp index fdea10f..969b629 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp @@ -23,13 +23,15 @@ namespace verbly { std::string wordNetPath, std::string cmudictPath, std::string imageNetPath, - std::string outputPath) : + std::string outputPath, + std::string imageNetOutput) : verbNetPath_(verbNetPath), agidPath_(agidPath), wordNetPath_(wordNetPath), cmudictPath_(cmudictPath), imageNetPath_(imageNetPath), - db_(outputPath, hatkirby::dbmode::create) + db_(outputPath, hatkirby::dbmode::create), + imageNetOutput_(imageNetOutput) { // Ensure VerbNet directory exists DIR* dir; @@ -75,6 +77,9 @@ namespace verbly { { throw std::invalid_argument("ImageNet urls.txt file not found"); } + + // Create the ImageNet output directory + std::filesystem::create_directory(imageNetOutput_); } void generator::run() @@ -86,7 +91,7 @@ namespace verbly { readAdjectivePositioning(); // Counts the number of URLs ImageNet has per notion - //readImageNetUrls(); + readImageNetUrls(); // Creates a word by WordNet sense key lookup table readWordNetSenseKeys(); @@ -118,17 +123,15 @@ namespace verbly { // Writes the database version writeVersion(); + // Dumps data to the database + dumpObjects(); + // Calculates and writes form merography writeMerography(); // Calculates and writes pronunciation merophony writeMerophony(); - // Dumps data to the database - dumpObjects(); - - - // Populates the antonymy relationship from WordNet readWordNetAntonymy(); @@ -286,6 +289,10 @@ namespace verbly { { // We know that this notion has a wnid and is a noun. notionByWnid_.at(wnid)->incrementNumOfImages(); + + std::filesystem::path imagefile = imageNetOutput_ / std::to_string(notionByWnid_[wnid]->getId()); + std::ofstream imageoutput(imagefile, std::ios::app); + imageoutput << line.substr(line.find("\t") + 1) << std::endl; } } } diff --git a/generator/generator.h b/generator/generator.h index 70f0657..3d51c35 100644 --- a/generator/generator.h +++ b/generator/generator.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "notion.h" #include "word.h" #include "lemma.h" @@ -34,7 +35,8 @@ namespace verbly { std::string wordNetPath, std::string cmudictPath, std::string imageNetPath, - std::string outputPath); + std::string outputPath, + std::string imageNetOutput); // Action @@ -127,6 +129,7 @@ namespace verbly { // Output hatkirby::database db_; + std::filesystem::path imageNetOutput_; // Data diff --git a/generator/main.cpp b/generator/main.cpp index 1b07706..7db7203 100644 --- a/generator/main.cpp +++ b/generator/main.cpp @@ -4,22 +4,23 @@ void printUsage() { - std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl; + std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output ino" << std::endl; std::cout << "verbnet :: path to a VerbNet data directory" << std::endl; std::cout << "agid :: path to an AGID infl.txt file" << std::endl; std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl; std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl; std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl; std::cout << "output :: datafile output path" << std::endl; + std::cout << "ino :: imagenet directory output path" << std::endl; } int main(int argc, char** argv) { - if (argc == 7) + if (argc == 8) { try { - verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); + verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7]); try { -- cgit 1.4.1