summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorStar Rauchenberger <fefferburbia@gmail.com>2022-12-09 16:53:33 -0500
committerStar Rauchenberger <fefferburbia@gmail.com>2022-12-09 16:53:33 -0500
commit06e4672540094a851542b47abaf022f934b63b09 (patch)
tree5333fb4446fce0043600f7d06427dd243a15c543
parentf2731325f551c4cfea861e2e31d214936b9bd619 (diff)
downloadverbly-06e4672540094a851542b47abaf022f934b63b09.tar.gz
verbly-06e4672540094a851542b47abaf022f934b63b09.tar.bz2
verbly-06e4672540094a851542b47abaf022f934b63b09.zip
Generator now splits ImageNet list into per-notion files
-rw-r--r--generator/generator.cpp23
-rw-r--r--generator/generator.h5
-rw-r--r--generator/main.cpp7
3 files changed, 23 insertions, 12 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index fdea10f..969b629 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp
@@ -23,13 +23,15 @@ namespace verbly {
23 std::string wordNetPath, 23 std::string wordNetPath,
24 std::string cmudictPath, 24 std::string cmudictPath,
25 std::string imageNetPath, 25 std::string imageNetPath,
26 std::string outputPath) : 26 std::string outputPath,
27 std::string imageNetOutput) :
27 verbNetPath_(verbNetPath), 28 verbNetPath_(verbNetPath),
28 agidPath_(agidPath), 29 agidPath_(agidPath),
29 wordNetPath_(wordNetPath), 30 wordNetPath_(wordNetPath),
30 cmudictPath_(cmudictPath), 31 cmudictPath_(cmudictPath),
31 imageNetPath_(imageNetPath), 32 imageNetPath_(imageNetPath),
32 db_(outputPath, hatkirby::dbmode::create) 33 db_(outputPath, hatkirby::dbmode::create),
34 imageNetOutput_(imageNetOutput)
33 { 35 {
34 // Ensure VerbNet directory exists 36 // Ensure VerbNet directory exists
35 DIR* dir; 37 DIR* dir;
@@ -75,6 +77,9 @@ namespace verbly {
75 { 77 {
76 throw std::invalid_argument("ImageNet urls.txt file not found"); 78 throw std::invalid_argument("ImageNet urls.txt file not found");
77 } 79 }
80
81 // Create the ImageNet output directory
82 std::filesystem::create_directory(imageNetOutput_);
78 } 83 }
79 84
80 void generator::run() 85 void generator::run()
@@ -86,7 +91,7 @@ namespace verbly {
86 readAdjectivePositioning(); 91 readAdjectivePositioning();
87 92
88 // Counts the number of URLs ImageNet has per notion 93 // Counts the number of URLs ImageNet has per notion
89 //readImageNetUrls(); 94 readImageNetUrls();
90 95
91 // Creates a word by WordNet sense key lookup table 96 // Creates a word by WordNet sense key lookup table
92 readWordNetSenseKeys(); 97 readWordNetSenseKeys();
@@ -118,17 +123,15 @@ namespace verbly {
118 // Writes the database version 123 // Writes the database version
119 writeVersion(); 124 writeVersion();
120 125
126 // Dumps data to the database
127 dumpObjects();
128
121 // Calculates and writes form merography 129 // Calculates and writes form merography
122 writeMerography(); 130 writeMerography();
123 131
124 // Calculates and writes pronunciation merophony 132 // Calculates and writes pronunciation merophony
125 writeMerophony(); 133 writeMerophony();
126 134
127 // Dumps data to the database
128 dumpObjects();
129
130
131
132 // Populates the antonymy relationship from WordNet 135 // Populates the antonymy relationship from WordNet
133 readWordNetAntonymy(); 136 readWordNetAntonymy();
134 137
@@ -286,6 +289,10 @@ namespace verbly {
286 { 289 {
287 // We know that this notion has a wnid and is a noun. 290 // We know that this notion has a wnid and is a noun.
288 notionByWnid_.at(wnid)->incrementNumOfImages(); 291 notionByWnid_.at(wnid)->incrementNumOfImages();
292
293 std::filesystem::path imagefile = imageNetOutput_ / std::to_string(notionByWnid_[wnid]->getId());
294 std::ofstream imageoutput(imagefile, std::ios::app);
295 imageoutput << line.substr(line.find("\t") + 1) << std::endl;
289 } 296 }
290 } 297 }
291 } 298 }
diff --git a/generator/generator.h b/generator/generator.h index 70f0657..3d51c35 100644 --- a/generator/generator.h +++ b/generator/generator.h
@@ -9,6 +9,7 @@
9#include <set> 9#include <set>
10#include <libxml/parser.h> 10#include <libxml/parser.h>
11#include <hkutil/database.h> 11#include <hkutil/database.h>
12#include <filesystem>
12#include "notion.h" 13#include "notion.h"
13#include "word.h" 14#include "word.h"
14#include "lemma.h" 15#include "lemma.h"
@@ -34,7 +35,8 @@ namespace verbly {
34 std::string wordNetPath, 35 std::string wordNetPath,
35 std::string cmudictPath, 36 std::string cmudictPath,
36 std::string imageNetPath, 37 std::string imageNetPath,
37 std::string outputPath); 38 std::string outputPath,
39 std::string imageNetOutput);
38 40
39 // Action 41 // Action
40 42
@@ -127,6 +129,7 @@ namespace verbly {
127 // Output 129 // Output
128 130
129 hatkirby::database db_; 131 hatkirby::database db_;
132 std::filesystem::path imageNetOutput_;
130 133
131 // Data 134 // Data
132 135
diff --git a/generator/main.cpp b/generator/main.cpp index 1b07706..7db7203 100644 --- a/generator/main.cpp +++ b/generator/main.cpp
@@ -4,22 +4,23 @@
4 4
5void printUsage() 5void printUsage()
6{ 6{
7 std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl; 7 std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output ino" << std::endl;
8 std::cout << "verbnet :: path to a VerbNet data directory" << std::endl; 8 std::cout << "verbnet :: path to a VerbNet data directory" << std::endl;
9 std::cout << "agid :: path to an AGID infl.txt file" << std::endl; 9 std::cout << "agid :: path to an AGID infl.txt file" << std::endl;
10 std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl; 10 std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl;
11 std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl; 11 std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl;
12 std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl; 12 std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl;
13 std::cout << "output :: datafile output path" << std::endl; 13 std::cout << "output :: datafile output path" << std::endl;
14 std::cout << "ino :: imagenet directory output path" << std::endl;
14} 15}
15 16
16int main(int argc, char** argv) 17int main(int argc, char** argv)
17{ 18{
18 if (argc == 7) 19 if (argc == 8)
19 { 20 {
20 try 21 try
21 { 22 {
22 verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); 23 verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], argv[7]);
23 24
24 try 25 try
25 { 26 {