summary refs log tree commit diff stats
path: root/generator/generator.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-01-22 18:43:25 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-01-22 18:43:25 -0500
commit2c5ed155e8951f7f28b82ed7570295d5629f4770 (patch)
tree889b807b6e76ec745f19488be7700cfc574aa9fa /generator/generator.cpp
parent8837d3ceb8358476f20d71948a1fc9bc91480d57 (diff)
downloadverbly-2c5ed155e8951f7f28b82ed7570295d5629f4770.tar.gz
verbly-2c5ed155e8951f7f28b82ed7570295d5629f4770.tar.bz2
verbly-2c5ed155e8951f7f28b82ed7570295d5629f4770.zip
Fixed generator ignoring multiple inflection variants
Previously, the generator would recognize at most one form per
inflection per lemma; now, the generator adds all variants in AGID to
the database.
Diffstat (limited to 'generator/generator.cpp')
-rw-r--r--generator/generator.cpp525
1 files changed, 268 insertions, 257 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index 3dd2ce7..610a602 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp
@@ -15,7 +15,7 @@
15 15
16namespace verbly { 16namespace verbly {
17 namespace generator { 17 namespace generator {
18 18
19 generator::generator( 19 generator::generator(
20 std::string verbNetPath, 20 std::string verbNetPath,
21 std::string agidPath, 21 std::string agidPath,
@@ -36,21 +36,21 @@ namespace verbly {
36 { 36 {
37 throw std::invalid_argument("Invalid VerbNet data directory"); 37 throw std::invalid_argument("Invalid VerbNet data directory");
38 } 38 }
39 39
40 closedir(dir); 40 closedir(dir);
41 41
42 // Ensure AGID infl.txt exists 42 // Ensure AGID infl.txt exists
43 if (!std::ifstream(agidPath_)) 43 if (!std::ifstream(agidPath_))
44 { 44 {
45 throw std::invalid_argument("AGID infl.txt file not found"); 45 throw std::invalid_argument("AGID infl.txt file not found");
46 } 46 }
47 47
48 // Add directory separator to WordNet path 48 // Add directory separator to WordNet path
49 if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\')) 49 if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\'))
50 { 50 {
51 wordNetPath_ += '/'; 51 wordNetPath_ += '/';
52 } 52 }
53 53
54 // Ensure WordNet tables exist 54 // Ensure WordNet tables exist
55 for (std::string table : { 55 for (std::string table : {
56 "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax" 56 "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax"
@@ -61,37 +61,37 @@ namespace verbly {
61 throw std::invalid_argument("WordNet " + table + " table not found"); 61 throw std::invalid_argument("WordNet " + table + " table not found");
62 } 62 }
63 } 63 }
64 64
65 // Ensure CMUDICT file exists 65 // Ensure CMUDICT file exists
66 if (!std::ifstream(cmudictPath_)) 66 if (!std::ifstream(cmudictPath_))
67 { 67 {
68 throw std::invalid_argument("CMUDICT file not found"); 68 throw std::invalid_argument("CMUDICT file not found");
69 } 69 }
70 70
71 // Ensure ImageNet urls.txt exists 71 // Ensure ImageNet urls.txt exists
72 if (!std::ifstream(imageNetPath_)) 72 if (!std::ifstream(imageNetPath_))
73 { 73 {
74 throw std::invalid_argument("ImageNet urls.txt file not found"); 74 throw std::invalid_argument("ImageNet urls.txt file not found");
75 } 75 }
76 } 76 }
77 77
78 void generator::run() 78 void generator::run()
79 { 79 {
80 // Create notions, words, lemmas, and forms from WordNet synsets 80 // Create notions, words, lemmas, and forms from WordNet synsets
81 readWordNetSynsets(); 81 readWordNetSynsets();
82 82
83 // Reads adjective positioning WordNet data 83 // Reads adjective positioning WordNet data
84 readAdjectivePositioning(); 84 readAdjectivePositioning();
85 85
86 // Counts the number of URLs ImageNet has per notion 86 // Counts the number of URLs ImageNet has per notion
87 readImageNetUrls(); 87 readImageNetUrls();
88 88
89 // Creates a word by WordNet sense key lookup table 89 // Creates a word by WordNet sense key lookup table
90 readWordNetSenseKeys(); 90 readWordNetSenseKeys();
91 91
92 // Creates groups and frames from VerbNet data 92 // Creates groups and frames from VerbNet data
93 readVerbNet(); 93 readVerbNet();
94 94
95 // Creates forms and inflections from AGID. To reduce the amount of forms 95 // Creates forms and inflections from AGID. To reduce the amount of forms
96 // created, we do this after most lemmas that need inflecting have been 96 // created, we do this after most lemmas that need inflecting have been
97 // created through other means, and then only generate forms for 97 // created through other means, and then only generate forms for
@@ -101,86 +101,78 @@ namespace verbly {
101 // then a notion and a word is generated and the form generation proceeds 101 // then a notion and a word is generated and the form generation proceeds
102 // as usual. 102 // as usual.
103 readAgidInflections(); 103 readAgidInflections();
104 104
105 // Reads in prepositions and the is_a relationship 105 // Reads in prepositions and the is_a relationship
106 readPrepositions(); 106 readPrepositions();
107 107
108 // Creates pronunciations from CMUDICT. To reduce the amount of 108 // Creates pronunciations from CMUDICT. To reduce the amount of
109 // pronunciations created, we do this after all forms have been created, 109 // pronunciations created, we do this after all forms have been created,
110 // and then only generate pronunciations for already-exisiting forms. 110 // and then only generate pronunciations for already-exisiting forms.
111 readCmudictPronunciations(); 111 readCmudictPronunciations();
112 112
113 // Writes the database schema 113 // Writes the database schema
114 writeSchema(); 114 writeSchema();
115 115
116 // Dumps data to the database 116 // Dumps data to the database
117 dumpObjects(); 117 dumpObjects();
118 118
119 // Populates the antonymy relationship from WordNet 119 // Populates the antonymy relationship from WordNet
120 readWordNetAntonymy(); 120 readWordNetAntonymy();
121 121
122 // Populates the variation relationship from WordNet 122 // Populates the variation relationship from WordNet
123 readWordNetVariation(); 123 readWordNetVariation();
124 124
125 // Populates the usage, topicality, and regionality relationships from 125 // Populates the usage, topicality, and regionality relationships from
126 // WordNet 126 // WordNet
127 readWordNetClasses(); 127 readWordNetClasses();
128 128
129 // Populates the causality relationship from WordNet 129 // Populates the causality relationship from WordNet
130 readWordNetCausality(); 130 readWordNetCausality();
131 131
132 // Populates the entailment relationship from WordNet 132 // Populates the entailment relationship from WordNet
133 readWordNetEntailment(); 133 readWordNetEntailment();
134 134
135 // Populates the hypernymy relationship from WordNet 135 // Populates the hypernymy relationship from WordNet
136 readWordNetHypernymy(); 136 readWordNetHypernymy();
137 137
138 // Populates the instantiation relationship from WordNet 138 // Populates the instantiation relationship from WordNet
139 readWordNetInstantiation(); 139 readWordNetInstantiation();
140 140
141 // Populates the member meronymy relationship from WordNet 141 // Populates the member meronymy relationship from WordNet
142 readWordNetMemberMeronymy(); 142 readWordNetMemberMeronymy();
143 143
144 // Populates the part meronymy relationship from WordNet 144 // Populates the part meronymy relationship from WordNet
145 readWordNetPartMeronymy(); 145 readWordNetPartMeronymy();
146 146
147 // Populates the substance meronymy relationship from WordNet 147 // Populates the substance meronymy relationship from WordNet
148 readWordNetSubstanceMeronymy(); 148 readWordNetSubstanceMeronymy();
149 149
150 // Populates the pertainymy and mannernymy relationships from WordNet 150 // Populates the pertainymy and mannernymy relationships from WordNet
151 readWordNetPertainymy(); 151 readWordNetPertainymy();
152 152
153 // Populates the specification relationship from WordNet 153 // Populates the specification relationship from WordNet
154 readWordNetSpecification(); 154 readWordNetSpecification();
155 155
156 // Populates the adjective similarity relationship from WordNet 156 // Populates the adjective similarity relationship from WordNet
157 readWordNetSimilarity(); 157 readWordNetSimilarity();
158
159
160
161
162
163
164
165
166 } 158 }
167 159
168 void generator::readWordNetSynsets() 160 void generator::readWordNetSynsets()
169 { 161 {
170 std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); 162 std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl"));
171 progress ppgs("Reading synsets from WordNet...", lines.size()); 163 progress ppgs("Reading synsets from WordNet...", lines.size());
172 164
173 for (std::string line : lines) 165 for (std::string line : lines)
174 { 166 {
175 ppgs.update(); 167 ppgs.update();
176 168
177 std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); 169 std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$");
178 std::smatch relation_data; 170 std::smatch relation_data;
179 if (!std::regex_search(line, relation_data, relation)) 171 if (!std::regex_search(line, relation_data, relation))
180 { 172 {
181 continue; 173 continue;
182 } 174 }
183 175
184 int synset_id = std::stoi(relation_data[1]); 176 int synset_id = std::stoi(relation_data[1]);
185 int wnum = std::stoi(relation_data[2]); 177 int wnum = std::stoi(relation_data[2]);
186 std::string text = relation_data[3]; 178 std::string text = relation_data[3];
@@ -190,7 +182,7 @@ namespace verbly {
190 { 182 {
191 text.erase(word_it, 1); 183 text.erase(word_it, 1);
192 } 184 }
193 185
194 // The WordNet data does contain duplicates, so we need to check that we 186 // The WordNet data does contain duplicates, so we need to check that we
195 // haven't already created this word. 187 // haven't already created this word.
196 std::pair<int, int> lookup(synset_id, wnum); 188 std::pair<int, int> lookup(synset_id, wnum);
@@ -204,32 +196,32 @@ namespace verbly {
204 } 196 }
205 } 197 }
206 } 198 }
207 199
208 void generator::readAdjectivePositioning() 200 void generator::readAdjectivePositioning()
209 { 201 {
210 std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl")); 202 std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl"));
211 progress ppgs("Reading adjective positionings from WordNet...", lines.size()); 203 progress ppgs("Reading adjective positionings from WordNet...", lines.size());
212 204
213 for (std::string line : lines) 205 for (std::string line : lines)
214 { 206 {
215 ppgs.update(); 207 ppgs.update();
216 208
217 std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); 209 std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
218 std::smatch relation_data; 210 std::smatch relation_data;
219 if (!std::regex_search(line, relation_data, relation)) 211 if (!std::regex_search(line, relation_data, relation))
220 { 212 {
221 continue; 213 continue;
222 } 214 }
223 215
224 int synset_id = stoi(relation_data[1]); 216 int synset_id = stoi(relation_data[1]);
225 int wnum = stoi(relation_data[2]); 217 int wnum = stoi(relation_data[2]);
226 std::string adjpos_str = relation_data[3]; 218 std::string adjpos_str = relation_data[3];
227 219
228 std::pair<int, int> lookup(synset_id, wnum); 220 std::pair<int, int> lookup(synset_id, wnum);
229 if (wordByWnidAndWnum_.count(lookup)) 221 if (wordByWnidAndWnum_.count(lookup))
230 { 222 {
231 word& adj = *wordByWnidAndWnum_.at(lookup); 223 word& adj = *wordByWnidAndWnum_.at(lookup);
232 224
233 if (adjpos_str == "p") 225 if (adjpos_str == "p")
234 { 226 {
235 adj.setAdjectivePosition(positioning::predicate); 227 adj.setAdjectivePosition(positioning::predicate);
@@ -246,20 +238,20 @@ namespace verbly {
246 } 238 }
247 } 239 }
248 } 240 }
249 241
250 void generator::readImageNetUrls() 242 void generator::readImageNetUrls()
251 { 243 {
252 // The ImageNet datafile is so large that it is unreasonable and 244 // The ImageNet datafile is so large that it is unreasonable and
253 // unnecessary to read it into memory; instead, we will parse each line as 245 // unnecessary to read it into memory; instead, we will parse each line as
254 // we read it. This has the caveat that we cannot display a progress bar. 246 // we read it. This has the caveat that we cannot display a progress bar.
255 std::cout << "Reading image counts from ImageNet..." << std::endl; 247 std::cout << "Reading image counts from ImageNet..." << std::endl;
256 248
257 std::ifstream file(imageNetPath_); 249 std::ifstream file(imageNetPath_);
258 if (!file) 250 if (!file)
259 { 251 {
260 throw std::invalid_argument("Could not find file " + imageNetPath_); 252 throw std::invalid_argument("Could not find file " + imageNetPath_);
261 } 253 }
262 254
263 std::string line; 255 std::string line;
264 while (std::getline(file, line)) 256 while (std::getline(file, line))
265 { 257 {
@@ -267,7 +259,7 @@ namespace verbly {
267 { 259 {
268 line.pop_back(); 260 line.pop_back();
269 } 261 }
270 262
271 std::string wnid_s = line.substr(1, 8); 263 std::string wnid_s = line.substr(1, 8);
272 int wnid = stoi(wnid_s) + 100000000; 264 int wnid = stoi(wnid_s) + 100000000;
273 if (notionByWnid_.count(wnid)) 265 if (notionByWnid_.count(wnid))
@@ -277,16 +269,16 @@ namespace verbly {
277 } 269 }
278 } 270 }
279 } 271 }
280 272
281 void generator::readWordNetSenseKeys() 273 void generator::readWordNetSenseKeys()
282 { 274 {
283 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl")); 275 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl"));
284 progress ppgs("Reading sense keys from WordNet...", lines.size()); 276 progress ppgs("Reading sense keys from WordNet...", lines.size());
285 277
286 for (std::string line : lines) 278 for (std::string line : lines)
287 { 279 {
288 ppgs.update(); 280 ppgs.update();
289 281
290 // We only actually need to lookup verbs by sense key so we'll just 282 // We only actually need to lookup verbs by sense key so we'll just
291 // ignore everything that isn't a verb. 283 // ignore everything that isn't a verb.
292 std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$"); 284 std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$");
@@ -295,11 +287,11 @@ namespace verbly {
295 { 287 {
296 continue; 288 continue;
297 } 289 }
298 290
299 int synset_id = stoi(relation_data[1]); 291 int synset_id = stoi(relation_data[1]);
300 int wnum = stoi(relation_data[2]); 292 int wnum = stoi(relation_data[2]);
301 std::string sense_key = relation_data[3]; 293 std::string sense_key = relation_data[3];
302 294
303 // We are treating this mapping as injective, which is not entirely 295 // We are treating this mapping as injective, which is not entirely
304 // accurate. First, the WordNet table contains duplicate rows, so those 296 // accurate. First, the WordNet table contains duplicate rows, so those
305 // need to be ignored. More importantly, a small number of sense keys 297 // need to be ignored. More importantly, a small number of sense keys
@@ -317,17 +309,17 @@ namespace verbly {
317 } 309 }
318 } 310 }
319 } 311 }
320 312
321 void generator::readVerbNet() 313 void generator::readVerbNet()
322 { 314 {
323 std::cout << "Reading frames from VerbNet..." << std::endl; 315 std::cout << "Reading frames from VerbNet..." << std::endl;
324 316
325 DIR* dir; 317 DIR* dir;
326 if ((dir = opendir(verbNetPath_.c_str())) == nullptr) 318 if ((dir = opendir(verbNetPath_.c_str())) == nullptr)
327 { 319 {
328 throw std::invalid_argument("Invalid VerbNet data directory"); 320 throw std::invalid_argument("Invalid VerbNet data directory");
329 } 321 }
330 322
331 struct dirent* ent; 323 struct dirent* ent;
332 while ((ent = readdir(dir)) != nullptr) 324 while ((ent = readdir(dir)) != nullptr)
333 { 325 {
@@ -337,20 +329,20 @@ namespace verbly {
337 { 329 {
338 filename += '/'; 330 filename += '/';
339 } 331 }
340 332
341 filename += ent->d_name; 333 filename += ent->d_name;
342 334
343 if (filename.rfind(".xml") != filename.size() - 4) 335 if (filename.rfind(".xml") != filename.size() - 4)
344 { 336 {
345 continue; 337 continue;
346 } 338 }
347 339
348 xmlDocPtr doc = xmlParseFile(filename.c_str()); 340 xmlDocPtr doc = xmlParseFile(filename.c_str());
349 if (doc == nullptr) 341 if (doc == nullptr)
350 { 342 {
351 throw std::logic_error("Error opening " + filename); 343 throw std::logic_error("Error opening " + filename);
352 } 344 }
353 345
354 xmlNodePtr top = xmlDocGetRootElement(doc); 346 xmlNodePtr top = xmlDocGetRootElement(doc);
355 if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS")))) 347 if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS"))))
356 { 348 {
@@ -365,71 +357,81 @@ namespace verbly {
365 std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename)); 357 std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename));
366 } 358 }
367 } 359 }
368 360
369 closedir(dir); 361 closedir(dir);
370 } 362 }
371 363
372 void generator::readAgidInflections() 364 void generator::readAgidInflections()
373 { 365 {
374 std::list<std::string> lines(readFile(agidPath_)); 366 std::list<std::string> lines(readFile(agidPath_));
375 progress ppgs("Reading inflections from AGID...", lines.size()); 367 progress ppgs("Reading inflections from AGID...", lines.size());
376 368
377 for (std::string line : lines) 369 for (std::string line : lines)
378 { 370 {
379 ppgs.update(); 371 ppgs.update();
380 372
381 int divider = line.find_first_of(" "); 373 int divider = line.find_first_of(" ");
382 std::string infinitive = line.substr(0, divider); 374 std::string infinitive = line.substr(0, divider);
383 line = line.substr(divider+1); 375 line = line.substr(divider+1);
384 char type = line[0]; 376 char type = line[0];
385 377
386 if (line[1] == '?') 378 if (line[1] == '?')
387 { 379 {
388 line.erase(0, 4); 380 line.erase(0, 4);
389 } else { 381 } else {
390 line.erase(0, 3); 382 line.erase(0, 3);
391 } 383 }
392 384
393 if (!lemmaByBaseForm_.count(infinitive) && (type != 'V')) 385 if (!lemmaByBaseForm_.count(infinitive) && (type != 'V'))
394 { 386 {
395 continue; 387 continue;
396 } 388 }
397 389
398 lemma& curLemma = lookupOrCreateLemma(infinitive); 390 lemma& curLemma = lookupOrCreateLemma(infinitive);
399 391
400 auto forms = split<std::vector<std::string>>(line, " | "); 392 std::vector<std::list<std::string>> agidForms;
401 for (std::string& inflForm : forms) 393 for (std::string inflForms : split<std::list<std::string>>(line, " | "))
402 { 394 {
403 int sympos = inflForm.find_first_of(",?"); 395 std::list<std::string> forms;
404 if (sympos != std::string::npos) 396
397 for (std::string inflForm : split<std::list<std::string>>(std::move(inflForms), ", "))
405 { 398 {
406 inflForm = inflForm.substr(0, sympos); 399 int sympos = inflForm.find_first_of("~<!? ");
400 if (sympos != std::string::npos)
401 {
402 inflForm = inflForm.substr(0, sympos);
403 }
404
405 forms.push_back(std::move(inflForm));
407 } 406 }
407
408 agidForms.push_back(std::move(forms));
408 } 409 }
409 410
411 std::map<inflection, std::list<std::string>> mappedForms;
410 switch (type) 412 switch (type)
411 { 413 {
412 case 'V': 414 case 'V':
413 { 415 {
414 if (forms.size() == 4) 416 if (agidForms.size() == 4)
415 { 417 {
416 curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); 418 mappedForms[inflection::past_tense] = agidForms[0];
417 curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1])); 419 mappedForms[inflection::past_participle] = agidForms[1];
418 curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2])); 420 mappedForms[inflection::ing_form] = agidForms[2];
419 curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3])); 421 mappedForms[inflection::s_form] = agidForms[3];
420 } else if (forms.size() == 3) 422 } else if (agidForms.size() == 3)
421 { 423 {
422 curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); 424 mappedForms[inflection::past_tense] = agidForms[0];
423 curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0])); 425 mappedForms[inflection::past_participle] = agidForms[0];
424 curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1])); 426 mappedForms[inflection::ing_form] = agidForms[1];
425 curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2])); 427 mappedForms[inflection::s_form] = agidForms[2];
426 } else if (forms.size() == 8) 428 } else if (agidForms.size() == 8)
427 { 429 {
428 // As of AGID 2014.08.11, this is only "to be" 430 // As of AGID 2014.08.11, this is only "to be"
429 curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); 431 mappedForms[inflection::past_tense] = agidForms[0];
430 curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2])); 432 mappedForms[inflection::past_participle] = agidForms[2];
431 curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3])); 433 mappedForms[inflection::ing_form] = agidForms[3];
432 curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4])); 434 mappedForms[inflection::s_form] = agidForms[4];
433 } else { 435 } else {
434 // Words that don't fit the cases above as of AGID 2014.08.11: 436 // Words that don't fit the cases above as of AGID 2014.08.11:
435 // - may and shall do not conjugate the way we want them to 437 // - may and shall do not conjugate the way we want them to
@@ -437,7 +439,7 @@ namespace verbly {
437 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now 439 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
438 std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; 440 std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
439 } 441 }
440 442
441 // For verbs in particular, we sometimes create a notion and a word 443 // For verbs in particular, we sometimes create a notion and a word
442 // from inflection data. Specifically, if there are not yet any 444 // from inflection data. Specifically, if there are not yet any
443 // verbs existing that have the same infinitive form. "Yet" means 445 // verbs existing that have the same infinitive form. "Yet" means
@@ -451,84 +453,93 @@ namespace verbly {
451 notion& n = createNotion(part_of_speech::verb); 453 notion& n = createNotion(part_of_speech::verb);
452 createWord(n, curLemma); 454 createWord(n, curLemma);
453 } 455 }
454 456
455 break; 457 break;
456 } 458 }
457 459
458 case 'A': 460 case 'A':
459 { 461 {
460 if (forms.size() == 2) 462 if (agidForms.size() == 2)
461 { 463 {
462 curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0])); 464 mappedForms[inflection::comparative] = agidForms[0];
463 curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1])); 465 mappedForms[inflection::superlative] = agidForms[1];
464 } else { 466 } else {
465 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" 467 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
466 std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; 468 std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
467 } 469 }
468 470
469 break; 471 break;
470 } 472 }
471 473
472 case 'N': 474 case 'N':
473 { 475 {
474 if (forms.size() == 1) 476 if (agidForms.size() == 1)
475 { 477 {
476 curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0])); 478 mappedForms[inflection::plural] = agidForms[0];
477 } else { 479 } else {
478 // As of AGID 2014.08.11, this is non-existent. 480 // As of AGID 2014.08.11, this is non-existent.
479 std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl; 481 std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
480 } 482 }
481 483
482 break; 484 break;
483 } 485 }
484 } 486 }
487
488 // Compile the forms we have mapped.
489 for (auto mapping : std::move(mappedForms))
490 {
491 for (std::string infl : std::move(mapping.second))
492 {
493 curLemma.addInflection(mapping.first, lookupOrCreateForm(std::move(infl)));
494 }
495 }
485 } 496 }
486 } 497 }
487 498
488 void generator::readPrepositions() 499 void generator::readPrepositions()
489 { 500 {
490 std::list<std::string> lines(readFile("prepositions.txt")); 501 std::list<std::string> lines(readFile("prepositions.txt"));
491 progress ppgs("Reading prepositions...", lines.size()); 502 progress ppgs("Reading prepositions...", lines.size());
492 503
493 for (std::string line : lines) 504 for (std::string line : lines)
494 { 505 {
495 ppgs.update(); 506 ppgs.update();
496 507
497 std::regex relation("^([^:]+): (.+)"); 508 std::regex relation("^([^:]+): (.+)");
498 std::smatch relation_data; 509 std::smatch relation_data;
499 std::regex_search(line, relation_data, relation); 510 std::regex_search(line, relation_data, relation);
500 std::string prep = relation_data[1]; 511 std::string prep = relation_data[1];
501 auto groups = split<std::list<std::string>>(relation_data[2], ", "); 512 auto groups = split<std::list<std::string>>(relation_data[2], ", ");
502 513
503 notion& n = createNotion(part_of_speech::preposition); 514 notion& n = createNotion(part_of_speech::preposition);
504 lemma& l = lookupOrCreateLemma(prep); 515 lemma& l = lookupOrCreateLemma(prep);
505 word& w = createWord(n, l); 516 word& w = createWord(n, l);
506 517
507 n.setPrepositionGroups(groups); 518 n.setPrepositionGroups(groups);
508 } 519 }
509 } 520 }
510 521
511 void generator::readCmudictPronunciations() 522 void generator::readCmudictPronunciations()
512 { 523 {
513 std::list<std::string> lines(readFile(cmudictPath_)); 524 std::list<std::string> lines(readFile(cmudictPath_));
514 progress ppgs("Reading pronunciations from CMUDICT...", lines.size()); 525 progress ppgs("Reading pronunciations from CMUDICT...", lines.size());
515 526
516 for (std::string line : lines) 527 for (std::string line : lines)
517 { 528 {
518 ppgs.update(); 529 ppgs.update();
519 530
520 std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); 531 std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)");
521 std::smatch phoneme_data; 532 std::smatch phoneme_data;
522 if (std::regex_search(line, phoneme_data, phoneme)) 533 if (std::regex_search(line, phoneme_data, phoneme))
523 { 534 {
524 std::string canonical(phoneme_data[1]); 535 std::string canonical(phoneme_data[1]);
525 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); 536 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
526 537
527 if (!formByText_.count(canonical)) 538 if (!formByText_.count(canonical))
528 { 539 {
529 continue; 540 continue;
530 } 541 }
531 542
532 std::string phonemes = phoneme_data[2]; 543 std::string phonemes = phoneme_data[2];
533 pronunciations_.emplace_back(phonemes); 544 pronunciations_.emplace_back(phonemes);
534 pronunciation& p = pronunciations_.back(); 545 pronunciation& p = pronunciations_.back();
@@ -536,7 +547,7 @@ namespace verbly {
536 } 547 }
537 } 548 }
538 } 549 }
539 550
540 void generator::writeSchema() 551 void generator::writeSchema()
541 { 552 {
542 std::ifstream file("schema.sql"); 553 std::ifstream file("schema.sql");
@@ -544,7 +555,7 @@ namespace verbly {
544 { 555 {
545 throw std::invalid_argument("Could not find database schema"); 556 throw std::invalid_argument("Could not find database schema");
546 } 557 }
547 558
548 std::ostringstream schemaBuilder; 559 std::ostringstream schemaBuilder;
549 std::string line; 560 std::string line;
550 while (std::getline(file, line)) 561 while (std::getline(file, line))
@@ -553,10 +564,10 @@ namespace verbly {
553 { 564 {
554 line.pop_back(); 565 line.pop_back();
555 } 566 }
556 567
557 schemaBuilder << line; 568 schemaBuilder << line;
558 } 569 }
559 570
560 std::string schema = schemaBuilder.str(); 571 std::string schema = schemaBuilder.str();
561 auto queries = split<std::list<std::string>>(schema, ";"); 572 auto queries = split<std::list<std::string>>(schema, ";");
562 progress ppgs("Writing database schema...", queries.size()); 573 progress ppgs("Writing database schema...", queries.size());
@@ -566,91 +577,91 @@ namespace verbly {
566 { 577 {
567 db_.runQuery(query); 578 db_.runQuery(query);
568 } 579 }
569 580
570 ppgs.update(); 581 ppgs.update();
571 } 582 }
572 } 583 }
573 584
574 void generator::dumpObjects() 585 void generator::dumpObjects()
575 { 586 {
576 { 587 {
577 progress ppgs("Writing notions...", notions_.size()); 588 progress ppgs("Writing notions...", notions_.size());
578 589
579 for (notion& n : notions_) 590 for (notion& n : notions_)
580 { 591 {
581 db_ << n; 592 db_ << n;
582 593
583 ppgs.update(); 594 ppgs.update();
584 } 595 }
585 } 596 }
586 597
587 { 598 {
588 progress ppgs("Writing words...", words_.size()); 599 progress ppgs("Writing words...", words_.size());
589 600
590 for (word& w : words_) 601 for (word& w : words_)
591 { 602 {
592 db_ << w; 603 db_ << w;
593 604
594 ppgs.update(); 605 ppgs.update();
595 } 606 }
596 } 607 }
597 608
598 { 609 {
599 progress ppgs("Writing lemmas...", lemmas_.size()); 610 progress ppgs("Writing lemmas...", lemmas_.size());
600 611
601 for (lemma& l : lemmas_) 612 for (lemma& l : lemmas_)
602 { 613 {
603 db_ << l; 614 db_ << l;
604 615
605 ppgs.update(); 616 ppgs.update();
606 } 617 }
607 } 618 }
608 619
609 { 620 {
610 progress ppgs("Writing forms...", forms_.size()); 621 progress ppgs("Writing forms...", forms_.size());
611 622
612 for (form& f : forms_) 623 for (form& f : forms_)
613 { 624 {
614 db_ << f; 625 db_ << f;
615 626
616 ppgs.update(); 627 ppgs.update();
617 } 628 }
618 } 629 }
619 630
620 { 631 {
621 progress ppgs("Writing pronunciations...", pronunciations_.size()); 632 progress ppgs("Writing pronunciations...", pronunciations_.size());
622 633
623 for (pronunciation& p : pronunciations_) 634 for (pronunciation& p : pronunciations_)
624 { 635 {
625 db_ << p; 636 db_ << p;
626 637
627 ppgs.update(); 638 ppgs.update();
628 } 639 }
629 } 640 }
630 641
631 { 642 {
632 progress ppgs("Writing verb groups...", groups_.size()); 643 progress ppgs("Writing verb groups...", groups_.size());
633 644
634 for (group& g : groups_) 645 for (group& g : groups_)
635 { 646 {
636 db_ << g; 647 db_ << g;
637 648
638 ppgs.update(); 649 ppgs.update();
639 } 650 }
640 } 651 }
641 652
642 { 653 {
643 progress ppgs("Writing verb frames...", frames_.size()); 654 progress ppgs("Writing verb frames...", frames_.size());
644 655
645 for (frame& f : frames_) 656 for (frame& f : frames_)
646 { 657 {
647 db_ << f; 658 db_ << f;
648 659
649 ppgs.update(); 660 ppgs.update();
650 } 661 }
651 } 662 }
652 } 663 }
653 664
654 void generator::readWordNetAntonymy() 665 void generator::readWordNetAntonymy()
655 { 666 {
656 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); 667 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl"));
@@ -658,7 +669,7 @@ namespace verbly {
658 for (auto line : lines) 669 for (auto line : lines)
659 { 670 {
660 ppgs.update(); 671 ppgs.update();
661 672
662 std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); 673 std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
663 std::smatch relation_data; 674 std::smatch relation_data;
664 if (!std::regex_search(line, relation_data, relation)) 675 if (!std::regex_search(line, relation_data, relation))
@@ -668,21 +679,21 @@ namespace verbly {
668 679
669 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); 680 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
670 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); 681 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
671 682
672 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) 683 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
673 { 684 {
674 word& word1 = *wordByWnidAndWnum_.at(lookup1); 685 word& word1 = *wordByWnidAndWnum_.at(lookup1);
675 word& word2 = *wordByWnidAndWnum_.at(lookup2); 686 word& word2 = *wordByWnidAndWnum_.at(lookup2);
676 687
677 std::list<field> fields; 688 std::list<field> fields;
678 fields.emplace_back("antonym_1_id", word1.getId()); 689 fields.emplace_back("antonym_1_id", word1.getId());
679 fields.emplace_back("antonym_2_id", word2.getId()); 690 fields.emplace_back("antonym_2_id", word2.getId());
680 691
681 db_.insertIntoTable("antonymy", std::move(fields)); 692 db_.insertIntoTable("antonymy", std::move(fields));
682 } 693 }
683 } 694 }
684 } 695 }
685 696
686 void generator::readWordNetVariation() 697 void generator::readWordNetVariation()
687 { 698 {
688 std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl")); 699 std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl"));
@@ -690,7 +701,7 @@ namespace verbly {
690 for (auto line : lines) 701 for (auto line : lines)
691 { 702 {
692 ppgs.update(); 703 ppgs.update();
693 704
694 std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); 705 std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
695 std::smatch relation_data; 706 std::smatch relation_data;
696 if (!std::regex_search(line, relation_data, relation)) 707 if (!std::regex_search(line, relation_data, relation))
@@ -700,21 +711,21 @@ namespace verbly {
700 711
701 int lookup1 = std::stoi(relation_data[1]); 712 int lookup1 = std::stoi(relation_data[1]);
702 int lookup2 = std::stoi(relation_data[2]); 713 int lookup2 = std::stoi(relation_data[2]);
703 714
704 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) 715 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
705 { 716 {
706 notion& notion1 = *notionByWnid_.at(lookup1); 717 notion& notion1 = *notionByWnid_.at(lookup1);
707 notion& notion2 = *notionByWnid_.at(lookup2); 718 notion& notion2 = *notionByWnid_.at(lookup2);
708 719
709 std::list<field> fields; 720 std::list<field> fields;
710 fields.emplace_back("noun_id", notion1.getId()); 721 fields.emplace_back("noun_id", notion1.getId());
711 fields.emplace_back("adjective_id", notion2.getId()); 722 fields.emplace_back("adjective_id", notion2.getId());
712 723
713 db_.insertIntoTable("variation", std::move(fields)); 724 db_.insertIntoTable("variation", std::move(fields));
714 } 725 }
715 } 726 }
716 } 727 }
717 728
718 void generator::readWordNetClasses() 729 void generator::readWordNetClasses()
719 { 730 {
720 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); 731 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl"));
@@ -722,7 +733,7 @@ namespace verbly {
722 for (auto line : lines) 733 for (auto line : lines)
723 { 734 {
724 ppgs.update(); 735 ppgs.update();
725 736
726 std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); 737 std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\.");
727 std::smatch relation_data; 738 std::smatch relation_data;
728 if (!std::regex_search(line, relation_data, relation)) 739 if (!std::regex_search(line, relation_data, relation))
@@ -733,7 +744,7 @@ namespace verbly {
733 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); 744 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
734 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); 745 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
735 std::string class_type = relation_data[5]; 746 std::string class_type = relation_data[5];
736 747
737 std::string table_name; 748 std::string table_name;
738 if (class_type == "t") 749 if (class_type == "t")
739 { 750 {
@@ -745,10 +756,10 @@ namespace verbly {
745 { 756 {
746 table_name += "regionality"; 757 table_name += "regionality";
747 } 758 }
748 759
749 std::list<int> leftJoin; 760 std::list<int> leftJoin;
750 std::list<int> rightJoin; 761 std::list<int> rightJoin;
751 762
752 if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) 763 if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first)))
753 { 764 {
754 std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) { 765 std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) {
@@ -757,7 +768,7 @@ namespace verbly {
757 } else if (wordByWnidAndWnum_.count(lookup1)) { 768 } else if (wordByWnidAndWnum_.count(lookup1)) {
758 leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); 769 leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId());
759 } 770 }
760 771
761 if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) 772 if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first)))
762 { 773 {
763 std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) { 774 std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) {
@@ -766,7 +777,7 @@ namespace verbly {
766 } else if (wordByWnidAndWnum_.count(lookup2)) { 777 } else if (wordByWnidAndWnum_.count(lookup2)) {
767 rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); 778 rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId());
768 } 779 }
769 780
770 for (int word1 : leftJoin) 781 for (int word1 : leftJoin)
771 { 782 {
772 for (int word2 : rightJoin) 783 for (int word2 : rightJoin)
@@ -774,13 +785,13 @@ namespace verbly {
774 std::list<field> fields; 785 std::list<field> fields;
775 fields.emplace_back("term_id", word1); 786 fields.emplace_back("term_id", word1);
776 fields.emplace_back("domain_id", word2); 787 fields.emplace_back("domain_id", word2);
777 788
778 db_.insertIntoTable(table_name, std::move(fields)); 789 db_.insertIntoTable(table_name, std::move(fields));
779 } 790 }
780 } 791 }
781 } 792 }
782 } 793 }
783 794
784 void generator::readWordNetCausality() 795 void generator::readWordNetCausality()
785 { 796 {
786 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl")); 797 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl"));
@@ -788,7 +799,7 @@ namespace verbly {
788 for (auto line : lines) 799 for (auto line : lines)
789 { 800 {
790 ppgs.update(); 801 ppgs.update();
791 802
792 std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\."); 803 std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\.");
793 std::smatch relation_data; 804 std::smatch relation_data;
794 if (!std::regex_search(line, relation_data, relation)) 805 if (!std::regex_search(line, relation_data, relation))
@@ -798,21 +809,21 @@ namespace verbly {
798 809
799 int lookup1 = std::stoi(relation_data[1]); 810 int lookup1 = std::stoi(relation_data[1]);
800 int lookup2 = std::stoi(relation_data[2]); 811 int lookup2 = std::stoi(relation_data[2]);
801 812
802 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) 813 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
803 { 814 {
804 notion& notion1 = *notionByWnid_.at(lookup1); 815 notion& notion1 = *notionByWnid_.at(lookup1);
805 notion& notion2 = *notionByWnid_.at(lookup2); 816 notion& notion2 = *notionByWnid_.at(lookup2);
806 817
807 std::list<field> fields; 818 std::list<field> fields;
808 fields.emplace_back("effect_id", notion1.getId()); 819 fields.emplace_back("effect_id", notion1.getId());
809 fields.emplace_back("cause_id", notion2.getId()); 820 fields.emplace_back("cause_id", notion2.getId());
810 821
811 db_.insertIntoTable("causality", std::move(fields)); 822 db_.insertIntoTable("causality", std::move(fields));
812 } 823 }
813 } 824 }
814 } 825 }
815 826
816 void generator::readWordNetEntailment() 827 void generator::readWordNetEntailment()
817 { 828 {
818 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl")); 829 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl"));
@@ -820,7 +831,7 @@ namespace verbly {
820 for (auto line : lines) 831 for (auto line : lines)
821 { 832 {
822 ppgs.update(); 833 ppgs.update();
823 834
824 std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\."); 835 std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\.");
825 std::smatch relation_data; 836 std::smatch relation_data;
826 if (!std::regex_search(line, relation_data, relation)) 837 if (!std::regex_search(line, relation_data, relation))
@@ -830,21 +841,21 @@ namespace verbly {
830 841
831 int lookup1 = std::stoi(relation_data[1]); 842 int lookup1 = std::stoi(relation_data[1]);
832 int lookup2 = std::stoi(relation_data[2]); 843 int lookup2 = std::stoi(relation_data[2]);
833 844
834 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) 845 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
835 { 846 {
836 notion& notion1 = *notionByWnid_.at(lookup1); 847 notion& notion1 = *notionByWnid_.at(lookup1);
837 notion& notion2 = *notionByWnid_.at(lookup2); 848 notion& notion2 = *notionByWnid_.at(lookup2);
838 849
839 std::list<field> fields; 850 std::list<field> fields;
840 fields.emplace_back("given_id", notion1.getId()); 851 fields.emplace_back("given_id", notion1.getId());
841 fields.emplace_back("entailment_id", notion2.getId()); 852 fields.emplace_back("entailment_id", notion2.getId());
842 853
843 db_.insertIntoTable("entailment", std::move(fields)); 854 db_.insertIntoTable("entailment", std::move(fields));
844 } 855 }
845 } 856 }
846 } 857 }
847 858
848 void generator::readWordNetHypernymy() 859 void generator::readWordNetHypernymy()
849 { 860 {
850 std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl")); 861 std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl"));
@@ -852,7 +863,7 @@ namespace verbly {
852 for (auto line : lines) 863 for (auto line : lines)
853 { 864 {
854 ppgs.update(); 865 ppgs.update();
855 866
856 std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\."); 867 std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\.");
857 std::smatch relation_data; 868 std::smatch relation_data;
858 if (!std::regex_search(line, relation_data, relation)) 869 if (!std::regex_search(line, relation_data, relation))
@@ -862,21 +873,21 @@ namespace verbly {
862 873
863 int lookup1 = std::stoi(relation_data[1]); 874 int lookup1 = std::stoi(relation_data[1]);
864 int lookup2 = std::stoi(relation_data[2]); 875 int lookup2 = std::stoi(relation_data[2]);
865 876
866 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) 877 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
867 { 878 {
868 notion& notion1 = *notionByWnid_.at(lookup1); 879 notion& notion1 = *notionByWnid_.at(lookup1);
869 notion& notion2 = *notionByWnid_.at(lookup2); 880 notion& notion2 = *notionByWnid_.at(lookup2);
870 881
871 std::list<field> fields; 882 std::list<field> fields;
872 fields.emplace_back("hyponym_id", notion1.getId()); 883 fields.emplace_back("hyponym_id", notion1.getId());
873 fields.emplace_back("hypernym_id", notion2.getId()); 884 fields.emplace_back("hypernym_id", notion2.getId());
874 885
875 db_.insertIntoTable("hypernymy", std::move(fields)); 886 db_.insertIntoTable("hypernymy", std::move(fields));
876 } 887 }
877 } 888 }
878 } 889 }
879 890
880 void generator::readWordNetInstantiation() 891 void generator::readWordNetInstantiation()
881 { 892 {
882 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl")); 893 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl"));
@@ -884,7 +895,7 @@ namespace verbly {
884 for (auto line : lines) 895 for (auto line : lines)
885 { 896 {
886 ppgs.update(); 897 ppgs.update();
887 898
888 std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); 899 std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
889 std::smatch relation_data; 900 std::smatch relation_data;
890 if (!std::regex_search(line, relation_data, relation)) 901 if (!std::regex_search(line, relation_data, relation))
@@ -894,21 +905,21 @@ namespace verbly {
894 905
895 int lookup1 = std::stoi(relation_data[1]); 906 int lookup1 = std::stoi(relation_data[1]);
896 int lookup2 = std::stoi(relation_data[2]); 907 int lookup2 = std::stoi(relation_data[2]);
897 908
898 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) 909 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
899 { 910 {
900 notion& notion1 = *notionByWnid_.at(lookup1); 911 notion& notion1 = *notionByWnid_.at(lookup1);
901 notion& notion2 = *notionByWnid_.at(lookup2); 912 notion& notion2 = *notionByWnid_.at(lookup2);
902 913
903 std::list<field> fields; 914 std::list<field> fields;
904 fields.emplace_back("instance_id", notion1.getId()); 915 fields.emplace_back("instance_id", notion1.getId());
905 fields.emplace_back("class_id", notion2.getId()); 916 fields.emplace_back("class_id", notion2.getId());
906 917
907 db_.insertIntoTable("instantiation", std::move(fields)); 918 db_.insertIntoTable("instantiation", std::move(fields));
908 } 919 }
909 } 920 }
910 } 921 }
911 922
912 void generator::readWordNetMemberMeronymy() 923 void generator::readWordNetMemberMeronymy()
913 { 924 {
914 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl")); 925 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl"));
@@ -916,7 +927,7 @@ namespace verbly {
916 for (auto line : lines) 927 for (auto line : lines)
917 { 928 {
918 ppgs.update(); 929 ppgs.update();
919 930
920 std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); 931 std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
921 std::smatch relation_data; 932 std::smatch relation_data;
922 if (!std::regex_search(line, relation_data, relation)) 933 if (!std::regex_search(line, relation_data, relation))
@@ -926,21 +937,21 @@ namespace verbly {
926 937
927 int lookup1 = std::stoi(relation_data[1]); 938 int lookup1 = std::stoi(relation_data[1]);
928 int lookup2 = std::stoi(relation_data[2]); 939 int lookup2 = std::stoi(relation_data[2]);
929 940
930 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) 941 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
931 { 942 {
932 notion& notion1 = *notionByWnid_.at(lookup1); 943 notion& notion1 = *notionByWnid_.at(lookup1);
933 notion& notion2 = *notionByWnid_.at(lookup2); 944 notion& notion2 = *notionByWnid_.at(lookup2);
934 945
935 std::list<field> fields; 946 std::list<field> fields;
936 fields.emplace_back("holonym_id", notion1.getId()); 947 fields.emplace_back("holonym_id", notion1.getId());
937 fields.emplace_back("meronym_id", notion2.getId()); 948 fields.emplace_back("meronym_id", notion2.getId());
938 949
939 db_.insertIntoTable("member_meronymy", std::move(fields)); 950 db_.insertIntoTable("member_meronymy", std::move(fields));
940 } 951 }
941 } 952 }
942 } 953 }
943 954
944 void generator::readWordNetPartMeronymy() 955 void generator::readWordNetPartMeronymy()
945 { 956 {
946 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl")); 957 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl"));
@@ -948,7 +959,7 @@ namespace verbly {
948 for (auto line : lines) 959 for (auto line : lines)
949 { 960 {
950 ppgs.update(); 961 ppgs.update();
951 962
952 std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); 963 std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
953 std::smatch relation_data; 964 std::smatch relation_data;
954 if (!std::regex_search(line, relation_data, relation)) 965 if (!std::regex_search(line, relation_data, relation))
@@ -958,21 +969,21 @@ namespace verbly {
958 969
959 int lookup1 = std::stoi(relation_data[1]); 970 int lookup1 = std::stoi(relation_data[1]);
960 int lookup2 = std::stoi(relation_data[2]); 971 int lookup2 = std::stoi(relation_data[2]);
961 972
962 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) 973 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
963 { 974 {
964 notion& notion1 = *notionByWnid_.at(lookup1); 975 notion& notion1 = *notionByWnid_.at(lookup1);
965 notion& notion2 = *notionByWnid_.at(lookup2); 976 notion& notion2 = *notionByWnid_.at(lookup2);
966 977
967 std::list<field> fields; 978 std::list<field> fields;
968 fields.emplace_back("holonym_id", notion1.getId()); 979 fields.emplace_back("holonym_id", notion1.getId());
969 fields.emplace_back("meronym_id", notion2.getId()); 980 fields.emplace_back("meronym_id", notion2.getId());
970 981
971 db_.insertIntoTable("part_meronymy", std::move(fields)); 982 db_.insertIntoTable("part_meronymy", std::move(fields));
972 } 983 }
973 } 984 }
974 } 985 }
975 986
976 void generator::readWordNetSubstanceMeronymy() 987 void generator::readWordNetSubstanceMeronymy()
977 { 988 {
978 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl")); 989 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl"));
@@ -980,7 +991,7 @@ namespace verbly {
980 for (auto line : lines) 991 for (auto line : lines)
981 { 992 {
982 ppgs.update(); 993 ppgs.update();
983 994
984 std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); 995 std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
985 std::smatch relation_data; 996 std::smatch relation_data;
986 if (!std::regex_search(line, relation_data, relation)) 997 if (!std::regex_search(line, relation_data, relation))
@@ -990,21 +1001,21 @@ namespace verbly {
990 1001
991 int lookup1 = std::stoi(relation_data[1]); 1002 int lookup1 = std::stoi(relation_data[1]);
992 int lookup2 = std::stoi(relation_data[2]); 1003 int lookup2 = std::stoi(relation_data[2]);
993 1004
994 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) 1005 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
995 { 1006 {
996 notion& notion1 = *notionByWnid_.at(lookup1); 1007 notion& notion1 = *notionByWnid_.at(lookup1);
997 notion& notion2 = *notionByWnid_.at(lookup2); 1008 notion& notion2 = *notionByWnid_.at(lookup2);
998 1009
999 std::list<field> fields; 1010 std::list<field> fields;
1000 fields.emplace_back("holonym_id", notion1.getId()); 1011 fields.emplace_back("holonym_id", notion1.getId());
1001 fields.emplace_back("meronym_id", notion2.getId()); 1012 fields.emplace_back("meronym_id", notion2.getId());
1002 1013
1003 db_.insertIntoTable("substance_meronymy", std::move(fields)); 1014 db_.insertIntoTable("substance_meronymy", std::move(fields));
1004 } 1015 }
1005 } 1016 }
1006 } 1017 }
1007 1018
1008 void generator::readWordNetPertainymy() 1019 void generator::readWordNetPertainymy()
1009 { 1020 {
1010 std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); 1021 std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl"));
@@ -1012,7 +1023,7 @@ namespace verbly {
1012 for (auto line : lines) 1023 for (auto line : lines)
1013 { 1024 {
1014 ppgs.update(); 1025 ppgs.update();
1015 1026
1016 std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); 1027 std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
1017 std::smatch relation_data; 1028 std::smatch relation_data;
1018 if (!std::regex_search(line, relation_data, relation)) 1029 if (!std::regex_search(line, relation_data, relation))
@@ -1022,31 +1033,31 @@ namespace verbly {
1022 1033
1023 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); 1034 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
1024 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); 1035 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
1025 1036
1026 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) 1037 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
1027 { 1038 {
1028 word& word1 = *wordByWnidAndWnum_.at(lookup1); 1039 word& word1 = *wordByWnidAndWnum_.at(lookup1);
1029 word& word2 = *wordByWnidAndWnum_.at(lookup2); 1040 word& word2 = *wordByWnidAndWnum_.at(lookup2);
1030 1041
1031 if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective) 1042 if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective)
1032 { 1043 {
1033 std::list<field> fields; 1044 std::list<field> fields;
1034 fields.emplace_back("pertainym_id", word1.getId()); 1045 fields.emplace_back("pertainym_id", word1.getId());
1035 fields.emplace_back("noun_id", word2.getId()); 1046 fields.emplace_back("noun_id", word2.getId());
1036 1047
1037 db_.insertIntoTable("pertainymy", std::move(fields)); 1048 db_.insertIntoTable("pertainymy", std::move(fields));
1038 } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb) 1049 } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb)
1039 { 1050 {
1040 std::list<field> fields; 1051 std::list<field> fields;
1041 fields.emplace_back("mannernym_id", word1.getId()); 1052 fields.emplace_back("mannernym_id", word1.getId());
1042 fields.emplace_back("adjective_id", word2.getId()); 1053 fields.emplace_back("adjective_id", word2.getId());
1043 1054
1044 db_.insertIntoTable("mannernymy", std::move(fields)); 1055 db_.insertIntoTable("mannernymy", std::move(fields));
1045 } 1056 }
1046 } 1057 }
1047 } 1058 }
1048 } 1059 }
1049 1060
1050 void generator::readWordNetSpecification() 1061 void generator::readWordNetSpecification()
1051 { 1062 {
1052 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl")); 1063 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl"));
@@ -1064,21 +1075,21 @@ namespace verbly {
1064 1075
1065 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); 1076 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
1066 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); 1077 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
1067 1078
1068 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) 1079 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
1069 { 1080 {
1070 word& word1 = *wordByWnidAndWnum_.at(lookup1); 1081 word& word1 = *wordByWnidAndWnum_.at(lookup1);
1071 word& word2 = *wordByWnidAndWnum_.at(lookup2); 1082 word& word2 = *wordByWnidAndWnum_.at(lookup2);
1072 1083
1073 std::list<field> fields; 1084 std::list<field> fields;
1074 fields.emplace_back("general_id", word1.getId()); 1085 fields.emplace_back("general_id", word1.getId());
1075 fields.emplace_back("specific_id", word2.getId()); 1086 fields.emplace_back("specific_id", word2.getId());
1076 1087
1077 db_.insertIntoTable("specification", std::move(fields)); 1088 db_.insertIntoTable("specification", std::move(fields));
1078 } 1089 }
1079 } 1090 }
1080 } 1091 }
1081 1092
1082 void generator::readWordNetSimilarity() 1093 void generator::readWordNetSimilarity()
1083 { 1094 {
1084 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl")); 1095 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl"));
@@ -1086,7 +1097,7 @@ namespace verbly {
1086 for (auto line : lines) 1097 for (auto line : lines)
1087 { 1098 {
1088 ppgs.update(); 1099 ppgs.update();
1089 1100
1090 std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); 1101 std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
1091 std::smatch relation_data; 1102 std::smatch relation_data;
1092 if (!std::regex_search(line, relation_data, relation)) 1103 if (!std::regex_search(line, relation_data, relation))
@@ -1096,21 +1107,21 @@ namespace verbly {
1096 1107
1097 int lookup1 = std::stoi(relation_data[1]); 1108 int lookup1 = std::stoi(relation_data[1]);
1098 int lookup2 = std::stoi(relation_data[2]); 1109 int lookup2 = std::stoi(relation_data[2]);
1099 1110
1100 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) 1111 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
1101 { 1112 {
1102 notion& notion1 = *notionByWnid_.at(lookup1); 1113 notion& notion1 = *notionByWnid_.at(lookup1);
1103 notion& notion2 = *notionByWnid_.at(lookup2); 1114 notion& notion2 = *notionByWnid_.at(lookup2);
1104 1115
1105 std::list<field> fields; 1116 std::list<field> fields;
1106 fields.emplace_back("adjective_1_id", notion1.getId()); 1117 fields.emplace_back("adjective_1_id", notion1.getId());
1107 fields.emplace_back("adjective_2_id", notion2.getId()); 1118 fields.emplace_back("adjective_2_id", notion2.getId());
1108 1119
1109 db_.insertIntoTable("similarity", std::move(fields)); 1120 db_.insertIntoTable("similarity", std::move(fields));
1110 } 1121 }
1111 } 1122 }
1112 } 1123 }
1113 1124
1114 std::list<std::string> generator::readFile(std::string path) 1125 std::list<std::string> generator::readFile(std::string path)
1115 { 1126 {
1116 std::ifstream file(path); 1127 std::ifstream file(path);
@@ -1118,7 +1129,7 @@ namespace verbly {
1118 { 1129 {
1119 throw std::invalid_argument("Could not find file " + path); 1130 throw std::invalid_argument("Could not find file " + path);
1120 } 1131 }
1121 1132
1122 std::list<std::string> lines; 1133 std::list<std::string> lines;
1123 std::string line; 1134 std::string line;
1124 while (std::getline(file, line)) 1135 while (std::getline(file, line))
@@ -1127,13 +1138,13 @@ namespace verbly {
1127 { 1138 {
1128 line.pop_back(); 1139 line.pop_back();
1129 } 1140 }
1130 1141
1131 lines.push_back(line); 1142 lines.push_back(line);
1132 } 1143 }
1133 1144
1134 return lines; 1145 return lines;
1135 } 1146 }
1136 1147
1137 part_of_speech generator::partOfSpeechByWnid(int wnid) 1148 part_of_speech generator::partOfSpeechByWnid(int wnid)
1138 { 1149 {
1139 switch (wnid / 100000000) 1150 switch (wnid / 100000000)
@@ -1145,14 +1156,14 @@ namespace verbly {
1145 default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid)); 1156 default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid));
1146 } 1157 }
1147 } 1158 }
1148 1159
1149 notion& generator::createNotion(part_of_speech partOfSpeech) 1160 notion& generator::createNotion(part_of_speech partOfSpeech)
1150 { 1161 {
1151 notions_.emplace_back(partOfSpeech); 1162 notions_.emplace_back(partOfSpeech);
1152 1163
1153 return notions_.back(); 1164 return notions_.back();
1154 } 1165 }
1155 1166
1156 notion& generator::lookupOrCreateNotion(int wnid) 1167 notion& generator::lookupOrCreateNotion(int wnid)
1157 { 1168 {
1158 if (!notionByWnid_.count(wnid)) 1169 if (!notionByWnid_.count(wnid))
@@ -1160,10 +1171,10 @@ namespace verbly {
1160 notions_.emplace_back(partOfSpeechByWnid(wnid), wnid); 1171 notions_.emplace_back(partOfSpeechByWnid(wnid), wnid);
1161 notionByWnid_[wnid] = &notions_.back(); 1172 notionByWnid_[wnid] = &notions_.back();
1162 } 1173 }
1163 1174
1164 return *notionByWnid_.at(wnid); 1175 return *notionByWnid_.at(wnid);
1165 } 1176 }
1166 1177
1167 lemma& generator::lookupOrCreateLemma(std::string base_form) 1178 lemma& generator::lookupOrCreateLemma(std::string base_form)
1168 { 1179 {
1169 if (!lemmaByBaseForm_.count(base_form)) 1180 if (!lemmaByBaseForm_.count(base_form))
@@ -1171,10 +1182,10 @@ namespace verbly {
1171 lemmas_.emplace_back(lookupOrCreateForm(base_form)); 1182 lemmas_.emplace_back(lookupOrCreateForm(base_form));
1172 lemmaByBaseForm_[base_form] = &lemmas_.back(); 1183 lemmaByBaseForm_[base_form] = &lemmas_.back();
1173 } 1184 }
1174 1185
1175 return *lemmaByBaseForm_.at(base_form); 1186 return *lemmaByBaseForm_.at(base_form);
1176 } 1187 }
1177 1188
1178 form& generator::lookupOrCreateForm(std::string text) 1189 form& generator::lookupOrCreateForm(std::string text)
1179 { 1190 {
1180 if (!formByText_.count(text)) 1191 if (!formByText_.count(text))
@@ -1182,32 +1193,32 @@ namespace verbly {
1182 forms_.emplace_back(text); 1193 forms_.emplace_back(text);
1183 formByText_[text] = &forms_.back(); 1194 formByText_[text] = &forms_.back();
1184 } 1195 }
1185 1196
1186 return *formByText_[text]; 1197 return *formByText_[text];
1187 } 1198 }
1188 1199
1189 template <typename... Args> word& generator::createWord(Args&&... args) 1200 template <typename... Args> word& generator::createWord(Args&&... args)
1190 { 1201 {
1191 words_.emplace_back(std::forward<Args>(args)...); 1202 words_.emplace_back(std::forward<Args>(args)...);
1192 word& w = words_.back(); 1203 word& w = words_.back();
1193 1204
1194 wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w); 1205 wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w);
1195 1206
1196 if (w.getNotion().hasWnid()) 1207 if (w.getNotion().hasWnid())
1197 { 1208 {
1198 wordsByWnid_[w.getNotion().getWnid()].insert(&w); 1209 wordsByWnid_[w.getNotion().getWnid()].insert(&w);
1199 } 1210 }
1200 1211
1201 return w; 1212 return w;
1202 } 1213 }
1203 1214
1204 group& generator::createGroup(xmlNodePtr top) 1215 group& generator::createGroup(xmlNodePtr top)
1205 { 1216 {
1206 groups_.emplace_back(); 1217 groups_.emplace_back();
1207 group& grp = groups_.back(); 1218 group& grp = groups_.back();
1208 1219
1209 xmlChar* key; 1220 xmlChar* key;
1210 1221
1211 for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) 1222 for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
1212 { 1223 {
1213 if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES"))) 1224 if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES")))
@@ -1223,14 +1234,14 @@ namespace verbly {
1223 } catch (const std::exception& e) 1234 } catch (const std::exception& e)
1224 { 1235 {
1225 key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID")); 1236 key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID"));
1226 1237
1227 if (key == nullptr) 1238 if (key == nullptr)
1228 { 1239 {
1229 std::throw_with_nested(std::logic_error("Error parsing IDless subgroup")); 1240 std::throw_with_nested(std::logic_error("Error parsing IDless subgroup"));
1230 } else { 1241 } else {
1231 std::string subgroupId(reinterpret_cast<const char*>(key)); 1242 std::string subgroupId(reinterpret_cast<const char*>(key));
1232 xmlFree(key); 1243 xmlFree(key);
1233 1244
1234 std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId)); 1245 std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId));
1235 } 1246 }
1236 } 1247 }
@@ -1245,25 +1256,25 @@ namespace verbly {
1245 key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn")); 1256 key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn"));
1246 std::string wnSenses(reinterpret_cast<const char*>(key)); 1257 std::string wnSenses(reinterpret_cast<const char*>(key));
1247 xmlFree(key); 1258 xmlFree(key);
1248 1259
1249 auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " "); 1260 auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " ");
1250 if (!wnSenseKeys.empty()) 1261 if (!wnSenseKeys.empty())
1251 { 1262 {
1252 std::list<std::string> tempKeys; 1263 std::list<std::string> tempKeys;
1253 1264
1254 std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) { 1265 std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) {
1255 return sense + "::"; 1266 return sense + "::";
1256 }); 1267 });
1257 1268
1258 std::list<std::string> filteredKeys; 1269 std::list<std::string> filteredKeys;
1259 1270
1260 std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) { 1271 std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) {
1261 return !wnSenseKeys_.count(sense); 1272 return !wnSenseKeys_.count(sense);
1262 }); 1273 });
1263 1274
1264 wnSenseKeys = std::move(filteredKeys); 1275 wnSenseKeys = std::move(filteredKeys);
1265 } 1276 }
1266 1277
1267 if (!wnSenseKeys.empty()) 1278 if (!wnSenseKeys.empty())
1268 { 1279 {
1269 for (std::string sense : wnSenseKeys) 1280 for (std::string sense : wnSenseKeys)
@@ -1275,11 +1286,11 @@ namespace verbly {
1275 key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name")); 1286 key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name"));
1276 std::string memberName(reinterpret_cast<const char*>(key)); 1287 std::string memberName(reinterpret_cast<const char*>(key));
1277 xmlFree(key); 1288 xmlFree(key);
1278 1289
1279 notion& n = createNotion(part_of_speech::verb); 1290 notion& n = createNotion(part_of_speech::verb);
1280 lemma& l = lookupOrCreateLemma(memberName); 1291 lemma& l = lookupOrCreateLemma(memberName);
1281 word& w = createWord(n, l); 1292 word& w = createWord(n, l);
1282 1293
1283 w.setVerbGroup(grp); 1294 w.setVerbGroup(grp);
1284 } 1295 }
1285 } 1296 }
@@ -1293,7 +1304,7 @@ namespace verbly {
1293 key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type")); 1304 key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type"));
1294 std::string roleName = reinterpret_cast<const char*>(key); 1305 std::string roleName = reinterpret_cast<const char*>(key);
1295 xmlFree(key); 1306 xmlFree(key);
1296 1307
1297 selrestr roleSelrestrs; 1308 selrestr roleSelrestrs;
1298 for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) 1309 for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
1299 { 1310 {
@@ -1314,19 +1325,19 @@ namespace verbly {
1314 { 1325 {
1315 frames_.emplace_back(); 1326 frames_.emplace_back();
1316 frame& fr = frames_.back(); 1327 frame& fr = frames_.back();
1317 1328
1318 for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) 1329 for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
1319 { 1330 {
1320 if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX"))) 1331 if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX")))
1321 { 1332 {
1322 for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) 1333 for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
1323 { 1334 {
1324 if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP"))) 1335 if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP")))
1325 { 1336 {
1326 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); 1337 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
1327 std::string partRole = reinterpret_cast<const char*>(key); 1338 std::string partRole = reinterpret_cast<const char*>(key);
1328 xmlFree(key); 1339 xmlFree(key);
1329 1340
1330 selrestr partSelrestrs; 1341 selrestr partSelrestrs;
1331 std::set<std::string> partSynrestrs; 1342 std::set<std::string> partSynrestrs;
1332 1343
@@ -1344,13 +1355,13 @@ namespace verbly {
1344 } 1355 }
1345 } 1356 }
1346 } 1357 }
1347 1358
1348 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) 1359 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
1349 { 1360 {
1350 partSelrestrs = parseSelrestr(npnode); 1361 partSelrestrs = parseSelrestr(npnode);
1351 } 1362 }
1352 } 1363 }
1353 1364
1354 fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs))); 1365 fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs)));
1355 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB"))) 1366 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB")))
1356 { 1367 {
@@ -1359,11 +1370,11 @@ namespace verbly {
1359 { 1370 {
1360 std::set<std::string> partChoices; 1371 std::set<std::string> partChoices;
1361 bool partLiteral; 1372 bool partLiteral;
1362 1373
1363 if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"))) 1374 if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")))
1364 { 1375 {
1365 partLiteral = true; 1376 partLiteral = true;
1366 1377
1367 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); 1378 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
1368 std::string choicesStr = reinterpret_cast<const char*>(key); 1379 std::string choicesStr = reinterpret_cast<const char*>(key);
1369 xmlFree(key); 1380 xmlFree(key);
@@ -1380,7 +1391,7 @@ namespace verbly {
1380 } 1391 }
1381 } else { 1392 } else {
1382 partLiteral = false; 1393 partLiteral = false;
1383 1394
1384 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) 1395 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
1385 { 1396 {
1386 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) 1397 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
@@ -1397,7 +1408,7 @@ namespace verbly {
1397 } 1408 }
1398 } 1409 }
1399 } 1410 }
1400 1411
1401 fr.push_back(part::createPreposition(std::move(partChoices), partLiteral)); 1412 fr.push_back(part::createPreposition(std::move(partChoices), partLiteral));
1402 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ"))) 1413 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ")))
1403 { 1414 {
@@ -1410,7 +1421,7 @@ namespace verbly {
1410 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); 1421 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
1411 std::string literalValue = reinterpret_cast<const char*>(key); 1422 std::string literalValue = reinterpret_cast<const char*>(key);
1412 xmlFree(key); 1423 xmlFree(key);
1413 1424
1414 fr.push_back(part::createLiteral(literalValue)); 1425 fr.push_back(part::createLiteral(literalValue));
1415 } else { 1426 } else {
1416 continue; 1427 continue;
@@ -1427,11 +1438,11 @@ namespace verbly {
1427 1438
1428 return grp; 1439 return grp;
1429 } 1440 }
1430 1441
1431 selrestr generator::parseSelrestr(xmlNodePtr top) 1442 selrestr generator::parseSelrestr(xmlNodePtr top)
1432 { 1443 {
1433 xmlChar* key; 1444 xmlChar* key;
1434 1445
1435 if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) 1446 if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
1436 { 1447 {
1437 if (xmlChildElementCount(top) == 0) 1448 if (xmlChildElementCount(top) == 0)
@@ -1449,10 +1460,10 @@ namespace verbly {
1449 { 1460 {
1450 orlogic = true; 1461 orlogic = true;
1451 } 1462 }
1452 1463
1453 xmlFree(key); 1464 xmlFree(key);
1454 } 1465 }
1455 1466
1456 std::list<selrestr> children; 1467 std::list<selrestr> children;
1457 for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) 1468 for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next)
1458 { 1469 {
@@ -1462,7 +1473,7 @@ namespace verbly {
1462 children.push_back(parseSelrestr(selrestr)); 1473 children.push_back(parseSelrestr(selrestr));
1463 } 1474 }
1464 } 1475 }
1465 1476
1466 return selrestr(children, orlogic); 1477 return selrestr(children, orlogic);
1467 } 1478 }
1468 } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) 1479 } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
@@ -1474,12 +1485,12 @@ namespace verbly {
1474 key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type")); 1485 key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type"));
1475 std::string selRestriction = reinterpret_cast<const char*>(key); 1486 std::string selRestriction = reinterpret_cast<const char*>(key);
1476 xmlFree(key); 1487 xmlFree(key);
1477 1488
1478 return selrestr(selRestriction, selPos); 1489 return selrestr(selRestriction, selPos);
1479 } else { 1490 } else {
1480 throw std::logic_error("Badly formatted selrestr"); 1491 throw std::logic_error("Badly formatted selrestr");
1481 } 1492 }
1482 } 1493 }
1483 1494
1484 }; 1495 };
1485}; 1496};