diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-22 18:43:25 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-22 18:43:25 -0500 |
commit | 2c5ed155e8951f7f28b82ed7570295d5629f4770 (patch) | |
tree | 889b807b6e76ec745f19488be7700cfc574aa9fa /generator/generator.cpp | |
parent | 8837d3ceb8358476f20d71948a1fc9bc91480d57 (diff) | |
download | verbly-2c5ed155e8951f7f28b82ed7570295d5629f4770.tar.gz verbly-2c5ed155e8951f7f28b82ed7570295d5629f4770.tar.bz2 verbly-2c5ed155e8951f7f28b82ed7570295d5629f4770.zip |
Fixed generator ignoring multiple inflection variants
Previously, the generator would recognize at most one form per inflection per lemma; now, the generator adds all variants in AGID to the database.
Diffstat (limited to 'generator/generator.cpp')
-rw-r--r-- | generator/generator.cpp | 525 |
1 files changed, 268 insertions, 257 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index 3dd2ce7..610a602 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -15,7 +15,7 @@ | |||
15 | 15 | ||
16 | namespace verbly { | 16 | namespace verbly { |
17 | namespace generator { | 17 | namespace generator { |
18 | 18 | ||
19 | generator::generator( | 19 | generator::generator( |
20 | std::string verbNetPath, | 20 | std::string verbNetPath, |
21 | std::string agidPath, | 21 | std::string agidPath, |
@@ -36,21 +36,21 @@ namespace verbly { | |||
36 | { | 36 | { |
37 | throw std::invalid_argument("Invalid VerbNet data directory"); | 37 | throw std::invalid_argument("Invalid VerbNet data directory"); |
38 | } | 38 | } |
39 | 39 | ||
40 | closedir(dir); | 40 | closedir(dir); |
41 | 41 | ||
42 | // Ensure AGID infl.txt exists | 42 | // Ensure AGID infl.txt exists |
43 | if (!std::ifstream(agidPath_)) | 43 | if (!std::ifstream(agidPath_)) |
44 | { | 44 | { |
45 | throw std::invalid_argument("AGID infl.txt file not found"); | 45 | throw std::invalid_argument("AGID infl.txt file not found"); |
46 | } | 46 | } |
47 | 47 | ||
48 | // Add directory separator to WordNet path | 48 | // Add directory separator to WordNet path |
49 | if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\')) | 49 | if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\')) |
50 | { | 50 | { |
51 | wordNetPath_ += '/'; | 51 | wordNetPath_ += '/'; |
52 | } | 52 | } |
53 | 53 | ||
54 | // Ensure WordNet tables exist | 54 | // Ensure WordNet tables exist |
55 | for (std::string table : { | 55 | for (std::string table : { |
56 | "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax" | 56 | "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax" |
@@ -61,37 +61,37 @@ namespace verbly { | |||
61 | throw std::invalid_argument("WordNet " + table + " table not found"); | 61 | throw std::invalid_argument("WordNet " + table + " table not found"); |
62 | } | 62 | } |
63 | } | 63 | } |
64 | 64 | ||
65 | // Ensure CMUDICT file exists | 65 | // Ensure CMUDICT file exists |
66 | if (!std::ifstream(cmudictPath_)) | 66 | if (!std::ifstream(cmudictPath_)) |
67 | { | 67 | { |
68 | throw std::invalid_argument("CMUDICT file not found"); | 68 | throw std::invalid_argument("CMUDICT file not found"); |
69 | } | 69 | } |
70 | 70 | ||
71 | // Ensure ImageNet urls.txt exists | 71 | // Ensure ImageNet urls.txt exists |
72 | if (!std::ifstream(imageNetPath_)) | 72 | if (!std::ifstream(imageNetPath_)) |
73 | { | 73 | { |
74 | throw std::invalid_argument("ImageNet urls.txt file not found"); | 74 | throw std::invalid_argument("ImageNet urls.txt file not found"); |
75 | } | 75 | } |
76 | } | 76 | } |
77 | 77 | ||
78 | void generator::run() | 78 | void generator::run() |
79 | { | 79 | { |
80 | // Create notions, words, lemmas, and forms from WordNet synsets | 80 | // Create notions, words, lemmas, and forms from WordNet synsets |
81 | readWordNetSynsets(); | 81 | readWordNetSynsets(); |
82 | 82 | ||
83 | // Reads adjective positioning WordNet data | 83 | // Reads adjective positioning WordNet data |
84 | readAdjectivePositioning(); | 84 | readAdjectivePositioning(); |
85 | 85 | ||
86 | // Counts the number of URLs ImageNet has per notion | 86 | // Counts the number of URLs ImageNet has per notion |
87 | readImageNetUrls(); | 87 | readImageNetUrls(); |
88 | 88 | ||
89 | // Creates a word by WordNet sense key lookup table | 89 | // Creates a word by WordNet sense key lookup table |
90 | readWordNetSenseKeys(); | 90 | readWordNetSenseKeys(); |
91 | 91 | ||
92 | // Creates groups and frames from VerbNet data | 92 | // Creates groups and frames from VerbNet data |
93 | readVerbNet(); | 93 | readVerbNet(); |
94 | 94 | ||
95 | // Creates forms and inflections from AGID. To reduce the amount of forms | 95 | // Creates forms and inflections from AGID. To reduce the amount of forms |
96 | // created, we do this after most lemmas that need inflecting have been | 96 | // created, we do this after most lemmas that need inflecting have been |
97 | // created through other means, and then only generate forms for | 97 | // created through other means, and then only generate forms for |
@@ -101,86 +101,78 @@ namespace verbly { | |||
101 | // then a notion and a word is generated and the form generation proceeds | 101 | // then a notion and a word is generated and the form generation proceeds |
102 | // as usual. | 102 | // as usual. |
103 | readAgidInflections(); | 103 | readAgidInflections(); |
104 | 104 | ||
105 | // Reads in prepositions and the is_a relationship | 105 | // Reads in prepositions and the is_a relationship |
106 | readPrepositions(); | 106 | readPrepositions(); |
107 | 107 | ||
108 | // Creates pronunciations from CMUDICT. To reduce the amount of | 108 | // Creates pronunciations from CMUDICT. To reduce the amount of |
109 | // pronunciations created, we do this after all forms have been created, | 109 | // pronunciations created, we do this after all forms have been created, |
110 | // and then only generate pronunciations for already-exisiting forms. | 110 | // and then only generate pronunciations for already-exisiting forms. |
111 | readCmudictPronunciations(); | 111 | readCmudictPronunciations(); |
112 | 112 | ||
113 | // Writes the database schema | 113 | // Writes the database schema |
114 | writeSchema(); | 114 | writeSchema(); |
115 | 115 | ||
116 | // Dumps data to the database | 116 | // Dumps data to the database |
117 | dumpObjects(); | 117 | dumpObjects(); |
118 | 118 | ||
119 | // Populates the antonymy relationship from WordNet | 119 | // Populates the antonymy relationship from WordNet |
120 | readWordNetAntonymy(); | 120 | readWordNetAntonymy(); |
121 | 121 | ||
122 | // Populates the variation relationship from WordNet | 122 | // Populates the variation relationship from WordNet |
123 | readWordNetVariation(); | 123 | readWordNetVariation(); |
124 | 124 | ||
125 | // Populates the usage, topicality, and regionality relationships from | 125 | // Populates the usage, topicality, and regionality relationships from |
126 | // WordNet | 126 | // WordNet |
127 | readWordNetClasses(); | 127 | readWordNetClasses(); |
128 | 128 | ||
129 | // Populates the causality relationship from WordNet | 129 | // Populates the causality relationship from WordNet |
130 | readWordNetCausality(); | 130 | readWordNetCausality(); |
131 | 131 | ||
132 | // Populates the entailment relationship from WordNet | 132 | // Populates the entailment relationship from WordNet |
133 | readWordNetEntailment(); | 133 | readWordNetEntailment(); |
134 | 134 | ||
135 | // Populates the hypernymy relationship from WordNet | 135 | // Populates the hypernymy relationship from WordNet |
136 | readWordNetHypernymy(); | 136 | readWordNetHypernymy(); |
137 | 137 | ||
138 | // Populates the instantiation relationship from WordNet | 138 | // Populates the instantiation relationship from WordNet |
139 | readWordNetInstantiation(); | 139 | readWordNetInstantiation(); |
140 | 140 | ||
141 | // Populates the member meronymy relationship from WordNet | 141 | // Populates the member meronymy relationship from WordNet |
142 | readWordNetMemberMeronymy(); | 142 | readWordNetMemberMeronymy(); |
143 | 143 | ||
144 | // Populates the part meronymy relationship from WordNet | 144 | // Populates the part meronymy relationship from WordNet |
145 | readWordNetPartMeronymy(); | 145 | readWordNetPartMeronymy(); |
146 | 146 | ||
147 | // Populates the substance meronymy relationship from WordNet | 147 | // Populates the substance meronymy relationship from WordNet |
148 | readWordNetSubstanceMeronymy(); | 148 | readWordNetSubstanceMeronymy(); |
149 | 149 | ||
150 | // Populates the pertainymy and mannernymy relationships from WordNet | 150 | // Populates the pertainymy and mannernymy relationships from WordNet |
151 | readWordNetPertainymy(); | 151 | readWordNetPertainymy(); |
152 | 152 | ||
153 | // Populates the specification relationship from WordNet | 153 | // Populates the specification relationship from WordNet |
154 | readWordNetSpecification(); | 154 | readWordNetSpecification(); |
155 | 155 | ||
156 | // Populates the adjective similarity relationship from WordNet | 156 | // Populates the adjective similarity relationship from WordNet |
157 | readWordNetSimilarity(); | 157 | readWordNetSimilarity(); |
158 | |||
159 | |||
160 | |||
161 | |||
162 | |||
163 | |||
164 | |||
165 | |||
166 | } | 158 | } |
167 | 159 | ||
168 | void generator::readWordNetSynsets() | 160 | void generator::readWordNetSynsets() |
169 | { | 161 | { |
170 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); | 162 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); |
171 | progress ppgs("Reading synsets from WordNet...", lines.size()); | 163 | progress ppgs("Reading synsets from WordNet...", lines.size()); |
172 | 164 | ||
173 | for (std::string line : lines) | 165 | for (std::string line : lines) |
174 | { | 166 | { |
175 | ppgs.update(); | 167 | ppgs.update(); |
176 | 168 | ||
177 | std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); | 169 | std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); |
178 | std::smatch relation_data; | 170 | std::smatch relation_data; |
179 | if (!std::regex_search(line, relation_data, relation)) | 171 | if (!std::regex_search(line, relation_data, relation)) |
180 | { | 172 | { |
181 | continue; | 173 | continue; |
182 | } | 174 | } |
183 | 175 | ||
184 | int synset_id = std::stoi(relation_data[1]); | 176 | int synset_id = std::stoi(relation_data[1]); |
185 | int wnum = std::stoi(relation_data[2]); | 177 | int wnum = std::stoi(relation_data[2]); |
186 | std::string text = relation_data[3]; | 178 | std::string text = relation_data[3]; |
@@ -190,7 +182,7 @@ namespace verbly { | |||
190 | { | 182 | { |
191 | text.erase(word_it, 1); | 183 | text.erase(word_it, 1); |
192 | } | 184 | } |
193 | 185 | ||
194 | // The WordNet data does contain duplicates, so we need to check that we | 186 | // The WordNet data does contain duplicates, so we need to check that we |
195 | // haven't already created this word. | 187 | // haven't already created this word. |
196 | std::pair<int, int> lookup(synset_id, wnum); | 188 | std::pair<int, int> lookup(synset_id, wnum); |
@@ -204,32 +196,32 @@ namespace verbly { | |||
204 | } | 196 | } |
205 | } | 197 | } |
206 | } | 198 | } |
207 | 199 | ||
208 | void generator::readAdjectivePositioning() | 200 | void generator::readAdjectivePositioning() |
209 | { | 201 | { |
210 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl")); | 202 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl")); |
211 | progress ppgs("Reading adjective positionings from WordNet...", lines.size()); | 203 | progress ppgs("Reading adjective positionings from WordNet...", lines.size()); |
212 | 204 | ||
213 | for (std::string line : lines) | 205 | for (std::string line : lines) |
214 | { | 206 | { |
215 | ppgs.update(); | 207 | ppgs.update(); |
216 | 208 | ||
217 | std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); | 209 | std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); |
218 | std::smatch relation_data; | 210 | std::smatch relation_data; |
219 | if (!std::regex_search(line, relation_data, relation)) | 211 | if (!std::regex_search(line, relation_data, relation)) |
220 | { | 212 | { |
221 | continue; | 213 | continue; |
222 | } | 214 | } |
223 | 215 | ||
224 | int synset_id = stoi(relation_data[1]); | 216 | int synset_id = stoi(relation_data[1]); |
225 | int wnum = stoi(relation_data[2]); | 217 | int wnum = stoi(relation_data[2]); |
226 | std::string adjpos_str = relation_data[3]; | 218 | std::string adjpos_str = relation_data[3]; |
227 | 219 | ||
228 | std::pair<int, int> lookup(synset_id, wnum); | 220 | std::pair<int, int> lookup(synset_id, wnum); |
229 | if (wordByWnidAndWnum_.count(lookup)) | 221 | if (wordByWnidAndWnum_.count(lookup)) |
230 | { | 222 | { |
231 | word& adj = *wordByWnidAndWnum_.at(lookup); | 223 | word& adj = *wordByWnidAndWnum_.at(lookup); |
232 | 224 | ||
233 | if (adjpos_str == "p") | 225 | if (adjpos_str == "p") |
234 | { | 226 | { |
235 | adj.setAdjectivePosition(positioning::predicate); | 227 | adj.setAdjectivePosition(positioning::predicate); |
@@ -246,20 +238,20 @@ namespace verbly { | |||
246 | } | 238 | } |
247 | } | 239 | } |
248 | } | 240 | } |
249 | 241 | ||
250 | void generator::readImageNetUrls() | 242 | void generator::readImageNetUrls() |
251 | { | 243 | { |
252 | // The ImageNet datafile is so large that it is unreasonable and | 244 | // The ImageNet datafile is so large that it is unreasonable and |
253 | // unnecessary to read it into memory; instead, we will parse each line as | 245 | // unnecessary to read it into memory; instead, we will parse each line as |
254 | // we read it. This has the caveat that we cannot display a progress bar. | 246 | // we read it. This has the caveat that we cannot display a progress bar. |
255 | std::cout << "Reading image counts from ImageNet..." << std::endl; | 247 | std::cout << "Reading image counts from ImageNet..." << std::endl; |
256 | 248 | ||
257 | std::ifstream file(imageNetPath_); | 249 | std::ifstream file(imageNetPath_); |
258 | if (!file) | 250 | if (!file) |
259 | { | 251 | { |
260 | throw std::invalid_argument("Could not find file " + imageNetPath_); | 252 | throw std::invalid_argument("Could not find file " + imageNetPath_); |
261 | } | 253 | } |
262 | 254 | ||
263 | std::string line; | 255 | std::string line; |
264 | while (std::getline(file, line)) | 256 | while (std::getline(file, line)) |
265 | { | 257 | { |
@@ -267,7 +259,7 @@ namespace verbly { | |||
267 | { | 259 | { |
268 | line.pop_back(); | 260 | line.pop_back(); |
269 | } | 261 | } |
270 | 262 | ||
271 | std::string wnid_s = line.substr(1, 8); | 263 | std::string wnid_s = line.substr(1, 8); |
272 | int wnid = stoi(wnid_s) + 100000000; | 264 | int wnid = stoi(wnid_s) + 100000000; |
273 | if (notionByWnid_.count(wnid)) | 265 | if (notionByWnid_.count(wnid)) |
@@ -277,16 +269,16 @@ namespace verbly { | |||
277 | } | 269 | } |
278 | } | 270 | } |
279 | } | 271 | } |
280 | 272 | ||
281 | void generator::readWordNetSenseKeys() | 273 | void generator::readWordNetSenseKeys() |
282 | { | 274 | { |
283 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl")); | 275 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl")); |
284 | progress ppgs("Reading sense keys from WordNet...", lines.size()); | 276 | progress ppgs("Reading sense keys from WordNet...", lines.size()); |
285 | 277 | ||
286 | for (std::string line : lines) | 278 | for (std::string line : lines) |
287 | { | 279 | { |
288 | ppgs.update(); | 280 | ppgs.update(); |
289 | 281 | ||
290 | // We only actually need to lookup verbs by sense key so we'll just | 282 | // We only actually need to lookup verbs by sense key so we'll just |
291 | // ignore everything that isn't a verb. | 283 | // ignore everything that isn't a verb. |
292 | std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$"); | 284 | std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$"); |
@@ -295,11 +287,11 @@ namespace verbly { | |||
295 | { | 287 | { |
296 | continue; | 288 | continue; |
297 | } | 289 | } |
298 | 290 | ||
299 | int synset_id = stoi(relation_data[1]); | 291 | int synset_id = stoi(relation_data[1]); |
300 | int wnum = stoi(relation_data[2]); | 292 | int wnum = stoi(relation_data[2]); |
301 | std::string sense_key = relation_data[3]; | 293 | std::string sense_key = relation_data[3]; |
302 | 294 | ||
303 | // We are treating this mapping as injective, which is not entirely | 295 | // We are treating this mapping as injective, which is not entirely |
304 | // accurate. First, the WordNet table contains duplicate rows, so those | 296 | // accurate. First, the WordNet table contains duplicate rows, so those |
305 | // need to be ignored. More importantly, a small number of sense keys | 297 | // need to be ignored. More importantly, a small number of sense keys |
@@ -317,17 +309,17 @@ namespace verbly { | |||
317 | } | 309 | } |
318 | } | 310 | } |
319 | } | 311 | } |
320 | 312 | ||
321 | void generator::readVerbNet() | 313 | void generator::readVerbNet() |
322 | { | 314 | { |
323 | std::cout << "Reading frames from VerbNet..." << std::endl; | 315 | std::cout << "Reading frames from VerbNet..." << std::endl; |
324 | 316 | ||
325 | DIR* dir; | 317 | DIR* dir; |
326 | if ((dir = opendir(verbNetPath_.c_str())) == nullptr) | 318 | if ((dir = opendir(verbNetPath_.c_str())) == nullptr) |
327 | { | 319 | { |
328 | throw std::invalid_argument("Invalid VerbNet data directory"); | 320 | throw std::invalid_argument("Invalid VerbNet data directory"); |
329 | } | 321 | } |
330 | 322 | ||
331 | struct dirent* ent; | 323 | struct dirent* ent; |
332 | while ((ent = readdir(dir)) != nullptr) | 324 | while ((ent = readdir(dir)) != nullptr) |
333 | { | 325 | { |
@@ -337,20 +329,20 @@ namespace verbly { | |||
337 | { | 329 | { |
338 | filename += '/'; | 330 | filename += '/'; |
339 | } | 331 | } |
340 | 332 | ||
341 | filename += ent->d_name; | 333 | filename += ent->d_name; |
342 | 334 | ||
343 | if (filename.rfind(".xml") != filename.size() - 4) | 335 | if (filename.rfind(".xml") != filename.size() - 4) |
344 | { | 336 | { |
345 | continue; | 337 | continue; |
346 | } | 338 | } |
347 | 339 | ||
348 | xmlDocPtr doc = xmlParseFile(filename.c_str()); | 340 | xmlDocPtr doc = xmlParseFile(filename.c_str()); |
349 | if (doc == nullptr) | 341 | if (doc == nullptr) |
350 | { | 342 | { |
351 | throw std::logic_error("Error opening " + filename); | 343 | throw std::logic_error("Error opening " + filename); |
352 | } | 344 | } |
353 | 345 | ||
354 | xmlNodePtr top = xmlDocGetRootElement(doc); | 346 | xmlNodePtr top = xmlDocGetRootElement(doc); |
355 | if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS")))) | 347 | if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS")))) |
356 | { | 348 | { |
@@ -365,71 +357,81 @@ namespace verbly { | |||
365 | std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename)); | 357 | std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename)); |
366 | } | 358 | } |
367 | } | 359 | } |
368 | 360 | ||
369 | closedir(dir); | 361 | closedir(dir); |
370 | } | 362 | } |
371 | 363 | ||
372 | void generator::readAgidInflections() | 364 | void generator::readAgidInflections() |
373 | { | 365 | { |
374 | std::list<std::string> lines(readFile(agidPath_)); | 366 | std::list<std::string> lines(readFile(agidPath_)); |
375 | progress ppgs("Reading inflections from AGID...", lines.size()); | 367 | progress ppgs("Reading inflections from AGID...", lines.size()); |
376 | 368 | ||
377 | for (std::string line : lines) | 369 | for (std::string line : lines) |
378 | { | 370 | { |
379 | ppgs.update(); | 371 | ppgs.update(); |
380 | 372 | ||
381 | int divider = line.find_first_of(" "); | 373 | int divider = line.find_first_of(" "); |
382 | std::string infinitive = line.substr(0, divider); | 374 | std::string infinitive = line.substr(0, divider); |
383 | line = line.substr(divider+1); | 375 | line = line.substr(divider+1); |
384 | char type = line[0]; | 376 | char type = line[0]; |
385 | 377 | ||
386 | if (line[1] == '?') | 378 | if (line[1] == '?') |
387 | { | 379 | { |
388 | line.erase(0, 4); | 380 | line.erase(0, 4); |
389 | } else { | 381 | } else { |
390 | line.erase(0, 3); | 382 | line.erase(0, 3); |
391 | } | 383 | } |
392 | 384 | ||
393 | if (!lemmaByBaseForm_.count(infinitive) && (type != 'V')) | 385 | if (!lemmaByBaseForm_.count(infinitive) && (type != 'V')) |
394 | { | 386 | { |
395 | continue; | 387 | continue; |
396 | } | 388 | } |
397 | 389 | ||
398 | lemma& curLemma = lookupOrCreateLemma(infinitive); | 390 | lemma& curLemma = lookupOrCreateLemma(infinitive); |
399 | 391 | ||
400 | auto forms = split<std::vector<std::string>>(line, " | "); | 392 | std::vector<std::list<std::string>> agidForms; |
401 | for (std::string& inflForm : forms) | 393 | for (std::string inflForms : split<std::list<std::string>>(line, " | ")) |
402 | { | 394 | { |
403 | int sympos = inflForm.find_first_of(",?"); | 395 | std::list<std::string> forms; |
404 | if (sympos != std::string::npos) | 396 | |
397 | for (std::string inflForm : split<std::list<std::string>>(std::move(inflForms), ", ")) | ||
405 | { | 398 | { |
406 | inflForm = inflForm.substr(0, sympos); | 399 | int sympos = inflForm.find_first_of("~<!? "); |
400 | if (sympos != std::string::npos) | ||
401 | { | ||
402 | inflForm = inflForm.substr(0, sympos); | ||
403 | } | ||
404 | |||
405 | forms.push_back(std::move(inflForm)); | ||
407 | } | 406 | } |
407 | |||
408 | agidForms.push_back(std::move(forms)); | ||
408 | } | 409 | } |
409 | 410 | ||
411 | std::map<inflection, std::list<std::string>> mappedForms; | ||
410 | switch (type) | 412 | switch (type) |
411 | { | 413 | { |
412 | case 'V': | 414 | case 'V': |
413 | { | 415 | { |
414 | if (forms.size() == 4) | 416 | if (agidForms.size() == 4) |
415 | { | 417 | { |
416 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); | 418 | mappedForms[inflection::past_tense] = agidForms[0]; |
417 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1])); | 419 | mappedForms[inflection::past_participle] = agidForms[1]; |
418 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2])); | 420 | mappedForms[inflection::ing_form] = agidForms[2]; |
419 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3])); | 421 | mappedForms[inflection::s_form] = agidForms[3]; |
420 | } else if (forms.size() == 3) | 422 | } else if (agidForms.size() == 3) |
421 | { | 423 | { |
422 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); | 424 | mappedForms[inflection::past_tense] = agidForms[0]; |
423 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0])); | 425 | mappedForms[inflection::past_participle] = agidForms[0]; |
424 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1])); | 426 | mappedForms[inflection::ing_form] = agidForms[1]; |
425 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2])); | 427 | mappedForms[inflection::s_form] = agidForms[2]; |
426 | } else if (forms.size() == 8) | 428 | } else if (agidForms.size() == 8) |
427 | { | 429 | { |
428 | // As of AGID 2014.08.11, this is only "to be" | 430 | // As of AGID 2014.08.11, this is only "to be" |
429 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); | 431 | mappedForms[inflection::past_tense] = agidForms[0]; |
430 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2])); | 432 | mappedForms[inflection::past_participle] = agidForms[2]; |
431 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3])); | 433 | mappedForms[inflection::ing_form] = agidForms[3]; |
432 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4])); | 434 | mappedForms[inflection::s_form] = agidForms[4]; |
433 | } else { | 435 | } else { |
434 | // Words that don't fit the cases above as of AGID 2014.08.11: | 436 | // Words that don't fit the cases above as of AGID 2014.08.11: |
435 | // - may and shall do not conjugate the way we want them to | 437 | // - may and shall do not conjugate the way we want them to |
@@ -437,7 +439,7 @@ namespace verbly { | |||
437 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now | 439 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now |
438 | std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | 440 | std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; |
439 | } | 441 | } |
440 | 442 | ||
441 | // For verbs in particular, we sometimes create a notion and a word | 443 | // For verbs in particular, we sometimes create a notion and a word |
442 | // from inflection data. Specifically, if there are not yet any | 444 | // from inflection data. Specifically, if there are not yet any |
443 | // verbs existing that have the same infinitive form. "Yet" means | 445 | // verbs existing that have the same infinitive form. "Yet" means |
@@ -451,84 +453,93 @@ namespace verbly { | |||
451 | notion& n = createNotion(part_of_speech::verb); | 453 | notion& n = createNotion(part_of_speech::verb); |
452 | createWord(n, curLemma); | 454 | createWord(n, curLemma); |
453 | } | 455 | } |
454 | 456 | ||
455 | break; | 457 | break; |
456 | } | 458 | } |
457 | 459 | ||
458 | case 'A': | 460 | case 'A': |
459 | { | 461 | { |
460 | if (forms.size() == 2) | 462 | if (agidForms.size() == 2) |
461 | { | 463 | { |
462 | curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0])); | 464 | mappedForms[inflection::comparative] = agidForms[0]; |
463 | curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1])); | 465 | mappedForms[inflection::superlative] = agidForms[1]; |
464 | } else { | 466 | } else { |
465 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" | 467 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" |
466 | std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | 468 | std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; |
467 | } | 469 | } |
468 | 470 | ||
469 | break; | 471 | break; |
470 | } | 472 | } |
471 | 473 | ||
472 | case 'N': | 474 | case 'N': |
473 | { | 475 | { |
474 | if (forms.size() == 1) | 476 | if (agidForms.size() == 1) |
475 | { | 477 | { |
476 | curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0])); | 478 | mappedForms[inflection::plural] = agidForms[0]; |
477 | } else { | 479 | } else { |
478 | // As of AGID 2014.08.11, this is non-existent. | 480 | // As of AGID 2014.08.11, this is non-existent. |
479 | std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | 481 | std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl; |
480 | } | 482 | } |
481 | 483 | ||
482 | break; | 484 | break; |
483 | } | 485 | } |
484 | } | 486 | } |
487 | |||
488 | // Compile the forms we have mapped. | ||
489 | for (auto mapping : std::move(mappedForms)) | ||
490 | { | ||
491 | for (std::string infl : std::move(mapping.second)) | ||
492 | { | ||
493 | curLemma.addInflection(mapping.first, lookupOrCreateForm(std::move(infl))); | ||
494 | } | ||
495 | } | ||
485 | } | 496 | } |
486 | } | 497 | } |
487 | 498 | ||
488 | void generator::readPrepositions() | 499 | void generator::readPrepositions() |
489 | { | 500 | { |
490 | std::list<std::string> lines(readFile("prepositions.txt")); | 501 | std::list<std::string> lines(readFile("prepositions.txt")); |
491 | progress ppgs("Reading prepositions...", lines.size()); | 502 | progress ppgs("Reading prepositions...", lines.size()); |
492 | 503 | ||
493 | for (std::string line : lines) | 504 | for (std::string line : lines) |
494 | { | 505 | { |
495 | ppgs.update(); | 506 | ppgs.update(); |
496 | 507 | ||
497 | std::regex relation("^([^:]+): (.+)"); | 508 | std::regex relation("^([^:]+): (.+)"); |
498 | std::smatch relation_data; | 509 | std::smatch relation_data; |
499 | std::regex_search(line, relation_data, relation); | 510 | std::regex_search(line, relation_data, relation); |
500 | std::string prep = relation_data[1]; | 511 | std::string prep = relation_data[1]; |
501 | auto groups = split<std::list<std::string>>(relation_data[2], ", "); | 512 | auto groups = split<std::list<std::string>>(relation_data[2], ", "); |
502 | 513 | ||
503 | notion& n = createNotion(part_of_speech::preposition); | 514 | notion& n = createNotion(part_of_speech::preposition); |
504 | lemma& l = lookupOrCreateLemma(prep); | 515 | lemma& l = lookupOrCreateLemma(prep); |
505 | word& w = createWord(n, l); | 516 | word& w = createWord(n, l); |
506 | 517 | ||
507 | n.setPrepositionGroups(groups); | 518 | n.setPrepositionGroups(groups); |
508 | } | 519 | } |
509 | } | 520 | } |
510 | 521 | ||
511 | void generator::readCmudictPronunciations() | 522 | void generator::readCmudictPronunciations() |
512 | { | 523 | { |
513 | std::list<std::string> lines(readFile(cmudictPath_)); | 524 | std::list<std::string> lines(readFile(cmudictPath_)); |
514 | progress ppgs("Reading pronunciations from CMUDICT...", lines.size()); | 525 | progress ppgs("Reading pronunciations from CMUDICT...", lines.size()); |
515 | 526 | ||
516 | for (std::string line : lines) | 527 | for (std::string line : lines) |
517 | { | 528 | { |
518 | ppgs.update(); | 529 | ppgs.update(); |
519 | 530 | ||
520 | std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); | 531 | std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); |
521 | std::smatch phoneme_data; | 532 | std::smatch phoneme_data; |
522 | if (std::regex_search(line, phoneme_data, phoneme)) | 533 | if (std::regex_search(line, phoneme_data, phoneme)) |
523 | { | 534 | { |
524 | std::string canonical(phoneme_data[1]); | 535 | std::string canonical(phoneme_data[1]); |
525 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | 536 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); |
526 | 537 | ||
527 | if (!formByText_.count(canonical)) | 538 | if (!formByText_.count(canonical)) |
528 | { | 539 | { |
529 | continue; | 540 | continue; |
530 | } | 541 | } |
531 | 542 | ||
532 | std::string phonemes = phoneme_data[2]; | 543 | std::string phonemes = phoneme_data[2]; |
533 | pronunciations_.emplace_back(phonemes); | 544 | pronunciations_.emplace_back(phonemes); |
534 | pronunciation& p = pronunciations_.back(); | 545 | pronunciation& p = pronunciations_.back(); |
@@ -536,7 +547,7 @@ namespace verbly { | |||
536 | } | 547 | } |
537 | } | 548 | } |
538 | } | 549 | } |
539 | 550 | ||
540 | void generator::writeSchema() | 551 | void generator::writeSchema() |
541 | { | 552 | { |
542 | std::ifstream file("schema.sql"); | 553 | std::ifstream file("schema.sql"); |
@@ -544,7 +555,7 @@ namespace verbly { | |||
544 | { | 555 | { |
545 | throw std::invalid_argument("Could not find database schema"); | 556 | throw std::invalid_argument("Could not find database schema"); |
546 | } | 557 | } |
547 | 558 | ||
548 | std::ostringstream schemaBuilder; | 559 | std::ostringstream schemaBuilder; |
549 | std::string line; | 560 | std::string line; |
550 | while (std::getline(file, line)) | 561 | while (std::getline(file, line)) |
@@ -553,10 +564,10 @@ namespace verbly { | |||
553 | { | 564 | { |
554 | line.pop_back(); | 565 | line.pop_back(); |
555 | } | 566 | } |
556 | 567 | ||
557 | schemaBuilder << line; | 568 | schemaBuilder << line; |
558 | } | 569 | } |
559 | 570 | ||
560 | std::string schema = schemaBuilder.str(); | 571 | std::string schema = schemaBuilder.str(); |
561 | auto queries = split<std::list<std::string>>(schema, ";"); | 572 | auto queries = split<std::list<std::string>>(schema, ";"); |
562 | progress ppgs("Writing database schema...", queries.size()); | 573 | progress ppgs("Writing database schema...", queries.size()); |
@@ -566,91 +577,91 @@ namespace verbly { | |||
566 | { | 577 | { |
567 | db_.runQuery(query); | 578 | db_.runQuery(query); |
568 | } | 579 | } |
569 | 580 | ||
570 | ppgs.update(); | 581 | ppgs.update(); |
571 | } | 582 | } |
572 | } | 583 | } |
573 | 584 | ||
574 | void generator::dumpObjects() | 585 | void generator::dumpObjects() |
575 | { | 586 | { |
576 | { | 587 | { |
577 | progress ppgs("Writing notions...", notions_.size()); | 588 | progress ppgs("Writing notions...", notions_.size()); |
578 | 589 | ||
579 | for (notion& n : notions_) | 590 | for (notion& n : notions_) |
580 | { | 591 | { |
581 | db_ << n; | 592 | db_ << n; |
582 | 593 | ||
583 | ppgs.update(); | 594 | ppgs.update(); |
584 | } | 595 | } |
585 | } | 596 | } |
586 | 597 | ||
587 | { | 598 | { |
588 | progress ppgs("Writing words...", words_.size()); | 599 | progress ppgs("Writing words...", words_.size()); |
589 | 600 | ||
590 | for (word& w : words_) | 601 | for (word& w : words_) |
591 | { | 602 | { |
592 | db_ << w; | 603 | db_ << w; |
593 | 604 | ||
594 | ppgs.update(); | 605 | ppgs.update(); |
595 | } | 606 | } |
596 | } | 607 | } |
597 | 608 | ||
598 | { | 609 | { |
599 | progress ppgs("Writing lemmas...", lemmas_.size()); | 610 | progress ppgs("Writing lemmas...", lemmas_.size()); |
600 | 611 | ||
601 | for (lemma& l : lemmas_) | 612 | for (lemma& l : lemmas_) |
602 | { | 613 | { |
603 | db_ << l; | 614 | db_ << l; |
604 | 615 | ||
605 | ppgs.update(); | 616 | ppgs.update(); |
606 | } | 617 | } |
607 | } | 618 | } |
608 | 619 | ||
609 | { | 620 | { |
610 | progress ppgs("Writing forms...", forms_.size()); | 621 | progress ppgs("Writing forms...", forms_.size()); |
611 | 622 | ||
612 | for (form& f : forms_) | 623 | for (form& f : forms_) |
613 | { | 624 | { |
614 | db_ << f; | 625 | db_ << f; |
615 | 626 | ||
616 | ppgs.update(); | 627 | ppgs.update(); |
617 | } | 628 | } |
618 | } | 629 | } |
619 | 630 | ||
620 | { | 631 | { |
621 | progress ppgs("Writing pronunciations...", pronunciations_.size()); | 632 | progress ppgs("Writing pronunciations...", pronunciations_.size()); |
622 | 633 | ||
623 | for (pronunciation& p : pronunciations_) | 634 | for (pronunciation& p : pronunciations_) |
624 | { | 635 | { |
625 | db_ << p; | 636 | db_ << p; |
626 | 637 | ||
627 | ppgs.update(); | 638 | ppgs.update(); |
628 | } | 639 | } |
629 | } | 640 | } |
630 | 641 | ||
631 | { | 642 | { |
632 | progress ppgs("Writing verb groups...", groups_.size()); | 643 | progress ppgs("Writing verb groups...", groups_.size()); |
633 | 644 | ||
634 | for (group& g : groups_) | 645 | for (group& g : groups_) |
635 | { | 646 | { |
636 | db_ << g; | 647 | db_ << g; |
637 | 648 | ||
638 | ppgs.update(); | 649 | ppgs.update(); |
639 | } | 650 | } |
640 | } | 651 | } |
641 | 652 | ||
642 | { | 653 | { |
643 | progress ppgs("Writing verb frames...", frames_.size()); | 654 | progress ppgs("Writing verb frames...", frames_.size()); |
644 | 655 | ||
645 | for (frame& f : frames_) | 656 | for (frame& f : frames_) |
646 | { | 657 | { |
647 | db_ << f; | 658 | db_ << f; |
648 | 659 | ||
649 | ppgs.update(); | 660 | ppgs.update(); |
650 | } | 661 | } |
651 | } | 662 | } |
652 | } | 663 | } |
653 | 664 | ||
654 | void generator::readWordNetAntonymy() | 665 | void generator::readWordNetAntonymy() |
655 | { | 666 | { |
656 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); | 667 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); |
@@ -658,7 +669,7 @@ namespace verbly { | |||
658 | for (auto line : lines) | 669 | for (auto line : lines) |
659 | { | 670 | { |
660 | ppgs.update(); | 671 | ppgs.update(); |
661 | 672 | ||
662 | std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | 673 | std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); |
663 | std::smatch relation_data; | 674 | std::smatch relation_data; |
664 | if (!std::regex_search(line, relation_data, relation)) | 675 | if (!std::regex_search(line, relation_data, relation)) |
@@ -668,21 +679,21 @@ namespace verbly { | |||
668 | 679 | ||
669 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | 680 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); |
670 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | 681 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); |
671 | 682 | ||
672 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | 683 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) |
673 | { | 684 | { |
674 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | 685 | word& word1 = *wordByWnidAndWnum_.at(lookup1); |
675 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | 686 | word& word2 = *wordByWnidAndWnum_.at(lookup2); |
676 | 687 | ||
677 | std::list<field> fields; | 688 | std::list<field> fields; |
678 | fields.emplace_back("antonym_1_id", word1.getId()); | 689 | fields.emplace_back("antonym_1_id", word1.getId()); |
679 | fields.emplace_back("antonym_2_id", word2.getId()); | 690 | fields.emplace_back("antonym_2_id", word2.getId()); |
680 | 691 | ||
681 | db_.insertIntoTable("antonymy", std::move(fields)); | 692 | db_.insertIntoTable("antonymy", std::move(fields)); |
682 | } | 693 | } |
683 | } | 694 | } |
684 | } | 695 | } |
685 | 696 | ||
686 | void generator::readWordNetVariation() | 697 | void generator::readWordNetVariation() |
687 | { | 698 | { |
688 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl")); | 699 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl")); |
@@ -690,7 +701,7 @@ namespace verbly { | |||
690 | for (auto line : lines) | 701 | for (auto line : lines) |
691 | { | 702 | { |
692 | ppgs.update(); | 703 | ppgs.update(); |
693 | 704 | ||
694 | std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); | 705 | std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); |
695 | std::smatch relation_data; | 706 | std::smatch relation_data; |
696 | if (!std::regex_search(line, relation_data, relation)) | 707 | if (!std::regex_search(line, relation_data, relation)) |
@@ -700,21 +711,21 @@ namespace verbly { | |||
700 | 711 | ||
701 | int lookup1 = std::stoi(relation_data[1]); | 712 | int lookup1 = std::stoi(relation_data[1]); |
702 | int lookup2 = std::stoi(relation_data[2]); | 713 | int lookup2 = std::stoi(relation_data[2]); |
703 | 714 | ||
704 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | 715 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
705 | { | 716 | { |
706 | notion& notion1 = *notionByWnid_.at(lookup1); | 717 | notion& notion1 = *notionByWnid_.at(lookup1); |
707 | notion& notion2 = *notionByWnid_.at(lookup2); | 718 | notion& notion2 = *notionByWnid_.at(lookup2); |
708 | 719 | ||
709 | std::list<field> fields; | 720 | std::list<field> fields; |
710 | fields.emplace_back("noun_id", notion1.getId()); | 721 | fields.emplace_back("noun_id", notion1.getId()); |
711 | fields.emplace_back("adjective_id", notion2.getId()); | 722 | fields.emplace_back("adjective_id", notion2.getId()); |
712 | 723 | ||
713 | db_.insertIntoTable("variation", std::move(fields)); | 724 | db_.insertIntoTable("variation", std::move(fields)); |
714 | } | 725 | } |
715 | } | 726 | } |
716 | } | 727 | } |
717 | 728 | ||
718 | void generator::readWordNetClasses() | 729 | void generator::readWordNetClasses() |
719 | { | 730 | { |
720 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); | 731 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); |
@@ -722,7 +733,7 @@ namespace verbly { | |||
722 | for (auto line : lines) | 733 | for (auto line : lines) |
723 | { | 734 | { |
724 | ppgs.update(); | 735 | ppgs.update(); |
725 | 736 | ||
726 | std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); | 737 | std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); |
727 | std::smatch relation_data; | 738 | std::smatch relation_data; |
728 | if (!std::regex_search(line, relation_data, relation)) | 739 | if (!std::regex_search(line, relation_data, relation)) |
@@ -733,7 +744,7 @@ namespace verbly { | |||
733 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | 744 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); |
734 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | 745 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); |
735 | std::string class_type = relation_data[5]; | 746 | std::string class_type = relation_data[5]; |
736 | 747 | ||
737 | std::string table_name; | 748 | std::string table_name; |
738 | if (class_type == "t") | 749 | if (class_type == "t") |
739 | { | 750 | { |
@@ -745,10 +756,10 @@ namespace verbly { | |||
745 | { | 756 | { |
746 | table_name += "regionality"; | 757 | table_name += "regionality"; |
747 | } | 758 | } |
748 | 759 | ||
749 | std::list<int> leftJoin; | 760 | std::list<int> leftJoin; |
750 | std::list<int> rightJoin; | 761 | std::list<int> rightJoin; |
751 | 762 | ||
752 | if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) | 763 | if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) |
753 | { | 764 | { |
754 | std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) { | 765 | std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) { |
@@ -757,7 +768,7 @@ namespace verbly { | |||
757 | } else if (wordByWnidAndWnum_.count(lookup1)) { | 768 | } else if (wordByWnidAndWnum_.count(lookup1)) { |
758 | leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); | 769 | leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); |
759 | } | 770 | } |
760 | 771 | ||
761 | if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) | 772 | if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) |
762 | { | 773 | { |
763 | std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) { | 774 | std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) { |
@@ -766,7 +777,7 @@ namespace verbly { | |||
766 | } else if (wordByWnidAndWnum_.count(lookup2)) { | 777 | } else if (wordByWnidAndWnum_.count(lookup2)) { |
767 | rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); | 778 | rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); |
768 | } | 779 | } |
769 | 780 | ||
770 | for (int word1 : leftJoin) | 781 | for (int word1 : leftJoin) |
771 | { | 782 | { |
772 | for (int word2 : rightJoin) | 783 | for (int word2 : rightJoin) |
@@ -774,13 +785,13 @@ namespace verbly { | |||
774 | std::list<field> fields; | 785 | std::list<field> fields; |
775 | fields.emplace_back("term_id", word1); | 786 | fields.emplace_back("term_id", word1); |
776 | fields.emplace_back("domain_id", word2); | 787 | fields.emplace_back("domain_id", word2); |
777 | 788 | ||
778 | db_.insertIntoTable(table_name, std::move(fields)); | 789 | db_.insertIntoTable(table_name, std::move(fields)); |
779 | } | 790 | } |
780 | } | 791 | } |
781 | } | 792 | } |
782 | } | 793 | } |
783 | 794 | ||
784 | void generator::readWordNetCausality() | 795 | void generator::readWordNetCausality() |
785 | { | 796 | { |
786 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl")); | 797 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl")); |
@@ -788,7 +799,7 @@ namespace verbly { | |||
788 | for (auto line : lines) | 799 | for (auto line : lines) |
789 | { | 800 | { |
790 | ppgs.update(); | 801 | ppgs.update(); |
791 | 802 | ||
792 | std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\."); | 803 | std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\."); |
793 | std::smatch relation_data; | 804 | std::smatch relation_data; |
794 | if (!std::regex_search(line, relation_data, relation)) | 805 | if (!std::regex_search(line, relation_data, relation)) |
@@ -798,21 +809,21 @@ namespace verbly { | |||
798 | 809 | ||
799 | int lookup1 = std::stoi(relation_data[1]); | 810 | int lookup1 = std::stoi(relation_data[1]); |
800 | int lookup2 = std::stoi(relation_data[2]); | 811 | int lookup2 = std::stoi(relation_data[2]); |
801 | 812 | ||
802 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | 813 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
803 | { | 814 | { |
804 | notion& notion1 = *notionByWnid_.at(lookup1); | 815 | notion& notion1 = *notionByWnid_.at(lookup1); |
805 | notion& notion2 = *notionByWnid_.at(lookup2); | 816 | notion& notion2 = *notionByWnid_.at(lookup2); |
806 | 817 | ||
807 | std::list<field> fields; | 818 | std::list<field> fields; |
808 | fields.emplace_back("effect_id", notion1.getId()); | 819 | fields.emplace_back("effect_id", notion1.getId()); |
809 | fields.emplace_back("cause_id", notion2.getId()); | 820 | fields.emplace_back("cause_id", notion2.getId()); |
810 | 821 | ||
811 | db_.insertIntoTable("causality", std::move(fields)); | 822 | db_.insertIntoTable("causality", std::move(fields)); |
812 | } | 823 | } |
813 | } | 824 | } |
814 | } | 825 | } |
815 | 826 | ||
816 | void generator::readWordNetEntailment() | 827 | void generator::readWordNetEntailment() |
817 | { | 828 | { |
818 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl")); | 829 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl")); |
@@ -820,7 +831,7 @@ namespace verbly { | |||
820 | for (auto line : lines) | 831 | for (auto line : lines) |
821 | { | 832 | { |
822 | ppgs.update(); | 833 | ppgs.update(); |
823 | 834 | ||
824 | std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\."); | 835 | std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\."); |
825 | std::smatch relation_data; | 836 | std::smatch relation_data; |
826 | if (!std::regex_search(line, relation_data, relation)) | 837 | if (!std::regex_search(line, relation_data, relation)) |
@@ -830,21 +841,21 @@ namespace verbly { | |||
830 | 841 | ||
831 | int lookup1 = std::stoi(relation_data[1]); | 842 | int lookup1 = std::stoi(relation_data[1]); |
832 | int lookup2 = std::stoi(relation_data[2]); | 843 | int lookup2 = std::stoi(relation_data[2]); |
833 | 844 | ||
834 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | 845 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
835 | { | 846 | { |
836 | notion& notion1 = *notionByWnid_.at(lookup1); | 847 | notion& notion1 = *notionByWnid_.at(lookup1); |
837 | notion& notion2 = *notionByWnid_.at(lookup2); | 848 | notion& notion2 = *notionByWnid_.at(lookup2); |
838 | 849 | ||
839 | std::list<field> fields; | 850 | std::list<field> fields; |
840 | fields.emplace_back("given_id", notion1.getId()); | 851 | fields.emplace_back("given_id", notion1.getId()); |
841 | fields.emplace_back("entailment_id", notion2.getId()); | 852 | fields.emplace_back("entailment_id", notion2.getId()); |
842 | 853 | ||
843 | db_.insertIntoTable("entailment", std::move(fields)); | 854 | db_.insertIntoTable("entailment", std::move(fields)); |
844 | } | 855 | } |
845 | } | 856 | } |
846 | } | 857 | } |
847 | 858 | ||
848 | void generator::readWordNetHypernymy() | 859 | void generator::readWordNetHypernymy() |
849 | { | 860 | { |
850 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl")); | 861 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl")); |
@@ -852,7 +863,7 @@ namespace verbly { | |||
852 | for (auto line : lines) | 863 | for (auto line : lines) |
853 | { | 864 | { |
854 | ppgs.update(); | 865 | ppgs.update(); |
855 | 866 | ||
856 | std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\."); | 867 | std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\."); |
857 | std::smatch relation_data; | 868 | std::smatch relation_data; |
858 | if (!std::regex_search(line, relation_data, relation)) | 869 | if (!std::regex_search(line, relation_data, relation)) |
@@ -862,21 +873,21 @@ namespace verbly { | |||
862 | 873 | ||
863 | int lookup1 = std::stoi(relation_data[1]); | 874 | int lookup1 = std::stoi(relation_data[1]); |
864 | int lookup2 = std::stoi(relation_data[2]); | 875 | int lookup2 = std::stoi(relation_data[2]); |
865 | 876 | ||
866 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | 877 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
867 | { | 878 | { |
868 | notion& notion1 = *notionByWnid_.at(lookup1); | 879 | notion& notion1 = *notionByWnid_.at(lookup1); |
869 | notion& notion2 = *notionByWnid_.at(lookup2); | 880 | notion& notion2 = *notionByWnid_.at(lookup2); |
870 | 881 | ||
871 | std::list<field> fields; | 882 | std::list<field> fields; |
872 | fields.emplace_back("hyponym_id", notion1.getId()); | 883 | fields.emplace_back("hyponym_id", notion1.getId()); |
873 | fields.emplace_back("hypernym_id", notion2.getId()); | 884 | fields.emplace_back("hypernym_id", notion2.getId()); |
874 | 885 | ||
875 | db_.insertIntoTable("hypernymy", std::move(fields)); | 886 | db_.insertIntoTable("hypernymy", std::move(fields)); |
876 | } | 887 | } |
877 | } | 888 | } |
878 | } | 889 | } |
879 | 890 | ||
880 | void generator::readWordNetInstantiation() | 891 | void generator::readWordNetInstantiation() |
881 | { | 892 | { |
882 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl")); | 893 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl")); |
@@ -884,7 +895,7 @@ namespace verbly { | |||
884 | for (auto line : lines) | 895 | for (auto line : lines) |
885 | { | 896 | { |
886 | ppgs.update(); | 897 | ppgs.update(); |
887 | 898 | ||
888 | std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); | 899 | std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); |
889 | std::smatch relation_data; | 900 | std::smatch relation_data; |
890 | if (!std::regex_search(line, relation_data, relation)) | 901 | if (!std::regex_search(line, relation_data, relation)) |
@@ -894,21 +905,21 @@ namespace verbly { | |||
894 | 905 | ||
895 | int lookup1 = std::stoi(relation_data[1]); | 906 | int lookup1 = std::stoi(relation_data[1]); |
896 | int lookup2 = std::stoi(relation_data[2]); | 907 | int lookup2 = std::stoi(relation_data[2]); |
897 | 908 | ||
898 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | 909 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
899 | { | 910 | { |
900 | notion& notion1 = *notionByWnid_.at(lookup1); | 911 | notion& notion1 = *notionByWnid_.at(lookup1); |
901 | notion& notion2 = *notionByWnid_.at(lookup2); | 912 | notion& notion2 = *notionByWnid_.at(lookup2); |
902 | 913 | ||
903 | std::list<field> fields; | 914 | std::list<field> fields; |
904 | fields.emplace_back("instance_id", notion1.getId()); | 915 | fields.emplace_back("instance_id", notion1.getId()); |
905 | fields.emplace_back("class_id", notion2.getId()); | 916 | fields.emplace_back("class_id", notion2.getId()); |
906 | 917 | ||
907 | db_.insertIntoTable("instantiation", std::move(fields)); | 918 | db_.insertIntoTable("instantiation", std::move(fields)); |
908 | } | 919 | } |
909 | } | 920 | } |
910 | } | 921 | } |
911 | 922 | ||
912 | void generator::readWordNetMemberMeronymy() | 923 | void generator::readWordNetMemberMeronymy() |
913 | { | 924 | { |
914 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl")); | 925 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl")); |
@@ -916,7 +927,7 @@ namespace verbly { | |||
916 | for (auto line : lines) | 927 | for (auto line : lines) |
917 | { | 928 | { |
918 | ppgs.update(); | 929 | ppgs.update(); |
919 | 930 | ||
920 | std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); | 931 | std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); |
921 | std::smatch relation_data; | 932 | std::smatch relation_data; |
922 | if (!std::regex_search(line, relation_data, relation)) | 933 | if (!std::regex_search(line, relation_data, relation)) |
@@ -926,21 +937,21 @@ namespace verbly { | |||
926 | 937 | ||
927 | int lookup1 = std::stoi(relation_data[1]); | 938 | int lookup1 = std::stoi(relation_data[1]); |
928 | int lookup2 = std::stoi(relation_data[2]); | 939 | int lookup2 = std::stoi(relation_data[2]); |
929 | 940 | ||
930 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | 941 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
931 | { | 942 | { |
932 | notion& notion1 = *notionByWnid_.at(lookup1); | 943 | notion& notion1 = *notionByWnid_.at(lookup1); |
933 | notion& notion2 = *notionByWnid_.at(lookup2); | 944 | notion& notion2 = *notionByWnid_.at(lookup2); |
934 | 945 | ||
935 | std::list<field> fields; | 946 | std::list<field> fields; |
936 | fields.emplace_back("holonym_id", notion1.getId()); | 947 | fields.emplace_back("holonym_id", notion1.getId()); |
937 | fields.emplace_back("meronym_id", notion2.getId()); | 948 | fields.emplace_back("meronym_id", notion2.getId()); |
938 | 949 | ||
939 | db_.insertIntoTable("member_meronymy", std::move(fields)); | 950 | db_.insertIntoTable("member_meronymy", std::move(fields)); |
940 | } | 951 | } |
941 | } | 952 | } |
942 | } | 953 | } |
943 | 954 | ||
944 | void generator::readWordNetPartMeronymy() | 955 | void generator::readWordNetPartMeronymy() |
945 | { | 956 | { |
946 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl")); | 957 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl")); |
@@ -948,7 +959,7 @@ namespace verbly { | |||
948 | for (auto line : lines) | 959 | for (auto line : lines) |
949 | { | 960 | { |
950 | ppgs.update(); | 961 | ppgs.update(); |
951 | 962 | ||
952 | std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); | 963 | std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); |
953 | std::smatch relation_data; | 964 | std::smatch relation_data; |
954 | if (!std::regex_search(line, relation_data, relation)) | 965 | if (!std::regex_search(line, relation_data, relation)) |
@@ -958,21 +969,21 @@ namespace verbly { | |||
958 | 969 | ||
959 | int lookup1 = std::stoi(relation_data[1]); | 970 | int lookup1 = std::stoi(relation_data[1]); |
960 | int lookup2 = std::stoi(relation_data[2]); | 971 | int lookup2 = std::stoi(relation_data[2]); |
961 | 972 | ||
962 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | 973 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
963 | { | 974 | { |
964 | notion& notion1 = *notionByWnid_.at(lookup1); | 975 | notion& notion1 = *notionByWnid_.at(lookup1); |
965 | notion& notion2 = *notionByWnid_.at(lookup2); | 976 | notion& notion2 = *notionByWnid_.at(lookup2); |
966 | 977 | ||
967 | std::list<field> fields; | 978 | std::list<field> fields; |
968 | fields.emplace_back("holonym_id", notion1.getId()); | 979 | fields.emplace_back("holonym_id", notion1.getId()); |
969 | fields.emplace_back("meronym_id", notion2.getId()); | 980 | fields.emplace_back("meronym_id", notion2.getId()); |
970 | 981 | ||
971 | db_.insertIntoTable("part_meronymy", std::move(fields)); | 982 | db_.insertIntoTable("part_meronymy", std::move(fields)); |
972 | } | 983 | } |
973 | } | 984 | } |
974 | } | 985 | } |
975 | 986 | ||
976 | void generator::readWordNetSubstanceMeronymy() | 987 | void generator::readWordNetSubstanceMeronymy() |
977 | { | 988 | { |
978 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl")); | 989 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl")); |
@@ -980,7 +991,7 @@ namespace verbly { | |||
980 | for (auto line : lines) | 991 | for (auto line : lines) |
981 | { | 992 | { |
982 | ppgs.update(); | 993 | ppgs.update(); |
983 | 994 | ||
984 | std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); | 995 | std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); |
985 | std::smatch relation_data; | 996 | std::smatch relation_data; |
986 | if (!std::regex_search(line, relation_data, relation)) | 997 | if (!std::regex_search(line, relation_data, relation)) |
@@ -990,21 +1001,21 @@ namespace verbly { | |||
990 | 1001 | ||
991 | int lookup1 = std::stoi(relation_data[1]); | 1002 | int lookup1 = std::stoi(relation_data[1]); |
992 | int lookup2 = std::stoi(relation_data[2]); | 1003 | int lookup2 = std::stoi(relation_data[2]); |
993 | 1004 | ||
994 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | 1005 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
995 | { | 1006 | { |
996 | notion& notion1 = *notionByWnid_.at(lookup1); | 1007 | notion& notion1 = *notionByWnid_.at(lookup1); |
997 | notion& notion2 = *notionByWnid_.at(lookup2); | 1008 | notion& notion2 = *notionByWnid_.at(lookup2); |
998 | 1009 | ||
999 | std::list<field> fields; | 1010 | std::list<field> fields; |
1000 | fields.emplace_back("holonym_id", notion1.getId()); | 1011 | fields.emplace_back("holonym_id", notion1.getId()); |
1001 | fields.emplace_back("meronym_id", notion2.getId()); | 1012 | fields.emplace_back("meronym_id", notion2.getId()); |
1002 | 1013 | ||
1003 | db_.insertIntoTable("substance_meronymy", std::move(fields)); | 1014 | db_.insertIntoTable("substance_meronymy", std::move(fields)); |
1004 | } | 1015 | } |
1005 | } | 1016 | } |
1006 | } | 1017 | } |
1007 | 1018 | ||
1008 | void generator::readWordNetPertainymy() | 1019 | void generator::readWordNetPertainymy() |
1009 | { | 1020 | { |
1010 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); | 1021 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); |
@@ -1012,7 +1023,7 @@ namespace verbly { | |||
1012 | for (auto line : lines) | 1023 | for (auto line : lines) |
1013 | { | 1024 | { |
1014 | ppgs.update(); | 1025 | ppgs.update(); |
1015 | 1026 | ||
1016 | std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); | 1027 | std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); |
1017 | std::smatch relation_data; | 1028 | std::smatch relation_data; |
1018 | if (!std::regex_search(line, relation_data, relation)) | 1029 | if (!std::regex_search(line, relation_data, relation)) |
@@ -1022,31 +1033,31 @@ namespace verbly { | |||
1022 | 1033 | ||
1023 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | 1034 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); |
1024 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | 1035 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); |
1025 | 1036 | ||
1026 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | 1037 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) |
1027 | { | 1038 | { |
1028 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | 1039 | word& word1 = *wordByWnidAndWnum_.at(lookup1); |
1029 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | 1040 | word& word2 = *wordByWnidAndWnum_.at(lookup2); |
1030 | 1041 | ||
1031 | if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective) | 1042 | if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective) |
1032 | { | 1043 | { |
1033 | std::list<field> fields; | 1044 | std::list<field> fields; |
1034 | fields.emplace_back("pertainym_id", word1.getId()); | 1045 | fields.emplace_back("pertainym_id", word1.getId()); |
1035 | fields.emplace_back("noun_id", word2.getId()); | 1046 | fields.emplace_back("noun_id", word2.getId()); |
1036 | 1047 | ||
1037 | db_.insertIntoTable("pertainymy", std::move(fields)); | 1048 | db_.insertIntoTable("pertainymy", std::move(fields)); |
1038 | } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb) | 1049 | } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb) |
1039 | { | 1050 | { |
1040 | std::list<field> fields; | 1051 | std::list<field> fields; |
1041 | fields.emplace_back("mannernym_id", word1.getId()); | 1052 | fields.emplace_back("mannernym_id", word1.getId()); |
1042 | fields.emplace_back("adjective_id", word2.getId()); | 1053 | fields.emplace_back("adjective_id", word2.getId()); |
1043 | 1054 | ||
1044 | db_.insertIntoTable("mannernymy", std::move(fields)); | 1055 | db_.insertIntoTable("mannernymy", std::move(fields)); |
1045 | } | 1056 | } |
1046 | } | 1057 | } |
1047 | } | 1058 | } |
1048 | } | 1059 | } |
1049 | 1060 | ||
1050 | void generator::readWordNetSpecification() | 1061 | void generator::readWordNetSpecification() |
1051 | { | 1062 | { |
1052 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl")); | 1063 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl")); |
@@ -1064,21 +1075,21 @@ namespace verbly { | |||
1064 | 1075 | ||
1065 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | 1076 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); |
1066 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | 1077 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); |
1067 | 1078 | ||
1068 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | 1079 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) |
1069 | { | 1080 | { |
1070 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | 1081 | word& word1 = *wordByWnidAndWnum_.at(lookup1); |
1071 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | 1082 | word& word2 = *wordByWnidAndWnum_.at(lookup2); |
1072 | 1083 | ||
1073 | std::list<field> fields; | 1084 | std::list<field> fields; |
1074 | fields.emplace_back("general_id", word1.getId()); | 1085 | fields.emplace_back("general_id", word1.getId()); |
1075 | fields.emplace_back("specific_id", word2.getId()); | 1086 | fields.emplace_back("specific_id", word2.getId()); |
1076 | 1087 | ||
1077 | db_.insertIntoTable("specification", std::move(fields)); | 1088 | db_.insertIntoTable("specification", std::move(fields)); |
1078 | } | 1089 | } |
1079 | } | 1090 | } |
1080 | } | 1091 | } |
1081 | 1092 | ||
1082 | void generator::readWordNetSimilarity() | 1093 | void generator::readWordNetSimilarity() |
1083 | { | 1094 | { |
1084 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl")); | 1095 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl")); |
@@ -1086,7 +1097,7 @@ namespace verbly { | |||
1086 | for (auto line : lines) | 1097 | for (auto line : lines) |
1087 | { | 1098 | { |
1088 | ppgs.update(); | 1099 | ppgs.update(); |
1089 | 1100 | ||
1090 | std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); | 1101 | std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); |
1091 | std::smatch relation_data; | 1102 | std::smatch relation_data; |
1092 | if (!std::regex_search(line, relation_data, relation)) | 1103 | if (!std::regex_search(line, relation_data, relation)) |
@@ -1096,21 +1107,21 @@ namespace verbly { | |||
1096 | 1107 | ||
1097 | int lookup1 = std::stoi(relation_data[1]); | 1108 | int lookup1 = std::stoi(relation_data[1]); |
1098 | int lookup2 = std::stoi(relation_data[2]); | 1109 | int lookup2 = std::stoi(relation_data[2]); |
1099 | 1110 | ||
1100 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | 1111 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
1101 | { | 1112 | { |
1102 | notion& notion1 = *notionByWnid_.at(lookup1); | 1113 | notion& notion1 = *notionByWnid_.at(lookup1); |
1103 | notion& notion2 = *notionByWnid_.at(lookup2); | 1114 | notion& notion2 = *notionByWnid_.at(lookup2); |
1104 | 1115 | ||
1105 | std::list<field> fields; | 1116 | std::list<field> fields; |
1106 | fields.emplace_back("adjective_1_id", notion1.getId()); | 1117 | fields.emplace_back("adjective_1_id", notion1.getId()); |
1107 | fields.emplace_back("adjective_2_id", notion2.getId()); | 1118 | fields.emplace_back("adjective_2_id", notion2.getId()); |
1108 | 1119 | ||
1109 | db_.insertIntoTable("similarity", std::move(fields)); | 1120 | db_.insertIntoTable("similarity", std::move(fields)); |
1110 | } | 1121 | } |
1111 | } | 1122 | } |
1112 | } | 1123 | } |
1113 | 1124 | ||
1114 | std::list<std::string> generator::readFile(std::string path) | 1125 | std::list<std::string> generator::readFile(std::string path) |
1115 | { | 1126 | { |
1116 | std::ifstream file(path); | 1127 | std::ifstream file(path); |
@@ -1118,7 +1129,7 @@ namespace verbly { | |||
1118 | { | 1129 | { |
1119 | throw std::invalid_argument("Could not find file " + path); | 1130 | throw std::invalid_argument("Could not find file " + path); |
1120 | } | 1131 | } |
1121 | 1132 | ||
1122 | std::list<std::string> lines; | 1133 | std::list<std::string> lines; |
1123 | std::string line; | 1134 | std::string line; |
1124 | while (std::getline(file, line)) | 1135 | while (std::getline(file, line)) |
@@ -1127,13 +1138,13 @@ namespace verbly { | |||
1127 | { | 1138 | { |
1128 | line.pop_back(); | 1139 | line.pop_back(); |
1129 | } | 1140 | } |
1130 | 1141 | ||
1131 | lines.push_back(line); | 1142 | lines.push_back(line); |
1132 | } | 1143 | } |
1133 | 1144 | ||
1134 | return lines; | 1145 | return lines; |
1135 | } | 1146 | } |
1136 | 1147 | ||
1137 | part_of_speech generator::partOfSpeechByWnid(int wnid) | 1148 | part_of_speech generator::partOfSpeechByWnid(int wnid) |
1138 | { | 1149 | { |
1139 | switch (wnid / 100000000) | 1150 | switch (wnid / 100000000) |
@@ -1145,14 +1156,14 @@ namespace verbly { | |||
1145 | default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid)); | 1156 | default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid)); |
1146 | } | 1157 | } |
1147 | } | 1158 | } |
1148 | 1159 | ||
1149 | notion& generator::createNotion(part_of_speech partOfSpeech) | 1160 | notion& generator::createNotion(part_of_speech partOfSpeech) |
1150 | { | 1161 | { |
1151 | notions_.emplace_back(partOfSpeech); | 1162 | notions_.emplace_back(partOfSpeech); |
1152 | 1163 | ||
1153 | return notions_.back(); | 1164 | return notions_.back(); |
1154 | } | 1165 | } |
1155 | 1166 | ||
1156 | notion& generator::lookupOrCreateNotion(int wnid) | 1167 | notion& generator::lookupOrCreateNotion(int wnid) |
1157 | { | 1168 | { |
1158 | if (!notionByWnid_.count(wnid)) | 1169 | if (!notionByWnid_.count(wnid)) |
@@ -1160,10 +1171,10 @@ namespace verbly { | |||
1160 | notions_.emplace_back(partOfSpeechByWnid(wnid), wnid); | 1171 | notions_.emplace_back(partOfSpeechByWnid(wnid), wnid); |
1161 | notionByWnid_[wnid] = ¬ions_.back(); | 1172 | notionByWnid_[wnid] = ¬ions_.back(); |
1162 | } | 1173 | } |
1163 | 1174 | ||
1164 | return *notionByWnid_.at(wnid); | 1175 | return *notionByWnid_.at(wnid); |
1165 | } | 1176 | } |
1166 | 1177 | ||
1167 | lemma& generator::lookupOrCreateLemma(std::string base_form) | 1178 | lemma& generator::lookupOrCreateLemma(std::string base_form) |
1168 | { | 1179 | { |
1169 | if (!lemmaByBaseForm_.count(base_form)) | 1180 | if (!lemmaByBaseForm_.count(base_form)) |
@@ -1171,10 +1182,10 @@ namespace verbly { | |||
1171 | lemmas_.emplace_back(lookupOrCreateForm(base_form)); | 1182 | lemmas_.emplace_back(lookupOrCreateForm(base_form)); |
1172 | lemmaByBaseForm_[base_form] = &lemmas_.back(); | 1183 | lemmaByBaseForm_[base_form] = &lemmas_.back(); |
1173 | } | 1184 | } |
1174 | 1185 | ||
1175 | return *lemmaByBaseForm_.at(base_form); | 1186 | return *lemmaByBaseForm_.at(base_form); |
1176 | } | 1187 | } |
1177 | 1188 | ||
1178 | form& generator::lookupOrCreateForm(std::string text) | 1189 | form& generator::lookupOrCreateForm(std::string text) |
1179 | { | 1190 | { |
1180 | if (!formByText_.count(text)) | 1191 | if (!formByText_.count(text)) |
@@ -1182,32 +1193,32 @@ namespace verbly { | |||
1182 | forms_.emplace_back(text); | 1193 | forms_.emplace_back(text); |
1183 | formByText_[text] = &forms_.back(); | 1194 | formByText_[text] = &forms_.back(); |
1184 | } | 1195 | } |
1185 | 1196 | ||
1186 | return *formByText_[text]; | 1197 | return *formByText_[text]; |
1187 | } | 1198 | } |
1188 | 1199 | ||
1189 | template <typename... Args> word& generator::createWord(Args&&... args) | 1200 | template <typename... Args> word& generator::createWord(Args&&... args) |
1190 | { | 1201 | { |
1191 | words_.emplace_back(std::forward<Args>(args)...); | 1202 | words_.emplace_back(std::forward<Args>(args)...); |
1192 | word& w = words_.back(); | 1203 | word& w = words_.back(); |
1193 | 1204 | ||
1194 | wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w); | 1205 | wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w); |
1195 | 1206 | ||
1196 | if (w.getNotion().hasWnid()) | 1207 | if (w.getNotion().hasWnid()) |
1197 | { | 1208 | { |
1198 | wordsByWnid_[w.getNotion().getWnid()].insert(&w); | 1209 | wordsByWnid_[w.getNotion().getWnid()].insert(&w); |
1199 | } | 1210 | } |
1200 | 1211 | ||
1201 | return w; | 1212 | return w; |
1202 | } | 1213 | } |
1203 | 1214 | ||
1204 | group& generator::createGroup(xmlNodePtr top) | 1215 | group& generator::createGroup(xmlNodePtr top) |
1205 | { | 1216 | { |
1206 | groups_.emplace_back(); | 1217 | groups_.emplace_back(); |
1207 | group& grp = groups_.back(); | 1218 | group& grp = groups_.back(); |
1208 | 1219 | ||
1209 | xmlChar* key; | 1220 | xmlChar* key; |
1210 | 1221 | ||
1211 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) | 1222 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) |
1212 | { | 1223 | { |
1213 | if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES"))) | 1224 | if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES"))) |
@@ -1223,14 +1234,14 @@ namespace verbly { | |||
1223 | } catch (const std::exception& e) | 1234 | } catch (const std::exception& e) |
1224 | { | 1235 | { |
1225 | key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID")); | 1236 | key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID")); |
1226 | 1237 | ||
1227 | if (key == nullptr) | 1238 | if (key == nullptr) |
1228 | { | 1239 | { |
1229 | std::throw_with_nested(std::logic_error("Error parsing IDless subgroup")); | 1240 | std::throw_with_nested(std::logic_error("Error parsing IDless subgroup")); |
1230 | } else { | 1241 | } else { |
1231 | std::string subgroupId(reinterpret_cast<const char*>(key)); | 1242 | std::string subgroupId(reinterpret_cast<const char*>(key)); |
1232 | xmlFree(key); | 1243 | xmlFree(key); |
1233 | 1244 | ||
1234 | std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId)); | 1245 | std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId)); |
1235 | } | 1246 | } |
1236 | } | 1247 | } |
@@ -1245,25 +1256,25 @@ namespace verbly { | |||
1245 | key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn")); | 1256 | key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn")); |
1246 | std::string wnSenses(reinterpret_cast<const char*>(key)); | 1257 | std::string wnSenses(reinterpret_cast<const char*>(key)); |
1247 | xmlFree(key); | 1258 | xmlFree(key); |
1248 | 1259 | ||
1249 | auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " "); | 1260 | auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " "); |
1250 | if (!wnSenseKeys.empty()) | 1261 | if (!wnSenseKeys.empty()) |
1251 | { | 1262 | { |
1252 | std::list<std::string> tempKeys; | 1263 | std::list<std::string> tempKeys; |
1253 | 1264 | ||
1254 | std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) { | 1265 | std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) { |
1255 | return sense + "::"; | 1266 | return sense + "::"; |
1256 | }); | 1267 | }); |
1257 | 1268 | ||
1258 | std::list<std::string> filteredKeys; | 1269 | std::list<std::string> filteredKeys; |
1259 | 1270 | ||
1260 | std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) { | 1271 | std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) { |
1261 | return !wnSenseKeys_.count(sense); | 1272 | return !wnSenseKeys_.count(sense); |
1262 | }); | 1273 | }); |
1263 | 1274 | ||
1264 | wnSenseKeys = std::move(filteredKeys); | 1275 | wnSenseKeys = std::move(filteredKeys); |
1265 | } | 1276 | } |
1266 | 1277 | ||
1267 | if (!wnSenseKeys.empty()) | 1278 | if (!wnSenseKeys.empty()) |
1268 | { | 1279 | { |
1269 | for (std::string sense : wnSenseKeys) | 1280 | for (std::string sense : wnSenseKeys) |
@@ -1275,11 +1286,11 @@ namespace verbly { | |||
1275 | key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name")); | 1286 | key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name")); |
1276 | std::string memberName(reinterpret_cast<const char*>(key)); | 1287 | std::string memberName(reinterpret_cast<const char*>(key)); |
1277 | xmlFree(key); | 1288 | xmlFree(key); |
1278 | 1289 | ||
1279 | notion& n = createNotion(part_of_speech::verb); | 1290 | notion& n = createNotion(part_of_speech::verb); |
1280 | lemma& l = lookupOrCreateLemma(memberName); | 1291 | lemma& l = lookupOrCreateLemma(memberName); |
1281 | word& w = createWord(n, l); | 1292 | word& w = createWord(n, l); |
1282 | 1293 | ||
1283 | w.setVerbGroup(grp); | 1294 | w.setVerbGroup(grp); |
1284 | } | 1295 | } |
1285 | } | 1296 | } |
@@ -1293,7 +1304,7 @@ namespace verbly { | |||
1293 | key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type")); | 1304 | key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type")); |
1294 | std::string roleName = reinterpret_cast<const char*>(key); | 1305 | std::string roleName = reinterpret_cast<const char*>(key); |
1295 | xmlFree(key); | 1306 | xmlFree(key); |
1296 | 1307 | ||
1297 | selrestr roleSelrestrs; | 1308 | selrestr roleSelrestrs; |
1298 | for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) | 1309 | for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) |
1299 | { | 1310 | { |
@@ -1314,19 +1325,19 @@ namespace verbly { | |||
1314 | { | 1325 | { |
1315 | frames_.emplace_back(); | 1326 | frames_.emplace_back(); |
1316 | frame& fr = frames_.back(); | 1327 | frame& fr = frames_.back(); |
1317 | 1328 | ||
1318 | for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) | 1329 | for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) |
1319 | { | 1330 | { |
1320 | if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX"))) | 1331 | if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX"))) |
1321 | { | 1332 | { |
1322 | for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) | 1333 | for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) |
1323 | { | 1334 | { |
1324 | if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP"))) | 1335 | if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP"))) |
1325 | { | 1336 | { |
1326 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | 1337 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); |
1327 | std::string partRole = reinterpret_cast<const char*>(key); | 1338 | std::string partRole = reinterpret_cast<const char*>(key); |
1328 | xmlFree(key); | 1339 | xmlFree(key); |
1329 | 1340 | ||
1330 | selrestr partSelrestrs; | 1341 | selrestr partSelrestrs; |
1331 | std::set<std::string> partSynrestrs; | 1342 | std::set<std::string> partSynrestrs; |
1332 | 1343 | ||
@@ -1344,13 +1355,13 @@ namespace verbly { | |||
1344 | } | 1355 | } |
1345 | } | 1356 | } |
1346 | } | 1357 | } |
1347 | 1358 | ||
1348 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | 1359 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) |
1349 | { | 1360 | { |
1350 | partSelrestrs = parseSelrestr(npnode); | 1361 | partSelrestrs = parseSelrestr(npnode); |
1351 | } | 1362 | } |
1352 | } | 1363 | } |
1353 | 1364 | ||
1354 | fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs))); | 1365 | fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs))); |
1355 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB"))) | 1366 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB"))) |
1356 | { | 1367 | { |
@@ -1359,11 +1370,11 @@ namespace verbly { | |||
1359 | { | 1370 | { |
1360 | std::set<std::string> partChoices; | 1371 | std::set<std::string> partChoices; |
1361 | bool partLiteral; | 1372 | bool partLiteral; |
1362 | 1373 | ||
1363 | if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"))) | 1374 | if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"))) |
1364 | { | 1375 | { |
1365 | partLiteral = true; | 1376 | partLiteral = true; |
1366 | 1377 | ||
1367 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | 1378 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); |
1368 | std::string choicesStr = reinterpret_cast<const char*>(key); | 1379 | std::string choicesStr = reinterpret_cast<const char*>(key); |
1369 | xmlFree(key); | 1380 | xmlFree(key); |
@@ -1380,7 +1391,7 @@ namespace verbly { | |||
1380 | } | 1391 | } |
1381 | } else { | 1392 | } else { |
1382 | partLiteral = false; | 1393 | partLiteral = false; |
1383 | 1394 | ||
1384 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | 1395 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) |
1385 | { | 1396 | { |
1386 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | 1397 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) |
@@ -1397,7 +1408,7 @@ namespace verbly { | |||
1397 | } | 1408 | } |
1398 | } | 1409 | } |
1399 | } | 1410 | } |
1400 | 1411 | ||
1401 | fr.push_back(part::createPreposition(std::move(partChoices), partLiteral)); | 1412 | fr.push_back(part::createPreposition(std::move(partChoices), partLiteral)); |
1402 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ"))) | 1413 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ"))) |
1403 | { | 1414 | { |
@@ -1410,7 +1421,7 @@ namespace verbly { | |||
1410 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | 1421 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); |
1411 | std::string literalValue = reinterpret_cast<const char*>(key); | 1422 | std::string literalValue = reinterpret_cast<const char*>(key); |
1412 | xmlFree(key); | 1423 | xmlFree(key); |
1413 | 1424 | ||
1414 | fr.push_back(part::createLiteral(literalValue)); | 1425 | fr.push_back(part::createLiteral(literalValue)); |
1415 | } else { | 1426 | } else { |
1416 | continue; | 1427 | continue; |
@@ -1427,11 +1438,11 @@ namespace verbly { | |||
1427 | 1438 | ||
1428 | return grp; | 1439 | return grp; |
1429 | } | 1440 | } |
1430 | 1441 | ||
1431 | selrestr generator::parseSelrestr(xmlNodePtr top) | 1442 | selrestr generator::parseSelrestr(xmlNodePtr top) |
1432 | { | 1443 | { |
1433 | xmlChar* key; | 1444 | xmlChar* key; |
1434 | 1445 | ||
1435 | if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | 1446 | if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) |
1436 | { | 1447 | { |
1437 | if (xmlChildElementCount(top) == 0) | 1448 | if (xmlChildElementCount(top) == 0) |
@@ -1449,10 +1460,10 @@ namespace verbly { | |||
1449 | { | 1460 | { |
1450 | orlogic = true; | 1461 | orlogic = true; |
1451 | } | 1462 | } |
1452 | 1463 | ||
1453 | xmlFree(key); | 1464 | xmlFree(key); |
1454 | } | 1465 | } |
1455 | 1466 | ||
1456 | std::list<selrestr> children; | 1467 | std::list<selrestr> children; |
1457 | for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) | 1468 | for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) |
1458 | { | 1469 | { |
@@ -1462,7 +1473,7 @@ namespace verbly { | |||
1462 | children.push_back(parseSelrestr(selrestr)); | 1473 | children.push_back(parseSelrestr(selrestr)); |
1463 | } | 1474 | } |
1464 | } | 1475 | } |
1465 | 1476 | ||
1466 | return selrestr(children, orlogic); | 1477 | return selrestr(children, orlogic); |
1467 | } | 1478 | } |
1468 | } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | 1479 | } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) |
@@ -1474,12 +1485,12 @@ namespace verbly { | |||
1474 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type")); | 1485 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type")); |
1475 | std::string selRestriction = reinterpret_cast<const char*>(key); | 1486 | std::string selRestriction = reinterpret_cast<const char*>(key); |
1476 | xmlFree(key); | 1487 | xmlFree(key); |
1477 | 1488 | ||
1478 | return selrestr(selRestriction, selPos); | 1489 | return selrestr(selRestriction, selPos); |
1479 | } else { | 1490 | } else { |
1480 | throw std::logic_error("Badly formatted selrestr"); | 1491 | throw std::logic_error("Badly formatted selrestr"); |
1481 | } | 1492 | } |
1482 | } | 1493 | } |
1483 | 1494 | ||
1484 | }; | 1495 | }; |
1485 | }; | 1496 | }; |