diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-03-24 23:16:07 -0400 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-03-24 23:16:07 -0400 |
commit | eef5de613c75661e5d94baa086f6f2ddc26c7ed0 (patch) | |
tree | 180230f6a245c5bca94d894273f5d2b93ded3f04 /generator/generator.cpp | |
parent | d5ee4e39e5b5b3b8daa85cd972802195ad35e965 (diff) | |
download | verbly-eef5de613c75661e5d94baa086f6f2ddc26c7ed0.tar.gz verbly-eef5de613c75661e5d94baa086f6f2ddc26c7ed0.tar.bz2 verbly-eef5de613c75661e5d94baa086f6f2ddc26c7ed0.zip |
Added verb frames
In addition: - Added prepositions. - Rewrote a lot of the query interface. It now, for a lot of relationships, supports nested AND, OR, and NOT logic. - Rewrote the token class. It is now a union-like class instead of being polymorphic, which means smart pointers are no longer necessary. - Querying with regards to word derivation has been temporarily removed. - Sentinel values are now supported for all word types. - The VerbNet data retrieved from http://verbs.colorado.edu/~mpalmer/projects/verbnet/downloads.html was found to not be perfectly satisfactory in some regards, especially regarding adjective phrases. A patch file is now included in the repository describing the changes made to the VerbNet v3.2 download for the canonical verbly datafile.
Diffstat (limited to 'generator/generator.cpp')
-rw-r--r-- | generator/generator.cpp | 545 |
1 files changed, 523 insertions, 22 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index 7ec94df..aea750c 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -11,36 +11,75 @@ | |||
11 | #include <regex> | 11 | #include <regex> |
12 | #include <list> | 12 | #include <list> |
13 | #include <algorithm> | 13 | #include <algorithm> |
14 | #include <json.hpp> | ||
14 | #include "progress.h" | 15 | #include "progress.h" |
16 | #include "../lib/util.h" | ||
15 | 17 | ||
16 | struct verb { | 18 | using json = nlohmann::json; |
19 | |||
20 | struct verb_t { | ||
17 | std::string infinitive; | 21 | std::string infinitive; |
18 | std::string past_tense; | 22 | std::string past_tense; |
19 | std::string past_participle; | 23 | std::string past_participle; |
20 | std::string ing_form; | 24 | std::string ing_form; |
21 | std::string s_form; | 25 | std::string s_form; |
26 | int id; | ||
22 | }; | 27 | }; |
23 | 28 | ||
24 | struct adjective { | 29 | struct adjective_t { |
25 | std::string base; | 30 | std::string base; |
26 | std::string comparative; | 31 | std::string comparative; |
27 | std::string superlative; | 32 | std::string superlative; |
28 | }; | 33 | }; |
29 | 34 | ||
30 | struct noun { | 35 | struct noun_t { |
31 | std::string singular; | 36 | std::string singular; |
32 | std::string plural; | 37 | std::string plural; |
33 | }; | 38 | }; |
34 | 39 | ||
35 | struct group { | 40 | struct selrestr_t { |
41 | enum class type_t { | ||
42 | singleton, | ||
43 | andlogic, | ||
44 | orlogic, | ||
45 | empty | ||
46 | }; | ||
47 | type_t type; | ||
48 | std::string restriction; | ||
49 | bool pos; | ||
50 | std::list<selrestr_t> subordinates; | ||
51 | }; | ||
52 | |||
53 | struct framepart_t { | ||
54 | enum class type_t { | ||
55 | np, | ||
56 | v, | ||
57 | pp, | ||
58 | adj, | ||
59 | adv, | ||
60 | lex | ||
61 | }; | ||
62 | type_t type; | ||
63 | std::string role; | ||
64 | selrestr_t selrestrs; | ||
65 | std::set<std::string> preprestrs; | ||
66 | std::set<std::string> synrestrs; | ||
67 | std::list<std::string> choices; | ||
68 | std::string lexval; | ||
69 | }; | ||
70 | |||
71 | struct group_t { | ||
36 | std::string id; | 72 | std::string id; |
73 | std::string parent; | ||
37 | std::set<std::string> members; | 74 | std::set<std::string> members; |
75 | std::map<std::string, selrestr_t> roles; | ||
76 | std::list<std::list<framepart_t>> frames; | ||
38 | }; | 77 | }; |
39 | 78 | ||
40 | std::map<std::string, group> groups; | 79 | std::map<std::string, group_t> groups; |
41 | std::map<std::string, verb> verbs; | 80 | std::map<std::string, verb_t> verbs; |
42 | std::map<std::string, adjective> adjectives; | 81 | std::map<std::string, adjective_t> adjectives; |
43 | std::map<std::string, noun> nouns; | 82 | std::map<std::string, noun_t> nouns; |
44 | std::map<int, std::map<int, int>> wn; | 83 | std::map<int, std::map<int, int>> wn; |
45 | std::map<std::string, std::set<std::string>> pronunciations; | 84 | std::map<std::string, std::set<std::string>> pronunciations; |
46 | 85 | ||
@@ -59,15 +98,97 @@ void print_usage() | |||
59 | exit(1); | 98 | exit(1); |
60 | } | 99 | } |
61 | 100 | ||
62 | void db_error(sqlite3* ppdb, std::string) | 101 | void db_error(sqlite3* ppdb, std::string query) |
63 | { | 102 | { |
64 | std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; | 103 | std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; |
104 | std::cout << query << std::endl; | ||
65 | sqlite3_close_v2(ppdb); | 105 | sqlite3_close_v2(ppdb); |
66 | print_usage(); | 106 | print_usage(); |
67 | } | 107 | } |
68 | 108 | ||
69 | /* | 109 | json export_selrestrs(selrestr_t r) |
70 | void parse_group(xmlNodePtr top, std::string filename) | 110 | { |
111 | if (r.type == selrestr_t::type_t::empty) | ||
112 | { | ||
113 | return {}; | ||
114 | } else if (r.type == selrestr_t::type_t::singleton) | ||
115 | { | ||
116 | json result; | ||
117 | result["type"] = r.restriction; | ||
118 | result["pos"] = r.pos; | ||
119 | return result; | ||
120 | } else { | ||
121 | json result; | ||
122 | if (r.type == selrestr_t::type_t::andlogic) | ||
123 | { | ||
124 | result["logic"] = "and"; | ||
125 | } else { | ||
126 | result["logic"] = "or"; | ||
127 | } | ||
128 | |||
129 | std::list<json> outlist; | ||
130 | std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs); | ||
131 | result["children"] = outlist; | ||
132 | |||
133 | return result; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename) | ||
138 | { | ||
139 | selrestr_t r; | ||
140 | xmlChar* key; | ||
141 | |||
142 | if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS")) | ||
143 | { | ||
144 | if (xmlChildElementCount(top) == 0) | ||
145 | { | ||
146 | r.type = selrestr_t::type_t::empty; | ||
147 | } else if (xmlChildElementCount(top) == 1) | ||
148 | { | ||
149 | r = parse_selrestrs(xmlFirstElementChild(top), filename); | ||
150 | } else { | ||
151 | r.type = selrestr_t::type_t::andlogic; | ||
152 | |||
153 | if (xmlHasProp(top, (const xmlChar*) "logic")) | ||
154 | { | ||
155 | key = xmlGetProp(top, (const xmlChar*) "logic"); | ||
156 | if (!xmlStrcmp(key, (const xmlChar*) "or")) | ||
157 | { | ||
158 | r.type = selrestr_t::type_t::orlogic; | ||
159 | } | ||
160 | xmlFree(key); | ||
161 | } | ||
162 | |||
163 | for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) | ||
164 | { | ||
165 | if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR")) | ||
166 | { | ||
167 | r.subordinates.push_back(parse_selrestrs(selrestr, filename)); | ||
168 | } | ||
169 | } | ||
170 | } | ||
171 | } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR")) | ||
172 | { | ||
173 | r.type = selrestr_t::type_t::singleton; | ||
174 | |||
175 | key = xmlGetProp(top, (xmlChar*) "Value"); | ||
176 | r.pos = (std::string((const char*)key) == "+"); | ||
177 | xmlFree(key); | ||
178 | |||
179 | key = xmlGetProp(top, (xmlChar*) "type"); | ||
180 | r.restriction = (const char*) key; | ||
181 | xmlFree(key); | ||
182 | } else { | ||
183 | // Invalid | ||
184 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
185 | print_usage(); | ||
186 | } | ||
187 | |||
188 | return r; | ||
189 | } | ||
190 | |||
191 | group_t& parse_group(xmlNodePtr top, std::string filename) | ||
71 | { | 192 | { |
72 | xmlChar* key = xmlGetProp(top, (xmlChar*) "ID"); | 193 | xmlChar* key = xmlGetProp(top, (xmlChar*) "ID"); |
73 | if (key == 0) | 194 | if (key == 0) |
@@ -75,41 +196,183 @@ void parse_group(xmlNodePtr top, std::string filename) | |||
75 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | 196 | std::cout << "Bad VerbNet file format: " << filename << std::endl; |
76 | print_usage(); | 197 | print_usage(); |
77 | } | 198 | } |
78 | std::string vnid = key; | 199 | std::string vnid = (const char*)key; |
79 | vnid = vnid.substr(vnid.find_first_of("-")+1); | 200 | vnid = vnid.substr(vnid.find_first_of("-")+1); |
80 | xmlFree(key); | 201 | xmlFree(key); |
81 | 202 | ||
82 | group g; | 203 | group_t g; |
83 | g.id = vnid; | 204 | g.id = vnid; |
84 | 205 | ||
85 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) | 206 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) |
86 | { | 207 | { |
87 | if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) | 208 | if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES")) |
209 | { | ||
210 | for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next) | ||
211 | { | ||
212 | if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS")) | ||
213 | { | ||
214 | auto& sg = parse_group(subclass, filename); | ||
215 | sg.parent = vnid; | ||
216 | |||
217 | for (auto member : sg.members) | ||
218 | { | ||
219 | g.members.insert(member); | ||
220 | } | ||
221 | |||
222 | // The schema requires that subclasses appear after role definitions, so we can do this now | ||
223 | for (auto role : g.roles) | ||
224 | { | ||
225 | if (sg.roles.count(role.first) == 0) | ||
226 | { | ||
227 | sg.roles[role.first] = role.second; | ||
228 | } | ||
229 | } | ||
230 | } | ||
231 | } | ||
232 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) | ||
88 | { | 233 | { |
89 | for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) | 234 | for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) |
90 | { | 235 | { |
91 | if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) | 236 | if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) |
92 | { | 237 | { |
93 | key = xmlGetProp(member, (xmlChar*) "name"); | 238 | key = xmlGetProp(member, (xmlChar*) "name"); |
94 | g.members.insert(key); | 239 | g.members.insert((const char*)key); |
95 | xmlFree(key); | 240 | xmlFree(key); |
96 | } | 241 | } |
97 | } | 242 | } |
243 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES")) | ||
244 | { | ||
245 | for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next) | ||
246 | { | ||
247 | if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE")) | ||
248 | { | ||
249 | selrestr_t r; | ||
250 | r.type = selrestr_t::type_t::empty; | ||
251 | |||
252 | key = xmlGetProp(role, (const xmlChar*) "type"); | ||
253 | std::string type = (const char*)key; | ||
254 | xmlFree(key); | ||
255 | |||
256 | for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) | ||
257 | { | ||
258 | if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS")) | ||
259 | { | ||
260 | r = parse_selrestrs(rolenode, filename); | ||
261 | } | ||
262 | } | ||
263 | |||
264 | g.roles[type] = r; | ||
265 | } | ||
266 | } | ||
98 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) | 267 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) |
99 | { | 268 | { |
100 | for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) | 269 | for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) |
101 | { | 270 | { |
102 | if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) | 271 | if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) |
103 | { | 272 | { |
273 | std::list<framepart_t> f; | ||
274 | |||
104 | for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) | 275 | for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) |
105 | { | 276 | { |
106 | 277 | if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX")) | |
278 | { | ||
279 | for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) | ||
280 | { | ||
281 | framepart_t fp; | ||
282 | |||
283 | if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP")) | ||
284 | { | ||
285 | fp.type = framepart_t::type_t::np; | ||
286 | |||
287 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
288 | fp.role = (const char*)key; | ||
289 | xmlFree(key); | ||
290 | |||
291 | fp.selrestrs.type = selrestr_t::type_t::empty; | ||
292 | |||
293 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
294 | { | ||
295 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS")) | ||
296 | { | ||
297 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
298 | { | ||
299 | if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR")) | ||
300 | { | ||
301 | key = xmlGetProp(synrestr, (xmlChar*) "type"); | ||
302 | fp.synrestrs.insert(std::string((const char*)key)); | ||
303 | xmlFree(key); | ||
304 | } | ||
305 | } | ||
306 | } | ||
307 | |||
308 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) | ||
309 | { | ||
310 | fp.selrestrs = parse_selrestrs(npnode, filename); | ||
311 | } | ||
312 | } | ||
313 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB")) | ||
314 | { | ||
315 | fp.type = framepart_t::type_t::v; | ||
316 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP")) | ||
317 | { | ||
318 | fp.type = framepart_t::type_t::pp; | ||
319 | |||
320 | if (xmlHasProp(syntaxnode, (xmlChar*) "value")) | ||
321 | { | ||
322 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
323 | std::string choices = (const char*)key; | ||
324 | xmlFree(key); | ||
325 | |||
326 | fp.choices = verbly::split<std::list<std::string>>(choices, " "); | ||
327 | } | ||
328 | |||
329 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
330 | { | ||
331 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) | ||
332 | { | ||
333 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
334 | { | ||
335 | if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR")) | ||
336 | { | ||
337 | key = xmlGetProp(synrestr, (xmlChar*) "type"); | ||
338 | fp.preprestrs.insert(std::string((const char*)key)); | ||
339 | xmlFree(key); | ||
340 | } | ||
341 | } | ||
342 | } | ||
343 | } | ||
344 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ")) | ||
345 | { | ||
346 | fp.type = framepart_t::type_t::adj; | ||
347 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV")) | ||
348 | { | ||
349 | fp.type = framepart_t::type_t::adv; | ||
350 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX")) | ||
351 | { | ||
352 | fp.type = framepart_t::type_t::lex; | ||
353 | |||
354 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
355 | fp.lexval = (const char*)key; | ||
356 | xmlFree(key); | ||
357 | } else { | ||
358 | continue; | ||
359 | } | ||
360 | |||
361 | f.push_back(fp); | ||
362 | } | ||
363 | |||
364 | g.frames.push_back(f); | ||
365 | } | ||
107 | } | 366 | } |
108 | } | 367 | } |
109 | } | 368 | } |
110 | } | 369 | } |
111 | } | 370 | } |
112 | }*/ | 371 | |
372 | groups[vnid] = g; | ||
373 | |||
374 | return groups[vnid]; | ||
375 | } | ||
113 | 376 | ||
114 | int main(int argc, char** argv) | 377 | int main(int argc, char** argv) |
115 | { | 378 | { |
@@ -118,7 +381,10 @@ int main(int argc, char** argv) | |||
118 | print_usage(); | 381 | print_usage(); |
119 | } | 382 | } |
120 | 383 | ||
121 | /*DIR* dir; | 384 | // VerbNet data |
385 | std::cout << "Reading verb frames..." << std::endl; | ||
386 | |||
387 | DIR* dir; | ||
122 | if ((dir = opendir(argv[1])) == nullptr) | 388 | if ((dir = opendir(argv[1])) == nullptr) |
123 | { | 389 | { |
124 | std::cout << "Invalid VerbNet data directory." << std::endl; | 390 | std::cout << "Invalid VerbNet data directory." << std::endl; |
@@ -160,7 +426,7 @@ int main(int argc, char** argv) | |||
160 | parse_group(top, filename); | 426 | parse_group(top, filename); |
161 | } | 427 | } |
162 | 428 | ||
163 | closedir(dir);*/ | 429 | closedir(dir); |
164 | 430 | ||
165 | // Get verbs from AGID | 431 | // Get verbs from AGID |
166 | std::cout << "Reading inflections..." << std::endl; | 432 | std::cout << "Reading inflections..." << std::endl; |
@@ -222,7 +488,7 @@ int main(int argc, char** argv) | |||
222 | { | 488 | { |
223 | case 'V': | 489 | case 'V': |
224 | { | 490 | { |
225 | verb v; | 491 | verb_t v; |
226 | v.infinitive = word; | 492 | v.infinitive = word; |
227 | if (forms.size() == 4) | 493 | if (forms.size() == 4) |
228 | { | 494 | { |
@@ -258,7 +524,7 @@ int main(int argc, char** argv) | |||
258 | 524 | ||
259 | case 'A': | 525 | case 'A': |
260 | { | 526 | { |
261 | adjective adj; | 527 | adjective_t adj; |
262 | adj.base = word; | 528 | adj.base = word; |
263 | if (forms.size() == 2) | 529 | if (forms.size() == 2) |
264 | { | 530 | { |
@@ -276,7 +542,7 @@ int main(int argc, char** argv) | |||
276 | 542 | ||
277 | case 'N': | 543 | case 'N': |
278 | { | 544 | { |
279 | noun n; | 545 | noun_t n; |
280 | n.singular = word; | 546 | n.singular = word; |
281 | if (forms.size() == 1) | 547 | if (forms.size() == 1) |
282 | { | 548 | { |
@@ -388,6 +654,85 @@ int main(int argc, char** argv) | |||
388 | sqlite3_finalize(schmstmt); | 654 | sqlite3_finalize(schmstmt); |
389 | } | 655 | } |
390 | 656 | ||
657 | std::cout << "Writing prepositions..." << std::endl; | ||
658 | std::ifstream prepfile("prepositions.txt"); | ||
659 | if (!prepfile.is_open()) | ||
660 | { | ||
661 | std::cout << "Could not find prepositions file" << std::endl; | ||
662 | print_usage(); | ||
663 | } | ||
664 | |||
665 | for (;;) | ||
666 | { | ||
667 | std::string line; | ||
668 | if (!getline(prepfile, line)) | ||
669 | { | ||
670 | break; | ||
671 | } | ||
672 | |||
673 | if (line.back() == '\r') | ||
674 | { | ||
675 | line.pop_back(); | ||
676 | } | ||
677 | |||
678 | std::regex relation("^([^:]+): (.+)"); | ||
679 | std::smatch relation_data; | ||
680 | std::regex_search(line, relation_data, relation); | ||
681 | std::string prep = relation_data[1]; | ||
682 | std::list<std::string> groups = verbly::split<std::list<std::string>>(relation_data[2], ", "); | ||
683 | |||
684 | std::string query("INSERT INTO prepositions (form) VALUES (?)"); | ||
685 | sqlite3_stmt* ppstmt; | ||
686 | |||
687 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
688 | { | ||
689 | db_error(ppdb, query); | ||
690 | } | ||
691 | |||
692 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_STATIC); | ||
693 | |||
694 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
695 | { | ||
696 | db_error(ppdb, query); | ||
697 | } | ||
698 | |||
699 | sqlite3_finalize(ppstmt); | ||
700 | |||
701 | query = "SELECT last_insert_rowid()"; | ||
702 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
703 | { | ||
704 | db_error(ppdb, query); | ||
705 | } | ||
706 | |||
707 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
708 | { | ||
709 | db_error(ppdb, query); | ||
710 | } | ||
711 | |||
712 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
713 | sqlite3_finalize(ppstmt); | ||
714 | |||
715 | for (auto group : groups) | ||
716 | { | ||
717 | query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)"; | ||
718 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
719 | { | ||
720 | db_error(ppdb, query); | ||
721 | } | ||
722 | |||
723 | sqlite3_bind_int(ppstmt, 1, rowid); | ||
724 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_STATIC); | ||
725 | |||
726 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
727 | { | ||
728 | db_error(ppdb, query); | ||
729 | } | ||
730 | |||
731 | sqlite3_finalize(ppstmt); | ||
732 | } | ||
733 | } | ||
734 | |||
735 | |||
391 | { | 736 | { |
392 | progress ppgs("Writing verbs...", verbs.size()); | 737 | progress ppgs("Writing verbs...", verbs.size()); |
393 | for (auto& mapping : verbs) | 738 | for (auto& mapping : verbs) |
@@ -431,6 +776,8 @@ int main(int argc, char** argv) | |||
431 | 776 | ||
432 | sqlite3_finalize(ppstmt); | 777 | sqlite3_finalize(ppstmt); |
433 | 778 | ||
779 | mapping.second.id = rowid; | ||
780 | |||
434 | for (auto pronunciation : pronunciations[canonical]) | 781 | for (auto pronunciation : pronunciations[canonical]) |
435 | { | 782 | { |
436 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; | 783 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; |
@@ -455,6 +802,160 @@ int main(int argc, char** argv) | |||
455 | } | 802 | } |
456 | } | 803 | } |
457 | 804 | ||
805 | { | ||
806 | progress ppgs("Writing verb frames...", groups.size()); | ||
807 | for (auto& mapping : groups) | ||
808 | { | ||
809 | std::list<json> roledatal; | ||
810 | std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair<std::string, selrestr_t> r) { | ||
811 | json role; | ||
812 | role["type"] = r.first; | ||
813 | role["selrestrs"] = export_selrestrs(r.second); | ||
814 | |||
815 | return role; | ||
816 | }); | ||
817 | |||
818 | json roledata(roledatal); | ||
819 | std::string rdm = roledata.dump(); | ||
820 | |||
821 | sqlite3_stmt* ppstmt; | ||
822 | std::string query("INSERT INTO groups (data) VALUES (?)"); | ||
823 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
824 | { | ||
825 | db_error(ppdb, query); | ||
826 | } | ||
827 | |||
828 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_STATIC); | ||
829 | |||
830 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
831 | { | ||
832 | db_error(ppdb, query); | ||
833 | } | ||
834 | |||
835 | sqlite3_finalize(ppstmt); | ||
836 | |||
837 | query = "SELECT last_insert_rowid()"; | ||
838 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
839 | { | ||
840 | db_error(ppdb, query); | ||
841 | } | ||
842 | |||
843 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
844 | { | ||
845 | db_error(ppdb, query); | ||
846 | } | ||
847 | |||
848 | int gid = sqlite3_column_int(ppstmt, 0); | ||
849 | sqlite3_finalize(ppstmt); | ||
850 | |||
851 | for (auto frame : mapping.second.frames) | ||
852 | { | ||
853 | std::list<json> fdatap; | ||
854 | std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) { | ||
855 | json part; | ||
856 | |||
857 | switch (fp.type) | ||
858 | { | ||
859 | case framepart_t::type_t::np: | ||
860 | { | ||
861 | part["type"] = "np"; | ||
862 | part["role"] = fp.role; | ||
863 | part["selrestrs"] = export_selrestrs(fp.selrestrs); | ||
864 | part["synrestrs"] = fp.synrestrs; | ||
865 | |||
866 | break; | ||
867 | } | ||
868 | |||
869 | case framepart_t::type_t::pp: | ||
870 | { | ||
871 | part["type"] = "pp"; | ||
872 | part["values"] = fp.choices; | ||
873 | part["preprestrs"] = fp.preprestrs; | ||
874 | |||
875 | break; | ||
876 | } | ||
877 | |||
878 | case framepart_t::type_t::v: | ||
879 | { | ||
880 | part["type"] = "v"; | ||
881 | |||
882 | break; | ||
883 | } | ||
884 | |||
885 | case framepart_t::type_t::adj: | ||
886 | { | ||
887 | part["type"] = "adj"; | ||
888 | |||
889 | break; | ||
890 | } | ||
891 | |||
892 | case framepart_t::type_t::adv: | ||
893 | { | ||
894 | part["type"] = "adv"; | ||
895 | |||
896 | break; | ||
897 | } | ||
898 | |||
899 | case framepart_t::type_t::lex: | ||
900 | { | ||
901 | part["type"] = "lex"; | ||
902 | part["value"] = fp.lexval; | ||
903 | |||
904 | break; | ||
905 | } | ||
906 | } | ||
907 | |||
908 | return part; | ||
909 | }); | ||
910 | |||
911 | json fdata(fdatap); | ||
912 | std::string marshall = fdata.dump(); | ||
913 | |||
914 | query = "INSERT INTO frames (group_id, data) VALUES (?, ?)"; | ||
915 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
916 | { | ||
917 | db_error(ppdb, query); | ||
918 | } | ||
919 | |||
920 | sqlite3_bind_int(ppstmt, 1, gid); | ||
921 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_STATIC); | ||
922 | |||
923 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
924 | { | ||
925 | db_error(ppdb, query); | ||
926 | } | ||
927 | |||
928 | sqlite3_finalize(ppstmt); | ||
929 | } | ||
930 | |||
931 | for (auto member : mapping.second.members) | ||
932 | { | ||
933 | if (verbs.count(member) == 1) | ||
934 | { | ||
935 | auto& v = verbs[member]; | ||
936 | |||
937 | query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)"; | ||
938 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
939 | { | ||
940 | db_error(ppdb, query); | ||
941 | } | ||
942 | |||
943 | sqlite3_bind_int(ppstmt, 1, v.id); | ||
944 | sqlite3_bind_int(ppstmt, 2, gid); | ||
945 | |||
946 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
947 | { | ||
948 | db_error(ppdb, query); | ||
949 | } | ||
950 | |||
951 | sqlite3_finalize(ppstmt); | ||
952 | } | ||
953 | } | ||
954 | |||
955 | ppgs.update(); | ||
956 | } | ||
957 | } | ||
958 | |||
458 | // Get nouns/adjectives/adverbs from WordNet | 959 | // Get nouns/adjectives/adverbs from WordNet |
459 | // Useful relations: | 960 | // Useful relations: |
460 | // - s: master list | 961 | // - s: master list |