summary refs log tree commit diff stats
path: root/generator
diff options
context:
space:
mode:
Diffstat (limited to 'generator')
-rw-r--r--generator/CMakeLists.txt2
-rw-r--r--generator/generator.cpp545
-rw-r--r--generator/prepositions.txt49
-rw-r--r--generator/schema.sql16
-rw-r--r--generator/vn.diff482
5 files changed, 1069 insertions, 25 deletions
diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index bbc3c4f..552526d 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt
@@ -5,7 +5,7 @@ find_package(PkgConfig)
5pkg_check_modules(sqlite3 sqlite3 REQUIRED) 5pkg_check_modules(sqlite3 sqlite3 REQUIRED)
6find_package(libxml2 REQUIRED) 6find_package(libxml2 REQUIRED)
7 7
8include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR}) 8include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json/src)
9add_executable(generator generator.cpp) 9add_executable(generator generator.cpp)
10set_property(TARGET generator PROPERTY CXX_STANDARD 11) 10set_property(TARGET generator PROPERTY CXX_STANDARD 11)
11set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) 11set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON)
diff --git a/generator/generator.cpp b/generator/generator.cpp index 7ec94df..aea750c 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp
@@ -11,36 +11,75 @@
11#include <regex> 11#include <regex>
12#include <list> 12#include <list>
13#include <algorithm> 13#include <algorithm>
14#include <json.hpp>
14#include "progress.h" 15#include "progress.h"
16#include "../lib/util.h"
15 17
16struct verb { 18using json = nlohmann::json;
19
20struct verb_t {
17 std::string infinitive; 21 std::string infinitive;
18 std::string past_tense; 22 std::string past_tense;
19 std::string past_participle; 23 std::string past_participle;
20 std::string ing_form; 24 std::string ing_form;
21 std::string s_form; 25 std::string s_form;
26 int id;
22}; 27};
23 28
24struct adjective { 29struct adjective_t {
25 std::string base; 30 std::string base;
26 std::string comparative; 31 std::string comparative;
27 std::string superlative; 32 std::string superlative;
28}; 33};
29 34
30struct noun { 35struct noun_t {
31 std::string singular; 36 std::string singular;
32 std::string plural; 37 std::string plural;
33}; 38};
34 39
35struct group { 40struct selrestr_t {
41 enum class type_t {
42 singleton,
43 andlogic,
44 orlogic,
45 empty
46 };
47 type_t type;
48 std::string restriction;
49 bool pos;
50 std::list<selrestr_t> subordinates;
51};
52
53struct framepart_t {
54 enum class type_t {
55 np,
56 v,
57 pp,
58 adj,
59 adv,
60 lex
61 };
62 type_t type;
63 std::string role;
64 selrestr_t selrestrs;
65 std::set<std::string> preprestrs;
66 std::set<std::string> synrestrs;
67 std::list<std::string> choices;
68 std::string lexval;
69};
70
71struct group_t {
36 std::string id; 72 std::string id;
73 std::string parent;
37 std::set<std::string> members; 74 std::set<std::string> members;
75 std::map<std::string, selrestr_t> roles;
76 std::list<std::list<framepart_t>> frames;
38}; 77};
39 78
40std::map<std::string, group> groups; 79std::map<std::string, group_t> groups;
41std::map<std::string, verb> verbs; 80std::map<std::string, verb_t> verbs;
42std::map<std::string, adjective> adjectives; 81std::map<std::string, adjective_t> adjectives;
43std::map<std::string, noun> nouns; 82std::map<std::string, noun_t> nouns;
44std::map<int, std::map<int, int>> wn; 83std::map<int, std::map<int, int>> wn;
45std::map<std::string, std::set<std::string>> pronunciations; 84std::map<std::string, std::set<std::string>> pronunciations;
46 85
@@ -59,15 +98,97 @@ void print_usage()
59 exit(1); 98 exit(1);
60} 99}
61 100
62void db_error(sqlite3* ppdb, std::string) 101void db_error(sqlite3* ppdb, std::string query)
63{ 102{
64 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; 103 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
104 std::cout << query << std::endl;
65 sqlite3_close_v2(ppdb); 105 sqlite3_close_v2(ppdb);
66 print_usage(); 106 print_usage();
67} 107}
68 108
69/* 109json export_selrestrs(selrestr_t r)
70void parse_group(xmlNodePtr top, std::string filename) 110{
111 if (r.type == selrestr_t::type_t::empty)
112 {
113 return {};
114 } else if (r.type == selrestr_t::type_t::singleton)
115 {
116 json result;
117 result["type"] = r.restriction;
118 result["pos"] = r.pos;
119 return result;
120 } else {
121 json result;
122 if (r.type == selrestr_t::type_t::andlogic)
123 {
124 result["logic"] = "and";
125 } else {
126 result["logic"] = "or";
127 }
128
129 std::list<json> outlist;
130 std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs);
131 result["children"] = outlist;
132
133 return result;
134 }
135}
136
137selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename)
138{
139 selrestr_t r;
140 xmlChar* key;
141
142 if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS"))
143 {
144 if (xmlChildElementCount(top) == 0)
145 {
146 r.type = selrestr_t::type_t::empty;
147 } else if (xmlChildElementCount(top) == 1)
148 {
149 r = parse_selrestrs(xmlFirstElementChild(top), filename);
150 } else {
151 r.type = selrestr_t::type_t::andlogic;
152
153 if (xmlHasProp(top, (const xmlChar*) "logic"))
154 {
155 key = xmlGetProp(top, (const xmlChar*) "logic");
156 if (!xmlStrcmp(key, (const xmlChar*) "or"))
157 {
158 r.type = selrestr_t::type_t::orlogic;
159 }
160 xmlFree(key);
161 }
162
163 for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next)
164 {
165 if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR"))
166 {
167 r.subordinates.push_back(parse_selrestrs(selrestr, filename));
168 }
169 }
170 }
171 } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR"))
172 {
173 r.type = selrestr_t::type_t::singleton;
174
175 key = xmlGetProp(top, (xmlChar*) "Value");
176 r.pos = (std::string((const char*)key) == "+");
177 xmlFree(key);
178
179 key = xmlGetProp(top, (xmlChar*) "type");
180 r.restriction = (const char*) key;
181 xmlFree(key);
182 } else {
183 // Invalid
184 std::cout << "Bad VerbNet file format: " << filename << std::endl;
185 print_usage();
186 }
187
188 return r;
189}
190
191group_t& parse_group(xmlNodePtr top, std::string filename)
71{ 192{
72 xmlChar* key = xmlGetProp(top, (xmlChar*) "ID"); 193 xmlChar* key = xmlGetProp(top, (xmlChar*) "ID");
73 if (key == 0) 194 if (key == 0)
@@ -75,41 +196,183 @@ void parse_group(xmlNodePtr top, std::string filename)
75 std::cout << "Bad VerbNet file format: " << filename << std::endl; 196 std::cout << "Bad VerbNet file format: " << filename << std::endl;
76 print_usage(); 197 print_usage();
77 } 198 }
78 std::string vnid = key; 199 std::string vnid = (const char*)key;
79 vnid = vnid.substr(vnid.find_first_of("-")+1); 200 vnid = vnid.substr(vnid.find_first_of("-")+1);
80 xmlFree(key); 201 xmlFree(key);
81 202
82 group g; 203 group_t g;
83 g.id = vnid; 204 g.id = vnid;
84 205
85 for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) 206 for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
86 { 207 {
87 if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) 208 if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES"))
209 {
210 for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next)
211 {
212 if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS"))
213 {
214 auto& sg = parse_group(subclass, filename);
215 sg.parent = vnid;
216
217 for (auto member : sg.members)
218 {
219 g.members.insert(member);
220 }
221
222 // The schema requires that subclasses appear after role definitions, so we can do this now
223 for (auto role : g.roles)
224 {
225 if (sg.roles.count(role.first) == 0)
226 {
227 sg.roles[role.first] = role.second;
228 }
229 }
230 }
231 }
232 } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS"))
88 { 233 {
89 for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) 234 for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
90 { 235 {
91 if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) 236 if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER"))
92 { 237 {
93 key = xmlGetProp(member, (xmlChar*) "name"); 238 key = xmlGetProp(member, (xmlChar*) "name");
94 g.members.insert(key); 239 g.members.insert((const char*)key);
95 xmlFree(key); 240 xmlFree(key);
96 } 241 }
97 } 242 }
243 } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES"))
244 {
245 for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next)
246 {
247 if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE"))
248 {
249 selrestr_t r;
250 r.type = selrestr_t::type_t::empty;
251
252 key = xmlGetProp(role, (const xmlChar*) "type");
253 std::string type = (const char*)key;
254 xmlFree(key);
255
256 for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
257 {
258 if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS"))
259 {
260 r = parse_selrestrs(rolenode, filename);
261 }
262 }
263
264 g.roles[type] = r;
265 }
266 }
98 } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) 267 } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES"))
99 { 268 {
100 for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) 269 for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next)
101 { 270 {
102 if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) 271 if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME"))
103 { 272 {
273 std::list<framepart_t> f;
274
104 for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) 275 for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
105 { 276 {
106 277 if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX"))
278 {
279 for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
280 {
281 framepart_t fp;
282
283 if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP"))
284 {
285 fp.type = framepart_t::type_t::np;
286
287 key = xmlGetProp(syntaxnode, (xmlChar*) "value");
288 fp.role = (const char*)key;
289 xmlFree(key);
290
291 fp.selrestrs.type = selrestr_t::type_t::empty;
292
293 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
294 {
295 if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS"))
296 {
297 for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
298 {
299 if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR"))
300 {
301 key = xmlGetProp(synrestr, (xmlChar*) "type");
302 fp.synrestrs.insert(std::string((const char*)key));
303 xmlFree(key);
304 }
305 }
306 }
307
308 if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS"))
309 {
310 fp.selrestrs = parse_selrestrs(npnode, filename);
311 }
312 }
313 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB"))
314 {
315 fp.type = framepart_t::type_t::v;
316 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP"))
317 {
318 fp.type = framepart_t::type_t::pp;
319
320 if (xmlHasProp(syntaxnode, (xmlChar*) "value"))
321 {
322 key = xmlGetProp(syntaxnode, (xmlChar*) "value");
323 std::string choices = (const char*)key;
324 xmlFree(key);
325
326 fp.choices = verbly::split<std::list<std::string>>(choices, " ");
327 }
328
329 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
330 {
331 if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS"))
332 {
333 for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
334 {
335 if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR"))
336 {
337 key = xmlGetProp(synrestr, (xmlChar*) "type");
338 fp.preprestrs.insert(std::string((const char*)key));
339 xmlFree(key);
340 }
341 }
342 }
343 }
344 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ"))
345 {
346 fp.type = framepart_t::type_t::adj;
347 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV"))
348 {
349 fp.type = framepart_t::type_t::adv;
350 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX"))
351 {
352 fp.type = framepart_t::type_t::lex;
353
354 key = xmlGetProp(syntaxnode, (xmlChar*) "value");
355 fp.lexval = (const char*)key;
356 xmlFree(key);
357 } else {
358 continue;
359 }
360
361 f.push_back(fp);
362 }
363
364 g.frames.push_back(f);
365 }
107 } 366 }
108 } 367 }
109 } 368 }
110 } 369 }
111 } 370 }
112}*/ 371
372 groups[vnid] = g;
373
374 return groups[vnid];
375}
113 376
114int main(int argc, char** argv) 377int main(int argc, char** argv)
115{ 378{
@@ -118,7 +381,10 @@ int main(int argc, char** argv)
118 print_usage(); 381 print_usage();
119 } 382 }
120 383
121 /*DIR* dir; 384 // VerbNet data
385 std::cout << "Reading verb frames..." << std::endl;
386
387 DIR* dir;
122 if ((dir = opendir(argv[1])) == nullptr) 388 if ((dir = opendir(argv[1])) == nullptr)
123 { 389 {
124 std::cout << "Invalid VerbNet data directory." << std::endl; 390 std::cout << "Invalid VerbNet data directory." << std::endl;
@@ -160,7 +426,7 @@ int main(int argc, char** argv)
160 parse_group(top, filename); 426 parse_group(top, filename);
161 } 427 }
162 428
163 closedir(dir);*/ 429 closedir(dir);
164 430
165 // Get verbs from AGID 431 // Get verbs from AGID
166 std::cout << "Reading inflections..." << std::endl; 432 std::cout << "Reading inflections..." << std::endl;
@@ -222,7 +488,7 @@ int main(int argc, char** argv)
222 { 488 {
223 case 'V': 489 case 'V':
224 { 490 {
225 verb v; 491 verb_t v;
226 v.infinitive = word; 492 v.infinitive = word;
227 if (forms.size() == 4) 493 if (forms.size() == 4)
228 { 494 {
@@ -258,7 +524,7 @@ int main(int argc, char** argv)
258 524
259 case 'A': 525 case 'A':
260 { 526 {
261 adjective adj; 527 adjective_t adj;
262 adj.base = word; 528 adj.base = word;
263 if (forms.size() == 2) 529 if (forms.size() == 2)
264 { 530 {
@@ -276,7 +542,7 @@ int main(int argc, char** argv)
276 542
277 case 'N': 543 case 'N':
278 { 544 {
279 noun n; 545 noun_t n;
280 n.singular = word; 546 n.singular = word;
281 if (forms.size() == 1) 547 if (forms.size() == 1)
282 { 548 {
@@ -388,6 +654,85 @@ int main(int argc, char** argv)
388 sqlite3_finalize(schmstmt); 654 sqlite3_finalize(schmstmt);
389 } 655 }
390 656
657 std::cout << "Writing prepositions..." << std::endl;
658 std::ifstream prepfile("prepositions.txt");
659 if (!prepfile.is_open())
660 {
661 std::cout << "Could not find prepositions file" << std::endl;
662 print_usage();
663 }
664
665 for (;;)
666 {
667 std::string line;
668 if (!getline(prepfile, line))
669 {
670 break;
671 }
672
673 if (line.back() == '\r')
674 {
675 line.pop_back();
676 }
677
678 std::regex relation("^([^:]+): (.+)");
679 std::smatch relation_data;
680 std::regex_search(line, relation_data, relation);
681 std::string prep = relation_data[1];
682 std::list<std::string> groups = verbly::split<std::list<std::string>>(relation_data[2], ", ");
683
684 std::string query("INSERT INTO prepositions (form) VALUES (?)");
685 sqlite3_stmt* ppstmt;
686
687 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
688 {
689 db_error(ppdb, query);
690 }
691
692 sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_STATIC);
693
694 if (sqlite3_step(ppstmt) != SQLITE_DONE)
695 {
696 db_error(ppdb, query);
697 }
698
699 sqlite3_finalize(ppstmt);
700
701 query = "SELECT last_insert_rowid()";
702 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
703 {
704 db_error(ppdb, query);
705 }
706
707 if (sqlite3_step(ppstmt) != SQLITE_ROW)
708 {
709 db_error(ppdb, query);
710 }
711
712 int rowid = sqlite3_column_int(ppstmt, 0);
713 sqlite3_finalize(ppstmt);
714
715 for (auto group : groups)
716 {
717 query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)";
718 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
719 {
720 db_error(ppdb, query);
721 }
722
723 sqlite3_bind_int(ppstmt, 1, rowid);
724 sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_STATIC);
725
726 if (sqlite3_step(ppstmt) != SQLITE_DONE)
727 {
728 db_error(ppdb, query);
729 }
730
731 sqlite3_finalize(ppstmt);
732 }
733 }
734
735
391 { 736 {
392 progress ppgs("Writing verbs...", verbs.size()); 737 progress ppgs("Writing verbs...", verbs.size());
393 for (auto& mapping : verbs) 738 for (auto& mapping : verbs)
@@ -431,6 +776,8 @@ int main(int argc, char** argv)
431 776
432 sqlite3_finalize(ppstmt); 777 sqlite3_finalize(ppstmt);
433 778
779 mapping.second.id = rowid;
780
434 for (auto pronunciation : pronunciations[canonical]) 781 for (auto pronunciation : pronunciations[canonical])
435 { 782 {
436 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; 783 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)";
@@ -455,6 +802,160 @@ int main(int argc, char** argv)
455 } 802 }
456 } 803 }
457 804
805 {
806 progress ppgs("Writing verb frames...", groups.size());
807 for (auto& mapping : groups)
808 {
809 std::list<json> roledatal;
810 std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair<std::string, selrestr_t> r) {
811 json role;
812 role["type"] = r.first;
813 role["selrestrs"] = export_selrestrs(r.second);
814
815 return role;
816 });
817
818 json roledata(roledatal);
819 std::string rdm = roledata.dump();
820
821 sqlite3_stmt* ppstmt;
822 std::string query("INSERT INTO groups (data) VALUES (?)");
823 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
824 {
825 db_error(ppdb, query);
826 }
827
828 sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_STATIC);
829
830 if (sqlite3_step(ppstmt) != SQLITE_DONE)
831 {
832 db_error(ppdb, query);
833 }
834
835 sqlite3_finalize(ppstmt);
836
837 query = "SELECT last_insert_rowid()";
838 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
839 {
840 db_error(ppdb, query);
841 }
842
843 if (sqlite3_step(ppstmt) != SQLITE_ROW)
844 {
845 db_error(ppdb, query);
846 }
847
848 int gid = sqlite3_column_int(ppstmt, 0);
849 sqlite3_finalize(ppstmt);
850
851 for (auto frame : mapping.second.frames)
852 {
853 std::list<json> fdatap;
854 std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) {
855 json part;
856
857 switch (fp.type)
858 {
859 case framepart_t::type_t::np:
860 {
861 part["type"] = "np";
862 part["role"] = fp.role;
863 part["selrestrs"] = export_selrestrs(fp.selrestrs);
864 part["synrestrs"] = fp.synrestrs;
865
866 break;
867 }
868
869 case framepart_t::type_t::pp:
870 {
871 part["type"] = "pp";
872 part["values"] = fp.choices;
873 part["preprestrs"] = fp.preprestrs;
874
875 break;
876 }
877
878 case framepart_t::type_t::v:
879 {
880 part["type"] = "v";
881
882 break;
883 }
884
885 case framepart_t::type_t::adj:
886 {
887 part["type"] = "adj";
888
889 break;
890 }
891
892 case framepart_t::type_t::adv:
893 {
894 part["type"] = "adv";
895
896 break;
897 }
898
899 case framepart_t::type_t::lex:
900 {
901 part["type"] = "lex";
902 part["value"] = fp.lexval;
903
904 break;
905 }
906 }
907
908 return part;
909 });
910
911 json fdata(fdatap);
912 std::string marshall = fdata.dump();
913
914 query = "INSERT INTO frames (group_id, data) VALUES (?, ?)";
915 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
916 {
917 db_error(ppdb, query);
918 }
919
920 sqlite3_bind_int(ppstmt, 1, gid);
921 sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_STATIC);
922
923 if (sqlite3_step(ppstmt) != SQLITE_DONE)
924 {
925 db_error(ppdb, query);
926 }
927
928 sqlite3_finalize(ppstmt);
929 }
930
931 for (auto member : mapping.second.members)
932 {
933 if (verbs.count(member) == 1)
934 {
935 auto& v = verbs[member];
936
937 query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)";
938 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
939 {
940 db_error(ppdb, query);
941 }
942
943 sqlite3_bind_int(ppstmt, 1, v.id);
944 sqlite3_bind_int(ppstmt, 2, gid);
945
946 if (sqlite3_step(ppstmt) != SQLITE_DONE)
947 {
948 db_error(ppdb, query);
949 }
950
951 sqlite3_finalize(ppstmt);
952 }
953 }
954
955 ppgs.update();
956 }
957 }
958
458 // Get nouns/adjectives/adverbs from WordNet 959 // Get nouns/adjectives/adverbs from WordNet
459 // Useful relations: 960 // Useful relations:
460 // - s: master list 961 // - s: master list
diff --git a/generator/prepositions.txt b/generator/prepositions.txt new file mode 100644 index 0000000..283e5c4 --- /dev/null +++ b/generator/prepositions.txt
@@ -0,0 +1,49 @@
1from: src, path, spatial, loc
2out: src, path, spatial
3out of: src, path, spatial, loc
4off: src, path, spatial, loc
5off of: src, path, spatial
6into: dest_conf, dest, path, spatial
7onto: dest_conf, dest, path, spatial
8for: dest_dir, dest, path, spatial
9at: dest_dir, dest, path, spatial, loc
10to: dest_dir, dest, path, spatial
11towards: dest_dir, dest, path, spatial
12along: dir, path, spatial, loc
13across: dir, path, spatial
14around: dir, path, spatial, loc
15down: dir, path, spatial
16over: dir, path, spatial, loc
17past: dir, path, spatial
18round: dir, path, spatial, loc
19through: dir, path, spatial
20towards: dir, path, spatial
21up: dir, path, spatial
22about: loc, spatial
23above: loc, spatial
24against: loc, spatial
25alongside: loc, spatial
26amid: loc, spatial
27among: loc, spatial
28amongst: loc, spatial
29astride: loc, spatial
30athwart: loc, spatial
31before: loc, spatial
32behind: loc, spatial
33beside: loc, spatial
34between: loc, spatial
35beyond: loc, spatial
36by: loc, spatial
37in: loc, spatial
38in front of: loc, spatial
39inside: loc, spatial
40near: loc, spatial
41next to: loc, spatial
42on: loc, spatial
43opposite: loc, spatial
44outside: loc, spatial
45throughout: loc, spatial
46under: loc, spatial
47underneath: loc, spatial
48upon: loc, spatial
49within: loc, spatial \ No newline at end of file
diff --git a/generator/schema.sql b/generator/schema.sql index 8e1e822..2295444 100644 --- a/generator/schema.sql +++ b/generator/schema.sql
@@ -11,8 +11,7 @@ CREATE TABLE `verbs` (
11DROP TABLE IF EXISTS `groups`; 11DROP TABLE IF EXISTS `groups`;
12CREATE TABLE `groups` ( 12CREATE TABLE `groups` (
13 `group_id` INTEGER PRIMARY KEY, 13 `group_id` INTEGER PRIMARY KEY,
14 `parent_id` INTEGER, 14 `data` BLOB NOT NULL
15 FOREIGN KEY (`parent_id`) REFERENCES `groups`(`group_id`)
16); 15);
17 16
18DROP TABLE IF EXISTS `frames`; 17DROP TABLE IF EXISTS `frames`;
@@ -251,3 +250,16 @@ CREATE TABLE `adverb_adverb_derivation` (
251 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), 250 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
252 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) 251 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
253); 252);
253
254DROP TABLE IF EXISTS `prepositions`;
255CREATE TABLE `prepositions` (
256 `preposition_id` INTEGER PRIMARY KEY,
257 `form` VARCHAR(32) NOT NULL
258);
259
260DROP TABLE IF EXISTS `preposition_groups`;
261CREATE TABLE `preposition_groups` (
262 `preposition_id` INTEGER NOT NULL,
263 `groupname` VARCHAR(32) NOT NULL,
264 FOREIGN KEY (`preposition_id`) REFERENCES `prepositions`(`preposition_id`)
265);
diff --git a/generator/vn.diff b/generator/vn.diff new file mode 100644 index 0000000..f636d28 --- /dev/null +++ b/generator/vn.diff
@@ -0,0 +1,482 @@
1diff /Users/hatkirby/Downloads/new_vn 2/admit-65.xml datadir/vn/admit-65.xml
2104c104
3< <SELRESTRS logic="or">
4---
5> <SELRESTRS>
6diff /Users/hatkirby/Downloads/new_vn 2/amuse-31.1.xml datadir/vn/amuse-31.1.xml
7270a271,273
8> <THEMROLE type="Cause">
9> <SELRESTRS/>
10> </THEMROLE>
11368c371,373
12< <LEX value="'s"/>
13---
14> <NP value="Cause">
15> <SYNRESTRS/>
16> </NP>
17404c409,411
18< <SYNRESTRS/>
19---
20> <SYNRESTRS>
21> <SYNRESTR Value="+" type="adjp"/>
22> </SYNRESTRS>
23diff /Users/hatkirby/Downloads/new_vn 2/animal_sounds-38.xml datadir/vn/animal_sounds-38.xml
24186a187,191
25> <PREP>
26> <SELRESTRS>
27> <SELRESTR Value="+" type="loc" />
28> </SELRESTRS>
29> </PREP>
30diff /Users/hatkirby/Downloads/new_vn 2/assessment-34.1.xml datadir/vn/assessment-34.1.xml
31103d102
32< <LEX value="'s"/>
33diff /Users/hatkirby/Downloads/new_vn 2/battle-36.4.xml datadir/vn/battle-36.4.xml
3496c96
35< <SYNRESTR Value="+" type="what_extract"/>
36---
37> <SYNRESTR Value="+" type="wh_comp"/>
38diff /Users/hatkirby/Downloads/new_vn 2/become-109.1.xml datadir/vn/become-109.1.xml
3934c34,36
40< <SYNRESTRS/>
41---
42> <SYNRESTRS>
43> <SYNRESTR Value="+" type="adjp"/>
44> </SYNRESTRS>
45diff /Users/hatkirby/Downloads/new_vn 2/beg-58.2.xml datadir/vn/beg-58.2.xml
4641c41
47< <SYNRESTR Value="-" type="for_comp"/>
48---
49> <SYNRESTR Value="+" type="for_comp"/>
50diff /Users/hatkirby/Downloads/new_vn 2/bend-45.2.xml datadir/vn/bend-45.2.xml
5147c47,49
52< <SELRESTRS/>
53---
54> <SELRESTRS>
55> <SELRESTR Value="+" type="state"/>
56> </SELRESTRS>
57230c232,234
58< <SYNRESTRS/>
59---
60> <SYNRESTRS>
61> <SYNRESTR Value="+" type="adjp"/>
62> </SYNRESTRS>
63280,282c284
64< <SELRESTRS>
65< <SELRESTR Value="+" type="state"/>
66< </SELRESTRS>
67---
68> <SELRESTRS/>
69332,334c334
70< <SELRESTRS>
71< <SELRESTR Value="+" type="state"/>
72< </SELRESTRS>
73---
74> <SELRESTRS/>
75diff /Users/hatkirby/Downloads/new_vn 2/break-45.1.xml datadir/vn/break-45.1.xml
76255c255,257
77< <SYNRESTRS/>
78---
79> <SYNRESTRS>
80> <SYNRESTR Value="+" type="adjp"/>
81> </SYNRESTRS>
82307c309,311
83< <SYNRESTRS/>
84---
85> <SYNRESTRS>
86> <SYNRESTR Value="+" type="adjp"/>
87> </SYNRESTRS>
88diff /Users/hatkirby/Downloads/new_vn 2/characterize-29.2.xml datadir/vn/characterize-29.2.xml
89107c107
90< <LEX value="as"/>
91---
92> <LEX value="to be"/>
93109,111c109,111
94< <SYNRESTRS>
95< <SYNRESTR Value="+" type="small_clause"/>
96< </SYNRESTRS>
97---
98> <SYNRESTRS>
99> <SYNRESTR Value="+" type="adjp"/>
100> </SYNRESTRS>
101386a387,391
102> <NP value="Attribute">
103> <SYNRESTRS>
104> <SYNRESTR Value="+" type="adjp"/>
105> </SYNRESTRS>
106> </NP>
107diff /Users/hatkirby/Downloads/new_vn 2/coloring-24.xml datadir/vn/coloring-24.xml
10889c89,91
109< <SYNRESTRS/>
110---
111> <SYNRESTRS>
112> <SYNRESTR Value="+" type="adjp"/>
113> </SYNRESTRS>
114diff /Users/hatkirby/Downloads/new_vn 2/confess-37.10.xml datadir/vn/confess-37.10.xml
115110a111,115
116> <NP value="Attribute">
117> <SYNRESTRS>
118> <SYNRESTR Value="+" type="adjp"/>
119> </SYNRESTRS>
120> </NP>
121diff /Users/hatkirby/Downloads/new_vn 2/consider-29.9.xml datadir/vn/consider-29.9.xml
122191,193c191,193
123< <SYNRESTRS>
124< <SYNRESTR Value="-" type="sentential"/>
125< </SYNRESTRS>
126---
127> <SYNRESTRS>
128> <SYNRESTR Value="+" type="adjp"/>
129> </SYNRESTRS>
130334,336c334,336
131< <SYNRESTRS>
132< <SYNRESTR Value="-" type="sentential"/>
133< </SYNRESTRS>
134---
135> <SYNRESTRS>
136> <SYNRESTR Value="+" type="adjp"/>
137> </SYNRESTRS>
138468,470c468,470
139< <SYNRESTRS>
140< <SYNRESTR Value="-" type="sentential"/>
141< </SYNRESTRS>
142---
143> <SYNRESTRS>
144> <SYNRESTR Value="+" type="adjp"/>
145> </SYNRESTRS>
146554,556c554,556
147< <SYNRESTRS>
148< <SYNRESTR Value="-" type="sentential"/>
149< </SYNRESTRS>
150---
151> <SYNRESTRS>
152> <SYNRESTR Value="+" type="adjp"/>
153> </SYNRESTRS>
154diff /Users/hatkirby/Downloads/new_vn 2/cut-21.1.xml datadir/vn/cut-21.1.xml
155316c316,318
156< <SYNRESTRS/>
157---
158> <SYNRESTRS>
159> <SYNRESTR Value="+" type="adjp"/>
160> </SYNRESTRS>
161368c370,372
162< <SYNRESTRS/>
163---
164> <SYNRESTRS>
165> <SYNRESTR Value="+" type="adjp"/>
166> </SYNRESTRS>
167560c564,566
168< <SYNRESTRS/>
169---
170> <SYNRESTRS>
171> <SYNRESTR Value="+" type="adjp"/>
172> </SYNRESTRS>
173diff /Users/hatkirby/Downloads/new_vn 2/declare-29.4.xml datadir/vn/declare-29.4.xml
17433,35c33,35
175< <SYNRESTRS>
176< <SYNRESTR Value="-" type="sentential"/>
177< </SYNRESTRS>
178---
179> <SYNRESTRS>
180> <SYNRESTR Value="+" type="adjp"/>
181> </SYNRESTRS>
182122,124c122,124
183< <SYNRESTRS>
184< <SYNRESTR Value="-" type="sentential"/>
185< </SYNRESTRS>
186---
187> <SYNRESTRS>
188> <SYNRESTR Value="+" type="adjp"/>
189> </SYNRESTRS>
190244,246c244,246
191< <SYNRESTRS>
192< <SYNRESTR Value="-" type="sentential"/>
193< </SYNRESTRS>
194---
195> <SYNRESTRS>
196> <SYNRESTR Value="+" type="adjp"/>
197> </SYNRESTRS>
198diff /Users/hatkirby/Downloads/new_vn 2/estimate-34.2.xml datadir/vn/estimate-34.2.xml
199123a124
200> <LEX value="to be"/>
201125,127c126,128
202< <SYNRESTRS>
203< <SYNRESTR Value="+" type="to_be"/>
204< </SYNRESTRS>
205---
206> <SYNRESTRS>
207> <SYNRESTR Value="+" type="adjp"/>
208> </SYNRESTRS>
209diff /Users/hatkirby/Downloads/new_vn 2/get-13.5.1.xml datadir/vn/get-13.5.1.xml
21055,56c55
211< <SELRESTR Value="-" type="location"/>
212< <SELRESTR Value="-" type="region"/>
213---
214> <SELRESTR Value="+" type="currency"/>
215diff /Users/hatkirby/Downloads/new_vn 2/hit-18.1.xml datadir/vn/hit-18.1.xml
216234c234,236
217< <SYNRESTRS/>
218---
219> <SYNRESTRS>
220> <SYNRESTR Value="+" type="adjp"/>
221> </SYNRESTRS>
222294c296,298
223< <SYNRESTRS/>
224---
225> <SYNRESTRS>
226> <SYNRESTR Value="+" type="adjp"/>
227> </SYNRESTRS>
228619c623,625
229< <SYNRESTRS/>
230---
231> <SYNRESTRS>
232> <SYNRESTR Value="+" type="adjp"/>
233> </SYNRESTRS>
234diff /Users/hatkirby/Downloads/new_vn 2/instr_communication-37.4.xml datadir/vn/instr_communication-37.4.xml
235195c195,197
236< <SYNRESTRS/>
237---
238> <SYNRESTRS>
239> <SYNRESTR Value="+" type="quotation"/>
240> </SYNRESTRS>
241233c235,237
242< <SYNRESTRS/>
243---
244> <SYNRESTRS>
245> <SYNRESTR Value="+" type="quotation"/>
246> </SYNRESTRS>
247diff /Users/hatkirby/Downloads/new_vn 2/judgment-33.xml datadir/vn/judgment-33.xml
248187a188,190
249> <NP value="Attribute">
250> <SYNRESTRS/>
251> </NP>
252243a247
253> <LEX value="to be"/>
254245,247c249
255< <SYNRESTRS>
256< <SYNRESTR Value="+" type="small_clause"/>
257< </SYNRESTRS>
258---
259> <SYNRESTRS/>
260diff /Users/hatkirby/Downloads/new_vn 2/manner_speaking-37.3.xml datadir/vn/manner_speaking-37.3.xml
261264c264,266
262< <SYNRESTRS/>
263---
264> <SYNRESTRS>
265> <SYNRESTR Value="+" type="quotation"/>
266> </SYNRESTRS>
267603c605,607
268< <SYNRESTRS/>
269---
270> <SYNRESTRS>
271> <SYNRESTR Value="+" type="quotation"/>
272> </SYNRESTRS>
273diff /Users/hatkirby/Downloads/new_vn 2/other_cos-45.4.xml datadir/vn/other_cos-45.4.xml
274534c534,536
275< <SYNRESTRS/>
276---
277> <SYNRESTRS>
278> <SYNRESTR Value="+" type="adjp"/>
279> </SYNRESTRS>
280diff /Users/hatkirby/Downloads/new_vn 2/pocket-9.10.xml datadir/vn/pocket-9.10.xml
281256c256,258
282< <SYNRESTRS/>
283---
284> <SYNRESTRS>
285> <SYNRESTR Value="+" type="adv-loc"/>
286> </SYNRESTRS>
287diff /Users/hatkirby/Downloads/new_vn 2/poison-42.2.xml datadir/vn/poison-42.2.xml
28893c93,95
289< <SYNRESTRS/>
290---
291> <SYNRESTRS>
292> <SYNRESTR Value="+" type="adjp"/>
293> </SYNRESTRS>
294diff /Users/hatkirby/Downloads/new_vn 2/pour-9.5.xml datadir/vn/pour-9.5.xml
29559,61c59,62
296< <SELRESTRS>
297< <SELRESTR Value="+" type="path"/>
298< <SELRESTR Value="-" type="dest_dir"/>
299---
300> <SELRESTRS logic="or">
301> <SELRESTR Value="+" type="dir"/>
302> <SELRESTR Value="+" type="src"/>
303> <SELRESTR Value="+" type="dest_conf"/>
304157,160c158,162
305< <SELRESTRS>
306< <SELRESTR Value="+" type="path"/>
307< <SELRESTR Value="-" type="dest_dir"/>
308< </SELRESTRS>
309---
310> <SELRESTRS logic="or">
311> <SELRESTR Value="+" type="dir"/>
312> <SELRESTR Value="+" type="src"/>
313> <SELRESTR Value="+" type="dest_conf"/>
314> </SELRESTRS>
315diff /Users/hatkirby/Downloads/new_vn 2/push-12.xml datadir/vn/push-12.xml
31690c90,92
317< <SYNRESTRS/>
318---
319> <SYNRESTRS>
320> <SYNRESTR Value="+" type="adjp"/>
321> </SYNRESTRS>
322diff /Users/hatkirby/Downloads/new_vn 2/roll-51.3.1.xml datadir/vn/roll-51.3.1.xml
323190c190,192
324< <SYNRESTRS/>
325---
326> <SYNRESTRS>
327> <SYNRESTR Value="+" type="adjp"/>
328> </SYNRESTRS>
329256c258,260
330< <SYNRESTRS/>
331---
332> <SYNRESTRS>
333> <SYNRESTR Value="+" type="adjp"/>
334> </SYNRESTRS>
335diff /Users/hatkirby/Downloads/new_vn 2/see-30.1.xml datadir/vn/see-30.1.xml
33616a17,19
337> <THEMROLE type="Patient">
338> <SELRESTRS/>
339> </THEMROLE>
34093a97,102
341> <PREP value="in">
342> <SELRESTRS/>
343> </PREP>
344> <NP value="Patient">
345> <SYNRESTRS/>
346> </NP>
347231a241,243
348> <NP value="Patient">
349> <SYNRESTRS/>
350> </NP>
351diff /Users/hatkirby/Downloads/new_vn 2/seem-109.xml datadir/vn/seem-109.xml
35230,32c30,32
353< <SYNRESTRS>
354< <SYNRESTR Value="-" type="sentential"/>
355< </SYNRESTRS>
356---
357> <SYNRESTRS>
358> <SYNRESTR Value="+" type="adjp"/>
359> </SYNRESTRS>
360diff /Users/hatkirby/Downloads/new_vn 2/slide-11.2.xml datadir/vn/slide-11.2.xml
36169,72c69,73
362< <SELRESTRS>
363< <SELRESTR Value="+" type="path"/>
364< <SELRESTR Value="-" type="dest_dir"/>
365< </SELRESTRS>
366---
367> <SELRESTRS logic="or">
368> <SELRESTR Value="+" type="dir"/>
369> <SELRESTR Value="+" type="src"/>
370> <SELRESTR Value="+" type="dest_conf"/>
371> </SELRESTRS>
372218,221c219,223
373< <SELRESTRS>
374< <SELRESTR Value="+" type="path"/>
375< <SELRESTR Value="-" type="dest_dir"/>
376< </SELRESTRS>
377---
378> <SELRESTRS logic="or">
379> <SELRESTR Value="+" type="dir"/>
380> <SELRESTR Value="+" type="src"/>
381> <SELRESTR Value="+" type="dest_conf"/>
382> </SELRESTRS>
383diff /Users/hatkirby/Downloads/new_vn 2/spank-18.3.xml datadir/vn/spank-18.3.xml
38469a70,72
385> <THEMROLE type="Recipient">
386> <SELRESTRS/>
387> </THEMROLE>
388201c204,206
389< <SYNRESTRS/>
390---
391> <SYNRESTRS>
392> <SYNRESTR Value="+" type="adjp"/>
393> </SYNRESTRS>
394529,532c534,541
395< <SYNRESTRS>
396< <SYNRESTR Value="+" type="genitive"/>
397< <SYNRESTR Value="+" type="body_part"/>
398< </SYNRESTRS>
399---
400> <SYNRESTRS>
401> <SYNRESTR Value="+" type="genitive"/>
402> </SYNRESTRS>
403> </NP>
404> <NP value="Recipient">
405> <SYNRESTRS>
406> <SYNRESTR Value="+" type="body_part"/>
407> </SYNRESTRS>
408diff /Users/hatkirby/Downloads/new_vn 2/swat-18.2.xml datadir/vn/swat-18.2.xml
409264c264,266
410< <SYNRESTRS/>
411---
412> <SYNRESTRS>
413> <SYNRESTR Value="+" type="adjp"/>
414> </SYNRESTRS>
415324c326,328
416< <SYNRESTRS/>
417---
418> <SYNRESTRS>
419> <SYNRESTR Value="+" type="adjp"/>
420> </SYNRESTRS>
421diff /Users/hatkirby/Downloads/new_vn 2/tape-22.4.xml datadir/vn/tape-22.4.xml
422364c364,366
423< <SYNRESTRS/>
424---
425> <SYNRESTRS>
426> <SYNRESTR Value="+" type="adjp"/>
427> </SYNRESTRS>
428diff /Users/hatkirby/Downloads/new_vn 2/vehicle-51.4.1.xml datadir/vn/vehicle-51.4.1.xml
429227c227,229
430< <SYNRESTRS/>
431---
432> <SYNRESTRS>
433> <SYNRESTR Value="+" type="adjp"/>
434> </SYNRESTRS>
435diff /Users/hatkirby/Downloads/new_vn 2/waltz-51.5.xml datadir/vn/waltz-51.5.xml
436181c181,183
437< <SYNRESTRS/>
438---
439> <SYNRESTRS>
440> <SYNRESTR Value="+" type="adjp"/>
441> </SYNRESTRS>
442diff /Users/hatkirby/Downloads/new_vn 2/want-32.1.xml datadir/vn/want-32.1.xml
443142a143
444> <ADV/>
445194a196
446> <ADJ/>
447305a308
448> <LEX value="to be"/>
449307,309c310,312
450< <SYNRESTRS>
451< <SYNRESTR Value="+" type="to_be"/>
452< </SYNRESTRS>
453---
454> <SYNRESTRS>
455> <SYNRESTR Value="+" type="adjp"/>
456> </SYNRESTRS>
457diff /Users/hatkirby/Downloads/new_vn 2/wipe_instr-10.4.2.xml datadir/vn/wipe_instr-10.4.2.xml
458178c178,180
459< <SYNRESTRS/>
460---
461> <SYNRESTRS>
462> <SYNRESTR Value="+" type="adjp"/>
463> </SYNRESTRS>
464diff /Users/hatkirby/Downloads/new_vn 2/wipe_manner-10.4.1.xml datadir/vn/wipe_manner-10.4.1.xml
465198c198,199
466< <SELRESTR Value="-" type="region"/>
467---
468> <SELRESTR Value="+" type="location"/>
469> <SELRESTR Value="-" type="region"/>
470diff /Users/hatkirby/Downloads/new_vn 2/wish-62.xml datadir/vn/wish-62.xml
47191a92
472> <LEX value="to be"/>
47393,95c94,96
474< <SYNRESTRS>
475< <SYNRESTR Value="+" type="to_be"/>
476< </SYNRESTRS>
477---
478> <SYNRESTRS>
479> <SYNRESTR Value="+" type="adjp"/>
480> </SYNRESTRS>
481122a124
482> <ADJ/>