#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "progress.h" #include "../lib/util.h" using json = nlohmann::json; struct verb_t { std::string infinitive; std::string past_tense; std::string past_participle; std::string ing_form; std::string s_form; int id; }; struct adjective_t { std::string base; std::string comparative; std::string superlative; }; struct noun_t { std::string singular; std::string plural; }; struct selrestr_t { enum class type_t { singleton, andlogic, orlogic, empty }; type_t type; std::string restriction; bool pos; std::list subordinates; }; struct framepart_t { enum class type_t { np, v, pp, adj, adv, lex }; type_t type; std::string role; selrestr_t selrestrs; std::set preprestrs; std::set synrestrs; std::list choices; std::string lexval; }; struct group_t { std::string id; std::string parent; std::set members; std::map roles; std::list> frames; }; std::map groups; std::map verbs; std::map adjectives; std::map nouns; std::map> wn; std::map images; std::map> pronunciations; void print_usage() { std::cout << "Verbly Datafile Generator" << std::endl; std::cout << "-------------------------" << std::endl; std::cout << "Requires exactly six arguments." << std::endl; std::cout << "1. The path to a VerbNet data directory." << std::endl; std::cout << "2. The path to an AGID infl.txt file." << std::endl; std::cout << "3. The path to a WordNet prolog data directory." << std::endl; std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl; std::cout << "5. The path to an ImageNet urls.txt file." << std::endl; std::cout << "6. Datafile output path." << std::endl; exit(1); } void db_error(sqlite3* ppdb, std::string query) { std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; std::cout << query << std::endl; sqlite3_close_v2(ppdb); print_usage(); } json export_selrestrs(selrestr_t r) { if (r.type == selrestr_t::type_t::empty) { return {}; } else if (r.type == selrestr_t::type_t::singleton) { json result; result["type"] = r.restriction; result["pos"] = r.pos; return result; } else { json result; if (r.type == selrestr_t::type_t::andlogic) { result["logic"] = "and"; } else { result["logic"] = "or"; } std::list outlist; std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs); result["children"] = outlist; return result; } } selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename) { selrestr_t r; xmlChar* key; if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS")) { if (xmlChildElementCount(top) == 0) { r.type = selrestr_t::type_t::empty; } else if (xmlChildElementCount(top) == 1) { r = parse_selrestrs(xmlFirstElementChild(top), filename); } else { r.type = selrestr_t::type_t::andlogic; if (xmlHasProp(top, (const xmlChar*) "logic")) { key = xmlGetProp(top, (const xmlChar*) "logic"); if (!xmlStrcmp(key, (const xmlChar*) "or")) { r.type = selrestr_t::type_t::orlogic; } xmlFree(key); } for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) { if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR")) { r.subordinates.push_back(parse_selrestrs(selrestr, filename)); } } } } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR")) { r.type = selrestr_t::type_t::singleton; key = xmlGetProp(top, (xmlChar*) "Value"); r.pos = (std::string((const char*)key) == "+"); xmlFree(key); key = xmlGetProp(top, (xmlChar*) "type"); r.restriction = (const char*) key; xmlFree(key); } else { // Invalid std::cout << "Bad VerbNet file format: " << filename << std::endl; print_usage(); } return r; } group_t& parse_group(xmlNodePtr top, std::string filename) { xmlChar* key = xmlGetProp(top, (xmlChar*) "ID"); if (key == 0) { std::cout << "Bad VerbNet file format: " << filename << std::endl; print_usage(); } std::string vnid = (const char*)key; vnid = vnid.substr(vnid.find_first_of("-")+1); xmlFree(key); group_t g; g.id = vnid; for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) { if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES")) { for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next) { if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS")) { auto& sg = parse_group(subclass, filename); sg.parent = vnid; for (auto member : sg.members) { g.members.insert(member); } // The schema requires that subclasses appear after role definitions, so we can do this now for (auto role : g.roles) { if (sg.roles.count(role.first) == 0) { sg.roles[role.first] = role.second; } } } } } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) { for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) { if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) { key = xmlGetProp(member, (xmlChar*) "name"); g.members.insert((const char*)key); xmlFree(key); } } } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES")) { for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next) { if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE")) { selrestr_t r; r.type = selrestr_t::type_t::empty; key = xmlGetProp(role, (const xmlChar*) "type"); std::string type = (const char*)key; xmlFree(key); for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) { if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS")) { r = parse_selrestrs(rolenode, filename); } } g.roles[type] = r; } } } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) { for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) { if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) { std::list f; for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) { if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX")) { for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) { framepart_t fp; if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP")) { fp.type = framepart_t::type_t::np; key = xmlGetProp(syntaxnode, (xmlChar*) "value"); fp.role = (const char*)key; xmlFree(key); fp.selrestrs.type = selrestr_t::type_t::empty; for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) { if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS")) { for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) { if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR")) { key = xmlGetProp(synrestr, (xmlChar*) "type"); fp.synrestrs.insert(std::string((const char*)key)); xmlFree(key); } } } if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) { fp.selrestrs = parse_selrestrs(npnode, filename); } } } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB")) { fp.type = framepart_t::type_t::v; } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP")) { fp.type = framepart_t::type_t::pp; if (xmlHasProp(syntaxnode, (xmlChar*) "value")) { key = xmlGetProp(syntaxnode, (xmlChar*) "value"); std::string choices = (const char*)key; xmlFree(key); fp.choices = verbly::split>(choices, " "); } for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) { if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) { for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) { if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR")) { key = xmlGetProp(synrestr, (xmlChar*) "type"); fp.preprestrs.insert(std::string((const char*)key)); xmlFree(key); } } } } } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ")) { fp.type = framepart_t::type_t::adj; } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV")) { fp.type = framepart_t::type_t::adv; } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX")) { fp.type = framepart_t::type_t::lex; key = xmlGetProp(syntaxnode, (xmlChar*) "value"); fp.lexval = (const char*)key; xmlFree(key); } else { continue; } f.push_back(fp); } g.frames.push_back(f); } } } } } } groups[vnid] = g; return groups[vnid]; } int main(int argc, char** argv) { if (argc != 7) { print_usage(); } // VerbNet data std::cout << "Reading verb frames..." << std::endl; DIR* dir; if ((dir = opendir(argv[1])) == nullptr) { std::cout << "Invalid VerbNet data directory." << std::endl; print_usage(); } struct dirent* ent; while ((ent = readdir(dir)) != nullptr) { std::string filename(argv[1]); if (filename.back() != '/') { filename += '/'; } filename += ent->d_name; //std::cout << ent->d_name << std::endl; if (filename.rfind(".xml") != filename.size() - 4) { continue; } xmlDocPtr doc = xmlParseFile(filename.c_str()); if (doc == nullptr) { std::cout << "Error opening " << filename << std::endl; print_usage(); } xmlNodePtr top = xmlDocGetRootElement(doc); if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS"))) { std::cout << "Bad VerbNet file format: " << filename << std::endl; print_usage(); } parse_group(top, filename); } closedir(dir); // Get verbs from AGID std::cout << "Reading inflections..." << std::endl; std::ifstream agidfile(argv[2]); if (!agidfile.is_open()) { std::cout << "Could not open AGID file: " << argv[2] << std::endl; print_usage(); } for (;;) { std::string line; if (!getline(agidfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } int divider = line.find_first_of(" "); std::string word = line.substr(0, divider); line = line.substr(divider+1); char type = line[0]; if (line[1] == '?') { line.erase(0, 4); } else { line.erase(0, 3); } std::vector forms; while (!line.empty()) { std::string inflection; if ((divider = line.find(" | ")) != std::string::npos) { inflection = line.substr(0, divider); line = line.substr(divider + 3); } else { inflection = line; line = ""; } if ((divider = inflection.find_first_of(",?")) != std::string::npos) { inflection = inflection.substr(0, divider); } forms.push_back(inflection); } switch (type) { case 'V': { verb_t v; v.infinitive = word; if (forms.size() == 4) { v.past_tense = forms[0]; v.past_participle = forms[1]; v.ing_form = forms[2]; v.s_form = forms[3]; } else if (forms.size() == 3) { v.past_tense = forms[0]; v.past_participle = forms[0]; v.ing_form = forms[1]; v.s_form = forms[2]; } else if (forms.size() == 8) { // As of AGID 2014.08.11, this is only "to be" v.past_tense = forms[0]; v.past_participle = forms[2]; v.ing_form = forms[3]; v.s_form = forms[4]; } else { // Words that don't fit the cases above as of AGID 2014.08.11: // - may and shall do not conjugate the way we want them to // - methinks only has a past tense and is an outlier // - wit has five forms, and is archaic/obscure enough that we can ignore it for now std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl; } verbs[word] = v; break; } case 'A': { adjective_t adj; adj.base = word; if (forms.size() == 2) { adj.comparative = forms[0]; adj.superlative = forms[1]; } else { // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl; } adjectives[word] = adj; break; } case 'N': { noun_t n; n.singular = word; if (forms.size() == 1) { n.plural = forms[0]; } else { // As of AGID 2014.08.11, this is non-existent. std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl; } nouns[word] = n; break; } } } // Pronounciations std::cout << "Reading pronunciations..." << std::endl; std::ifstream pronfile(argv[4]); if (!pronfile.is_open()) { std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl; print_usage(); } for (;;) { std::string line; if (!getline(pronfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); std::smatch phoneme_data; if (std::regex_search(line, phoneme_data, phoneme)) { std::string canonical(phoneme_data[1]); std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); pronunciations[canonical].insert(phoneme_data[2]); } } // Images std::cout << "Reading images..." << std::endl; std::ifstream imagefile(argv[5]); if (!imagefile.is_open()) { std::cout << "Could not open ImageNet file: " << argv[5] << std::endl; print_usage(); } for (;;) { std::string line; if (!getline(imagefile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } std::string wnid_s = line.substr(1, 8); int wnid = stoi(wnid_s) + 100000000; images[wnid]++; } imagefile.close(); // Start writing output std::cout << "Writing schema..." << std::endl; sqlite3* ppdb; if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) { std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl; print_usage(); } std::ifstream schemafile("schema.sql"); if (!schemafile.is_open()) { std::cout << "Could not find schema file" << std::endl; print_usage(); } std::stringstream schemabuilder; for (;;) { std::string line; if (!getline(schemafile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } schemabuilder << line << std::endl; } std::string schema = schemabuilder.str(); while (!schema.empty()) { std::string query; int divider = schema.find(";"); if (divider != std::string::npos) { query = schema.substr(0, divider+1); schema = schema.substr(divider+2); } else { break; } sqlite3_stmt* schmstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } if (sqlite3_step(schmstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(schmstmt); } std::cout << "Writing prepositions..." << std::endl; std::ifstream prepfile("prepositions.txt"); if (!prepfile.is_open()) { std::cout << "Could not find prepositions file" << std::endl; print_usage(); } for (;;) { std::string line; if (!getline(prepfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } std::regex relation("^([^:]+): (.+)"); std::smatch relation_data; std::regex_search(line, relation_data, relation); std::string prep = relation_data[1]; std::list groups = verbly::split>(relation_data[2], ", "); std::string query("INSERT INTO prepositions (form) VALUES (?)"); sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_STATIC); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); query = "SELECT last_insert_rowid()"; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } if (sqlite3_step(ppstmt) != SQLITE_ROW) { db_error(ppdb, query); } int rowid = sqlite3_column_int(ppstmt, 0); sqlite3_finalize(ppstmt); for (auto group : groups) { query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)"; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, rowid); sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_STATIC); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } { progress ppgs("Writing verbs...", verbs.size()); for (auto& mapping : verbs) { sqlite3_stmt* ppstmt; std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)"); if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC); sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC); sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC); sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC); sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); std::string canonical(mapping.second.infinitive); std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); if (pronunciations.count(canonical) == 1) { query = "SELECT last_insert_rowid()"; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } if (sqlite3_step(ppstmt) != SQLITE_ROW) { db_error(ppdb, query); } int rowid = sqlite3_column_int(ppstmt, 0); sqlite3_finalize(ppstmt); mapping.second.id = rowid; for (auto pronunciation : pronunciations[canonical]) { query = "INSERT INTO verb_pronunciations (verb_id, pronunciation) VALUES (?, ?)"; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, rowid); sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } ppgs.update(); } } { progress ppgs("Writing verb frames...", groups.size()); for (auto& mapping : groups) { std::list roledatal; std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair r) { json role; role["type"] = r.first; role["selrestrs"] = export_selrestrs(r.second); return role; }); json roledata(roledatal); std::string rdm = roledata.dump(); sqlite3_stmt* ppstmt; std::string query("INSERT INTO groups (data) VALUES (?)"); if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_STATIC); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); query = "SELECT last_insert_rowid()"; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } if (sqlite3_step(ppstmt) != SQLITE_ROW) { db_error(ppdb, query); } int gid = sqlite3_column_int(ppstmt, 0); sqlite3_finalize(ppstmt); for (auto frame : mapping.second.frames) { std::list fdatap; std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) { json part; switch (fp.type) { case framepart_t::type_t::np: { part["type"] = "np"; part["role"] = fp.role; part["selrestrs"] = export_selrestrs(fp.selrestrs); part["synrestrs"] = fp.synrestrs; break; } case framepart_t::type_t::pp: { part["type"] = "pp"; part["values"] = fp.choices; part["preprestrs"] = fp.preprestrs; break; } case framepart_t::type_t::v: { part["type"] = "v"; break; } case framepart_t::type_t::adj: { part["type"] = "adj"; break; } case framepart_t::type_t::adv: { part["type"] = "adv"; break; } case framepart_t::type_t::lex: { part["type"] = "lex"; part["value"] = fp.lexval; break; } } return part; }); json fdata(fdatap); std::string marshall = fdata.dump(); query = "INSERT INTO frames (group_id, data) VALUES (?, ?)"; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, gid); sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_STATIC); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } for (auto member : mapping.second.members) { if (verbs.count(member) == 1) { auto& v = verbs[member]; query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)"; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, v.id); sqlite3_bind_int(ppstmt, 2, gid); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } ppgs.update(); } } // Get nouns/adjectives/adverbs from WordNet // Useful relations: // - s: master list // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness) // - at: variation (e.g. a measurement can be standard or nonstandard) // - der: derivation (e.g. happy/happily, happily/happy) // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue) // - ins: instantiation (do we need this? let's see) // - mm: member meronymy/holonymy (e.g. family/mother, family/child) // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire) // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber) // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska) // mannernymy (e.g. something done quickly is done in a manner that is quick) // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) // - syntax: positioning flags for some adjectives std::string wnpref {argv[3]}; if (wnpref.back() != '/') { wnpref += '/'; } // s table { std::ifstream wnsfile(wnpref + "wn_s.pl"); if (!wnsfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnsfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',"); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id = stoi(relation_data[1]); int wnum = stoi(relation_data[2]); std::string word = relation_data[3]; std::string query; switch (synset_id / 100000000) { case 1: // Noun { if (nouns.count(word) == 1) { query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)"; } else { query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)"; } break; } case 2: // Verb { // Ignore break; } case 3: // Adjective { if (adjectives.count(word) == 1) { query = "INSERT INTO adjectives (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; } else { query = "INSERT INTO adjectives (base_form, complexity) VALUES (?, ?)"; } break; } case 4: // Adverb { if (adjectives.count(word) == 1) { query = "INSERT INTO adverbs (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; } else { query = "INSERT INTO adverbs (base_form, complexity) VALUES (?, ?)"; } break; } } sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC); switch (synset_id / 100000000) { case 1: // Noun { sqlite3_bind_int(ppstmt, 2, (std::any_of(std::begin(word), std::end(word), [] (char ch) { return isupper(ch); }) ? 1 : 0)); sqlite3_bind_int(ppstmt, 3, verbly::split>(word, " ").size()); sqlite3_bind_int(ppstmt, 4, images[synset_id]); sqlite3_bind_int(ppstmt, 5, synset_id); if (nouns.count(word) == 1) { sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_STATIC); } break; } case 3: // Adjective case 4: // Adverb { sqlite3_bind_int(ppstmt, 2, verbly::split>(word, " ").size()); if (adjectives.count(word) == 1) { sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_STATIC); sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_STATIC); } break; } } if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); query = "SELECT last_insert_rowid()"; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } if (sqlite3_step(ppstmt) != SQLITE_ROW) { db_error(ppdb, query); } int rowid = sqlite3_column_int(ppstmt, 0); wn[synset_id][wnum] = rowid; sqlite3_finalize(ppstmt); std::string canonical(word); std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); if (pronunciations.count(canonical) == 1) { for (auto pronunciation : pronunciations[canonical]) { switch (synset_id / 100000000) { case 1: // Noun { query = "INSERT INTO noun_pronunciations (noun_id, pronunciation) VALUES (?, ?)"; break; } case 3: // Adjective { query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation) VALUES (?, ?)"; break; } case 4: // Adverb { query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation) VALUES (?, ?)"; break; } } if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, rowid); sqlite3_bind_text(ppstmt, 2, pronunciation.c_str(), pronunciation.length(), SQLITE_STATIC); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } } } // While we're working on s { progress ppgs("Writing word synonyms...", wn.size()); for (auto sense : wn) { ppgs.update(); for (auto word1 : sense.second) { for (auto word2 : sense.second) { if (word1 != word2) { std::string query; switch (sense.first / 100000000) { case 1: // Noun { query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; break; } case 2: // Verb { // Ignore break; } case 3: // Adjective { query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; break; } case 4: // Adverb { query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; break; } } sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, word1.second); sqlite3_bind_int(ppstmt, 2, word2.second); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } } } } // ant table { std::ifstream wnantfile(wnpref + "wn_ant.pl"); if (!wnantfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnantfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing antonyms...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int wnum_1 = stoi(relation_data[2]); int synset_id_2 = stoi(relation_data[3]); int wnum_2 = stoi(relation_data[4]); std::string query; switch (synset_id_1 / 100000000) { case 1: // Noun { query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; break; } case 2: // Verb { // Ignore break; } case 3: // Adjective { query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; break; } case 4: // Adverb { query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; break; } } sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } // at table { std::ifstream wnatfile(wnpref + "wn_at.pl"); if (!wnatfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnatfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing variations...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int synset_id_2 = stoi(relation_data[2]); std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)"); for (auto mapping1 : wn[synset_id_1]) { for (auto mapping2 : wn[synset_id_2]) { sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, mapping1.second); sqlite3_bind_int(ppstmt, 2, mapping2.second); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } } } // der table { std::ifstream wnderfile(wnpref + "wn_der.pl"); if (!wnderfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnderfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing morphological derivation...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int wnum_1 = stoi(relation_data[2]); int synset_id_2 = stoi(relation_data[3]); int wnum_2 = stoi(relation_data[4]); std::string query; switch (synset_id_1 / 100000000) { case 1: // Noun { switch (synset_id_2 / 100000000) { case 1: // Noun { query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)"; break; } case 3: // Adjective { query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)"; break; } case 4: // Adverb { query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)"; break; } } break; } case 3: // Adjective { switch (synset_id_2 / 100000000) { case 1: // Noun { query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)"; break; } case 3: // Adjective { query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)"; break; } case 4: // Adverb { query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)"; break; } } break; } case 4: // Adverb { switch (synset_id_2 / 100000000) { case 1: // Noun { query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)"; break; } case 3: // Adjective { query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)"; break; } case 4: // Adverb { query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)"; break; } } break; } } sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } // hyp table { std::ifstream wnhypfile(wnpref + "wn_hyp.pl"); if (!wnhypfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnhypfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing hypernyms...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int synset_id_2 = stoi(relation_data[2]); std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)"); for (auto mapping1 : wn[synset_id_1]) { for (auto mapping2 : wn[synset_id_2]) { sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, mapping1.second); sqlite3_bind_int(ppstmt, 2, mapping2.second); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } } } // ins table { std::ifstream wninsfile(wnpref + "wn_ins.pl"); if (!wninsfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wninsfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing instantiations...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int synset_id_2 = stoi(relation_data[2]); std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)"); for (auto mapping1 : wn[synset_id_1]) { for (auto mapping2 : wn[synset_id_2]) { sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, mapping1.second); sqlite3_bind_int(ppstmt, 2, mapping2.second); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } } } // mm table { std::ifstream wnmmfile(wnpref + "wn_mm.pl"); if (!wnmmfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnmmfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing member meronyms...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int synset_id_2 = stoi(relation_data[2]); std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); for (auto mapping1 : wn[synset_id_1]) { for (auto mapping2 : wn[synset_id_2]) { sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, mapping1.second); sqlite3_bind_int(ppstmt, 2, mapping2.second); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } } } // ms table { std::ifstream wnmsfile(wnpref + "wn_ms.pl"); if (!wnmsfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnmsfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing substance meronyms...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int synset_id_2 = stoi(relation_data[2]); std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); for (auto mapping1 : wn[synset_id_1]) { for (auto mapping2 : wn[synset_id_2]) { sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, mapping1.second); sqlite3_bind_int(ppstmt, 2, mapping2.second); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } } } // mm table { std::ifstream wnmpfile(wnpref + "wn_mp.pl"); if (!wnmpfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnmpfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing part meronyms...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int synset_id_2 = stoi(relation_data[2]); std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); for (auto mapping1 : wn[synset_id_1]) { for (auto mapping2 : wn[synset_id_2]) { sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, mapping1.second); sqlite3_bind_int(ppstmt, 2, mapping2.second); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } } } // per table { std::ifstream wnperfile(wnpref + "wn_per.pl"); if (!wnperfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnperfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing pertainyms and mannernyms...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int wnum_1 = stoi(relation_data[2]); int synset_id_2 = stoi(relation_data[3]); int wnum_2 = stoi(relation_data[4]); std::string query; switch (synset_id_1 / 100000000) { case 3: // Adjective { // This is a pertainym, the second word should be a noun // Technically it can be an adjective but we're ignoring that if (synset_id_2 / 100000000 != 1) { continue; } query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)"; break; } case 4: // Adverb { // This is a mannernym, the second word should be an adjective if (synset_id_2 / 100000000 != 3) { continue; } query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)"; break; } } sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } // sa table { std::ifstream wnsafile(wnpref + "wn_sa.pl"); if (!wnsafile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnsafile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing specifications...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int wnum_1 = stoi(relation_data[2]); int synset_id_2 = stoi(relation_data[3]); int wnum_2 = stoi(relation_data[4]); std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)"); sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } // sim table { std::ifstream wnsimfile(wnpref + "wn_sim.pl"); if (!wnsimfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnsimfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing sense synonyms...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id_1 = stoi(relation_data[1]); int synset_id_2 = stoi(relation_data[2]); std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"); for (auto mapping1 : wn[synset_id_1]) { for (auto mapping2 : wn[synset_id_2]) { sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_int(ppstmt, 1, mapping1.second); sqlite3_bind_int(ppstmt, 2, mapping2.second); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_reset(ppstmt); sqlite3_clear_bindings(ppstmt); sqlite3_bind_int(ppstmt, 1, mapping2.second); sqlite3_bind_int(ppstmt, 2, mapping1.second); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } } } // syntax table { std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl"); if (!wnsyntaxfile.is_open()) { std::cout << "Invalid WordNet data directory." << std::endl; print_usage(); } std::list lines; for (;;) { std::string line; if (!getline(wnsyntaxfile, line)) { break; } if (line.back() == '\r') { line.pop_back(); } lines.push_back(line); } progress ppgs("Writing adjective syntax markers...", lines.size()); for (auto line : lines) { ppgs.update(); std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); std::smatch relation_data; if (!std::regex_search(line, relation_data, relation)) { continue; } int synset_id = stoi(relation_data[1]); int wnum = stoi(relation_data[2]); std::string syn = relation_data[3]; std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?"); sqlite3_stmt* ppstmt; if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) { db_error(ppdb, query); } sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_STATIC); sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); if (sqlite3_step(ppstmt) != SQLITE_DONE) { db_error(ppdb, query); } sqlite3_finalize(ppstmt); } } sqlite3_close_v2(ppdb); std::cout << "Done." << std::endl; }