Started implementing verbly data generator

Currently, the generator: - Uses AGID to create entries for verb words and their inflections - Uses WordNet to create entries for adjective, adverb, and noun senses
author: Kelly Rauchenberger <fefferburbia@gmail.com> 2016-03-10 21:34:55 -0500
committer: Kelly Rauchenberger <fefferburbia@gmail.com> 2016-03-10 21:34:55 -0500
commit: e1be2716746e75cf6ed37e86461a7f580a964564 (patch)
tree: 38a69a8cbd690f27f1ee8c2ce43eeb0333753d52 /generator.cpp
parent: 41decb9a671e4d0fbbe12533372435ec6ede2246 (diff)
download: furries-e1be2716746e75cf6ed37e86461a7f580a964564.tar.gz
furries-e1be2716746e75cf6ed37e86461a7f580a964564.tar.bz2
furries-e1be2716746e75cf6ed37e86461a7f580a964564.zip
1 files changed, 451 insertions, 0 deletions
diff --git a/generator.cpp b/generator.cpp
new file mode 100644
index 0000000..c389963
--- /dev/null
+++ b/generator.cpp

@@ -0,0 +1,451 @@
+#include <libxml/parser.h>
+#include <iostream>
+#include <dirent.h>
+#include <set>
+#include <map>
+#include <string>
+#include <vector>
+#include <fstream>
+#include <sqlite3.h>
+#include <sstream>
+#include <regex>
+struct verb {
+  std::string infinitive;
+  std::string past_tense;
+  std::string past_participle;
+  std::string ing_form;
+  std::string s_form;
+};
+struct group {
+  std::string id;
+  std::set<std::string> members;
+};
+std::map<std::string, group> groups;
+std::map<std::string, verb> verbs;
+std::map<int, std::map<int, int>> wn;
+void print_usage()
+{
+  std::cout << "Verbly Datafile Generator" << std::endl;
+  std::cout << "-------------------------" << std::endl;
+  std::cout << "Requires exactly four arguments." << std::endl;
+  std::cout << "1. The path to a VerbNet data directory." << std::endl;
+  std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl;
+  std::cout << "3. The path to an AGID infl.txt file." << std::endl;
+  std::cout << "4. The path to a WordNet prolog data directory." << std::endl;
+  std::cout << "5. Datafile output path." << std::endl;
+  
+  exit(1);
+}
+/*
+void parse_group(xmlNodePtr top, std::string filename)
+{
+  xmlChar* key = xmlGetProp(top, (xmlChar*) "ID");
+  if (key == 0)
+  {
+    std::cout << "Bad VerbNet file format: " << filename << std::endl;
+    print_usage();
+  }
+  std::string vnid = key;
+  vnid = vnid.substr(vnid.find_first_of("-")+1);
+  xmlFree(key);
+  
+  group g;
+  g.id = vnid;
+  
+  for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
+  {
+    if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS"))
+    {
+      for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
+      {
+        if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER"))
+        {
+          key = xmlGetProp(member, (xmlChar*) "name");
+          g.members.insert(key);
+          xmlFree(key);
+        }
+      }
+    } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES"))
+    {
+      for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next)
+      {
+        if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME"))
+        {
+          for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
+          {
+            
+          }
+        }
+      }
+    }
+  }
+}*/
+int main(int argc, char** argv)
+{
+  if (argc != 6)
+  {
+    print_usage();
+  }
+  
+  /*DIR* dir;
+  if ((dir = opendir(argv[1])) == nullptr)
+  {
+    std::cout << "Invalid VerbNet data directory." << std::endl;
+    
+    print_usage();
+  }
+  
+  struct dirent* ent;
+  while ((ent = readdir(dir)) != nullptr)
+  {
+    std::string filename(argv[1]);
+    if (filename.back() != '/')
+    {
+      filename += '/';
+    }
+    
+    filename += ent->d_name;
+    //std::cout << ent->d_name << std::endl;
+    
+    if (filename.rfind(".xml") != filename.size() - 4)
+    {
+      continue;
+    }
+    
+    xmlDocPtr doc = xmlParseFile(filename.c_str());
+    if (doc == nullptr)
+    {
+      std::cout << "Error opening " << filename << std::endl;
+      print_usage();
+    }
+    
+    xmlNodePtr top = xmlDocGetRootElement(doc);
+    if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS")))
+    {
+      std::cout << "Bad VerbNet file format: " << filename << std::endl;
+      print_usage();
+    }
+    
+    parse_group(top, filename);
+  }
+  
+  closedir(dir);*/
+  
+  // Get verbs from AGID
+  std::cout << "Reading verb inflection..." << std::endl;
+  
+  std::ifstream agidfile(argv[3]);
+  if (!agidfile.is_open())
+  {
+    std::cout << "Could not open AGID file: " << argv[3] << std::endl;
+    print_usage();
+  }
+  
+  for (;;)
+  {
+    std::string line;
+    if (!getline(agidfile, line))
+    {
+      break;
+    }
+    
+    if (line.back() == '\r')
+    {
+      line.pop_back();
+    }
+    
+    int divider = line.find_first_of(" ");
+    std::string word = line.substr(0, divider);
+    line = line.substr(divider+1);
+    
+    if (line[0] != 'V')
+    {
+      continue;
+    }
+    
+    if (line[1] == '?')
+    {
+      line.erase(0, 4);
+    } else {
+      line.erase(0, 3);
+    }
+    
+    std::vector<std::string> forms;
+    while (!line.empty())
+    {
+      std::string inflection;
+      if ((divider = line.find(" | ")) != std::string::npos)
+      {
+        inflection = line.substr(0, divider);
+        line = line.substr(divider + 3);
+      } else {
+        inflection = line;
+        line = "";
+      }
+      
+      if ((divider = inflection.find_first_of(",?")) != std::string::npos)
+      {
+        inflection = inflection.substr(0, divider);
+      }
+      
+      forms.push_back(inflection);
+    }
+    
+    verb v;
+    v.infinitive = word;
+    if (forms.size() == 4)
+    {
+      v.past_tense = forms[0];
+      v.past_participle = forms[1];
+      v.ing_form = forms[2];
+      v.s_form = forms[3];
+    } else if (forms.size() == 3)
+    {
+      v.past_tense = forms[0];
+      v.past_participle = forms[0];
+      v.ing_form = forms[1];
+      v.s_form = forms[2];
+    } else if (forms.size() == 8)
+    {
+      // As of AGID 2014.08.11, this is only "to be"
+      v.past_tense = forms[0];
+      v.past_participle = forms[2];
+      v.ing_form = forms[3];
+      v.s_form = forms[4];
+    } else {
+      // Words that don't fit the cases above as of AGID 2014.08.11:
+      // - may and shall do not conjugate the way we want them to
+      // - methinks only has a past tense and is an outlier
+      // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
+      std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl;
+    }
+    
+    verbs[word] = v;
+  }
+  
+  // Start writing output
+  std::cout << "Writing output..." << std::endl;
+  
+  sqlite3* ppdb;
+  if (sqlite3_open_v2(argv[5], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
+  {
+    std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl;
+    print_usage();
+  }
+  
+  std::ifstream schemafile("schema.sql");
+  if (!schemafile.is_open())
+  {
+    std::cout << "Could not find schema file" << std::endl;
+    print_usage();
+  }
+  
+  std::stringstream schemabuilder;
+  for (;;)
+  {
+    std::string line;
+    if (!getline(schemafile, line))
+    {
+      break;
+    }
+    
+    if (line.back() == '\r')
+    {
+      line.pop_back();
+    }
+    
+    schemabuilder << line << std::endl;
+  }
+  
+  std::string schema = schemabuilder.str();
+  while (!schema.empty())
+  {
+    std::string query;
+    int divider = schema.find(";");
+    if (divider != std::string::npos)
+    {
+      query = schema.substr(0, divider+1);
+      schema = schema.substr(divider+2);
+    } else {
+      break;
+    }
+    
+    sqlite3_stmt* schmstmt;
+    if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK)
+    {
+      std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
+      sqlite3_close_v2(ppdb);
+      print_usage();
+    }
+  
+    if (sqlite3_step(schmstmt) != SQLITE_DONE)
+    {
+      std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
+      sqlite3_close_v2(ppdb);
+      print_usage();
+    }
+  
+    sqlite3_finalize(schmstmt);
+  }
+  
+  std::cout << "Writing verbs..." << std::endl;
+  for (auto& mapping : verbs)
+  {
+    sqlite3_stmt* ppstmt;
+    std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)");
+    if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
+    {
+      std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
+      sqlite3_close_v2(ppdb);
+      print_usage();
+    }
+    
+    sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC);
+    sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC);
+    sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC);
+    sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC);
+    sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC);
+    
+    if (sqlite3_step(ppstmt) != SQLITE_DONE)
+    {
+      std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
+      sqlite3_close_v2(ppdb);
+      print_usage();
+    }
+    
+    sqlite3_finalize(ppstmt);
+  }
+  
+  // Get nouns/adjectives/adverbs from WordNet
+  // Useful relations:
+  // - s: master list
+  // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness)
+  // - at: variation (e.g. a measurement can be standard or nonstandard)
+  // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue)
+  // - ins: instantiation (do we need this? let's see)
+  // - mm: member meronymy/holonymy (e.g. family/mother, family/child)
+  // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire)
+  // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber)
+  // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska)
+  //        mannernymy (e.g. something done quickly is done in a manner that is quick)
+  // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific))
+  // - sim: synonymy (e.g. cheerful/happy, happy/cheerful)
+  // - syntax: positioning flags for some adjectives
+  std::string wnpref {argv[4]};
+  if (wnpref.back() != '/')
+  {
+    wnpref += '/';
+  }
+  
+  std::cout << "Reading words from WordNet..." << std::endl;
+  std::ifstream wnsfile(wnpref + "wn_s.pl");
+  if (!wnsfile.is_open())
+  {
+    std::cout << "Invalid WordNet data directory." << std::endl;
+    print_usage();
+  }
+  
+  for (;;)
+  {
+    std::string line;
+    if (!getline(wnsfile, line))
+    {
+      break;
+    }
+    
+    if (line.back() == '\r')
+    {
+      line.pop_back();
+    }
+    
+    std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',");
+    std::smatch relation_data;
+    if (!std::regex_search(line, relation_data, relation))
+    {
+      continue;
+    }
+    
+    int synset_id = stoi(relation_data[1]);
+    int wnum = stoi(relation_data[2]);
+    std::string word = relation_data[3];
+    
+    std::string query;
+    switch (synset_id / 100000000)
+    {
+      case 1: // Noun
+      {
+        query = "INSERT INTO nouns (form) VALUES (?)";
+        
+        break;
+      }
+      
+      case 2: // Verb
+      {
+        // Ignore
+        
+        break;
+      }
+      
+      case 3: // Adjective
+      {
+        query = "INSERT INTO adjectives (form) VALUES (?)";
+        
+        break;
+      }
+      
+      case 4: // Adverb
+      {
+        query = "INSERT INTO adverbs (form) VALUES (?)";
+        
+        break;
+      }
+    }
+    
+    sqlite3_stmt* ppstmt;
+    if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
+    {
+      std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
+      sqlite3_close_v2(ppdb);
+      print_usage();
+    }
+    
+    sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC);
+    
+    if (sqlite3_step(ppstmt) != SQLITE_DONE)
+    {
+      std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
+      sqlite3_close_v2(ppdb);
+      print_usage();
+    }
+    
+    sqlite3_finalize(ppstmt);
+    
+    query = "SELECT last_insert_rowid()";
+    if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
+    {
+      std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
+      sqlite3_close_v2(ppdb);
+      print_usage();
+    }
+    
+    if (sqlite3_step(ppstmt) != SQLITE_ROW)
+    {
+      std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
+      sqlite3_close_v2(ppdb);
+      print_usage();
+    }
+    
+    wn[synset_id][wnum] = sqlite3_column_int(ppstmt, 0);
+    
+    sqlite3_finalize(ppstmt);
+  }
+  
+  sqlite3_close_v2(ppdb);
+  
+  std::cout << "Done." << std::endl;
+}
+\ No newline at end of file
author	Kelly Rauchenberger <fefferburbia@gmail.com>	2016-03-10 21:34:55 -0500
committer	Kelly Rauchenberger <fefferburbia@gmail.com>	2016-03-10 21:34:55 -0500
commit	e1be2716746e75cf6ed37e86461a7f580a964564 (patch)
tree	38a69a8cbd690f27f1ee8c2ce43eeb0333753d52 /generator.cpp
parent	41decb9a671e4d0fbbe12533372435ec6ede2246 (diff)
download	furries-e1be2716746e75cf6ed37e86461a7f580a964564.tar.gz furries-e1be2716746e75cf6ed37e86461a7f580a964564.tar.bz2 furries-e1be2716746e75cf6ed37e86461a7f580a964564.zip

diff --git a/generator.cpp b/generator.cpp new file mode 100644 index 0000000..c389963 --- /dev/null +++ b/generator.cpp
@@ -0,0 +1,451 @@
	1	#include <libxml/parser.h>
	2	#include <iostream>
	3	#include <dirent.h>
	4	#include <set>
	5	#include <map>
	6	#include <string>
	7	#include <vector>
	8	#include <fstream>
	9	#include <sqlite3.h>
	10	#include <sstream>
	11	#include <regex>
	12
	13	struct verb {
	14	std::string infinitive;
	15	std::string past_tense;
	16	std::string past_participle;
	17	std::string ing_form;
	18	std::string s_form;
	19	};
	20
	21	struct group {
	22	std::string id;
	23	std::set<std::string> members;
	24	};
	25
	26	std::map<std::string, group> groups;
	27	std::map<std::string, verb> verbs;
	28	std::map<int, std::map<int, int>> wn;
	29
	30	void print_usage()
	31	{
	32	std::cout << "Verbly Datafile Generator" << std::endl;
	33	std::cout << "-------------------------" << std::endl;
	34	std::cout << "Requires exactly four arguments." << std::endl;
	35	std::cout << "1. The path to a VerbNet data directory." << std::endl;
	36	std::cout << "2. The path to a SemLink vnpbMappings file." << std::endl;
	37	std::cout << "3. The path to an AGID infl.txt file." << std::endl;
	38	std::cout << "4. The path to a WordNet prolog data directory." << std::endl;
	39	std::cout << "5. Datafile output path." << std::endl;
	40
	41	exit(1);
	42	}
	43	/*
	44	void parse_group(xmlNodePtr top, std::string filename)
	45	{
	46	xmlChar* key = xmlGetProp(top, (xmlChar*) "ID");
	47	if (key == 0)
	48	{
	49	std::cout << "Bad VerbNet file format: " << filename << std::endl;
	50	print_usage();
	51	}
	52	std::string vnid = key;
	53	vnid = vnid.substr(vnid.find_first_of("-")+1);
	54	xmlFree(key);
	55
	56	group g;
	57	g.id = vnid;
	58
	59	for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
	60	{
	61	if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS"))
	62	{
	63	for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
	64	{
	65	if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER"))
	66	{
	67	key = xmlGetProp(member, (xmlChar*) "name");
	68	g.members.insert(key);
	69	xmlFree(key);
	70	}
	71	}
	72	} else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES"))
	73	{
	74	for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next)
	75	{
	76	if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME"))
	77	{
	78	for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
	79	{
	80
	81	}
	82	}
	83	}
	84	}
	85	}
	86	}*/
	87
	88	int main(int argc, char** argv)
	89	{
	90	if (argc != 6)
	91	{
	92	print_usage();
	93	}
	94
	95	/DIR dir;
	96	if ((dir = opendir(argv[1])) == nullptr)
	97	{
	98	std::cout << "Invalid VerbNet data directory." << std::endl;
	99
	100	print_usage();
	101	}
	102
	103	struct dirent* ent;
	104	while ((ent = readdir(dir)) != nullptr)
	105	{
	106	std::string filename(argv[1]);
	107	if (filename.back() != '/')
	108	{
	109	filename += '/';
	110	}
	111
	112	filename += ent->d_name;
	113	//std::cout << ent->d_name << std::endl;
	114
	115	if (filename.rfind(".xml") != filename.size() - 4)
	116	{
	117	continue;
	118	}
	119
	120	xmlDocPtr doc = xmlParseFile(filename.c_str());
	121	if (doc == nullptr)
	122	{
	123	std::cout << "Error opening " << filename << std::endl;
	124	print_usage();
	125	}
	126
	127	xmlNodePtr top = xmlDocGetRootElement(doc);
	128	if ((top == nullptr) \|\| (xmlStrcmp(top->name, (xmlChar*) "VNCLASS")))
	129	{
	130	std::cout << "Bad VerbNet file format: " << filename << std::endl;
	131	print_usage();
	132	}
	133
	134	parse_group(top, filename);
	135	}
	136
	137	closedir(dir);*/
	138
	139	// Get verbs from AGID
	140	std::cout << "Reading verb inflection..." << std::endl;
	141
	142	std::ifstream agidfile(argv[3]);
	143	if (!agidfile.is_open())
	144	{
	145	std::cout << "Could not open AGID file: " << argv[3] << std::endl;
	146	print_usage();
	147	}
	148
	149	for (;;)
	150	{
	151	std::string line;
	152	if (!getline(agidfile, line))
	153	{
	154	break;
	155	}
	156
	157	if (line.back() == '\r')
	158	{
	159	line.pop_back();
	160	}
	161
	162	int divider = line.find_first_of(" ");
	163	std::string word = line.substr(0, divider);
	164	line = line.substr(divider+1);
	165
	166	if (line[0] != 'V')
	167	{
	168	continue;
	169	}
	170
	171	if (line[1] == '?')
	172	{
	173	line.erase(0, 4);
	174	} else {
	175	line.erase(0, 3);
	176	}
	177
	178	std::vector<std::string> forms;
	179	while (!line.empty())
	180	{
	181	std::string inflection;
	182	if ((divider = line.find(" \| ")) != std::string::npos)
	183	{
	184	inflection = line.substr(0, divider);
	185	line = line.substr(divider + 3);
	186	} else {
	187	inflection = line;
	188	line = "";
	189	}
	190
	191	if ((divider = inflection.find_first_of(",?")) != std::string::npos)
	192	{
	193	inflection = inflection.substr(0, divider);
	194	}
	195
	196	forms.push_back(inflection);
	197	}
	198
	199	verb v;
	200	v.infinitive = word;
	201	if (forms.size() == 4)
	202	{
	203	v.past_tense = forms[0];
	204	v.past_participle = forms[1];
	205	v.ing_form = forms[2];
	206	v.s_form = forms[3];
	207	} else if (forms.size() == 3)
	208	{
	209	v.past_tense = forms[0];
	210	v.past_participle = forms[0];
	211	v.ing_form = forms[1];
	212	v.s_form = forms[2];
	213	} else if (forms.size() == 8)
	214	{
	215	// As of AGID 2014.08.11, this is only "to be"
	216	v.past_tense = forms[0];
	217	v.past_participle = forms[2];
	218	v.ing_form = forms[3];
	219	v.s_form = forms[4];
	220	} else {
	221	// Words that don't fit the cases above as of AGID 2014.08.11:
	222	// - may and shall do not conjugate the way we want them to
	223	// - methinks only has a past tense and is an outlier
	224	// - wit has five forms, and is archaic/obscure enough that we can ignore it for now
	225	std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl;
	226	}
	227
	228	verbs[word] = v;
	229	}
	230
	231	// Start writing output
	232	std::cout << "Writing output..." << std::endl;
	233
	234	sqlite3* ppdb;
	235	if (sqlite3_open_v2(argv[5], &ppdb, SQLITE_OPEN_READWRITE \| SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
	236	{
	237	std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl;
	238	print_usage();
	239	}
	240
	241	std::ifstream schemafile("schema.sql");
	242	if (!schemafile.is_open())
	243	{
	244	std::cout << "Could not find schema file" << std::endl;
	245	print_usage();
	246	}
	247
	248	std::stringstream schemabuilder;
	249	for (;;)
	250	{
	251	std::string line;
	252	if (!getline(schemafile, line))
	253	{
	254	break;
	255	}
	256
	257	if (line.back() == '\r')
	258	{
	259	line.pop_back();
	260	}
	261
	262	schemabuilder << line << std::endl;
	263	}
	264
	265	std::string schema = schemabuilder.str();
	266	while (!schema.empty())
	267	{
	268	std::string query;
	269	int divider = schema.find(";");
	270	if (divider != std::string::npos)
	271	{
	272	query = schema.substr(0, divider+1);
	273	schema = schema.substr(divider+2);
	274	} else {
	275	break;
	276	}
	277
	278	sqlite3_stmt* schmstmt;
	279	if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK)
	280	{
	281	std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
	282	sqlite3_close_v2(ppdb);
	283	print_usage();
	284	}
	285
	286	if (sqlite3_step(schmstmt) != SQLITE_DONE)
	287	{
	288	std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
	289	sqlite3_close_v2(ppdb);
	290	print_usage();
	291	}
	292
	293	sqlite3_finalize(schmstmt);
	294	}
	295
	296	std::cout << "Writing verbs..." << std::endl;
	297	for (auto& mapping : verbs)
	298	{
	299	sqlite3_stmt* ppstmt;
	300	std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)");
	301	if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
	302	{
	303	std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
	304	sqlite3_close_v2(ppdb);
	305	print_usage();
	306	}
	307
	308	sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_STATIC);
	309	sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_STATIC);
	310	sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_STATIC);
	311	sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_STATIC);
	312	sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_STATIC);
	313
	314	if (sqlite3_step(ppstmt) != SQLITE_DONE)
	315	{
	316	std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
	317	sqlite3_close_v2(ppdb);
	318	print_usage();
	319	}
	320
	321	sqlite3_finalize(ppstmt);
	322	}
	323
	324	// Get nouns/adjectives/adverbs from WordNet
	325	// Useful relations:
	326	// - s: master list
	327	// - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness)
	328	// - at: variation (e.g. a measurement can be standard or nonstandard)
	329	// - hyp: hypernymy/hyponymy (e.g. color/red, color/blue)
	330	// - ins: instantiation (do we need this? let's see)
	331	// - mm: member meronymy/holonymy (e.g. family/mother, family/child)
	332	// - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire)
	333	// - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber)
	334	// - per: pertainymy (e.g. something that is Alaskan pertains to Alaska)
	335	// mannernymy (e.g. something done quickly is done in a manner that is quick)
	336	// - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific))
	337	// - sim: synonymy (e.g. cheerful/happy, happy/cheerful)
	338	// - syntax: positioning flags for some adjectives
	339	std::string wnpref {argv[4]};
	340	if (wnpref.back() != '/')
	341	{
	342	wnpref += '/';
	343	}
	344
	345	std::cout << "Reading words from WordNet..." << std::endl;
	346	std::ifstream wnsfile(wnpref + "wn_s.pl");
	347	if (!wnsfile.is_open())
	348	{
	349	std::cout << "Invalid WordNet data directory." << std::endl;
	350	print_usage();
	351	}
	352
	353	for (;;)
	354	{
	355	std::string line;
	356	if (!getline(wnsfile, line))
	357	{
	358	break;
	359	}
	360
	361	if (line.back() == '\r')
	362	{
	363	line.pop_back();
	364	}
	365
	366	std::regex relation("^s\\(([134]\\d{8}),(\\d+),'([\\w ]+)',");
	367	std::smatch relation_data;
	368	if (!std::regex_search(line, relation_data, relation))
	369	{
	370	continue;
	371	}
	372
	373	int synset_id = stoi(relation_data[1]);
	374	int wnum = stoi(relation_data[2]);
	375	std::string word = relation_data[3];
	376
	377	std::string query;
	378	switch (synset_id / 100000000)
	379	{
	380	case 1: // Noun
	381	{
	382	query = "INSERT INTO nouns (form) VALUES (?)";
	383
	384	break;
	385	}
	386
	387	case 2: // Verb
	388	{
	389	// Ignore
	390
	391	break;
	392	}
	393
	394	case 3: // Adjective
	395	{
	396	query = "INSERT INTO adjectives (form) VALUES (?)";
	397
	398	break;
	399	}
	400
	401	case 4: // Adverb
	402	{
	403	query = "INSERT INTO adverbs (form) VALUES (?)";
	404
	405	break;
	406	}
	407	}
	408
	409	sqlite3_stmt* ppstmt;
	410	if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
	411	{
	412	std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
	413	sqlite3_close_v2(ppdb);
	414	print_usage();
	415	}
	416
	417	sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_STATIC);
	418
	419	if (sqlite3_step(ppstmt) != SQLITE_DONE)
	420	{
	421	std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
	422	sqlite3_close_v2(ppdb);
	423	print_usage();
	424	}
	425
	426	sqlite3_finalize(ppstmt);
	427
	428	query = "SELECT last_insert_rowid()";
	429	if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
	430	{
	431	std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
	432	sqlite3_close_v2(ppdb);
	433	print_usage();
	434	}
	435
	436	if (sqlite3_step(ppstmt) != SQLITE_ROW)
	437	{
	438	std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
	439	sqlite3_close_v2(ppdb);
	440	print_usage();
	441	}
	442
	443	wn[synset_id][wnum] = sqlite3_column_int(ppstmt, 0);
	444
	445	sqlite3_finalize(ppstmt);
	446	}
	447
	448	sqlite3_close_v2(ppdb);
	449
	450	std::cout << "Done." << std::endl;
	451	} \ No newline at end of file