1 files changed, 1151 insertions, 1994 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp
index 6a16467..d88cb31 100644
--- a/generator/generator.cpp
+++ b/generator/generator.cpp

@@ -1,2320 +1,1477 @@
-#include <libxml/parser.h>
+#include "generator.h"
+#include <cassert>
+#include <stdexcept>
 #include <iostream>
+#include <regex>
 #include <dirent.h>
-#include <set>
-#include <map>
-#include <string>
-#include <vector>
 #include <fstream>
-#include <sqlite3.h>
+#include "enums.h"
-#include <sstream>
-#include <regex>
-#include <list>
-#include <algorithm>
-#include <json.hpp>
 #include "progress.h"
+#include "selrestr.h"
+#include "role.h"
+#include "part.h"
+#include "field.h"
 #include "../lib/util.h"
-using json = nlohmann::json;
+namespace verbly {
+  namespace generator {
-struct verb_t {
-  std::string infinitive;
-  std::string past_tense;
-  std::string past_participle;
-  std::string ing_form;
-  std::string s_form;
-  int id;
-};
-struct adjective_t {
-  std::string base;
-  std::string comparative;
-  std::string superlative;
-};
-struct noun_t {
-  std::string singular;
-  std::string plural;
-};
-struct selrestr_t {
-  enum class type_t {
-    singleton,
-    andlogic,
-    orlogic,
-    empty
-  };
-  type_t type;
-  std::string restriction;
-  bool pos;
-  std::list<selrestr_t> subordinates;
-};
-struct framepart_t {
-  enum class type_t {
-    np,
-    v,
-    pp,
-    adj,
-    adv,
-    lex
-  };
-  type_t type;
-  std::string role;
-  selrestr_t selrestrs;
-  std::set<std::string> preprestrs;
-  std::set<std::string> synrestrs;
-  std::list<std::string> choices;
-  std::string lexval;
-};
-struct group_t {
-  std::string id;
-  std::string parent;
-  std::set<std::string> members;
-  std::map<std::string, selrestr_t> roles;
-  std::list<std::list<framepart_t>> frames;
-};
-struct pronunciation_t {
-  std::string phonemes;
-  std::string prerhyme;
-  std::string rhyme;
-  int syllables = 0;
-  std::string stress;
-  
-  bool operator<(const pronunciation_t& other) const
-  {
-    return phonemes < other.phonemes;
-  }
-};
-std::map<std::string, group_t> groups;
-std::map<std::string, verb_t> verbs;
-std::map<std::string, adjective_t> adjectives;
-std::map<std::string, noun_t> nouns;
-std::map<int, std::map<int, int>> wn;
-std::map<int, int> images;
-std::map<std::string, std::set<pronunciation_t>> pronunciations;
-void print_usage()
-{
-  std::cout << "Verbly Datafile Generator" << std::endl;
-  std::cout << "-------------------------" << std::endl;
-  std::cout << "Requires exactly six arguments." << std::endl;
-  std::cout << "1. The path to a VerbNet data directory." << std::endl;
-  std::cout << "2. The path to an AGID infl.txt file." << std::endl;
-  std::cout << "3. The path to a WordNet prolog data directory." << std::endl;
-  std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl;
-  std::cout << "5. The path to an ImageNet urls.txt file." << std::endl;
-  std::cout << "6. Datafile output path." << std::endl;
-  
-  exit(1);
-}
-void db_error(sqlite3* ppdb, std::string query)
-{
-  std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
-  std::cout << query << std::endl;
-  sqlite3_close_v2(ppdb);
-  print_usage();
-}
-json export_selrestrs(selrestr_t r)
-{
-  if (r.type == selrestr_t::type_t::empty)
-  {
-    return {};
-  } else if (r.type == selrestr_t::type_t::singleton)
-  {
-    json result;
-    result["type"] = r.restriction;
-    result["pos"] = r.pos;
-    return result;
-  } else {
-    json result;
-    if (r.type == selrestr_t::type_t::andlogic)
-    {
-      result["logic"] = "and";
-    } else {
-      result["logic"] = "or";
-    }
-    
-    std::list<json> outlist;
-    std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs);
-    result["children"] = outlist;
    
-    return result;
+    generator::generator(
-  }
+      std::string verbNetPath,
-}
+      std::string agidPath,
+      std::string wordNetPath,
-selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename)
+      std::string cmudictPath,
-{
+      std::string imageNetPath,
-  selrestr_t r;
+      std::string outputPath) :
-  xmlChar* key;
+        verbNetPath_(verbNetPath),
-  
+        agidPath_(agidPath),
-  if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS"))
+        wordNetPath_(wordNetPath),
-  {
+        cmudictPath_(cmudictPath),
-    if (xmlChildElementCount(top) == 0)
+        imageNetPath_(imageNetPath),
+        db_(outputPath)
    {
-      r.type = selrestr_t::type_t::empty;
+      // Ensure VerbNet directory exists
-    } else if (xmlChildElementCount(top) == 1)
+      DIR* dir;
-    {
+      if ((dir = opendir(verbNetPath_.c_str())) == nullptr)
-      r = parse_selrestrs(xmlFirstElementChild(top), filename);
-    } else {
-      r.type = selrestr_t::type_t::andlogic;
-      
-      if (xmlHasProp(top, (const xmlChar*) "logic"))
      {
-        key = xmlGetProp(top, (const xmlChar*) "logic");
+        throw std::invalid_argument("Invalid VerbNet data directory");
-        if (!xmlStrcmp(key, (const xmlChar*) "or"))
-        {
-          r.type = selrestr_t::type_t::orlogic;
-        }
-        xmlFree(key);
      }
-  
+      
-      for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next)
+      closedir(dir);
+      
+      // Ensure AGID infl.txt exists
+      if (!std::ifstream(agidPath_))
      {
-        if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR"))
+        throw std::invalid_argument("AGID infl.txt file not found");
-        {
-          r.subordinates.push_back(parse_selrestrs(selrestr, filename));
-        }
      }
-    }
+      
-  } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR"))
+      // Add directory separator to WordNet path
-  {
+      if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\'))
-    r.type = selrestr_t::type_t::singleton;
-    
-    key = xmlGetProp(top, (xmlChar*) "Value");
-    r.pos = (std::string((const char*)key) == "+");
-    xmlFree(key);
-    key = xmlGetProp(top, (xmlChar*) "type");
-    r.restriction = (const char*) key;
-    xmlFree(key);
-  } else {
-    // Invalid
-    std::cout << "Bad VerbNet file format: " << filename << std::endl;
-    print_usage();
-  }
-  
-  return r;
-}
-group_t& parse_group(xmlNodePtr top, std::string filename)
-{
-  xmlChar* key = xmlGetProp(top, (xmlChar*) "ID");
-  if (key == 0)
-  {
-    std::cout << "Bad VerbNet file format: " << filename << std::endl;
-    print_usage();
-  }
-  std::string vnid = (const char*)key;
-  vnid = vnid.substr(vnid.find_first_of("-")+1);
-  xmlFree(key);
-  
-  group_t g;
-  g.id = vnid;
-  
-  for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
-  {
-    if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES"))
-    {
-      for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next)
      {
-        if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS"))
+        wordNetPath_ += '/';
-        {
-          auto& sg = parse_group(subclass, filename);
-          sg.parent = vnid;
-          
-          for (auto member : sg.members)
-          {
-            g.members.insert(member);
-          }
-          
-          // The schema requires that subclasses appear after role definitions, so we can do this now
-          for (auto role : g.roles)
-          {
-            if (sg.roles.count(role.first) == 0)
-            {
-              sg.roles[role.first] = role.second;
-            }
-          }
-        }
      }
-    } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS"))
+      
-    {
+      // Ensure WordNet tables exist
-      for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
+      for (std::string table : {
+        "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax"
+      })
      {
-        if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER"))
+        if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl"))
        {
-          key = xmlGetProp(member, (xmlChar*) "name");
+          throw std::invalid_argument("WordNet " + table + " table not found");
-          g.members.insert((const char*)key);
-          xmlFree(key);
        }
      }
-    } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES"))
+      
-    {
+      // Ensure CMUDICT file exists
-      for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next)
+      if (!std::ifstream(cmudictPath_))
      {
-        if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE"))
+        throw std::invalid_argument("CMUDICT file not found");
-        {
-          selrestr_t r;
-          r.type = selrestr_t::type_t::empty;
-          
-          key = xmlGetProp(role, (const xmlChar*) "type");
-          std::string type = (const char*)key;
-          xmlFree(key);
-          
-          for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
-          {
-            if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS"))
-            {
-              r = parse_selrestrs(rolenode, filename);
-            }
-          }
-          
-          g.roles[type] = r;
-        }
      }
-    } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES"))
+      
-    {
+      // Ensure ImageNet urls.txt exists
-      for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next)
+      if (!std::ifstream(imageNetPath_))
      {
-        if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME"))
+        throw std::invalid_argument("ImageNet urls.txt file not found");
-        {
-          std::list<framepart_t> f;
-          
-          for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
-          {
-            if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX"))
-            {
-              for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
-              {
-                framepart_t fp;
-                
-                if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP"))
-                {
-                  fp.type = framepart_t::type_t::np;
-                  
-                  key = xmlGetProp(syntaxnode, (xmlChar*) "value");
-                  fp.role = (const char*)key;
-                  xmlFree(key);
-                  
-                  fp.selrestrs.type = selrestr_t::type_t::empty;
-                  
-                  for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
-                  {
-                    if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS"))
-                    {
-                      for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
-                      {
-                        if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR"))
-                        {
-                          key = xmlGetProp(synrestr, (xmlChar*) "type");
-                          fp.synrestrs.insert(std::string((const char*)key));
-                          xmlFree(key);
-                        }
-                      }
-                    }
-                  
-                    if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS"))
-                    {
-                      fp.selrestrs = parse_selrestrs(npnode, filename);
-                    }
-                  }
-                } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB"))
-                {
-                  fp.type = framepart_t::type_t::v;
-                } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP"))
-                {
-                  fp.type = framepart_t::type_t::pp;
-                  
-                  if (xmlHasProp(syntaxnode, (xmlChar*) "value"))
-                  {
-                    key = xmlGetProp(syntaxnode, (xmlChar*) "value");
-                    std::string choices = (const char*)key;
-                    xmlFree(key);
-                  
-                    fp.choices = verbly::split<std::list<std::string>>(choices, " ");
-                  }
-                  
-                  for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
-                  {
-                    if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS"))
-                    {
-                      for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
-                      {
-                        if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR"))
-                        {
-                          key = xmlGetProp(synrestr, (xmlChar*) "type");
-                          fp.preprestrs.insert(std::string((const char*)key));
-                          xmlFree(key);
-                        }
-                      }
-                    }
-                  }
-                } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ"))
-                {
-                  fp.type = framepart_t::type_t::adj;
-                } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV"))
-                {
-                  fp.type = framepart_t::type_t::adv;
-                } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX"))
-                {
-                  fp.type = framepart_t::type_t::lex;
-                  
-                  key = xmlGetProp(syntaxnode, (xmlChar*) "value");
-                  fp.lexval = (const char*)key;
-                  xmlFree(key);
-                } else {
-                  continue;
-                }
-                
-                f.push_back(fp);
-              }
-              
-              g.frames.push_back(f);
-            }
-          }
-        }
      }
    }
-  }
-  
-  groups[vnid] = g;
-  
-  return groups[vnid];
-}
-int main(int argc, char** argv)
-{
-  if (argc != 7)
-  {
-    print_usage();
-  }
-  
-  // VerbNet data
-  std::cout << "Reading verb frames..." << std::endl;
-  
-  DIR* dir;
-  if ((dir = opendir(argv[1])) == nullptr)
-  {
-    std::cout << "Invalid VerbNet data directory." << std::endl;
-    
-    print_usage();
-  }
-  
-  struct dirent* ent;
-  while ((ent = readdir(dir)) != nullptr)
-  {
-    std::string filename(argv[1]);
-    if (filename.back() != '/')
-    {
-      filename += '/';
-    }
    
-    filename += ent->d_name;
+    void generator::run()
-    //std::cout << ent->d_name << std::endl;
-    
-    if (filename.rfind(".xml") != filename.size() - 4)
-    {
-      continue;
-    }
-    
-    xmlDocPtr doc = xmlParseFile(filename.c_str());
-    if (doc == nullptr)
-    {
-      std::cout << "Error opening " << filename << std::endl;
-      print_usage();
-    }
-    
-    xmlNodePtr top = xmlDocGetRootElement(doc);
-    if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS")))
-    {
-      std::cout << "Bad VerbNet file format: " << filename << std::endl;
-      print_usage();
-    }
-    
-    parse_group(top, filename);
-  }
-  
-  closedir(dir);
-  
-  // Get verbs from AGID
-  std::cout << "Reading inflections..." << std::endl;
-  
-  std::ifstream agidfile(argv[2]);
-  if (!agidfile.is_open())
-  {
-    std::cout << "Could not open AGID file: " << argv[2] << std::endl;
-    print_usage();
-  }
-  
-  for (;;)
-  {
-    std::string line;
-    if (!getline(agidfile, line))
-    {
-      break;
-    }
-    
-    if (line.back() == '\r')
    {
-      line.pop_back();
+      // Create notions, words, lemmas, and forms from WordNet synsets
-    }
+      readWordNetSynsets();
-    
+      
-    int divider = line.find_first_of(" ");
+      // Reads adjective positioning WordNet data
-    std::string word = line.substr(0, divider);
+      readAdjectivePositioning();
-    line = line.substr(divider+1);
+      
-    char type = line[0];
+      // Counts the number of URLs ImageNet has per notion 
-    
+      readImageNetUrls();
-    if (line[1] == '?')
+      
-    {
+      // Creates a word by WordNet sense key lookup table
-      line.erase(0, 4);
+      readWordNetSenseKeys();
-    } else {
+      
-      line.erase(0, 3);
+      // Creates groups and frames from VerbNet data
-    }
+      readVerbNet();
+      
-    std::vector<std::string> forms;
+      // Creates forms and inflections from AGID. To reduce the amount of forms
-    while (!line.empty())
+      // created, we do this after most lemmas that need inflecting have been
-    {
+      // created through other means, and then only generate forms for
-      std::string inflection;
+      // inflections of already-existing lemmas. The exception to this regards
-      if ((divider = line.find(" | ")) != std::string::npos)
+      // verb lemmas. If a verb lemma in AGID either does not exist yet, or does
-      {
+      // exist but is not related to any words that are related to verb notions,
-        inflection = line.substr(0, divider);
+      // then a notion and a word is generated and the form generation proceeds
-        line = line.substr(divider + 3);
+      // as usual.
-      } else {
+      readAgidInflections();
-        inflection = line;
+      
-        line = "";
+      // Reads in prepositions and the is_a relationship
-      }
+      readPrepositions();
-  
+      
-      if ((divider = inflection.find_first_of(",?")) != std::string::npos)
+      // Creates pronunciations from CMUDICT. To reduce the amount of
-      {
+      // pronunciations created, we do this after all forms have been created,
-        inflection = inflection.substr(0, divider);
+      // and then only generate pronunciations for already-exisiting forms.
-      }
+      readCmudictPronunciations();
-  
+      
-      forms.push_back(inflection);
+      // Writes the database schema
+      writeSchema();
+      
+      // Dumps data to the database
+      dumpObjects();
+      
+      // Populates the antonymy relationship from WordNet
+      readWordNetAntonymy();
+      
+      // Populates the variation relationship from WordNet
+      readWordNetVariation();
+      
+      // Populates the usage, topicality, and regionality relationships from
+      // WordNet
+      readWordNetClasses();
+      
+      // Populates the causality relationship from WordNet
+      readWordNetCausality();
+      
+      // Populates the entailment relationship from WordNet
+      readWordNetEntailment();
+      
+      // Populates the hypernymy relationship from WordNet
+      readWordNetHypernymy();
+      
+      // Populates the instantiation relationship from WordNet
+      readWordNetInstantiation();
+      
+      // Populates the member meronymy relationship from WordNet
+      readWordNetMemberMeronymy();
+      
+      // Populates the part meronymy relationship from WordNet
+      readWordNetPartMeronymy();
+      
+      // Populates the substance meronymy relationship from WordNet
+      readWordNetSubstanceMeronymy();
+      
+      // Populates the pertainymy and mannernymy relationships from WordNet
+      readWordNetPertainymy();
+      
+      // Populates the specification relationship from WordNet
+      readWordNetSpecification();
+      
+      // Populates the adjective similarity relationship from WordNet
+      readWordNetSimilarity();
+      
+      
+      
+      
+      
+      
+      
+      
    }
    
-    switch (type)
+    void generator::readWordNetSynsets()
    {
-      case 'V':
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl"));
+      progress ppgs("Reading synsets from WordNet...", lines.size());
+      
+      for (std::string line : lines)
      {
-        verb_t v;
+        ppgs.update();
-        v.infinitive = word;
+        
-        if (forms.size() == 4)
+        std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$");
-        {
+        std::smatch relation_data;
-          v.past_tense = forms[0];
+        if (!std::regex_search(line, relation_data, relation))
-          v.past_participle = forms[1];
+        {
-          v.ing_form = forms[2];
+          continue;
-          v.s_form = forms[3];
-        } else if (forms.size() == 3)
-        {
-          v.past_tense = forms[0];
-          v.past_participle = forms[0];
-          v.ing_form = forms[1];
-          v.s_form = forms[2];
-        } else if (forms.size() == 8)
-        {
-          // As of AGID 2014.08.11, this is only "to be"
-          v.past_tense = forms[0];
-          v.past_participle = forms[2];
-          v.ing_form = forms[3];
-          v.s_form = forms[4];
-        } else {
-          // Words that don't fit the cases above as of AGID 2014.08.11:
-          // - may and shall do not conjugate the way we want them to
-          // - methinks only has a past tense and is an outlier
-          // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
-          std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl;
        }
    
-        verbs[word] = v;
+        int synset_id = std::stoi(relation_data[1]);
-        
+        int wnum = std::stoi(relation_data[2]);
-        break;
+        std::string text = relation_data[3];
-      }
+        int tag_count = std::stoi(relation_data[4]);
-      
+        size_t word_it;
-      case 'A':
+        while ((word_it = text.find("''")) != std::string::npos)
-      {
-        adjective_t adj;
-        adj.base = word;
-        if (forms.size() == 2)
        {
-          adj.comparative = forms[0];
+          text.erase(word_it, 1);
-          adj.superlative = forms[1];
-        } else {
-          // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
-          std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl;
        }
        
-        adjectives[word] = adj;
+        // The WordNet data does contain duplicates, so we need to check that we
-        
+        // haven't already created this word.
-        break;
+        std::pair<int, int> lookup(synset_id, wnum);
-      }
+        if (!wordByWnidAndWnum_.count(lookup))
-      
-      case 'N':
-      {
-        noun_t n;
-        n.singular = word;
-        if (forms.size() == 1)
        {
-          n.plural = forms[0];
+          notion& synset = lookupOrCreateNotion(synset_id);
-        } else {
+          lemma& lex = lookupOrCreateLemma(text);
-          // As of AGID 2014.08.11, this is non-existent.
+          word& entry = createWord(synset, lex, tag_count);
-          std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl;
+          wordByWnidAndWnum_[lookup] = &entry;
        }
-        
-        nouns[word] = n;
-        
-        break;
      }
    }
-  }
-  
-  // Pronounciations
-  std::cout << "Reading pronunciations..." << std::endl;
-  
-  std::ifstream pronfile(argv[4]);
-  if (!pronfile.is_open())
-  {
-    std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl;
-    print_usage();
-  }
-  
-  for (;;)
-  {
-    std::string line;
-    if (!getline(pronfile, line))
-    {
-      break;
-    }
-    
-    if (line.back() == '\r')
-    {
-      line.pop_back();
-    }
    
-    std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))?  ([A-Z 0-9]+)");
+    void generator::readAdjectivePositioning()
-    std::smatch phoneme_data;
-    if (std::regex_search(line, phoneme_data, phoneme))
    {
-      std::string canonical(phoneme_data[1]);
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl"));
-      std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
+      progress ppgs("Reading adjective positionings from WordNet...", lines.size());
-      
-      std::string phonemes = phoneme_data[2];
-      auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " ");
-      auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) {
-        return phoneme.find("1") != std::string::npos;
-      });
      
-      pronunciation_t p;
+      for (std::string line : lines)
-      p.phonemes = phonemes;
-      
-      // Rhyme detection
-      if (phemstrt != std::end(phoneme_set))
      {
-        std::stringstream rhymer;
+        ppgs.update();
-        for (auto it = phemstrt; it != std::end(phoneme_set); it++)
-        {
-          std::string naked;
-          std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) {
-            return isdigit(ch);
-          });
-          
-          if (it != phemstrt)
-          {
-            rhymer << " ";
-          }
-          
-          rhymer << naked;
-        }
        
-        p.rhyme = rhymer.str();
+        std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
-        
+        std::smatch relation_data;
-        if (phemstrt != std::begin(phoneme_set))
+        if (!std::regex_search(line, relation_data, relation))
        {
-          phemstrt--;
+          continue;
-          p.prerhyme = *phemstrt;
-        } else {
-          p.prerhyme = "";
        }
-      } else {
-        p.prerhyme = "";
-        p.rhyme = "";
-      }
      
-      // Syllable/stress
+        int synset_id = stoi(relation_data[1]);
-      for (auto phm : phoneme_set)
+        int wnum = stoi(relation_data[2]);
-      {
+        std::string adjpos_str = relation_data[3];
-        if (isdigit(phm.back()))
-        {
-          // It's a vowel!
-          p.syllables++;
        
-          if (phm.back() == '1')
+        std::pair<int, int> lookup(synset_id, wnum);
+        if (wordByWnidAndWnum_.count(lookup))
+        {
+          word& adj = *wordByWnidAndWnum_.at(lookup);
+          
+          if (adjpos_str == "p")
+          {
+            adj.setAdjectivePosition(positioning::predicate);
+          } else if (adjpos_str == "a")
+          {
+            adj.setAdjectivePosition(positioning::attributive);
+          } else if (adjpos_str == "i")
          {
-            p.stress.push_back('1');
+            adj.setAdjectivePosition(positioning::postnominal);
          } else {
-            p.stress.push_back('0');
+            // Can't happen because of how we specified the regex.
+            assert(false);
          }
        }
      }
-      
-      pronunciations[canonical].insert(p);
-    }
-  }
-  
-  // Images
-  std::cout << "Reading images..." << std::endl;
-  
-  std::ifstream imagefile(argv[5]);
-  if (!imagefile.is_open())
-  {
-    std::cout << "Could not open ImageNet file: " << argv[5] << std::endl;
-    print_usage();
-  }
-  
-  for (;;)
-  {
-    std::string line;
-    if (!getline(imagefile, line))
-    {
-      break;
-    }
-    
-    if (line.back() == '\r')
-    {
-      line.pop_back();
-    }
-    
-    std::string wnid_s = line.substr(1, 8);
-    int wnid = stoi(wnid_s) + 100000000;
-    images[wnid]++;
-  }
-  
-  imagefile.close();
-  
-  // Start writing output
-  std::cout << "Writing schema..." << std::endl;
-  
-  sqlite3* ppdb;
-  if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
-  {
-    std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl;
-    print_usage();
-  }
-  
-  std::ifstream schemafile("schema.sql");
-  if (!schemafile.is_open())
-  {
-    std::cout << "Could not find schema file" << std::endl;
-    print_usage();
-  }
-  
-  std::stringstream schemabuilder;
-  for (;;)
-  {
-    std::string line;
-    if (!getline(schemafile, line))
-    {
-      break;
-    }
-    
-    if (line.back() == '\r')
-    {
-      line.pop_back();
-    }
-    
-    schemabuilder << line << std::endl;
-  }
-  
-  std::string schema = schemabuilder.str();
-  while (!schema.empty())
-  {
-    std::string query;
-    int divider = schema.find(";");
-    if (divider != std::string::npos)
-    {
-      query = schema.substr(0, divider+1);
-      schema = schema.substr(divider+2);
-    } else {
-      break;
    }
    
-    sqlite3_stmt* schmstmt;
+    void generator::readImageNetUrls()
-    if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK)
    {
-      db_error(ppdb, query);
+      // The ImageNet datafile is so large that it is unreasonable and
-    }
+      // unnecessary to read it into memory; instead, we will parse each line as
-  
+      // we read it. This has the caveat that we cannot display a progress bar.
-    if (sqlite3_step(schmstmt) != SQLITE_DONE)
+      std::cout << "Reading image counts from ImageNet..." << std::endl;
-    {
-      db_error(ppdb, query);
-    }
-  
-    sqlite3_finalize(schmstmt);
-  }
-  
-  std::cout << "Writing prepositions..." << std::endl;
-  std::ifstream prepfile("prepositions.txt");
-  if (!prepfile.is_open())
-  {
-    std::cout << "Could not find prepositions file" << std::endl;
-    print_usage();
-  }
-  
-  for (;;)
-  {
-    std::string line;
-    if (!getline(prepfile, line))
-    {
-      break;
-    }
-    
-    if (line.back() == '\r')
-    {
-      line.pop_back();
-    }
-    
-    std::regex relation("^([^:]+): (.+)");
-    std::smatch relation_data;
-    std::regex_search(line, relation_data, relation);
-    std::string prep = relation_data[1];
-    std::list<std::string> groups = verbly::split<std::list<std::string>>(relation_data[2], ", ");
-    
-    std::string query("INSERT INTO prepositions (form) VALUES (?)");
-    sqlite3_stmt* ppstmt;
-    
-    if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-    {
-      db_error(ppdb, query);
-    }
-    
-    sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT);
-    
-    if (sqlite3_step(ppstmt) != SQLITE_DONE)
-    {
-      db_error(ppdb, query);
-    }
-    
-    sqlite3_finalize(ppstmt);
-    
-    query = "SELECT last_insert_rowid()";
-    if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-    {
-      db_error(ppdb, query);
-    }
-    
-    if (sqlite3_step(ppstmt) != SQLITE_ROW)
-    {
-      db_error(ppdb, query);
-    }
-    
-    int rowid = sqlite3_column_int(ppstmt, 0);
-    sqlite3_finalize(ppstmt);
-    
-    for (auto group : groups)
-    {
-      query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)";
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
      
-      sqlite3_bind_int(ppstmt, 1, rowid);
+      std::ifstream file(imageNetPath_);
-      sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT);
+      if (!file)
-      
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
      {
-        db_error(ppdb, query);
+        throw std::invalid_argument("Could not find file " + imageNetPath_);
      }
-      
-      sqlite3_finalize(ppstmt);
-    }
-  }
-  
  
-  {
+      std::string line;
-    progress ppgs("Writing verbs...", verbs.size());
+      while (std::getline(file, line))
-    for (auto& mapping : verbs)
-    {
-      sqlite3_stmt* ppstmt;
-      std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)");
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT);
-      sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT);
-      sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT);
-      sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT);
-      sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT);
-    
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_finalize(ppstmt);
-      
-      std::string canonical(mapping.second.infinitive);
-      std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
-      if (pronunciations.count(canonical) == 1)
      {
-        query = "SELECT last_insert_rowid()";
+        if (line.back() == '\r')
-        if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
        {
-          db_error(ppdb, query);
+          line.pop_back();
        }
-    
+      
-        if (sqlite3_step(ppstmt) != SQLITE_ROW)
+        std::string wnid_s = line.substr(1, 8);
+        int wnid = stoi(wnid_s) + 100000000;
+        if (notionByWnid_.count(wnid))
        {
-          db_error(ppdb, query);
+          // We know that this notion has a wnid and is a noun.
-        }
+          notionByWnid_.at(wnid)->incrementNumOfImages();
-    
-        int rowid = sqlite3_column_int(ppstmt, 0);
-    
-        sqlite3_finalize(ppstmt);
-        
-        mapping.second.id = rowid;
-        
-        for (auto pronunciation : pronunciations[canonical])
-        {
-          if (!pronunciation.rhyme.empty())
-          {
-            query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
-          } else {
-            query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
-          }
-          
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-          {
-            db_error(ppdb, query);
-          }
-          
-          sqlite3_bind_int(ppstmt, 1, rowid);
-          sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT);
-          sqlite3_bind_int(ppstmt, 3, pronunciation.syllables);
-          sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT);
-          
-          if (!pronunciation.rhyme.empty())
-          {
-            sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT);
-            sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT);
-          }
-          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
-          {
-            db_error(ppdb, query);
-          }
-          
-          sqlite3_finalize(ppstmt);
        }
      }
-      
-      ppgs.update();
    }
-  }
+    
-  
+    void generator::readWordNetSenseKeys()
-  {
-    progress ppgs("Writing verb frames...", groups.size());
-    for (auto& mapping : groups)
    {
-      std::list<json> roledatal;
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl"));
-      std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair<std::string, selrestr_t> r) {
+      progress ppgs("Reading sense keys from WordNet...", lines.size());
-        json role;
-        role["type"] = r.first;
-        role["selrestrs"] = export_selrestrs(r.second);
-        
-        return role;
-      });
-      
-      json roledata(roledatal);
-      std::string rdm = roledata.dump();
-      
-      sqlite3_stmt* ppstmt;
-      std::string query("INSERT INTO groups (data) VALUES (?)");
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-      
-      sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT);
-      
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
-      {
-        db_error(ppdb, query);
-      }
      
-      sqlite3_finalize(ppstmt);
+      for (std::string line : lines)
-      
-      query = "SELECT last_insert_rowid()";
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-      
-      if (sqlite3_step(ppstmt) != SQLITE_ROW)
-      {
-        db_error(ppdb, query);
-      }
-      
-      int gid = sqlite3_column_int(ppstmt, 0);
-      sqlite3_finalize(ppstmt);
-      
-      for (auto frame : mapping.second.frames)
      {
-        std::list<json> fdatap;
+        ppgs.update();
-        std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) {
-          json part;
-          
-          switch (fp.type)
-          {
-            case framepart_t::type_t::np:
-            {
-              part["type"] = "np";
-              part["role"] = fp.role;
-              part["selrestrs"] = export_selrestrs(fp.selrestrs);
-              part["synrestrs"] = fp.synrestrs;
-              
-              break;
-            }
-            
-            case framepart_t::type_t::pp:
-            {
-              part["type"] = "pp";
-              part["values"] = fp.choices;
-              part["preprestrs"] = fp.preprestrs;
-              
-              break;
-            }
-            
-            case framepart_t::type_t::v:
-            {
-              part["type"] = "v";
-              
-              break;
-            }
-            
-            case framepart_t::type_t::adj:
-            {
-              part["type"] = "adj";
-              
-              break;
-            }
-            
-            case framepart_t::type_t::adv:
-            {
-              part["type"] = "adv";
-              
-              break;
-            }
-            
-            case framepart_t::type_t::lex:
-            {
-              part["type"] = "lex";
-              part["value"] = fp.lexval;
-              
-              break;
-            }
-          }
-          
-          return part;
-        });
-        
-        json fdata(fdatap);
-        std::string marshall = fdata.dump();
-        
-        query = "INSERT INTO frames (group_id, data) VALUES (?, ?)";
-        if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-        {
-          db_error(ppdb, query);
-        }
-        
-        sqlite3_bind_int(ppstmt, 1, gid);
-        sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT);
        
-        if (sqlite3_step(ppstmt) != SQLITE_DONE)
+        // We only actually need to lookup verbs by sense key so we'll just
+        // ignore everything that isn't a verb.
+        std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
        {
-          db_error(ppdb, query);
+          continue;
        }
+    
+        int synset_id = stoi(relation_data[1]);
+        int wnum = stoi(relation_data[2]);
+        std::string sense_key = relation_data[3];
        
-        sqlite3_finalize(ppstmt);
+        // We are treating this mapping as injective, which is not entirely
-      }
+        // accurate. First, the WordNet table contains duplicate rows, so those
-      
+        // need to be ignored. More importantly, a small number of sense keys
-      for (auto member : mapping.second.members)
+        // (one for each letter of the Latin alphabet, plus 9 other words) each
-      {
+        // map to two different words in the same synset which differ only by
-        if (verbs.count(member) == 1)
+        // capitalization. Luckily, none of these exceptions are verbs, so we
+        // can pretend that the mapping is injective.
+        if (!wnSenseKeys_.count(sense_key))
        {
-          auto& v = verbs[member];
+          std::pair<int, int> lookup(synset_id, wnum);
-          
+          if (wordByWnidAndWnum_.count(lookup))
-          query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)";
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-          {
-            db_error(ppdb, query);
-          }
-          
-          sqlite3_bind_int(ppstmt, 1, v.id);
-          sqlite3_bind_int(ppstmt, 2, gid);
-          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
          {
-            db_error(ppdb, query);
+            wnSenseKeys_[sense_key] = wordByWnidAndWnum_.at(lookup);
          }
-          
-          sqlite3_finalize(ppstmt);
        }
      }
-      
-      ppgs.update();
    }
-  }
+    
-  
+    void generator::readVerbNet()
-  // Get nouns/adjectives/adverbs from WordNet
-  // Useful relations:
-  // - s: master list
-  // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness)
-  // - at: variation (e.g. a measurement can be standard or nonstandard)
-  // - der: derivation (e.g. happy/happily, happily/happy)
-  // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue)
-  // - ins: instantiation (do we need this? let's see)
-  // - mm: member meronymy/holonymy (e.g. family/mother, family/child)
-  // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire)
-  // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber)
-  // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska)
-  //        mannernymy (e.g. something done quickly is done in a manner that is quick)
-  // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific))
-  // - sim: synonymy (e.g. cheerful/happy, happy/cheerful)
-  // - syntax: positioning flags for some adjectives
-  std::string wnpref {argv[3]};
-  if (wnpref.back() != '/')
-  {
-    wnpref += '/';
-  }
-  
-  // s table
-  {
-    std::ifstream wnsfile(wnpref + "wn_s.pl");
-    if (!wnsfile.is_open())
    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
+      std::cout << "Reading frames from VerbNet..." << std::endl;
-      print_usage();
-    }
  
-    std::list<std::string> lines;
+      DIR* dir;
-    for (;;)
+      if ((dir = opendir(verbNetPath_.c_str())) == nullptr)
-    {
-      std::string line;
-      if (!getline(wnsfile, line))
      {
-        break;
+        throw std::invalid_argument("Invalid VerbNet data directory");
      }
-    
+  
-      if (line.back() == '\r')
+      struct dirent* ent;
-      {
+      while ((ent = readdir(dir)) != nullptr)
-        line.pop_back();
-      }
-      
-      lines.push_back(line);
-    }
-    
-    progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size());
-    for (auto line : lines)
-    {
-      ppgs.update();
-      
-      std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$");
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        std::string filename(verbNetPath_);
-      }
+        if (filename.back() != '/')
+        {
+          filename += '/';
+        }
    
-      int synset_id = stoi(relation_data[1]);
+        filename += ent->d_name;
-      int wnum = stoi(relation_data[2]);
-      std::string word = relation_data[3];
-      size_t word_it;
-      while ((word_it = word.find("''")) != std::string::npos)
-      {
-        word.erase(word_it, 1);
-      }
    
-      std::string query;
+        if (filename.rfind(".xml") != filename.size() - 4)
-      switch (synset_id / 100000000)
-      {
-        case 1: // Noun
        {
-          if (nouns.count(word) == 1)
+          continue;
-          {
-            query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)";
-          } else {
-            query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)";
-          }
-        
-          break;
        }
-      
+    
-        case 2: // Verb
+        xmlDocPtr doc = xmlParseFile(filename.c_str());
+        if (doc == nullptr)
        {
-          // Ignore
+          throw std::logic_error("Error opening " + filename);
-        
-          break;
        }
-      
+    
-        case 3: // Adjective
+        xmlNodePtr top = xmlDocGetRootElement(doc);
+        if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS"))))
        {
-          if (adjectives.count(word) == 1)
+          throw std::logic_error("Bad VerbNet file format: " + filename);
-          {
-            query = "INSERT INTO adjectives (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)";
-          } else {
-            query = "INSERT INTO adjectives (base_form, complexity) VALUES (?, ?)";
-          }
-        
-          break;
        }
-      
-        case 4: // Adverb
+        try
        {
-          if (adjectives.count(word) == 1)
+          createGroup(top);
-          {
+        } catch (const std::exception& e)
-            query = "INSERT INTO adverbs (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)";
+        {
-          } else {
+          std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename));
-            query = "INSERT INTO adverbs (base_form, complexity) VALUES (?, ?)";
-          }
-        
-          break;
        }
      }
+  
+      closedir(dir);
+    }
    
-      sqlite3_stmt* ppstmt;
+    void generator::readAgidInflections()
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
+    {
+      std::list<std::string> lines(readFile(agidPath_));
+      progress ppgs("Reading inflections from AGID...", lines.size());
+      
+      for (std::string line : lines)
      {
-        db_error(ppdb, query);
+        ppgs.update();
-      }
+        
+        int divider = line.find_first_of(" ");
+        std::string infinitive = line.substr(0, divider);
+        line = line.substr(divider+1);
+        char type = line[0];
    
-      sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT);
+        if (line[1] == '?')
-      switch (synset_id / 100000000)
-      {
-        case 1: // Noun
        {
-          sqlite3_bind_int(ppstmt, 2, (std::any_of(std::begin(word), std::end(word), [] (char ch) {
+          line.erase(0, 4);
-            return isupper(ch);
+        } else {
-          }) ? 1 : 0));
+          line.erase(0, 3);
-          
-          sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size());
-          sqlite3_bind_int(ppstmt, 4, images[synset_id]);
-          sqlite3_bind_int(ppstmt, 5, synset_id);
-          
-          if (nouns.count(word) == 1)
-          {
-            sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT);
-          }
-          
-          break;
        }
        
-        case 3: // Adjective
+        if (!lemmaByBaseForm_.count(infinitive) && (type != 'V'))
-        case 4: // Adverb
        {
-          sqlite3_bind_int(ppstmt, 2, verbly::split<std::list<std::string>>(word, " ").size());
+          continue;
-          
+        }
-          if (adjectives.count(word) == 1)
+        
+        lemma& curLemma = lookupOrCreateLemma(infinitive);
+        auto forms = split<std::vector<std::string>>(line, " | ");
+        for (std::string& inflForm : forms)
+        {
+          int sympos = inflForm.find_first_of(",?");
+          if (sympos != std::string::npos)
          {
-            sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT);
+            inflForm = inflForm.substr(0, sympos);
-            sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT);
          }
-          
-          break;
        }
-      }
    
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
+        switch (type)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_finalize(ppstmt);
-    
-      query = "SELECT last_insert_rowid()";
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-    
-      if (sqlite3_step(ppstmt) != SQLITE_ROW)
-      {
-        db_error(ppdb, query);
-      }
-    
-      int rowid = sqlite3_column_int(ppstmt, 0);
-      wn[synset_id][wnum] = rowid;
-    
-      sqlite3_finalize(ppstmt);
-      
-      std::string canonical(word);
-      std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
-      if (pronunciations.count(canonical) == 1)
-      {
-        for (auto pronunciation : pronunciations[canonical])
        {
-          switch (synset_id / 100000000)
+          case 'V':
          {
-            case 1: // Noun
+            if (forms.size() == 4)
            {
-              if (!pronunciation.rhyme.empty())
+              curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
-              {
+              curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1]));
-                query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
+              curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2]));
-              } else {
+              curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3]));
-                query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
+            } else if (forms.size() == 3)
-              }
-              
-              break;
-            }
-            
-            case 3: // Adjective
            {
-              if (!pronunciation.rhyme.empty())
+              curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
-              {
+              curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0]));
-                query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
+              curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1]));
-              } else {
+              curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2]));
-                query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
+            } else if (forms.size() == 8)
-              }
+            {
-              
+              // As of AGID 2014.08.11, this is only "to be"
-              break;
+              curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
+              curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2]));
+              curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3]));
+              curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4]));
+            } else {
+              // Words that don't fit the cases above as of AGID 2014.08.11:
+              // - may and shall do not conjugate the way we want them to
+              // - methinks only has a past tense and is an outlier
+              // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
+              std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
            }
-            
+    
-            case 4: // Adverb
+            // For verbs in particular, we sometimes create a notion and a word
+            // from inflection data. Specifically, if there are not yet any
+            // verbs existing that have the same infinitive form. "Yet" means
+            // that this verb appears in the AGID data but not in either WordNet
+            // or VerbNet.
+            if (!wordsByBaseForm_.count(infinitive)
+              || !std::any_of(std::begin(wordsByBaseForm_.at(infinitive)), std::end(wordsByBaseForm_.at(infinitive)), [] (word* w) {
+                return w->getNotion().getPartOfSpeech() == part_of_speech::verb;
+              }))
            {
-              if (!pronunciation.rhyme.empty())
+              notion& n = createNotion(part_of_speech::verb);
-              {
+              createWord(n, curLemma);
-                query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
-              } else {
-                query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
-              }
-              
-              break;
            }
-          }
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-          {
-            db_error(ppdb, query);
-          }
-        
-          sqlite3_bind_int(ppstmt, 1, rowid);
-          sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT);
-          sqlite3_bind_int(ppstmt, 3, pronunciation.syllables);
-          sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT);
-          
-          if (!pronunciation.rhyme.empty())
-          {
-            sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT);
-            sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT);
-          }
        
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+            break;
-          {
-            db_error(ppdb, query);
          }
-        
-          sqlite3_finalize(ppstmt);
-        }
-      }
-    }
-  }
-  
-  // While we're working on s
-  {
-    progress ppgs("Writing word synonyms...", wn.size());
-    for (auto sense : wn)
-    {
-      ppgs.update();
      
-      for (auto word1 : sense.second)
+          case 'A':
-      {
-        for (auto word2 : sense.second)
-        {
-          if (word1 != word2)
          {
-            std::string query;
+            if (forms.size() == 2)
-            switch (sense.first / 100000000)
            {
-              case 1: // Noun
+              curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0]));
-              {
+              curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1]));
-                query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
+            } else {
-        
+              // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
-                break;
+              std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
-              }
+            }
-      
-              case 2: // Verb
-              {
-                // Ignore
-        
-                break;
-              }
-      
-              case 3: // Adjective
-              {
-                query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
        
-                break;
+            break;
-              }
+          }
      
-              case 4: // Adverb
+          case 'N':
-              {
+          {
-                query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
+            if (forms.size() == 1)
-        
-                break;
-              }
-            }
-            
-            sqlite3_stmt* ppstmt;
-            if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-            {
-              db_error(ppdb, query);
-            }
-    
-            sqlite3_bind_int(ppstmt, 1, word1.second);
-            sqlite3_bind_int(ppstmt, 2, word2.second);
-    
-            if (sqlite3_step(ppstmt) != SQLITE_DONE)
            {
-              db_error(ppdb, query);
+              curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0]));
+            } else {
+              // As of AGID 2014.08.11, this is non-existent.
+              std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
            }
-    
+        
-            sqlite3_finalize(ppstmt);
+            break;
          }
        }
      }
    }
-  }
-  
-  // ant table
-  {
-    std::ifstream wnantfile(wnpref + "wn_ant.pl");
-    if (!wnantfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnantfile, line))
-      {
-        break;
-      }
    
-      if (line.back() == '\r')
+    void generator::readPrepositions()
-      {
-        line.pop_back();
-      }
-      
-      lines.push_back(line);
-    }
-    
-    progress ppgs("Writing antonyms...", lines.size());
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile("prepositions.txt"));
+      progress ppgs("Reading prepositions...", lines.size());
      
-      std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
+      for (std::string line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
-      {
-        continue;
-      }
-    
-      int synset_id_1 = stoi(relation_data[1]);
-      int wnum_1 = stoi(relation_data[2]);
-      int synset_id_2 = stoi(relation_data[3]);
-      int wnum_2 = stoi(relation_data[4]);
-    
-      std::string query;
-      switch (synset_id_1 / 100000000)
      {
-        case 1: // Noun
+        ppgs.update();
-        {
-          query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
        
-          break;
+        std::regex relation("^([^:]+): (.+)");
-        }
+        std::smatch relation_data;
-      
+        std::regex_search(line, relation_data, relation);
-        case 2: // Verb
+        std::string prep = relation_data[1];
-        {
+        auto groups = split<std::list<std::string>>(relation_data[2], ", ");
-          // Ignore
        
-          break;
+        notion& n = createNotion(part_of_speech::preposition);
-        }
+        lemma& l = lookupOrCreateLemma(prep);
-      
+        word& w = createWord(n, l);
-        case 3: // Adjective
-        {
-          query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
        
-          break;
+        n.setPrepositionGroups(groups);
-        }
-      
-        case 4: // Adverb
-        {
-          query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
-        
-          break;
-        }
-      }
-    
-      sqlite3_stmt* ppstmt;
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
-      sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
-    
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_finalize(ppstmt);
-    }
-  }
-  
-  // at table
-  {
-    std::ifstream wnatfile(wnpref + "wn_at.pl");
-    if (!wnatfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnatfile, line))
-      {
-        break;
      }
-    
-      if (line.back() == '\r')
-      {
-        line.pop_back();
-      }
-      
-      lines.push_back(line);
    }
    
-    progress ppgs("Writing variations...", lines.size());
+    void generator::readCmudictPronunciations()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(cmudictPath_));
+      progress ppgs("Reading pronunciations from CMUDICT...", lines.size());
      
-      std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
+      for (std::string line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
+        
-      
+        std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))?  ([A-Z 0-9]+)");
-      int synset_id_1 = stoi(relation_data[1]);
+        std::smatch phoneme_data;
-      int synset_id_2 = stoi(relation_data[2]);
+        if (std::regex_search(line, phoneme_data, phoneme))
-      std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)");
-      
-      for (auto mapping1 : wn[synset_id_1])
-      {
-        for (auto mapping2 : wn[synset_id_2])
        {
-          sqlite3_stmt* ppstmt;
+          std::string canonical(phoneme_data[1]);
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+          std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
-          {
-            db_error(ppdb, query);
-          }
-          
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+          if (!formByText_.count(canonical))
          {
-            db_error(ppdb, query);
+            continue;
          }
          
-          sqlite3_finalize(ppstmt);
+          std::string phonemes = phoneme_data[2];
+          pronunciations_.emplace_back(phonemes);
+          pronunciation& p = pronunciations_.back();
+          formByText_.at(canonical)->addPronunciation(p);
        }
      }
    }
-  }
-  
-  // der table
-  {
-    std::ifstream wnderfile(wnpref + "wn_der.pl");
-    if (!wnderfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
    
-    std::list<std::string> lines;
+    void generator::writeSchema()
-    for (;;)
    {
-      std::string line;
+      std::ifstream file("schema.sql");
-      if (!getline(wnderfile, line))
+      if (!file)
      {
-        break;
+        throw std::invalid_argument("Could not find database schema");
      }
-    
+  
-      if (line.back() == '\r')
+      std::ostringstream schemaBuilder;
+      std::string line;
+      while (std::getline(file, line))
      {
-        line.pop_back();
+        if (line.back() == '\r')
+        {
+          line.pop_back();
+        }
+      
+        schemaBuilder << line;
      }
      
-      lines.push_back(line);
+      std::string schema = schemaBuilder.str();
+      auto queries = split<std::list<std::string>>(schema, ";");
+      progress ppgs("Writing database schema...", queries.size());
+      for (std::string query : queries)
+      {
+        if (!queries.empty())
+        {
+          db_.runQuery(query);
+        }
+        
+        ppgs.update();
+      }
    }
    
-    progress ppgs("Writing morphological derivation...", lines.size());
+    void generator::dumpObjects()
-    for (auto line : lines)
    {
-      ppgs.update();
-      
-      std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        progress ppgs("Writing notions...", notions_.size());
+        
+        for (notion& n : notions_)
+        {
+          db_ << n;
+          
+          ppgs.update();
+        }
      }
      
-      int synset_id_1 = stoi(relation_data[1]);
-      int wnum_1 = stoi(relation_data[2]);
-      int synset_id_2 = stoi(relation_data[3]);
-      int wnum_2 = stoi(relation_data[4]);
-      std::string query;
-      switch (synset_id_1 / 100000000)
      {
-        case 1: // Noun
+        progress ppgs("Writing words...", words_.size());
+        
+        for (word& w : words_)
        {
-          switch (synset_id_2 / 100000000)
+          db_ << w;
-          {
-            case 1: // Noun
-            {
-              query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 3: // Adjective
-            {
-              query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 4: // Adverb
-            {
-              query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)";
-              break;
-            }
-          }
          
-          break;
+          ppgs.update();
        }
+      }
+      
+      {
+        progress ppgs("Writing lemmas...", lemmas_.size());
        
-        case 3: // Adjective
+        for (lemma& l : lemmas_)
        {
-          switch (synset_id_2 / 100000000)
+          db_ << l;
-          {
-            case 1: // Noun
-            {
-              query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 3: // Adjective
-            {
-              query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 4: // Adverb
-            {
-              query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)";
-              break;
-            }
-          }
          
-          break;
+          ppgs.update();
        }
+      }
+      
+      {
+        progress ppgs("Writing forms...", forms_.size());
        
-        case 4: // Adverb
+        for (form& f : forms_)
        {
-          switch (synset_id_2 / 100000000)
+          db_ << f;
-          {
-            case 1: // Noun
-            {
-              query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 3: // Adjective
-            {
-              query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 4: // Adverb
-            {
-              query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)";
-              break;
-            }
-          }
          
-          break;
+          ppgs.update();
        }
      }
      
-      sqlite3_stmt* ppstmt;
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
      {
-        db_error(ppdb, query);
+        progress ppgs("Writing pronunciations...", pronunciations_.size());
+        
+        for (pronunciation& p : pronunciations_)
+        {
+          db_ << p;
+          
+          ppgs.update();
+        }
      }
      
-      sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
-      sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
-      
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
      {
-        db_error(ppdb, query);
+        progress ppgs("Writing verb groups...", groups_.size());
+        
+        for (group& g : groups_)
+        {
+          db_ << g;
+          
+          ppgs.update();
+        }
      }
      
-      sqlite3_finalize(ppstmt);
-    }
-  }
-  
-  // hyp table
-  {
-    std::ifstream wnhypfile(wnpref + "wn_hyp.pl");
-    if (!wnhypfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnhypfile, line))
-      {
-        break;
-      }
-    
-      if (line.back() == '\r')
      {
-        line.pop_back();
+        progress ppgs("Writing verb frames...", frames_.size());
+        
+        for (frame& f : frames_)
+        {
+          db_ << f;
+          
+          ppgs.update();
+        }
      }
-      
-      lines.push_back(line);
    }
    
-    progress ppgs("Writing hypernyms...", lines.size());
+    void generator::readWordNetAntonymy()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl"));
-      
+      progress ppgs("Writing antonyms...", lines.size());
-      std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
-      
-      int synset_id_1 = stoi(relation_data[1]);
-      int synset_id_2 = stoi(relation_data[2]);
-      std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)");
      
-      for (auto mapping1 : wn[synset_id_1])
+        std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
-      {
+        std::smatch relation_data;
-        for (auto mapping2 : wn[synset_id_2])
+        if (!std::regex_search(line, relation_data, relation))
        {
-          sqlite3_stmt* ppstmt;
+          continue;
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+        }
-          {
-            db_error(ppdb, query);
+        std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
-          }
+        std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
-          
+        
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+        if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
+        {
+          word& word1 = *wordByWnidAndWnum_.at(lookup1);
+          word& word2 = *wordByWnidAndWnum_.at(lookup2);
          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+          std::list<field> fields;
-          {
+          fields.emplace_back("antonym_1_id", word1.getId());
-            db_error(ppdb, query);
+          fields.emplace_back("antonym_2_id", word2.getId());
-          }
          
-          sqlite3_finalize(ppstmt);
+          db_.insertIntoTable("antonymy", std::move(fields));
        }
      }
    }
-  }
-  
-  // ins table
-  {
-    std::ifstream wninsfile(wnpref + "wn_ins.pl");
-    if (!wninsfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wninsfile, line))
-      {
-        break;
-      }
    
-      if (line.back() == '\r')
+    void generator::readWordNetVariation()
+    {
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl"));
+      progress ppgs("Writing variation...", lines.size());
+      for (auto line : lines)
      {
-        line.pop_back();
+        ppgs.update();
-      }
      
-      lines.push_back(line);
+        std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("noun_id", notion1.getId());
+          fields.emplace_back("adjective_id", notion2.getId());
+          
+          db_.insertIntoTable("variation", std::move(fields));
+        }
+      }
    }
    
-    progress ppgs("Writing instantiations...", lines.size());
+    void generator::readWordNetClasses()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl"));
-      
+      progress ppgs("Writing usage, topicality, and regionality...", lines.size());
-      std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
-      
-      int synset_id_1 = stoi(relation_data[1]);
-      int synset_id_2 = stoi(relation_data[2]);
-      std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)");
      
-      for (auto mapping1 : wn[synset_id_1])
+        std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\.");
-      {
+        std::smatch relation_data;
-        for (auto mapping2 : wn[synset_id_2])
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
+        std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
+        std::string class_type = relation_data[5];
+        
+        std::string table_name;
+        if (class_type == "t")
+        {
+          table_name += "topicality";
+        } else if (class_type == "u")
+        {
+          table_name += "usage";
+        } else if (class_type == "r")
+        {
+          table_name += "regionality";
+        }
+        
+        std::list<int> leftJoin;
+        std::list<int> rightJoin;
+        
+        if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first)))
        {
-          sqlite3_stmt* ppstmt;
+          std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) {
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+            return w->getId();
+          });
+        } else if (wordByWnidAndWnum_.count(lookup1)) {
+          leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId());
+        }
+        
+        if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first)))
+        {
+          std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) {
+            return w->getId();
+          });
+        } else if (wordByWnidAndWnum_.count(lookup2)) {
+          rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId());
+        }
+        
+        for (int word1 : leftJoin)
+        {
+          for (int word2 : rightJoin)
          {
-            db_error(ppdb, query);
+            std::list<field> fields;
-          }
+            fields.emplace_back("term_id", word1);
+            fields.emplace_back("domain_id", word2);
          
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+            db_.insertIntoTable(table_name, std::move(fields));
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
-          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
-          {
-            db_error(ppdb, query);
          }
-          
-          sqlite3_finalize(ppstmt);
        }
      }
    }
-  }
-  
-  // mm table
-  {
-    std::ifstream wnmmfile(wnpref + "wn_mm.pl");
-    if (!wnmmfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnmmfile, line))
-      {
-        break;
-      }
    
-      if (line.back() == '\r')
+    void generator::readWordNetCausality()
+    {
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl"));
+      progress ppgs("Writing causality...", lines.size());
+      for (auto line : lines)
      {
-        line.pop_back();
+        ppgs.update();
-      }
      
-      lines.push_back(line);
+        std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("effect_id", notion1.getId());
+          fields.emplace_back("cause_id", notion2.getId());
+          
+          db_.insertIntoTable("causality", std::move(fields));
+        }
+      }
    }
    
-    progress ppgs("Writing member meronyms...", lines.size());
+    void generator::readWordNetEntailment()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl"));
-      
+      progress ppgs("Writing entailment...", lines.size());
-      std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
      
-      int synset_id_1 = stoi(relation_data[1]);
+        std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\.");
-      int synset_id_2 = stoi(relation_data[2]);
+        std::smatch relation_data;
-      std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
+        if (!std::regex_search(line, relation_data, relation))
-      
-      for (auto mapping1 : wn[synset_id_1])
-      {
-        for (auto mapping2 : wn[synset_id_2])
        {
-          sqlite3_stmt* ppstmt;
+          continue;
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+        }
-          {
-            db_error(ppdb, query);
+        int lookup1 = std::stoi(relation_data[1]);
-          }
+        int lookup2 = std::stoi(relation_data[2]);
-          
+        
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+          std::list<field> fields;
-          {
+          fields.emplace_back("given_id", notion1.getId());
-            db_error(ppdb, query);
+          fields.emplace_back("entailment_id", notion2.getId());
-          }
          
-          sqlite3_finalize(ppstmt);
+          db_.insertIntoTable("entailment", std::move(fields));
        }
      }
    }
-  }
+    
-  
+    void generator::readWordNetHypernymy()
-  // ms table
-  {
-    std::ifstream wnmsfile(wnpref + "wn_ms.pl");
-    if (!wnmsfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
    {
-      std::string line;
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl"));
-      if (!getline(wnmsfile, line))
+      progress ppgs("Writing hypernymy...", lines.size());
+      for (auto line : lines)
      {
-        break;
+        ppgs.update();
+      
+        std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("hyponym_id", notion1.getId());
+          fields.emplace_back("hypernym_id", notion2.getId());
+          
+          db_.insertIntoTable("hypernymy", std::move(fields));
+        }
      }
+    }
    
-      if (line.back() == '\r')
+    void generator::readWordNetInstantiation()
+    {
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl"));
+      progress ppgs("Writing instantiation...", lines.size());
+      for (auto line : lines)
      {
-        line.pop_back();
+        ppgs.update();
-      }
      
-      lines.push_back(line);
+        std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("instance_id", notion1.getId());
+          fields.emplace_back("class_id", notion2.getId());
+          
+          db_.insertIntoTable("instantiation", std::move(fields));
+        }
+      }
    }
    
-    progress ppgs("Writing substance meronyms...", lines.size());
+    void generator::readWordNetMemberMeronymy()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl"));
-      
+      progress ppgs("Writing member meronymy...", lines.size());
-      std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
-      
-      int synset_id_1 = stoi(relation_data[1]);
-      int synset_id_2 = stoi(relation_data[2]);
-      std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
      
-      for (auto mapping1 : wn[synset_id_1])
+        std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
-      {
+        std::smatch relation_data;
-        for (auto mapping2 : wn[synset_id_2])
+        if (!std::regex_search(line, relation_data, relation))
        {
-          sqlite3_stmt* ppstmt;
+          continue;
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+        }
-          {
-            db_error(ppdb, query);
+        int lookup1 = std::stoi(relation_data[1]);
-          }
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
          
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+          std::list<field> fields;
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
+          fields.emplace_back("holonym_id", notion1.getId());
+          fields.emplace_back("meronym_id", notion2.getId());
          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+          db_.insertIntoTable("member_meronymy", std::move(fields));
-          {
-            db_error(ppdb, query);
-          }
-          
-          sqlite3_finalize(ppstmt);
        }
      }
    }
-  }
+    
-  
+    void generator::readWordNetPartMeronymy()
-  // mm table
-  {
-    std::ifstream wnmpfile(wnpref + "wn_mp.pl");
-    if (!wnmpfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
    {
-      std::string line;
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl"));
-      if (!getline(wnmpfile, line))
+      progress ppgs("Writing part meronymy...", lines.size());
+      for (auto line : lines)
      {
-        break;
+        ppgs.update();
+      
+        std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("holonym_id", notion1.getId());
+          fields.emplace_back("meronym_id", notion2.getId());
+          
+          db_.insertIntoTable("part_meronymy", std::move(fields));
+        }
      }
+    }
    
-      if (line.back() == '\r')
+    void generator::readWordNetSubstanceMeronymy()
+    {
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl"));
+      progress ppgs("Writing substance meronymy...", lines.size());
+      for (auto line : lines)
      {
-        line.pop_back();
+        ppgs.update();
-      }
      
-      lines.push_back(line);
+        std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("holonym_id", notion1.getId());
+          fields.emplace_back("meronym_id", notion2.getId());
+          
+          db_.insertIntoTable("substance_meronymy", std::move(fields));
+        }
+      }
    }
    
-    progress ppgs("Writing part meronyms...", lines.size());
+    void generator::readWordNetPertainymy()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl"));
-      
+      progress ppgs("Writing pertainymy and mannernymy...", lines.size());
-      std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
-      
-      int synset_id_1 = stoi(relation_data[1]);
-      int synset_id_2 = stoi(relation_data[2]);
-      std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
      
-      for (auto mapping1 : wn[synset_id_1])
+        std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
-      {
+        std::smatch relation_data;
-        for (auto mapping2 : wn[synset_id_2])
+        if (!std::regex_search(line, relation_data, relation))
        {
-          sqlite3_stmt* ppstmt;
+          continue;
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+        }
-          {
-            db_error(ppdb, query);
+        std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
-          }
+        std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
+        
+        if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
+        {
+          word& word1 = *wordByWnidAndWnum_.at(lookup1);
+          word& word2 = *wordByWnidAndWnum_.at(lookup2);
          
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+          if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective)
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
+          {
+            std::list<field> fields;
+            fields.emplace_back("pertainym_id", word1.getId());
+            fields.emplace_back("noun_id", word2.getId());
          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+            db_.insertIntoTable("pertainymy", std::move(fields));
+          } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb)
          {
-            db_error(ppdb, query);
+            std::list<field> fields;
-          }
+            fields.emplace_back("mannernym_id", word1.getId());
+            fields.emplace_back("adjective_id", word2.getId());
          
-          sqlite3_finalize(ppstmt);
+            db_.insertIntoTable("mannernymy", std::move(fields));
+          }
        }
      }
    }
-  }
-  
-  // per table
-  {
-    std::ifstream wnperfile(wnpref + "wn_per.pl");
-    if (!wnperfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnperfile, line))
-      {
-        break;
-      }
    
-      if (line.back() == '\r')
+    void generator::readWordNetSpecification()
+    {
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl"));
+      progress ppgs("Writing specifications...", lines.size());
+      for (auto line : lines)
      {
-        line.pop_back();
+        ppgs.update();
+        std::regex relation("^sa\\((23\\d{8}),(\\d+),(23\\d{8}),(\\d+)\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
+        std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
+        
+        if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
+        {
+          word& word1 = *wordByWnidAndWnum_.at(lookup1);
+          word& word2 = *wordByWnidAndWnum_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("general_id", word1.getId());
+          fields.emplace_back("specific_id", word2.getId());
+          
+          db_.insertIntoTable("specification", std::move(fields));
+        }
      }
-      
-      lines.push_back(line);
    }
    
-    progress ppgs("Writing pertainyms and mannernyms...", lines.size());
+    void generator::readWordNetSimilarity()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl"));
-      
+      progress ppgs("Writing adjective similarity...", lines.size());
-      std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
      
-      int synset_id_1 = stoi(relation_data[1]);
+        std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
-      int wnum_1 = stoi(relation_data[2]);
+        std::smatch relation_data;
-      int synset_id_2 = stoi(relation_data[3]);
+        if (!std::regex_search(line, relation_data, relation))
-      int wnum_2 = stoi(relation_data[4]);
-      std::string query;
-      switch (synset_id_1 / 100000000)
-      {
-        case 3: // Adjective
        {
-          // This is a pertainym, the second word should be a noun
+          continue;
-          // Technically it can be an adjective but we're ignoring that
-          if (synset_id_2 / 100000000 != 1)
-          {
-            continue;
-          }
-          
-          query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)";
-          
-          break;
        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
        
-        case 4: // Adverb
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
        {
-          // This is a mannernym, the second word should be an adjective
+          notion& notion1 = *notionByWnid_.at(lookup1);
-          if (synset_id_2 / 100000000 != 3)
+          notion& notion2 = *notionByWnid_.at(lookup2);
-          {
-            continue;
-          }
          
-          query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)";
+          std::list<field> fields;
+          fields.emplace_back("adjective_1_id", notion1.getId());
+          fields.emplace_back("adjective_2_id", notion2.getId());
          
-          break;
+          db_.insertIntoTable("similarity", std::move(fields));
        }
      }
-      
+    }
-      sqlite3_stmt* ppstmt;
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
-      sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
    
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
+    std::list<std::string> generator::readFile(std::string path)
+    {
+      std::ifstream file(path);
+      if (!file)
      {
-        db_error(ppdb, query);
+        throw std::invalid_argument("Could not find file " + path);
      }
-    
-      sqlite3_finalize(ppstmt);
-    }
-  }
  
-  // sa table
+      std::list<std::string> lines;
-  {
-    std::ifstream wnsafile(wnpref + "wn_sa.pl");
-    if (!wnsafile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
      std::string line;
-      if (!getline(wnsafile, line))
+      while (std::getline(file, line))
-      {
-        break;
-      }
-    
-      if (line.back() == '\r')
      {
-        line.pop_back();
+        if (line.back() == '\r')
+        {
+          line.pop_back();
+        }
+      
+        lines.push_back(line);
      }
      
-      lines.push_back(line);
+      return lines;
    }
    
-    progress ppgs("Writing specifications...", lines.size());
+    part_of_speech generator::partOfSpeechByWnid(int wnid)
-    for (auto line : lines)
    {
-      ppgs.update();
+      switch (wnid / 100000000)
-      
-      std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\.");
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
-      {
-        continue;
-      }
-      
-      int synset_id_1 = stoi(relation_data[1]);
-      int wnum_1 = stoi(relation_data[2]);
-      int synset_id_2 = stoi(relation_data[3]);
-      int wnum_2 = stoi(relation_data[4]);
-      std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)");
-      
-      sqlite3_stmt* ppstmt;
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
      {
-        db_error(ppdb, query);
+        case 1: return part_of_speech::noun;
+        case 2: return part_of_speech::verb;
+        case 3: return part_of_speech::adjective;
+        case 4: return part_of_speech::adverb;
+        default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid));
      }
+    }
    
-      sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
+    notion& generator::createNotion(part_of_speech partOfSpeech)
-      sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
+    {
+      notions_.emplace_back(partOfSpeech);
+      
+      return notions_.back();
+    }
    
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
+    notion& generator::lookupOrCreateNotion(int wnid)
+    {
+      if (!notionByWnid_.count(wnid))
      {
-        db_error(ppdb, query);
+        notions_.emplace_back(partOfSpeechByWnid(wnid), wnid);
+        notionByWnid_[wnid] = &notions_.back();
      }
-    
+      
-      sqlite3_finalize(ppstmt);
+      return *notionByWnid_.at(wnid);
-    }
-  }
-  // sim table
-  {
-    std::ifstream wnsimfile(wnpref + "wn_sim.pl");
-    if (!wnsimfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
    }
+    
-    std::list<std::string> lines;
+    lemma& generator::lookupOrCreateLemma(std::string base_form)
-    for (;;)
    {
-      std::string line;
+      if (!lemmaByBaseForm_.count(base_form))
-      if (!getline(wnsimfile, line))
      {
-        break;
+        lemmas_.emplace_back(lookupOrCreateForm(base_form));
+        lemmaByBaseForm_[base_form] = &lemmas_.back();
      }
+      
+      return *lemmaByBaseForm_.at(base_form);
+    }
    
-      if (line.back() == '\r')
+    form& generator::lookupOrCreateForm(std::string text)
+    {
+      if (!formByText_.count(text))
      {
-        line.pop_back();
+        forms_.emplace_back(text);
+        formByText_[text] = &forms_.back();
      }
      
-      lines.push_back(line);
+      return *formByText_[text];
    }
    
-    progress ppgs("Writing sense synonyms...", lines.size());
+    template <typename... Args> word& generator::createWord(Args&&... args)
-    for (auto line : lines)
    {
-      ppgs.update();
+      words_.emplace_back(std::forward<Args>(args)...);
+      word& w = words_.back();
      
-      std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
+      wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w);
-      std::smatch relation_data;
+      
-      if (!std::regex_search(line, relation_data, relation))
+      if (w.getNotion().hasWnid())
      {
-        continue;
+        wordsByWnid_[w.getNotion().getWnid()].insert(&w);
      }
      
-      int synset_id_1 = stoi(relation_data[1]);
+      return w;
-      int synset_id_2 = stoi(relation_data[2]);
+    }
-      std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)");
+    
+    group& generator::createGroup(xmlNodePtr top)
+    {
+      groups_.emplace_back();
+      group& grp = groups_.back();
      
-      for (auto mapping1 : wn[synset_id_1])
+      xmlChar* key;
+  
+      for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
      {
-        for (auto mapping2 : wn[synset_id_2])
+        if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES")))
        {
-          sqlite3_stmt* ppstmt;
+          for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next)
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
          {
-            db_error(ppdb, query);
+            if (!xmlStrcmp(subclass->name, reinterpret_cast<const xmlChar*>("VNSUBCLASS")))
+            {
+              try
+              {
+                group& subgrp = createGroup(subclass);
+                subgrp.setParent(grp);
+              } catch (const std::exception& e)
+              {
+                key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID"));
+                
+                if (key == nullptr)
+                {
+                  std::throw_with_nested(std::logic_error("Error parsing IDless subgroup"));
+                } else {
+                  std::string subgroupId(reinterpret_cast<const char*>(key));
+                  xmlFree(key);
+                  
+                  std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId));
+                }
+              }
+            }
          }
-          
+        } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("MEMBERS")))
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+        {
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
+          for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
-          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
          {
-            db_error(ppdb, query);
+            if (!xmlStrcmp(member->name, reinterpret_cast<const xmlChar*>("MEMBER")))
+            {
+              key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn"));
+              std::string wnSenses(reinterpret_cast<const char*>(key));
+              xmlFree(key);
+              
+              auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " ");
+              if (!wnSenseKeys.empty())
+              {
+                std::list<std::string> tempKeys;
+                
+                std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) {
+                  return sense + "::";
+                });
+                
+                std::list<std::string> filteredKeys;
+                
+                std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) {
+                  return !wnSenseKeys_.count(sense);
+                });
+                
+                wnSenseKeys = std::move(filteredKeys);
+              }
+              
+              if (!wnSenseKeys.empty())
+              {
+                for (std::string sense : wnSenseKeys)
+                {
+                  word& wordSense = *wnSenseKeys_[sense];
+                  wordSense.setVerbGroup(grp);
+                }
+              } else {
+                key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name"));
+                std::string memberName(reinterpret_cast<const char*>(key));
+                xmlFree(key);
+                
+                notion& n = createNotion(part_of_speech::verb);
+                lemma& l = lookupOrCreateLemma(memberName);
+                word& w = createWord(n, l);
+                
+                w.setVerbGroup(grp);
+              }
+            }
          }
-          
+        } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("THEMROLES")))
-          sqlite3_reset(ppstmt);
+        {
-          sqlite3_clear_bindings(ppstmt);
+          for (xmlNodePtr roletopnode = node->xmlChildrenNode; roletopnode != nullptr; roletopnode = roletopnode->next)
-          
-          sqlite3_bind_int(ppstmt, 1, mapping2.second);
-          sqlite3_bind_int(ppstmt, 2, mapping1.second);
-          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
          {
-            db_error(ppdb, query);
+            if (!xmlStrcmp(roletopnode->name, reinterpret_cast<const xmlChar*>("THEMROLE")))
+            {
+              role r;
+              
+              key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type"));
+              std::string roleName = reinterpret_cast<const char*>(key);
+              xmlFree(key);
+              
+              for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
+              {
+                if (!xmlStrcmp(rolenode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
+                {
+                  r.setSelrestrs(parseSelrestr(rolenode));
+                }
+              }
+              grp.addRole(roleName, std::move(r));
+            }
          }
+        } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("FRAMES")))
+        {
+          for (xmlNodePtr frametopnode = node->xmlChildrenNode; frametopnode != nullptr; frametopnode = frametopnode->next)
+          {
+            if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME")))
+            {
+              frames_.emplace_back();
+              frame& fr = frames_.back();
          
-          sqlite3_finalize(ppstmt);
+              for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
+              {
+                if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX")))
+                {
+                  for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
+                  {                
+                    if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP")))
+                    {
+                      key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
+                      std::string partRole = reinterpret_cast<const char*>(key);
+                      xmlFree(key);
+                  
+                      selrestr partSelrestrs;
+                      std::set<std::string> partSynrestrs;
+                      for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
+                      {
+                        if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SYNRESTRS")))
+                        {
+                          for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
+                          {
+                            if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SYNRESTR")))
+                            {
+                              key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type"));
+                              partSynrestrs.insert(reinterpret_cast<const char*>(key));
+                              xmlFree(key);
+                            }
+                          }
+                        }
+                  
+                        if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
+                        {
+                          partSelrestrs = parseSelrestr(npnode);
+                        }
+                      }
+                      
+                      fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs)));
+                    } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB")))
+                    {
+                      fr.push_back(part::createVerb());
+                    } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("PREP")))
+                    {
+                      std::set<std::string> partChoices;
+                      bool partLiteral;
+                      
+                      if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")))
+                      {
+                        partLiteral = true;
+                        
+                        key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
+                        std::string choicesStr = reinterpret_cast<const char*>(key);
+                        xmlFree(key);
+                        
+                        split(choicesStr, " ", std::inserter(partChoices, std::end(partChoices)));
+                      } else {
+                        partLiteral = false;
+                        
+                        for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
+                        {
+                          if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
+                          {
+                            for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
+                            {
+                              if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
+                              {
+                                key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type"));
+                                partChoices.insert(reinterpret_cast<const char*>(key));
+                                xmlFree(key);
+                              }
+                            }
+                          }
+                        }
+                      }
+                  
+                      fr.push_back(part::createPreposition(std::move(partChoices), partLiteral));
+                    } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ")))
+                    {
+                      fr.push_back(part::createAdjective());
+                    } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADV")))
+                    {
+                      fr.push_back(part::createAdverb());
+                    } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("LEX")))
+                    {
+                      key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
+                      std::string literalValue = reinterpret_cast<const char*>(key);
+                      xmlFree(key);
+                      
+                      fr.push_back(part::createLiteral(literalValue));
+                    } else {
+                      continue;
+                    }
+                  }
+                  grp.addFrame(fr);
+                }
+              }
+            }
+          }
        }
      }
-    }
-  }
-  
-  // syntax table
-  {
-    std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl");
-    if (!wnsyntaxfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
+      return grp;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnsyntaxfile, line))
-      {
-        break;
-      }
-    
-      if (line.back() == '\r')
-      {
-        line.pop_back();
-      }
-      
-      lines.push_back(line);
    }
    
-    progress ppgs("Writing adjective syntax markers...", lines.size());
+    selrestr generator::parseSelrestr(xmlNodePtr top)
-    for (auto line : lines)
    {
-      ppgs.update();
+      xmlChar* key;
-      
+  
-      std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
+      if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
-      {
-        continue;
-      }
-      
-      int synset_id = stoi(relation_data[1]);
-      int wnum = stoi(relation_data[2]);
-      std::string syn = relation_data[3];
-      std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?");
-      
-      sqlite3_stmt* ppstmt;
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
      {
-        db_error(ppdb, query);
+        if (xmlChildElementCount(top) == 0)
-      }
+        {
-      
+          return {};
-      sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT);
+        } else if (xmlChildElementCount(top) == 1)
-      sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]);
+        {
-      
+          return parseSelrestr(xmlFirstElementChild(top));
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
+        } else {
+          bool orlogic = false;
+          if (xmlHasProp(top, reinterpret_cast<const xmlChar*>("logic")))
+          {
+            key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("logic"));
+            if (!xmlStrcmp(key, reinterpret_cast<const xmlChar*>("or")))
+            {
+              orlogic = true;
+            }
+            
+            xmlFree(key);
+          }
+  
+          std::list<selrestr> children;
+          for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next)
+          {
+            if (!xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))
+              || !xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
+            {
+              children.push_back(parseSelrestr(selrestr));
+            }
+          }
+          
+          return selrestr(children, orlogic);
+        }
+      } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
      {
-        db_error(ppdb, query);
+        key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("Value"));
+        bool selPos = (std::string(reinterpret_cast<const char*>(key)) == "+");
+        xmlFree(key);
+        key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type"));
+        std::string selRestriction = reinterpret_cast<const char*>(key);
+        xmlFree(key);
+        
+        return selrestr(selRestriction, selPos);
+      } else {
+        throw std::logic_error("Badly formatted selrestr");
      }
-      
-      sqlite3_finalize(ppstmt);
    }
-  }
+    
-  
+  };
-  sqlite3_close_v2(ppdb);
+};
-  
-  std::cout << "Done." << std::endl;
-}