29 files changed, 4018 insertions, 2250 deletions
diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt
index 552526d..4f78eb8 100644
--- a/generator/CMakeLists.txt
+++ b/generator/CMakeLists.txt

@@ -1,12 +1,12 @@
-cmake_minimum_required (VERSION 2.6)
+cmake_minimum_required (VERSION 3.1)
 project (generator)
 find_package(PkgConfig)
 pkg_check_modules(sqlite3 sqlite3 REQUIRED)
 find_package(libxml2 REQUIRED)
-include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json/src)
+include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json)
-add_executable(generator generator.cpp)
+add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp selrestr.cpp database.cpp field.cpp generator.cpp main.cpp)
 set_property(TARGET generator PROPERTY CXX_STANDARD 11)
 set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON)
 target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES})
diff --git a/generator/database.cpp b/generator/database.cpp
new file mode 100644
index 0000000..c7e4cfa
--- /dev/null
+++ b/generator/database.cpp

@@ -0,0 +1,173 @@
+#include "database.h"
+#include <sqlite3.h>
+#include <cassert>
+#include <fstream>
+#include <stdexcept>
+#include <cstdio>
+#include <sstream>
+#include "field.h"
+#include "../lib/util.h"
+namespace verbly {
+  namespace generator {
+    sqlite3_error::sqlite3_error(
+      const std::string& what,
+      const std::string& db_err) :
+        what_(what + " (" + db_err + ")"),
+        db_err_(db_err)
+    {
+    }
+    const char* sqlite3_error::what() const noexcept
+    {
+      return what_.c_str();
+    }
+    const char* sqlite3_error::db_err() const noexcept
+    {
+      return db_err_.c_str();
+    }
+    database::database(std::string path)
+    {
+      // If there is already a file at this path, overwrite it.
+      if (std::ifstream(path))
+      {
+        if (std::remove(path.c_str()))
+        {
+          throw std::logic_error("Could not overwrite file at path");
+        }
+      }
+      if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
+      {
+        // We still have to free the resources allocated. In the event that
+        // allocation failed, ppdb will be null and sqlite3_close_v2 will just
+        // ignore it.
+        std::string errmsg(sqlite3_errmsg(ppdb_));
+        sqlite3_close_v2(ppdb_);
+        throw sqlite3_error("Could not create output datafile", errmsg);
+      }
+    }
+    database::database(database&& other) : database()
+    {
+      swap(*this, other);
+    }
+    database& database::operator=(database&& other)
+    {
+      swap(*this, other);
+      return *this;
+    }
+    void swap(database& first, database& second)
+    {
+      std::swap(first.ppdb_, second.ppdb_);
+    }
+    database::~database()
+    {
+      sqlite3_close_v2(ppdb_);
+    }
+    
+    void database::runQuery(std::string query)
+    {
+      // This can only happen when doing bad things with move semantics.
+      assert(ppdb_ != nullptr);
+      
+      sqlite3_stmt* ppstmt;
+      if (sqlite3_prepare_v2(ppdb_, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
+      {
+        throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
+      }
+      
+      int result = sqlite3_step(ppstmt);
+      sqlite3_finalize(ppstmt);
+      if (result != SQLITE_DONE)
+      {
+        throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
+      }
+    }
+    void database::insertIntoTable(std::string table, std::list<field> fields)
+    {
+      // This can only happen when doing bad things with move semantics.
+      assert(ppdb_ != nullptr);
+      
+      // This shouldn't happen.
+      assert(!fields.empty());
+      
+      std::list<std::string> fieldNames;
+      std::list<std::string> qs;
+      for (field& f : fields)
+      {
+        fieldNames.push_back(f.getName());
+        qs.push_back("?");
+      }
+      
+      std::ostringstream query;
+      query << "INSERT INTO ";
+      query << table;
+      query << " (";
+      query << implode(std::begin(fieldNames), std::end(fieldNames), ", ");
+      query << ") VALUES (";
+      query << implode(std::begin(qs), std::end(qs), ", ");
+      query << ")";
+      
+      std::string query_str = query.str();
+      sqlite3_stmt* ppstmt;
+      if (sqlite3_prepare_v2(ppdb_, query_str.c_str(), query_str.length(), &ppstmt, NULL) != SQLITE_OK)
+      {
+        throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
+      }
+      int i = 1;
+      for (field& f : fields)
+      {
+        switch (f.getType())
+        {
+          case field::type::integer:
+          {
+            sqlite3_bind_int(ppstmt, i, f.getInteger());
+            break;
+          }
+          case field::type::string:
+          {
+            sqlite3_bind_text(ppstmt, i, f.getString().c_str(), f.getString().length(), SQLITE_TRANSIENT);
+            break;
+          }
+          case field::type::invalid:
+          {
+            // Fields can only be invalid when doing bad things with move semantics.
+            assert(false);
+            break;
+          }
+        }
+        i++;
+      }
+      int result = sqlite3_step(ppstmt);
+      sqlite3_finalize(ppstmt);
+      if (result != SQLITE_DONE)
+      {
+        throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
+      }
+    }
+  };
+};
diff --git a/generator/database.h b/generator/database.h
new file mode 100644
index 0000000..15cdff5
--- /dev/null
+++ b/generator/database.h

@@ -0,0 +1,73 @@
+#ifndef DATABASE_H_0B0A47D2
+#define DATABASE_H_0B0A47D2
+#include <string>
+#include <exception>
+#include <list>
+struct sqlite3;
+namespace verbly {
+  namespace generator {
+    class field;
+    class sqlite3_error : public std::exception {
+    public:
+      sqlite3_error(const std::string& what, const std::string& db_err);
+      const char* what() const noexcept override;
+      const char* db_err() const noexcept;
+    private:
+      std::string what_;
+      std::string db_err_;
+    };
+    class database {
+    public:
+      // Constructor
+      explicit database(std::string path);
+      // Disable copying
+      database(const database& other) = delete;
+      database& operator=(const database& other) = delete;
+      // Move constructor and move assignment
+      database(database&& other);
+      database& operator=(database&& other);
+      // Swap
+      friend void swap(database& first, database& second);
+      // Destructor
+      ~database();
+      // Actions
+      
+      void runQuery(std::string query);
+      void insertIntoTable(std::string table, std::list<field> fields);
+    private:
+      database()
+      {
+      }
+      sqlite3* ppdb_ = nullptr;
+    };
+  };
+};
+#endif /* end of include guard: DATABASE_H_0B0A47D2 */
diff --git a/generator/field.cpp b/generator/field.cpp
new file mode 100644
index 0000000..84b2f91
--- /dev/null
+++ b/generator/field.cpp

@@ -0,0 +1,193 @@
+#include "field.h"
+#include <stdexcept>
+#include <utility>
+namespace verbly {
+  namespace generator {
+    field::field(const field& other)
+    {
+      type_ = other.type_;
+      name_ = other.name_;
+      switch (type_)
+      {
+        case type::integer:
+        {
+          integer_ = other.integer_;
+          break;
+        }
+        case type::string:
+        {
+          new(&string_) std::string(other.string_);
+          break;
+        }
+        case type::invalid:
+        {
+          break;
+        }
+      }
+    }
+    field::field(field&& other) : field()
+    {
+      swap(*this, other);
+    }
+    field& field::operator=(field other)
+    {
+      swap(*this, other);
+      return *this;
+    }
+    void swap(field& first, field& second)
+    {
+      using type = field::type;
+      type tempType = first.type_;
+      std::string tempName = std::move(first.name_);
+      int tempInteger;
+      std::string tempString;
+      switch (first.type_)
+      {
+        case type::integer:
+        {
+          tempInteger = first.integer_;
+          break;
+        }
+        case type::string:
+        {
+          tempString = std::move(tempString);
+          break;
+        }
+        case type::invalid:
+        {
+          break;
+        }
+      }
+      first.~field();
+      first.type_ = second.type_;
+      first.name_ = std::move(second.name_);
+      switch (second.type_)
+      {
+        case type::integer:
+        {
+          first.integer_ = second.integer_;
+          break;
+        }
+        case type::string:
+        {
+          new(&first.string_) std::string(std::move(second.string_));
+          break;
+        }
+        case type::invalid:
+        {
+          break;
+        }
+      }
+      second.~field();
+      second.type_ = tempType;
+      second.name_ = std::move(tempName);
+      switch (tempType)
+      {
+        case type::integer:
+        {
+          second.integer_ = tempInteger;
+          break;
+        }
+        case type::string:
+        {
+          new(&second.string_) std::string(std::move(tempString));
+          break;
+        }
+        case type::invalid:
+        {
+          break;
+        }
+      }
+    }
+    field::~field()
+    {
+      switch (type_)
+      {
+        case type::string:
+        {
+          using string_type = std::string;
+          string_.~string_type();
+          break;
+        }
+        case type::integer:
+        case type::invalid:
+        {
+          break;
+        }
+      }
+    }
+    field::field(
+      std::string name,
+      int arg) :
+        type_(type::integer),
+        name_(name),
+        integer_(arg)
+    {
+    }
+    int field::getInteger() const
+    {
+      if (type_ != type::integer)
+      {
+        throw std::domain_error("field::getInteger called on non-integer field");
+      }
+      return integer_;
+    }
+    field::field(
+      std::string name,
+      std::string arg) :
+        type_(type::string),
+        name_(name)
+    {
+      new(&string_) std::string(arg);
+    }
+    std::string field::getString() const
+    {
+      if (type_ != type::string)
+      {
+        throw std::domain_error("field::getString called on non-string field");
+      }
+      return string_;
+    }
+  };
+};
diff --git a/generator/field.h b/generator/field.h
new file mode 100644
index 0000000..1fbabfc
--- /dev/null
+++ b/generator/field.h

@@ -0,0 +1,76 @@
+#ifndef BINDING_H_CAE0B18E
+#define BINDING_H_CAE0B18E
+#include <string>
+namespace verbly {
+  namespace generator {
+    class field {
+      public:
+        enum class type {
+          invalid,
+          integer,
+          string
+        };
+        // Copy and move constructors
+        field(const field& other);
+        field(field&& other);
+        // Assignment
+        field& operator=(field other);
+        // Swap
+        friend void swap(field& first, field& second);
+        // Destructor
+        ~field();
+        // Generic accessors
+        
+        type getType() const
+        {
+          return type_;
+        }
+        
+        std::string getName() const
+        {
+          return name_;
+        }
+        // Integer
+        
+        field(std::string name, int arg);
+        
+        int getInteger() const;
+        // String
+        
+        field(std::string name, std::string arg);
+        
+        std::string getString() const;
+      private:
+        field()
+        {
+        }
+        union {
+          int integer_;
+          std::string string_;
+        };
+        type type_ = type::invalid;
+        std::string name_;
+    };
+  };
+};
+#endif /* end of include guard: BINDING_H_CAE0B18E */
diff --git a/generator/form.cpp b/generator/form.cpp
new file mode 100644
index 0000000..6be9d47
--- /dev/null
+++ b/generator/form.cpp

@@ -0,0 +1,53 @@
+#include "form.h"
+#include <algorithm>
+#include <list>
+#include "database.h"
+#include "field.h"
+#include "pronunciation.h"
+namespace verbly {
+  namespace generator {
+    int form::nextId_ = 0;
+    form::form(std::string text) :
+      id_(nextId_++),
+      text_(text),
+      complexity_(std::count(std::begin(text), std::end(text), ' ') + 1),
+      proper_(std::any_of(std::begin(text), std::end(text), std::isupper))
+    {
+    }
+    void form::addPronunciation(const pronunciation& p)
+    {
+      pronunciations_.insert(&p);
+    }
+    database& operator<<(database& db, const form& arg)
+    {
+      // Serialize the form first.
+      {
+        std::list<field> fields;
+        fields.emplace_back("form_id", arg.getId());
+        fields.emplace_back("form", arg.getText());
+        fields.emplace_back("complexity", arg.getComplexity());
+        fields.emplace_back("proper", arg.isProper());
+        db.insertIntoTable("forms", std::move(fields));
+      }
+      // Then, serialize the form/pronunciation relationship.
+      for (const pronunciation* p : arg.getPronunciations())
+      {
+        std::list<field> fields;
+        fields.emplace_back("form_id", arg.getId());
+        fields.emplace_back("pronunciation_id", p->getId());
+        db.insertIntoTable("forms_pronunciations", std::move(fields));
+      }
+      return db;
+    }
+  };
+};
diff --git a/generator/form.h b/generator/form.h
new file mode 100644
index 0000000..5576035
--- /dev/null
+++ b/generator/form.h

@@ -0,0 +1,71 @@
+#ifndef FORM_H_7EFBC970
+#define FORM_H_7EFBC970
+#include <string>
+#include <set>
+namespace verbly {
+  namespace generator {
+    class pronunciation;
+    class database;
+    class form {
+    public:
+      // Constructor
+      explicit form(std::string text);
+      // Mutators
+      void addPronunciation(const pronunciation& p);
+      // Accessors
+      int getId() const
+      {
+        return id_;
+      }
+      std::string getText() const
+      {
+        return text_;
+      }
+      int getComplexity() const
+      {
+        return complexity_;
+      }
+      bool isProper() const
+      {
+        return proper_;
+      }
+      std::set<const pronunciation*> getPronunciations() const
+      {
+        return pronunciations_;
+      }
+    private:
+      static int nextId_;
+      const int id_;
+      const std::string text_;
+      const int complexity_;
+      const bool proper_;
+      std::set<const pronunciation*> pronunciations_;
+    };
+    // Serializer
+    database& operator<<(database& db, const form& arg);
+  };
+};
+#endif /* end of include guard: FORM_H_7EFBC970 */
diff --git a/generator/frame.cpp b/generator/frame.cpp
new file mode 100644
index 0000000..9f0653f
--- /dev/null
+++ b/generator/frame.cpp

@@ -0,0 +1,83 @@
+#include "frame.h"
+#include "database.h"
+#include "field.h"
+namespace verbly {
+  namespace generator {
+    
+    int frame::nextId_ = 0;
+    
+    frame::frame() : id_(nextId_++)
+    {
+    }
+    
+    void frame::push_back(part fp)
+    {
+      parts_.push_back(std::move(fp));
+    }
+    
+    database& operator<<(database& db, const frame& arg)
+    {
+      std::list<field> fields;
+      fields.emplace_back("frame_id", arg.getId());
+      
+      nlohmann::json jsonParts;
+      for (const part& p : arg)
+      {
+        nlohmann::json jsonPart;
+        jsonPart["type"] = static_cast<int>(p.getType());
+        
+        switch (p.getType())
+        {
+          case part::type::noun_phrase:
+          {
+            jsonPart["role"] = p.getNounRole();
+            jsonPart["selrestrs"] = p.getNounSelrestrs().toJson();
+            jsonPart["synrestrs"] = p.getNounSynrestrs();
+            
+            break;
+          }
+          
+          case part::type::preposition:
+          {
+            jsonPart["choices"] = p.getPrepositionChoices();
+            jsonPart["literal"] = p.isPrepositionLiteral();
+            
+            break;
+          }
+          
+          case part::type::literal:
+          {
+            jsonPart["value"] = p.getLiteralValue();
+            
+            break;
+          }
+          
+          case part::type::verb:
+          case part::type::adjective:
+          case part::type::adverb:
+          {
+            break;
+          }
+          
+          case part::type::invalid:
+          {
+            // Invalid parts should not be serialized.
+            assert(false);
+            
+            break;
+          }
+        }
+        
+        jsonParts.emplace_back(std::move(jsonPart));
+      }
+      fields.emplace_back("data", jsonParts.dump());
+      
+      db.insertIntoTable("frames", std::move(fields));
+      
+      return db;
+    }
+    
+  };
+};
diff --git a/generator/frame.h b/generator/frame.h
new file mode 100644
index 0000000..411ce6c
--- /dev/null
+++ b/generator/frame.h

@@ -0,0 +1,59 @@
+#ifndef FRAME_H_26770FF1
+#define FRAME_H_26770FF1
+#include <list>
+#include "part.h"
+namespace verbly {
+  namespace generator {
+    
+    class database;
+    
+    class frame {
+    public:
+      
+      // Aliases
+      
+      using const_iterator = std::list<part>::const_iterator;
+      
+      // Constructor
+      
+      frame();
+      
+      // Mutators
+      
+      void push_back(part fp);
+      
+      // Accessors
+      
+      int getId() const
+      {
+        return id_;
+      }
+      
+      const_iterator begin() const
+      {
+        return std::begin(parts_);
+      }
+      
+      const_iterator end() const
+      {
+        return std::end(parts_);
+      }
+      
+    private:
+      
+      static int nextId_;
+      
+      const int id_;
+      
+      std::list<part> parts_;
+      
+    };
+    
+    database& operator<<(database& db, const frame& arg);
+    
+  };
+};
+#endif /* end of include guard: FRAME_H_26770FF1 */
diff --git a/generator/generator.cpp b/generator/generator.cpp
index 6a16467..d88cb31 100644
--- a/generator/generator.cpp
+++ b/generator/generator.cpp

@@ -1,2320 +1,1477 @@
-#include <libxml/parser.h>
+#include "generator.h"
+#include <cassert>
+#include <stdexcept>
 #include <iostream>
+#include <regex>
 #include <dirent.h>
-#include <set>
-#include <map>
-#include <string>
-#include <vector>
 #include <fstream>
-#include <sqlite3.h>
+#include "enums.h"
-#include <sstream>
-#include <regex>
-#include <list>
-#include <algorithm>
-#include <json.hpp>
 #include "progress.h"
+#include "selrestr.h"
+#include "role.h"
+#include "part.h"
+#include "field.h"
 #include "../lib/util.h"
-using json = nlohmann::json;
+namespace verbly {
+  namespace generator {
-struct verb_t {
-  std::string infinitive;
-  std::string past_tense;
-  std::string past_participle;
-  std::string ing_form;
-  std::string s_form;
-  int id;
-};
-struct adjective_t {
-  std::string base;
-  std::string comparative;
-  std::string superlative;
-};
-struct noun_t {
-  std::string singular;
-  std::string plural;
-};
-struct selrestr_t {
-  enum class type_t {
-    singleton,
-    andlogic,
-    orlogic,
-    empty
-  };
-  type_t type;
-  std::string restriction;
-  bool pos;
-  std::list<selrestr_t> subordinates;
-};
-struct framepart_t {
-  enum class type_t {
-    np,
-    v,
-    pp,
-    adj,
-    adv,
-    lex
-  };
-  type_t type;
-  std::string role;
-  selrestr_t selrestrs;
-  std::set<std::string> preprestrs;
-  std::set<std::string> synrestrs;
-  std::list<std::string> choices;
-  std::string lexval;
-};
-struct group_t {
-  std::string id;
-  std::string parent;
-  std::set<std::string> members;
-  std::map<std::string, selrestr_t> roles;
-  std::list<std::list<framepart_t>> frames;
-};
-struct pronunciation_t {
-  std::string phonemes;
-  std::string prerhyme;
-  std::string rhyme;
-  int syllables = 0;
-  std::string stress;
-  
-  bool operator<(const pronunciation_t& other) const
-  {
-    return phonemes < other.phonemes;
-  }
-};
-std::map<std::string, group_t> groups;
-std::map<std::string, verb_t> verbs;
-std::map<std::string, adjective_t> adjectives;
-std::map<std::string, noun_t> nouns;
-std::map<int, std::map<int, int>> wn;
-std::map<int, int> images;
-std::map<std::string, std::set<pronunciation_t>> pronunciations;
-void print_usage()
-{
-  std::cout << "Verbly Datafile Generator" << std::endl;
-  std::cout << "-------------------------" << std::endl;
-  std::cout << "Requires exactly six arguments." << std::endl;
-  std::cout << "1. The path to a VerbNet data directory." << std::endl;
-  std::cout << "2. The path to an AGID infl.txt file." << std::endl;
-  std::cout << "3. The path to a WordNet prolog data directory." << std::endl;
-  std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl;
-  std::cout << "5. The path to an ImageNet urls.txt file." << std::endl;
-  std::cout << "6. Datafile output path." << std::endl;
-  
-  exit(1);
-}
-void db_error(sqlite3* ppdb, std::string query)
-{
-  std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
-  std::cout << query << std::endl;
-  sqlite3_close_v2(ppdb);
-  print_usage();
-}
-json export_selrestrs(selrestr_t r)
-{
-  if (r.type == selrestr_t::type_t::empty)
-  {
-    return {};
-  } else if (r.type == selrestr_t::type_t::singleton)
-  {
-    json result;
-    result["type"] = r.restriction;
-    result["pos"] = r.pos;
-    return result;
-  } else {
-    json result;
-    if (r.type == selrestr_t::type_t::andlogic)
-    {
-      result["logic"] = "and";
-    } else {
-      result["logic"] = "or";
-    }
-    
-    std::list<json> outlist;
-    std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs);
-    result["children"] = outlist;
    
-    return result;
+    generator::generator(
-  }
+      std::string verbNetPath,
-}
+      std::string agidPath,
+      std::string wordNetPath,
-selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename)
+      std::string cmudictPath,
-{
+      std::string imageNetPath,
-  selrestr_t r;
+      std::string outputPath) :
-  xmlChar* key;
+        verbNetPath_(verbNetPath),
-  
+        agidPath_(agidPath),
-  if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS"))
+        wordNetPath_(wordNetPath),
-  {
+        cmudictPath_(cmudictPath),
-    if (xmlChildElementCount(top) == 0)
+        imageNetPath_(imageNetPath),
+        db_(outputPath)
    {
-      r.type = selrestr_t::type_t::empty;
+      // Ensure VerbNet directory exists
-    } else if (xmlChildElementCount(top) == 1)
+      DIR* dir;
-    {
+      if ((dir = opendir(verbNetPath_.c_str())) == nullptr)
-      r = parse_selrestrs(xmlFirstElementChild(top), filename);
-    } else {
-      r.type = selrestr_t::type_t::andlogic;
-      
-      if (xmlHasProp(top, (const xmlChar*) "logic"))
      {
-        key = xmlGetProp(top, (const xmlChar*) "logic");
+        throw std::invalid_argument("Invalid VerbNet data directory");
-        if (!xmlStrcmp(key, (const xmlChar*) "or"))
-        {
-          r.type = selrestr_t::type_t::orlogic;
-        }
-        xmlFree(key);
      }
-  
+      
-      for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next)
+      closedir(dir);
+      
+      // Ensure AGID infl.txt exists
+      if (!std::ifstream(agidPath_))
      {
-        if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR"))
+        throw std::invalid_argument("AGID infl.txt file not found");
-        {
-          r.subordinates.push_back(parse_selrestrs(selrestr, filename));
-        }
      }
-    }
+      
-  } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR"))
+      // Add directory separator to WordNet path
-  {
+      if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\'))
-    r.type = selrestr_t::type_t::singleton;
-    
-    key = xmlGetProp(top, (xmlChar*) "Value");
-    r.pos = (std::string((const char*)key) == "+");
-    xmlFree(key);
-    key = xmlGetProp(top, (xmlChar*) "type");
-    r.restriction = (const char*) key;
-    xmlFree(key);
-  } else {
-    // Invalid
-    std::cout << "Bad VerbNet file format: " << filename << std::endl;
-    print_usage();
-  }
-  
-  return r;
-}
-group_t& parse_group(xmlNodePtr top, std::string filename)
-{
-  xmlChar* key = xmlGetProp(top, (xmlChar*) "ID");
-  if (key == 0)
-  {
-    std::cout << "Bad VerbNet file format: " << filename << std::endl;
-    print_usage();
-  }
-  std::string vnid = (const char*)key;
-  vnid = vnid.substr(vnid.find_first_of("-")+1);
-  xmlFree(key);
-  
-  group_t g;
-  g.id = vnid;
-  
-  for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
-  {
-    if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES"))
-    {
-      for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next)
      {
-        if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS"))
+        wordNetPath_ += '/';
-        {
-          auto& sg = parse_group(subclass, filename);
-          sg.parent = vnid;
-          
-          for (auto member : sg.members)
-          {
-            g.members.insert(member);
-          }
-          
-          // The schema requires that subclasses appear after role definitions, so we can do this now
-          for (auto role : g.roles)
-          {
-            if (sg.roles.count(role.first) == 0)
-            {
-              sg.roles[role.first] = role.second;
-            }
-          }
-        }
      }
-    } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS"))
+      
-    {
+      // Ensure WordNet tables exist
-      for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
+      for (std::string table : {
+        "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax"
+      })
      {
-        if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER"))
+        if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl"))
        {
-          key = xmlGetProp(member, (xmlChar*) "name");
+          throw std::invalid_argument("WordNet " + table + " table not found");
-          g.members.insert((const char*)key);
-          xmlFree(key);
        }
      }
-    } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES"))
+      
-    {
+      // Ensure CMUDICT file exists
-      for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next)
+      if (!std::ifstream(cmudictPath_))
      {
-        if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE"))
+        throw std::invalid_argument("CMUDICT file not found");
-        {
-          selrestr_t r;
-          r.type = selrestr_t::type_t::empty;
-          
-          key = xmlGetProp(role, (const xmlChar*) "type");
-          std::string type = (const char*)key;
-          xmlFree(key);
-          
-          for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
-          {
-            if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS"))
-            {
-              r = parse_selrestrs(rolenode, filename);
-            }
-          }
-          
-          g.roles[type] = r;
-        }
      }
-    } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES"))
+      
-    {
+      // Ensure ImageNet urls.txt exists
-      for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next)
+      if (!std::ifstream(imageNetPath_))
      {
-        if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME"))
+        throw std::invalid_argument("ImageNet urls.txt file not found");
-        {
-          std::list<framepart_t> f;
-          
-          for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
-          {
-            if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX"))
-            {
-              for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
-              {
-                framepart_t fp;
-                
-                if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP"))
-                {
-                  fp.type = framepart_t::type_t::np;
-                  
-                  key = xmlGetProp(syntaxnode, (xmlChar*) "value");
-                  fp.role = (const char*)key;
-                  xmlFree(key);
-                  
-                  fp.selrestrs.type = selrestr_t::type_t::empty;
-                  
-                  for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
-                  {
-                    if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS"))
-                    {
-                      for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
-                      {
-                        if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR"))
-                        {
-                          key = xmlGetProp(synrestr, (xmlChar*) "type");
-                          fp.synrestrs.insert(std::string((const char*)key));
-                          xmlFree(key);
-                        }
-                      }
-                    }
-                  
-                    if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS"))
-                    {
-                      fp.selrestrs = parse_selrestrs(npnode, filename);
-                    }
-                  }
-                } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB"))
-                {
-                  fp.type = framepart_t::type_t::v;
-                } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP"))
-                {
-                  fp.type = framepart_t::type_t::pp;
-                  
-                  if (xmlHasProp(syntaxnode, (xmlChar*) "value"))
-                  {
-                    key = xmlGetProp(syntaxnode, (xmlChar*) "value");
-                    std::string choices = (const char*)key;
-                    xmlFree(key);
-                  
-                    fp.choices = verbly::split<std::list<std::string>>(choices, " ");
-                  }
-                  
-                  for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
-                  {
-                    if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS"))
-                    {
-                      for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
-                      {
-                        if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR"))
-                        {
-                          key = xmlGetProp(synrestr, (xmlChar*) "type");
-                          fp.preprestrs.insert(std::string((const char*)key));
-                          xmlFree(key);
-                        }
-                      }
-                    }
-                  }
-                } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ"))
-                {
-                  fp.type = framepart_t::type_t::adj;
-                } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV"))
-                {
-                  fp.type = framepart_t::type_t::adv;
-                } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX"))
-                {
-                  fp.type = framepart_t::type_t::lex;
-                  
-                  key = xmlGetProp(syntaxnode, (xmlChar*) "value");
-                  fp.lexval = (const char*)key;
-                  xmlFree(key);
-                } else {
-                  continue;
-                }
-                
-                f.push_back(fp);
-              }
-              
-              g.frames.push_back(f);
-            }
-          }
-        }
      }
    }
-  }
-  
-  groups[vnid] = g;
-  
-  return groups[vnid];
-}
-int main(int argc, char** argv)
-{
-  if (argc != 7)
-  {
-    print_usage();
-  }
-  
-  // VerbNet data
-  std::cout << "Reading verb frames..." << std::endl;
-  
-  DIR* dir;
-  if ((dir = opendir(argv[1])) == nullptr)
-  {
-    std::cout << "Invalid VerbNet data directory." << std::endl;
-    
-    print_usage();
-  }
-  
-  struct dirent* ent;
-  while ((ent = readdir(dir)) != nullptr)
-  {
-    std::string filename(argv[1]);
-    if (filename.back() != '/')
-    {
-      filename += '/';
-    }
    
-    filename += ent->d_name;
+    void generator::run()
-    //std::cout << ent->d_name << std::endl;
-    
-    if (filename.rfind(".xml") != filename.size() - 4)
-    {
-      continue;
-    }
-    
-    xmlDocPtr doc = xmlParseFile(filename.c_str());
-    if (doc == nullptr)
-    {
-      std::cout << "Error opening " << filename << std::endl;
-      print_usage();
-    }
-    
-    xmlNodePtr top = xmlDocGetRootElement(doc);
-    if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS")))
-    {
-      std::cout << "Bad VerbNet file format: " << filename << std::endl;
-      print_usage();
-    }
-    
-    parse_group(top, filename);
-  }
-  
-  closedir(dir);
-  
-  // Get verbs from AGID
-  std::cout << "Reading inflections..." << std::endl;
-  
-  std::ifstream agidfile(argv[2]);
-  if (!agidfile.is_open())
-  {
-    std::cout << "Could not open AGID file: " << argv[2] << std::endl;
-    print_usage();
-  }
-  
-  for (;;)
-  {
-    std::string line;
-    if (!getline(agidfile, line))
-    {
-      break;
-    }
-    
-    if (line.back() == '\r')
    {
-      line.pop_back();
+      // Create notions, words, lemmas, and forms from WordNet synsets
-    }
+      readWordNetSynsets();
-    
+      
-    int divider = line.find_first_of(" ");
+      // Reads adjective positioning WordNet data
-    std::string word = line.substr(0, divider);
+      readAdjectivePositioning();
-    line = line.substr(divider+1);
+      
-    char type = line[0];
+      // Counts the number of URLs ImageNet has per notion 
-    
+      readImageNetUrls();
-    if (line[1] == '?')
+      
-    {
+      // Creates a word by WordNet sense key lookup table
-      line.erase(0, 4);
+      readWordNetSenseKeys();
-    } else {
+      
-      line.erase(0, 3);
+      // Creates groups and frames from VerbNet data
-    }
+      readVerbNet();
+      
-    std::vector<std::string> forms;
+      // Creates forms and inflections from AGID. To reduce the amount of forms
-    while (!line.empty())
+      // created, we do this after most lemmas that need inflecting have been
-    {
+      // created through other means, and then only generate forms for
-      std::string inflection;
+      // inflections of already-existing lemmas. The exception to this regards
-      if ((divider = line.find(" | ")) != std::string::npos)
+      // verb lemmas. If a verb lemma in AGID either does not exist yet, or does
-      {
+      // exist but is not related to any words that are related to verb notions,
-        inflection = line.substr(0, divider);
+      // then a notion and a word is generated and the form generation proceeds
-        line = line.substr(divider + 3);
+      // as usual.
-      } else {
+      readAgidInflections();
-        inflection = line;
+      
-        line = "";
+      // Reads in prepositions and the is_a relationship
-      }
+      readPrepositions();
-  
+      
-      if ((divider = inflection.find_first_of(",?")) != std::string::npos)
+      // Creates pronunciations from CMUDICT. To reduce the amount of
-      {
+      // pronunciations created, we do this after all forms have been created,
-        inflection = inflection.substr(0, divider);
+      // and then only generate pronunciations for already-exisiting forms.
-      }
+      readCmudictPronunciations();
-  
+      
-      forms.push_back(inflection);
+      // Writes the database schema
+      writeSchema();
+      
+      // Dumps data to the database
+      dumpObjects();
+      
+      // Populates the antonymy relationship from WordNet
+      readWordNetAntonymy();
+      
+      // Populates the variation relationship from WordNet
+      readWordNetVariation();
+      
+      // Populates the usage, topicality, and regionality relationships from
+      // WordNet
+      readWordNetClasses();
+      
+      // Populates the causality relationship from WordNet
+      readWordNetCausality();
+      
+      // Populates the entailment relationship from WordNet
+      readWordNetEntailment();
+      
+      // Populates the hypernymy relationship from WordNet
+      readWordNetHypernymy();
+      
+      // Populates the instantiation relationship from WordNet
+      readWordNetInstantiation();
+      
+      // Populates the member meronymy relationship from WordNet
+      readWordNetMemberMeronymy();
+      
+      // Populates the part meronymy relationship from WordNet
+      readWordNetPartMeronymy();
+      
+      // Populates the substance meronymy relationship from WordNet
+      readWordNetSubstanceMeronymy();
+      
+      // Populates the pertainymy and mannernymy relationships from WordNet
+      readWordNetPertainymy();
+      
+      // Populates the specification relationship from WordNet
+      readWordNetSpecification();
+      
+      // Populates the adjective similarity relationship from WordNet
+      readWordNetSimilarity();
+      
+      
+      
+      
+      
+      
+      
+      
    }
    
-    switch (type)
+    void generator::readWordNetSynsets()
    {
-      case 'V':
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl"));
+      progress ppgs("Reading synsets from WordNet...", lines.size());
+      
+      for (std::string line : lines)
      {
-        verb_t v;
+        ppgs.update();
-        v.infinitive = word;
+        
-        if (forms.size() == 4)
+        std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$");
-        {
+        std::smatch relation_data;
-          v.past_tense = forms[0];
+        if (!std::regex_search(line, relation_data, relation))
-          v.past_participle = forms[1];
+        {
-          v.ing_form = forms[2];
+          continue;
-          v.s_form = forms[3];
-        } else if (forms.size() == 3)
-        {
-          v.past_tense = forms[0];
-          v.past_participle = forms[0];
-          v.ing_form = forms[1];
-          v.s_form = forms[2];
-        } else if (forms.size() == 8)
-        {
-          // As of AGID 2014.08.11, this is only "to be"
-          v.past_tense = forms[0];
-          v.past_participle = forms[2];
-          v.ing_form = forms[3];
-          v.s_form = forms[4];
-        } else {
-          // Words that don't fit the cases above as of AGID 2014.08.11:
-          // - may and shall do not conjugate the way we want them to
-          // - methinks only has a past tense and is an outlier
-          // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
-          std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl;
        }
    
-        verbs[word] = v;
+        int synset_id = std::stoi(relation_data[1]);
-        
+        int wnum = std::stoi(relation_data[2]);
-        break;
+        std::string text = relation_data[3];
-      }
+        int tag_count = std::stoi(relation_data[4]);
-      
+        size_t word_it;
-      case 'A':
+        while ((word_it = text.find("''")) != std::string::npos)
-      {
-        adjective_t adj;
-        adj.base = word;
-        if (forms.size() == 2)
        {
-          adj.comparative = forms[0];
+          text.erase(word_it, 1);
-          adj.superlative = forms[1];
-        } else {
-          // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
-          std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl;
        }
        
-        adjectives[word] = adj;
+        // The WordNet data does contain duplicates, so we need to check that we
-        
+        // haven't already created this word.
-        break;
+        std::pair<int, int> lookup(synset_id, wnum);
-      }
+        if (!wordByWnidAndWnum_.count(lookup))
-      
-      case 'N':
-      {
-        noun_t n;
-        n.singular = word;
-        if (forms.size() == 1)
        {
-          n.plural = forms[0];
+          notion& synset = lookupOrCreateNotion(synset_id);
-        } else {
+          lemma& lex = lookupOrCreateLemma(text);
-          // As of AGID 2014.08.11, this is non-existent.
+          word& entry = createWord(synset, lex, tag_count);
-          std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl;
+          wordByWnidAndWnum_[lookup] = &entry;
        }
-        
-        nouns[word] = n;
-        
-        break;
      }
    }
-  }
-  
-  // Pronounciations
-  std::cout << "Reading pronunciations..." << std::endl;
-  
-  std::ifstream pronfile(argv[4]);
-  if (!pronfile.is_open())
-  {
-    std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl;
-    print_usage();
-  }
-  
-  for (;;)
-  {
-    std::string line;
-    if (!getline(pronfile, line))
-    {
-      break;
-    }
-    
-    if (line.back() == '\r')
-    {
-      line.pop_back();
-    }
    
-    std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))?  ([A-Z 0-9]+)");
+    void generator::readAdjectivePositioning()
-    std::smatch phoneme_data;
-    if (std::regex_search(line, phoneme_data, phoneme))
    {
-      std::string canonical(phoneme_data[1]);
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl"));
-      std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
+      progress ppgs("Reading adjective positionings from WordNet...", lines.size());
-      
-      std::string phonemes = phoneme_data[2];
-      auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " ");
-      auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) {
-        return phoneme.find("1") != std::string::npos;
-      });
      
-      pronunciation_t p;
+      for (std::string line : lines)
-      p.phonemes = phonemes;
-      
-      // Rhyme detection
-      if (phemstrt != std::end(phoneme_set))
      {
-        std::stringstream rhymer;
+        ppgs.update();
-        for (auto it = phemstrt; it != std::end(phoneme_set); it++)
-        {
-          std::string naked;
-          std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) {
-            return isdigit(ch);
-          });
-          
-          if (it != phemstrt)
-          {
-            rhymer << " ";
-          }
-          
-          rhymer << naked;
-        }
        
-        p.rhyme = rhymer.str();
+        std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
-        
+        std::smatch relation_data;
-        if (phemstrt != std::begin(phoneme_set))
+        if (!std::regex_search(line, relation_data, relation))
        {
-          phemstrt--;
+          continue;
-          p.prerhyme = *phemstrt;
-        } else {
-          p.prerhyme = "";
        }
-      } else {
-        p.prerhyme = "";
-        p.rhyme = "";
-      }
      
-      // Syllable/stress
+        int synset_id = stoi(relation_data[1]);
-      for (auto phm : phoneme_set)
+        int wnum = stoi(relation_data[2]);
-      {
+        std::string adjpos_str = relation_data[3];
-        if (isdigit(phm.back()))
-        {
-          // It's a vowel!
-          p.syllables++;
        
-          if (phm.back() == '1')
+        std::pair<int, int> lookup(synset_id, wnum);
+        if (wordByWnidAndWnum_.count(lookup))
+        {
+          word& adj = *wordByWnidAndWnum_.at(lookup);
+          
+          if (adjpos_str == "p")
+          {
+            adj.setAdjectivePosition(positioning::predicate);
+          } else if (adjpos_str == "a")
+          {
+            adj.setAdjectivePosition(positioning::attributive);
+          } else if (adjpos_str == "i")
          {
-            p.stress.push_back('1');
+            adj.setAdjectivePosition(positioning::postnominal);
          } else {
-            p.stress.push_back('0');
+            // Can't happen because of how we specified the regex.
+            assert(false);
          }
        }
      }
-      
-      pronunciations[canonical].insert(p);
-    }
-  }
-  
-  // Images
-  std::cout << "Reading images..." << std::endl;
-  
-  std::ifstream imagefile(argv[5]);
-  if (!imagefile.is_open())
-  {
-    std::cout << "Could not open ImageNet file: " << argv[5] << std::endl;
-    print_usage();
-  }
-  
-  for (;;)
-  {
-    std::string line;
-    if (!getline(imagefile, line))
-    {
-      break;
-    }
-    
-    if (line.back() == '\r')
-    {
-      line.pop_back();
-    }
-    
-    std::string wnid_s = line.substr(1, 8);
-    int wnid = stoi(wnid_s) + 100000000;
-    images[wnid]++;
-  }
-  
-  imagefile.close();
-  
-  // Start writing output
-  std::cout << "Writing schema..." << std::endl;
-  
-  sqlite3* ppdb;
-  if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
-  {
-    std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl;
-    print_usage();
-  }
-  
-  std::ifstream schemafile("schema.sql");
-  if (!schemafile.is_open())
-  {
-    std::cout << "Could not find schema file" << std::endl;
-    print_usage();
-  }
-  
-  std::stringstream schemabuilder;
-  for (;;)
-  {
-    std::string line;
-    if (!getline(schemafile, line))
-    {
-      break;
-    }
-    
-    if (line.back() == '\r')
-    {
-      line.pop_back();
-    }
-    
-    schemabuilder << line << std::endl;
-  }
-  
-  std::string schema = schemabuilder.str();
-  while (!schema.empty())
-  {
-    std::string query;
-    int divider = schema.find(";");
-    if (divider != std::string::npos)
-    {
-      query = schema.substr(0, divider+1);
-      schema = schema.substr(divider+2);
-    } else {
-      break;
    }
    
-    sqlite3_stmt* schmstmt;
+    void generator::readImageNetUrls()
-    if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK)
    {
-      db_error(ppdb, query);
+      // The ImageNet datafile is so large that it is unreasonable and
-    }
+      // unnecessary to read it into memory; instead, we will parse each line as
-  
+      // we read it. This has the caveat that we cannot display a progress bar.
-    if (sqlite3_step(schmstmt) != SQLITE_DONE)
+      std::cout << "Reading image counts from ImageNet..." << std::endl;
-    {
-      db_error(ppdb, query);
-    }
-  
-    sqlite3_finalize(schmstmt);
-  }
-  
-  std::cout << "Writing prepositions..." << std::endl;
-  std::ifstream prepfile("prepositions.txt");
-  if (!prepfile.is_open())
-  {
-    std::cout << "Could not find prepositions file" << std::endl;
-    print_usage();
-  }
-  
-  for (;;)
-  {
-    std::string line;
-    if (!getline(prepfile, line))
-    {
-      break;
-    }
-    
-    if (line.back() == '\r')
-    {
-      line.pop_back();
-    }
-    
-    std::regex relation("^([^:]+): (.+)");
-    std::smatch relation_data;
-    std::regex_search(line, relation_data, relation);
-    std::string prep = relation_data[1];
-    std::list<std::string> groups = verbly::split<std::list<std::string>>(relation_data[2], ", ");
-    
-    std::string query("INSERT INTO prepositions (form) VALUES (?)");
-    sqlite3_stmt* ppstmt;
-    
-    if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-    {
-      db_error(ppdb, query);
-    }
-    
-    sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT);
-    
-    if (sqlite3_step(ppstmt) != SQLITE_DONE)
-    {
-      db_error(ppdb, query);
-    }
-    
-    sqlite3_finalize(ppstmt);
-    
-    query = "SELECT last_insert_rowid()";
-    if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-    {
-      db_error(ppdb, query);
-    }
-    
-    if (sqlite3_step(ppstmt) != SQLITE_ROW)
-    {
-      db_error(ppdb, query);
-    }
-    
-    int rowid = sqlite3_column_int(ppstmt, 0);
-    sqlite3_finalize(ppstmt);
-    
-    for (auto group : groups)
-    {
-      query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)";
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
      
-      sqlite3_bind_int(ppstmt, 1, rowid);
+      std::ifstream file(imageNetPath_);
-      sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT);
+      if (!file)
-      
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
      {
-        db_error(ppdb, query);
+        throw std::invalid_argument("Could not find file " + imageNetPath_);
      }
-      
-      sqlite3_finalize(ppstmt);
-    }
-  }
-  
  
-  {
+      std::string line;
-    progress ppgs("Writing verbs...", verbs.size());
+      while (std::getline(file, line))
-    for (auto& mapping : verbs)
-    {
-      sqlite3_stmt* ppstmt;
-      std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)");
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT);
-      sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT);
-      sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT);
-      sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT);
-      sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT);
-    
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_finalize(ppstmt);
-      
-      std::string canonical(mapping.second.infinitive);
-      std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
-      if (pronunciations.count(canonical) == 1)
      {
-        query = "SELECT last_insert_rowid()";
+        if (line.back() == '\r')
-        if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
        {
-          db_error(ppdb, query);
+          line.pop_back();
        }
-    
+      
-        if (sqlite3_step(ppstmt) != SQLITE_ROW)
+        std::string wnid_s = line.substr(1, 8);
+        int wnid = stoi(wnid_s) + 100000000;
+        if (notionByWnid_.count(wnid))
        {
-          db_error(ppdb, query);
+          // We know that this notion has a wnid and is a noun.
-        }
+          notionByWnid_.at(wnid)->incrementNumOfImages();
-    
-        int rowid = sqlite3_column_int(ppstmt, 0);
-    
-        sqlite3_finalize(ppstmt);
-        
-        mapping.second.id = rowid;
-        
-        for (auto pronunciation : pronunciations[canonical])
-        {
-          if (!pronunciation.rhyme.empty())
-          {
-            query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
-          } else {
-            query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
-          }
-          
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-          {
-            db_error(ppdb, query);
-          }
-          
-          sqlite3_bind_int(ppstmt, 1, rowid);
-          sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT);
-          sqlite3_bind_int(ppstmt, 3, pronunciation.syllables);
-          sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT);
-          
-          if (!pronunciation.rhyme.empty())
-          {
-            sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT);
-            sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT);
-          }
-          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
-          {
-            db_error(ppdb, query);
-          }
-          
-          sqlite3_finalize(ppstmt);
        }
      }
-      
-      ppgs.update();
    }
-  }
+    
-  
+    void generator::readWordNetSenseKeys()
-  {
-    progress ppgs("Writing verb frames...", groups.size());
-    for (auto& mapping : groups)
    {
-      std::list<json> roledatal;
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl"));
-      std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair<std::string, selrestr_t> r) {
+      progress ppgs("Reading sense keys from WordNet...", lines.size());
-        json role;
-        role["type"] = r.first;
-        role["selrestrs"] = export_selrestrs(r.second);
-        
-        return role;
-      });
-      
-      json roledata(roledatal);
-      std::string rdm = roledata.dump();
-      
-      sqlite3_stmt* ppstmt;
-      std::string query("INSERT INTO groups (data) VALUES (?)");
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-      
-      sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT);
-      
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
-      {
-        db_error(ppdb, query);
-      }
      
-      sqlite3_finalize(ppstmt);
+      for (std::string line : lines)
-      
-      query = "SELECT last_insert_rowid()";
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-      
-      if (sqlite3_step(ppstmt) != SQLITE_ROW)
-      {
-        db_error(ppdb, query);
-      }
-      
-      int gid = sqlite3_column_int(ppstmt, 0);
-      sqlite3_finalize(ppstmt);
-      
-      for (auto frame : mapping.second.frames)
      {
-        std::list<json> fdatap;
+        ppgs.update();
-        std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) {
-          json part;
-          
-          switch (fp.type)
-          {
-            case framepart_t::type_t::np:
-            {
-              part["type"] = "np";
-              part["role"] = fp.role;
-              part["selrestrs"] = export_selrestrs(fp.selrestrs);
-              part["synrestrs"] = fp.synrestrs;
-              
-              break;
-            }
-            
-            case framepart_t::type_t::pp:
-            {
-              part["type"] = "pp";
-              part["values"] = fp.choices;
-              part["preprestrs"] = fp.preprestrs;
-              
-              break;
-            }
-            
-            case framepart_t::type_t::v:
-            {
-              part["type"] = "v";
-              
-              break;
-            }
-            
-            case framepart_t::type_t::adj:
-            {
-              part["type"] = "adj";
-              
-              break;
-            }
-            
-            case framepart_t::type_t::adv:
-            {
-              part["type"] = "adv";
-              
-              break;
-            }
-            
-            case framepart_t::type_t::lex:
-            {
-              part["type"] = "lex";
-              part["value"] = fp.lexval;
-              
-              break;
-            }
-          }
-          
-          return part;
-        });
-        
-        json fdata(fdatap);
-        std::string marshall = fdata.dump();
-        
-        query = "INSERT INTO frames (group_id, data) VALUES (?, ?)";
-        if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-        {
-          db_error(ppdb, query);
-        }
-        
-        sqlite3_bind_int(ppstmt, 1, gid);
-        sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT);
        
-        if (sqlite3_step(ppstmt) != SQLITE_DONE)
+        // We only actually need to lookup verbs by sense key so we'll just
+        // ignore everything that isn't a verb.
+        std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
        {
-          db_error(ppdb, query);
+          continue;
        }
+    
+        int synset_id = stoi(relation_data[1]);
+        int wnum = stoi(relation_data[2]);
+        std::string sense_key = relation_data[3];
        
-        sqlite3_finalize(ppstmt);
+        // We are treating this mapping as injective, which is not entirely
-      }
+        // accurate. First, the WordNet table contains duplicate rows, so those
-      
+        // need to be ignored. More importantly, a small number of sense keys
-      for (auto member : mapping.second.members)
+        // (one for each letter of the Latin alphabet, plus 9 other words) each
-      {
+        // map to two different words in the same synset which differ only by
-        if (verbs.count(member) == 1)
+        // capitalization. Luckily, none of these exceptions are verbs, so we
+        // can pretend that the mapping is injective.
+        if (!wnSenseKeys_.count(sense_key))
        {
-          auto& v = verbs[member];
+          std::pair<int, int> lookup(synset_id, wnum);
-          
+          if (wordByWnidAndWnum_.count(lookup))
-          query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)";
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-          {
-            db_error(ppdb, query);
-          }
-          
-          sqlite3_bind_int(ppstmt, 1, v.id);
-          sqlite3_bind_int(ppstmt, 2, gid);
-          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
          {
-            db_error(ppdb, query);
+            wnSenseKeys_[sense_key] = wordByWnidAndWnum_.at(lookup);
          }
-          
-          sqlite3_finalize(ppstmt);
        }
      }
-      
-      ppgs.update();
    }
-  }
+    
-  
+    void generator::readVerbNet()
-  // Get nouns/adjectives/adverbs from WordNet
-  // Useful relations:
-  // - s: master list
-  // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness)
-  // - at: variation (e.g. a measurement can be standard or nonstandard)
-  // - der: derivation (e.g. happy/happily, happily/happy)
-  // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue)
-  // - ins: instantiation (do we need this? let's see)
-  // - mm: member meronymy/holonymy (e.g. family/mother, family/child)
-  // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire)
-  // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber)
-  // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska)
-  //        mannernymy (e.g. something done quickly is done in a manner that is quick)
-  // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific))
-  // - sim: synonymy (e.g. cheerful/happy, happy/cheerful)
-  // - syntax: positioning flags for some adjectives
-  std::string wnpref {argv[3]};
-  if (wnpref.back() != '/')
-  {
-    wnpref += '/';
-  }
-  
-  // s table
-  {
-    std::ifstream wnsfile(wnpref + "wn_s.pl");
-    if (!wnsfile.is_open())
    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
+      std::cout << "Reading frames from VerbNet..." << std::endl;
-      print_usage();
-    }
  
-    std::list<std::string> lines;
+      DIR* dir;
-    for (;;)
+      if ((dir = opendir(verbNetPath_.c_str())) == nullptr)
-    {
-      std::string line;
-      if (!getline(wnsfile, line))
      {
-        break;
+        throw std::invalid_argument("Invalid VerbNet data directory");
      }
-    
+  
-      if (line.back() == '\r')
+      struct dirent* ent;
-      {
+      while ((ent = readdir(dir)) != nullptr)
-        line.pop_back();
-      }
-      
-      lines.push_back(line);
-    }
-    
-    progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size());
-    for (auto line : lines)
-    {
-      ppgs.update();
-      
-      std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$");
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        std::string filename(verbNetPath_);
-      }
+        if (filename.back() != '/')
+        {
+          filename += '/';
+        }
    
-      int synset_id = stoi(relation_data[1]);
+        filename += ent->d_name;
-      int wnum = stoi(relation_data[2]);
-      std::string word = relation_data[3];
-      size_t word_it;
-      while ((word_it = word.find("''")) != std::string::npos)
-      {
-        word.erase(word_it, 1);
-      }
    
-      std::string query;
+        if (filename.rfind(".xml") != filename.size() - 4)
-      switch (synset_id / 100000000)
-      {
-        case 1: // Noun
        {
-          if (nouns.count(word) == 1)
+          continue;
-          {
-            query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)";
-          } else {
-            query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)";
-          }
-        
-          break;
        }
-      
+    
-        case 2: // Verb
+        xmlDocPtr doc = xmlParseFile(filename.c_str());
+        if (doc == nullptr)
        {
-          // Ignore
+          throw std::logic_error("Error opening " + filename);
-        
-          break;
        }
-      
+    
-        case 3: // Adjective
+        xmlNodePtr top = xmlDocGetRootElement(doc);
+        if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS"))))
        {
-          if (adjectives.count(word) == 1)
+          throw std::logic_error("Bad VerbNet file format: " + filename);
-          {
-            query = "INSERT INTO adjectives (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)";
-          } else {
-            query = "INSERT INTO adjectives (base_form, complexity) VALUES (?, ?)";
-          }
-        
-          break;
        }
-      
-        case 4: // Adverb
+        try
        {
-          if (adjectives.count(word) == 1)
+          createGroup(top);
-          {
+        } catch (const std::exception& e)
-            query = "INSERT INTO adverbs (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)";
+        {
-          } else {
+          std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename));
-            query = "INSERT INTO adverbs (base_form, complexity) VALUES (?, ?)";
-          }
-        
-          break;
        }
      }
+  
+      closedir(dir);
+    }
    
-      sqlite3_stmt* ppstmt;
+    void generator::readAgidInflections()
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
+    {
+      std::list<std::string> lines(readFile(agidPath_));
+      progress ppgs("Reading inflections from AGID...", lines.size());
+      
+      for (std::string line : lines)
      {
-        db_error(ppdb, query);
+        ppgs.update();
-      }
+        
+        int divider = line.find_first_of(" ");
+        std::string infinitive = line.substr(0, divider);
+        line = line.substr(divider+1);
+        char type = line[0];
    
-      sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT);
+        if (line[1] == '?')
-      switch (synset_id / 100000000)
-      {
-        case 1: // Noun
        {
-          sqlite3_bind_int(ppstmt, 2, (std::any_of(std::begin(word), std::end(word), [] (char ch) {
+          line.erase(0, 4);
-            return isupper(ch);
+        } else {
-          }) ? 1 : 0));
+          line.erase(0, 3);
-          
-          sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size());
-          sqlite3_bind_int(ppstmt, 4, images[synset_id]);
-          sqlite3_bind_int(ppstmt, 5, synset_id);
-          
-          if (nouns.count(word) == 1)
-          {
-            sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT);
-          }
-          
-          break;
        }
        
-        case 3: // Adjective
+        if (!lemmaByBaseForm_.count(infinitive) && (type != 'V'))
-        case 4: // Adverb
        {
-          sqlite3_bind_int(ppstmt, 2, verbly::split<std::list<std::string>>(word, " ").size());
+          continue;
-          
+        }
-          if (adjectives.count(word) == 1)
+        
+        lemma& curLemma = lookupOrCreateLemma(infinitive);
+        auto forms = split<std::vector<std::string>>(line, " | ");
+        for (std::string& inflForm : forms)
+        {
+          int sympos = inflForm.find_first_of(",?");
+          if (sympos != std::string::npos)
          {
-            sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT);
+            inflForm = inflForm.substr(0, sympos);
-            sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT);
          }
-          
-          break;
        }
-      }
    
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
+        switch (type)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_finalize(ppstmt);
-    
-      query = "SELECT last_insert_rowid()";
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-    
-      if (sqlite3_step(ppstmt) != SQLITE_ROW)
-      {
-        db_error(ppdb, query);
-      }
-    
-      int rowid = sqlite3_column_int(ppstmt, 0);
-      wn[synset_id][wnum] = rowid;
-    
-      sqlite3_finalize(ppstmt);
-      
-      std::string canonical(word);
-      std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
-      if (pronunciations.count(canonical) == 1)
-      {
-        for (auto pronunciation : pronunciations[canonical])
        {
-          switch (synset_id / 100000000)
+          case 'V':
          {
-            case 1: // Noun
+            if (forms.size() == 4)
            {
-              if (!pronunciation.rhyme.empty())
+              curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
-              {
+              curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1]));
-                query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
+              curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2]));
-              } else {
+              curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3]));
-                query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
+            } else if (forms.size() == 3)
-              }
-              
-              break;
-            }
-            
-            case 3: // Adjective
            {
-              if (!pronunciation.rhyme.empty())
+              curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
-              {
+              curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0]));
-                query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
+              curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1]));
-              } else {
+              curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2]));
-                query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
+            } else if (forms.size() == 8)
-              }
+            {
-              
+              // As of AGID 2014.08.11, this is only "to be"
-              break;
+              curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
+              curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2]));
+              curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3]));
+              curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4]));
+            } else {
+              // Words that don't fit the cases above as of AGID 2014.08.11:
+              // - may and shall do not conjugate the way we want them to
+              // - methinks only has a past tense and is an outlier
+              // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
+              std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
            }
-            
+    
-            case 4: // Adverb
+            // For verbs in particular, we sometimes create a notion and a word
+            // from inflection data. Specifically, if there are not yet any
+            // verbs existing that have the same infinitive form. "Yet" means
+            // that this verb appears in the AGID data but not in either WordNet
+            // or VerbNet.
+            if (!wordsByBaseForm_.count(infinitive)
+              || !std::any_of(std::begin(wordsByBaseForm_.at(infinitive)), std::end(wordsByBaseForm_.at(infinitive)), [] (word* w) {
+                return w->getNotion().getPartOfSpeech() == part_of_speech::verb;
+              }))
            {
-              if (!pronunciation.rhyme.empty())
+              notion& n = createNotion(part_of_speech::verb);
-              {
+              createWord(n, curLemma);
-                query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
-              } else {
-                query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
-              }
-              
-              break;
            }
-          }
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-          {
-            db_error(ppdb, query);
-          }
-        
-          sqlite3_bind_int(ppstmt, 1, rowid);
-          sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT);
-          sqlite3_bind_int(ppstmt, 3, pronunciation.syllables);
-          sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT);
-          
-          if (!pronunciation.rhyme.empty())
-          {
-            sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT);
-            sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT);
-          }
        
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+            break;
-          {
-            db_error(ppdb, query);
          }
-        
-          sqlite3_finalize(ppstmt);
-        }
-      }
-    }
-  }
-  
-  // While we're working on s
-  {
-    progress ppgs("Writing word synonyms...", wn.size());
-    for (auto sense : wn)
-    {
-      ppgs.update();
      
-      for (auto word1 : sense.second)
+          case 'A':
-      {
-        for (auto word2 : sense.second)
-        {
-          if (word1 != word2)
          {
-            std::string query;
+            if (forms.size() == 2)
-            switch (sense.first / 100000000)
            {
-              case 1: // Noun
+              curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0]));
-              {
+              curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1]));
-                query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
+            } else {
-        
+              // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
-                break;
+              std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
-              }
+            }
-      
-              case 2: // Verb
-              {
-                // Ignore
-        
-                break;
-              }
-      
-              case 3: // Adjective
-              {
-                query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
        
-                break;
+            break;
-              }
+          }
      
-              case 4: // Adverb
+          case 'N':
-              {
+          {
-                query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
+            if (forms.size() == 1)
-        
-                break;
-              }
-            }
-            
-            sqlite3_stmt* ppstmt;
-            if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-            {
-              db_error(ppdb, query);
-            }
-    
-            sqlite3_bind_int(ppstmt, 1, word1.second);
-            sqlite3_bind_int(ppstmt, 2, word2.second);
-    
-            if (sqlite3_step(ppstmt) != SQLITE_DONE)
            {
-              db_error(ppdb, query);
+              curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0]));
+            } else {
+              // As of AGID 2014.08.11, this is non-existent.
+              std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
            }
-    
+        
-            sqlite3_finalize(ppstmt);
+            break;
          }
        }
      }
    }
-  }
-  
-  // ant table
-  {
-    std::ifstream wnantfile(wnpref + "wn_ant.pl");
-    if (!wnantfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnantfile, line))
-      {
-        break;
-      }
    
-      if (line.back() == '\r')
+    void generator::readPrepositions()
-      {
-        line.pop_back();
-      }
-      
-      lines.push_back(line);
-    }
-    
-    progress ppgs("Writing antonyms...", lines.size());
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile("prepositions.txt"));
+      progress ppgs("Reading prepositions...", lines.size());
      
-      std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
+      for (std::string line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
-      {
-        continue;
-      }
-    
-      int synset_id_1 = stoi(relation_data[1]);
-      int wnum_1 = stoi(relation_data[2]);
-      int synset_id_2 = stoi(relation_data[3]);
-      int wnum_2 = stoi(relation_data[4]);
-    
-      std::string query;
-      switch (synset_id_1 / 100000000)
      {
-        case 1: // Noun
+        ppgs.update();
-        {
-          query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
        
-          break;
+        std::regex relation("^([^:]+): (.+)");
-        }
+        std::smatch relation_data;
-      
+        std::regex_search(line, relation_data, relation);
-        case 2: // Verb
+        std::string prep = relation_data[1];
-        {
+        auto groups = split<std::list<std::string>>(relation_data[2], ", ");
-          // Ignore
        
-          break;
+        notion& n = createNotion(part_of_speech::preposition);
-        }
+        lemma& l = lookupOrCreateLemma(prep);
-      
+        word& w = createWord(n, l);
-        case 3: // Adjective
-        {
-          query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
        
-          break;
+        n.setPrepositionGroups(groups);
-        }
-      
-        case 4: // Adverb
-        {
-          query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
-        
-          break;
-        }
-      }
-    
-      sqlite3_stmt* ppstmt;
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
-      sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
-    
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_finalize(ppstmt);
-    }
-  }
-  
-  // at table
-  {
-    std::ifstream wnatfile(wnpref + "wn_at.pl");
-    if (!wnatfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnatfile, line))
-      {
-        break;
      }
-    
-      if (line.back() == '\r')
-      {
-        line.pop_back();
-      }
-      
-      lines.push_back(line);
    }
    
-    progress ppgs("Writing variations...", lines.size());
+    void generator::readCmudictPronunciations()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(cmudictPath_));
+      progress ppgs("Reading pronunciations from CMUDICT...", lines.size());
      
-      std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
+      for (std::string line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
+        
-      
+        std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))?  ([A-Z 0-9]+)");
-      int synset_id_1 = stoi(relation_data[1]);
+        std::smatch phoneme_data;
-      int synset_id_2 = stoi(relation_data[2]);
+        if (std::regex_search(line, phoneme_data, phoneme))
-      std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)");
-      
-      for (auto mapping1 : wn[synset_id_1])
-      {
-        for (auto mapping2 : wn[synset_id_2])
        {
-          sqlite3_stmt* ppstmt;
+          std::string canonical(phoneme_data[1]);
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+          std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
-          {
-            db_error(ppdb, query);
-          }
-          
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+          if (!formByText_.count(canonical))
          {
-            db_error(ppdb, query);
+            continue;
          }
          
-          sqlite3_finalize(ppstmt);
+          std::string phonemes = phoneme_data[2];
+          pronunciations_.emplace_back(phonemes);
+          pronunciation& p = pronunciations_.back();
+          formByText_.at(canonical)->addPronunciation(p);
        }
      }
    }
-  }
-  
-  // der table
-  {
-    std::ifstream wnderfile(wnpref + "wn_der.pl");
-    if (!wnderfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
    
-    std::list<std::string> lines;
+    void generator::writeSchema()
-    for (;;)
    {
-      std::string line;
+      std::ifstream file("schema.sql");
-      if (!getline(wnderfile, line))
+      if (!file)
      {
-        break;
+        throw std::invalid_argument("Could not find database schema");
      }
-    
+  
-      if (line.back() == '\r')
+      std::ostringstream schemaBuilder;
+      std::string line;
+      while (std::getline(file, line))
      {
-        line.pop_back();
+        if (line.back() == '\r')
+        {
+          line.pop_back();
+        }
+      
+        schemaBuilder << line;
      }
      
-      lines.push_back(line);
+      std::string schema = schemaBuilder.str();
+      auto queries = split<std::list<std::string>>(schema, ";");
+      progress ppgs("Writing database schema...", queries.size());
+      for (std::string query : queries)
+      {
+        if (!queries.empty())
+        {
+          db_.runQuery(query);
+        }
+        
+        ppgs.update();
+      }
    }
    
-    progress ppgs("Writing morphological derivation...", lines.size());
+    void generator::dumpObjects()
-    for (auto line : lines)
    {
-      ppgs.update();
-      
-      std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        progress ppgs("Writing notions...", notions_.size());
+        
+        for (notion& n : notions_)
+        {
+          db_ << n;
+          
+          ppgs.update();
+        }
      }
      
-      int synset_id_1 = stoi(relation_data[1]);
-      int wnum_1 = stoi(relation_data[2]);
-      int synset_id_2 = stoi(relation_data[3]);
-      int wnum_2 = stoi(relation_data[4]);
-      std::string query;
-      switch (synset_id_1 / 100000000)
      {
-        case 1: // Noun
+        progress ppgs("Writing words...", words_.size());
+        
+        for (word& w : words_)
        {
-          switch (synset_id_2 / 100000000)
+          db_ << w;
-          {
-            case 1: // Noun
-            {
-              query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 3: // Adjective
-            {
-              query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 4: // Adverb
-            {
-              query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)";
-              break;
-            }
-          }
          
-          break;
+          ppgs.update();
        }
+      }
+      
+      {
+        progress ppgs("Writing lemmas...", lemmas_.size());
        
-        case 3: // Adjective
+        for (lemma& l : lemmas_)
        {
-          switch (synset_id_2 / 100000000)
+          db_ << l;
-          {
-            case 1: // Noun
-            {
-              query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 3: // Adjective
-            {
-              query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 4: // Adverb
-            {
-              query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)";
-              break;
-            }
-          }
          
-          break;
+          ppgs.update();
        }
+      }
+      
+      {
+        progress ppgs("Writing forms...", forms_.size());
        
-        case 4: // Adverb
+        for (form& f : forms_)
        {
-          switch (synset_id_2 / 100000000)
+          db_ << f;
-          {
-            case 1: // Noun
-            {
-              query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 3: // Adjective
-            {
-              query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)";
-              break;
-            }
-            
-            case 4: // Adverb
-            {
-              query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)";
-              break;
-            }
-          }
          
-          break;
+          ppgs.update();
        }
      }
      
-      sqlite3_stmt* ppstmt;
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
      {
-        db_error(ppdb, query);
+        progress ppgs("Writing pronunciations...", pronunciations_.size());
+        
+        for (pronunciation& p : pronunciations_)
+        {
+          db_ << p;
+          
+          ppgs.update();
+        }
      }
      
-      sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
-      sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
-      
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
      {
-        db_error(ppdb, query);
+        progress ppgs("Writing verb groups...", groups_.size());
+        
+        for (group& g : groups_)
+        {
+          db_ << g;
+          
+          ppgs.update();
+        }
      }
      
-      sqlite3_finalize(ppstmt);
-    }
-  }
-  
-  // hyp table
-  {
-    std::ifstream wnhypfile(wnpref + "wn_hyp.pl");
-    if (!wnhypfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnhypfile, line))
-      {
-        break;
-      }
-    
-      if (line.back() == '\r')
      {
-        line.pop_back();
+        progress ppgs("Writing verb frames...", frames_.size());
+        
+        for (frame& f : frames_)
+        {
+          db_ << f;
+          
+          ppgs.update();
+        }
      }
-      
-      lines.push_back(line);
    }
    
-    progress ppgs("Writing hypernyms...", lines.size());
+    void generator::readWordNetAntonymy()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl"));
-      
+      progress ppgs("Writing antonyms...", lines.size());
-      std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
-      
-      int synset_id_1 = stoi(relation_data[1]);
-      int synset_id_2 = stoi(relation_data[2]);
-      std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)");
      
-      for (auto mapping1 : wn[synset_id_1])
+        std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
-      {
+        std::smatch relation_data;
-        for (auto mapping2 : wn[synset_id_2])
+        if (!std::regex_search(line, relation_data, relation))
        {
-          sqlite3_stmt* ppstmt;
+          continue;
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+        }
-          {
-            db_error(ppdb, query);
+        std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
-          }
+        std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
-          
+        
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+        if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
+        {
+          word& word1 = *wordByWnidAndWnum_.at(lookup1);
+          word& word2 = *wordByWnidAndWnum_.at(lookup2);
          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+          std::list<field> fields;
-          {
+          fields.emplace_back("antonym_1_id", word1.getId());
-            db_error(ppdb, query);
+          fields.emplace_back("antonym_2_id", word2.getId());
-          }
          
-          sqlite3_finalize(ppstmt);
+          db_.insertIntoTable("antonymy", std::move(fields));
        }
      }
    }
-  }
-  
-  // ins table
-  {
-    std::ifstream wninsfile(wnpref + "wn_ins.pl");
-    if (!wninsfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wninsfile, line))
-      {
-        break;
-      }
    
-      if (line.back() == '\r')
+    void generator::readWordNetVariation()
+    {
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl"));
+      progress ppgs("Writing variation...", lines.size());
+      for (auto line : lines)
      {
-        line.pop_back();
+        ppgs.update();
-      }
      
-      lines.push_back(line);
+        std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("noun_id", notion1.getId());
+          fields.emplace_back("adjective_id", notion2.getId());
+          
+          db_.insertIntoTable("variation", std::move(fields));
+        }
+      }
    }
    
-    progress ppgs("Writing instantiations...", lines.size());
+    void generator::readWordNetClasses()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl"));
-      
+      progress ppgs("Writing usage, topicality, and regionality...", lines.size());
-      std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
-      
-      int synset_id_1 = stoi(relation_data[1]);
-      int synset_id_2 = stoi(relation_data[2]);
-      std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)");
      
-      for (auto mapping1 : wn[synset_id_1])
+        std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\.");
-      {
+        std::smatch relation_data;
-        for (auto mapping2 : wn[synset_id_2])
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
+        std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
+        std::string class_type = relation_data[5];
+        
+        std::string table_name;
+        if (class_type == "t")
+        {
+          table_name += "topicality";
+        } else if (class_type == "u")
+        {
+          table_name += "usage";
+        } else if (class_type == "r")
+        {
+          table_name += "regionality";
+        }
+        
+        std::list<int> leftJoin;
+        std::list<int> rightJoin;
+        
+        if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first)))
        {
-          sqlite3_stmt* ppstmt;
+          std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) {
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+            return w->getId();
+          });
+        } else if (wordByWnidAndWnum_.count(lookup1)) {
+          leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId());
+        }
+        
+        if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first)))
+        {
+          std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) {
+            return w->getId();
+          });
+        } else if (wordByWnidAndWnum_.count(lookup2)) {
+          rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId());
+        }
+        
+        for (int word1 : leftJoin)
+        {
+          for (int word2 : rightJoin)
          {
-            db_error(ppdb, query);
+            std::list<field> fields;
-          }
+            fields.emplace_back("term_id", word1);
+            fields.emplace_back("domain_id", word2);
          
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+            db_.insertIntoTable(table_name, std::move(fields));
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
-          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
-          {
-            db_error(ppdb, query);
          }
-          
-          sqlite3_finalize(ppstmt);
        }
      }
    }
-  }
-  
-  // mm table
-  {
-    std::ifstream wnmmfile(wnpref + "wn_mm.pl");
-    if (!wnmmfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnmmfile, line))
-      {
-        break;
-      }
    
-      if (line.back() == '\r')
+    void generator::readWordNetCausality()
+    {
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl"));
+      progress ppgs("Writing causality...", lines.size());
+      for (auto line : lines)
      {
-        line.pop_back();
+        ppgs.update();
-      }
      
-      lines.push_back(line);
+        std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("effect_id", notion1.getId());
+          fields.emplace_back("cause_id", notion2.getId());
+          
+          db_.insertIntoTable("causality", std::move(fields));
+        }
+      }
    }
    
-    progress ppgs("Writing member meronyms...", lines.size());
+    void generator::readWordNetEntailment()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl"));
-      
+      progress ppgs("Writing entailment...", lines.size());
-      std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
      
-      int synset_id_1 = stoi(relation_data[1]);
+        std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\.");
-      int synset_id_2 = stoi(relation_data[2]);
+        std::smatch relation_data;
-      std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
+        if (!std::regex_search(line, relation_data, relation))
-      
-      for (auto mapping1 : wn[synset_id_1])
-      {
-        for (auto mapping2 : wn[synset_id_2])
        {
-          sqlite3_stmt* ppstmt;
+          continue;
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+        }
-          {
-            db_error(ppdb, query);
+        int lookup1 = std::stoi(relation_data[1]);
-          }
+        int lookup2 = std::stoi(relation_data[2]);
-          
+        
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+          std::list<field> fields;
-          {
+          fields.emplace_back("given_id", notion1.getId());
-            db_error(ppdb, query);
+          fields.emplace_back("entailment_id", notion2.getId());
-          }
          
-          sqlite3_finalize(ppstmt);
+          db_.insertIntoTable("entailment", std::move(fields));
        }
      }
    }
-  }
+    
-  
+    void generator::readWordNetHypernymy()
-  // ms table
-  {
-    std::ifstream wnmsfile(wnpref + "wn_ms.pl");
-    if (!wnmsfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
    {
-      std::string line;
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl"));
-      if (!getline(wnmsfile, line))
+      progress ppgs("Writing hypernymy...", lines.size());
+      for (auto line : lines)
      {
-        break;
+        ppgs.update();
+      
+        std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("hyponym_id", notion1.getId());
+          fields.emplace_back("hypernym_id", notion2.getId());
+          
+          db_.insertIntoTable("hypernymy", std::move(fields));
+        }
      }
+    }
    
-      if (line.back() == '\r')
+    void generator::readWordNetInstantiation()
+    {
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl"));
+      progress ppgs("Writing instantiation...", lines.size());
+      for (auto line : lines)
      {
-        line.pop_back();
+        ppgs.update();
-      }
      
-      lines.push_back(line);
+        std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("instance_id", notion1.getId());
+          fields.emplace_back("class_id", notion2.getId());
+          
+          db_.insertIntoTable("instantiation", std::move(fields));
+        }
+      }
    }
    
-    progress ppgs("Writing substance meronyms...", lines.size());
+    void generator::readWordNetMemberMeronymy()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl"));
-      
+      progress ppgs("Writing member meronymy...", lines.size());
-      std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
-      
-      int synset_id_1 = stoi(relation_data[1]);
-      int synset_id_2 = stoi(relation_data[2]);
-      std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
      
-      for (auto mapping1 : wn[synset_id_1])
+        std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
-      {
+        std::smatch relation_data;
-        for (auto mapping2 : wn[synset_id_2])
+        if (!std::regex_search(line, relation_data, relation))
        {
-          sqlite3_stmt* ppstmt;
+          continue;
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+        }
-          {
-            db_error(ppdb, query);
+        int lookup1 = std::stoi(relation_data[1]);
-          }
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
          
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+          std::list<field> fields;
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
+          fields.emplace_back("holonym_id", notion1.getId());
+          fields.emplace_back("meronym_id", notion2.getId());
          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+          db_.insertIntoTable("member_meronymy", std::move(fields));
-          {
-            db_error(ppdb, query);
-          }
-          
-          sqlite3_finalize(ppstmt);
        }
      }
    }
-  }
+    
-  
+    void generator::readWordNetPartMeronymy()
-  // mm table
-  {
-    std::ifstream wnmpfile(wnpref + "wn_mp.pl");
-    if (!wnmpfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
    {
-      std::string line;
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl"));
-      if (!getline(wnmpfile, line))
+      progress ppgs("Writing part meronymy...", lines.size());
+      for (auto line : lines)
      {
-        break;
+        ppgs.update();
+      
+        std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("holonym_id", notion1.getId());
+          fields.emplace_back("meronym_id", notion2.getId());
+          
+          db_.insertIntoTable("part_meronymy", std::move(fields));
+        }
      }
+    }
    
-      if (line.back() == '\r')
+    void generator::readWordNetSubstanceMeronymy()
+    {
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl"));
+      progress ppgs("Writing substance meronymy...", lines.size());
+      for (auto line : lines)
      {
-        line.pop_back();
+        ppgs.update();
-      }
      
-      lines.push_back(line);
+        std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
+        
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
+        {
+          notion& notion1 = *notionByWnid_.at(lookup1);
+          notion& notion2 = *notionByWnid_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("holonym_id", notion1.getId());
+          fields.emplace_back("meronym_id", notion2.getId());
+          
+          db_.insertIntoTable("substance_meronymy", std::move(fields));
+        }
+      }
    }
    
-    progress ppgs("Writing part meronyms...", lines.size());
+    void generator::readWordNetPertainymy()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl"));
-      
+      progress ppgs("Writing pertainymy and mannernymy...", lines.size());
-      std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
-      
-      int synset_id_1 = stoi(relation_data[1]);
-      int synset_id_2 = stoi(relation_data[2]);
-      std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
      
-      for (auto mapping1 : wn[synset_id_1])
+        std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
-      {
+        std::smatch relation_data;
-        for (auto mapping2 : wn[synset_id_2])
+        if (!std::regex_search(line, relation_data, relation))
        {
-          sqlite3_stmt* ppstmt;
+          continue;
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
+        }
-          {
-            db_error(ppdb, query);
+        std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
-          }
+        std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
+        
+        if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
+        {
+          word& word1 = *wordByWnidAndWnum_.at(lookup1);
+          word& word2 = *wordByWnidAndWnum_.at(lookup2);
          
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+          if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective)
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
+          {
+            std::list<field> fields;
+            fields.emplace_back("pertainym_id", word1.getId());
+            fields.emplace_back("noun_id", word2.getId());
          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
+            db_.insertIntoTable("pertainymy", std::move(fields));
+          } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb)
          {
-            db_error(ppdb, query);
+            std::list<field> fields;
-          }
+            fields.emplace_back("mannernym_id", word1.getId());
+            fields.emplace_back("adjective_id", word2.getId());
          
-          sqlite3_finalize(ppstmt);
+            db_.insertIntoTable("mannernymy", std::move(fields));
+          }
        }
      }
    }
-  }
-  
-  // per table
-  {
-    std::ifstream wnperfile(wnpref + "wn_per.pl");
-    if (!wnperfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnperfile, line))
-      {
-        break;
-      }
    
-      if (line.back() == '\r')
+    void generator::readWordNetSpecification()
+    {
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl"));
+      progress ppgs("Writing specifications...", lines.size());
+      for (auto line : lines)
      {
-        line.pop_back();
+        ppgs.update();
+        std::regex relation("^sa\\((23\\d{8}),(\\d+),(23\\d{8}),(\\d+)\\)\\.");
+        std::smatch relation_data;
+        if (!std::regex_search(line, relation_data, relation))
+        {
+          continue;
+        }
+        std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
+        std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
+        
+        if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
+        {
+          word& word1 = *wordByWnidAndWnum_.at(lookup1);
+          word& word2 = *wordByWnidAndWnum_.at(lookup2);
+          
+          std::list<field> fields;
+          fields.emplace_back("general_id", word1.getId());
+          fields.emplace_back("specific_id", word2.getId());
+          
+          db_.insertIntoTable("specification", std::move(fields));
+        }
      }
-      
-      lines.push_back(line);
    }
    
-    progress ppgs("Writing pertainyms and mannernyms...", lines.size());
+    void generator::readWordNetSimilarity()
-    for (auto line : lines)
    {
-      ppgs.update();
+      std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl"));
-      
+      progress ppgs("Writing adjective similarity...", lines.size());
-      std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
+      for (auto line : lines)
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
      {
-        continue;
+        ppgs.update();
-      }
      
-      int synset_id_1 = stoi(relation_data[1]);
+        std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
-      int wnum_1 = stoi(relation_data[2]);
+        std::smatch relation_data;
-      int synset_id_2 = stoi(relation_data[3]);
+        if (!std::regex_search(line, relation_data, relation))
-      int wnum_2 = stoi(relation_data[4]);
-      std::string query;
-      switch (synset_id_1 / 100000000)
-      {
-        case 3: // Adjective
        {
-          // This is a pertainym, the second word should be a noun
+          continue;
-          // Technically it can be an adjective but we're ignoring that
-          if (synset_id_2 / 100000000 != 1)
-          {
-            continue;
-          }
-          
-          query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)";
-          
-          break;
        }
+        int lookup1 = std::stoi(relation_data[1]);
+        int lookup2 = std::stoi(relation_data[2]);
        
-        case 4: // Adverb
+        if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
        {
-          // This is a mannernym, the second word should be an adjective
+          notion& notion1 = *notionByWnid_.at(lookup1);
-          if (synset_id_2 / 100000000 != 3)
+          notion& notion2 = *notionByWnid_.at(lookup2);
-          {
-            continue;
-          }
          
-          query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)";
+          std::list<field> fields;
+          fields.emplace_back("adjective_1_id", notion1.getId());
+          fields.emplace_back("adjective_2_id", notion2.getId());
          
-          break;
+          db_.insertIntoTable("similarity", std::move(fields));
        }
      }
-      
+    }
-      sqlite3_stmt* ppstmt;
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
-      {
-        db_error(ppdb, query);
-      }
-    
-      sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
-      sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
    
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
+    std::list<std::string> generator::readFile(std::string path)
+    {
+      std::ifstream file(path);
+      if (!file)
      {
-        db_error(ppdb, query);
+        throw std::invalid_argument("Could not find file " + path);
      }
-    
-      sqlite3_finalize(ppstmt);
-    }
-  }
  
-  // sa table
+      std::list<std::string> lines;
-  {
-    std::ifstream wnsafile(wnpref + "wn_sa.pl");
-    if (!wnsafile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
-    for (;;)
-    {
      std::string line;
-      if (!getline(wnsafile, line))
+      while (std::getline(file, line))
-      {
-        break;
-      }
-    
-      if (line.back() == '\r')
      {
-        line.pop_back();
+        if (line.back() == '\r')
+        {
+          line.pop_back();
+        }
+      
+        lines.push_back(line);
      }
      
-      lines.push_back(line);
+      return lines;
    }
    
-    progress ppgs("Writing specifications...", lines.size());
+    part_of_speech generator::partOfSpeechByWnid(int wnid)
-    for (auto line : lines)
    {
-      ppgs.update();
+      switch (wnid / 100000000)
-      
-      std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\.");
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
-      {
-        continue;
-      }
-      
-      int synset_id_1 = stoi(relation_data[1]);
-      int wnum_1 = stoi(relation_data[2]);
-      int synset_id_2 = stoi(relation_data[3]);
-      int wnum_2 = stoi(relation_data[4]);
-      std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)");
-      
-      sqlite3_stmt* ppstmt;
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
      {
-        db_error(ppdb, query);
+        case 1: return part_of_speech::noun;
+        case 2: return part_of_speech::verb;
+        case 3: return part_of_speech::adjective;
+        case 4: return part_of_speech::adverb;
+        default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid));
      }
+    }
    
-      sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
+    notion& generator::createNotion(part_of_speech partOfSpeech)
-      sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
+    {
+      notions_.emplace_back(partOfSpeech);
+      
+      return notions_.back();
+    }
    
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
+    notion& generator::lookupOrCreateNotion(int wnid)
+    {
+      if (!notionByWnid_.count(wnid))
      {
-        db_error(ppdb, query);
+        notions_.emplace_back(partOfSpeechByWnid(wnid), wnid);
+        notionByWnid_[wnid] = &notions_.back();
      }
-    
+      
-      sqlite3_finalize(ppstmt);
+      return *notionByWnid_.at(wnid);
-    }
-  }
-  // sim table
-  {
-    std::ifstream wnsimfile(wnpref + "wn_sim.pl");
-    if (!wnsimfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
    }
+    
-    std::list<std::string> lines;
+    lemma& generator::lookupOrCreateLemma(std::string base_form)
-    for (;;)
    {
-      std::string line;
+      if (!lemmaByBaseForm_.count(base_form))
-      if (!getline(wnsimfile, line))
      {
-        break;
+        lemmas_.emplace_back(lookupOrCreateForm(base_form));
+        lemmaByBaseForm_[base_form] = &lemmas_.back();
      }
+      
+      return *lemmaByBaseForm_.at(base_form);
+    }
    
-      if (line.back() == '\r')
+    form& generator::lookupOrCreateForm(std::string text)
+    {
+      if (!formByText_.count(text))
      {
-        line.pop_back();
+        forms_.emplace_back(text);
+        formByText_[text] = &forms_.back();
      }
      
-      lines.push_back(line);
+      return *formByText_[text];
    }
    
-    progress ppgs("Writing sense synonyms...", lines.size());
+    template <typename... Args> word& generator::createWord(Args&&... args)
-    for (auto line : lines)
    {
-      ppgs.update();
+      words_.emplace_back(std::forward<Args>(args)...);
+      word& w = words_.back();
      
-      std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
+      wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w);
-      std::smatch relation_data;
+      
-      if (!std::regex_search(line, relation_data, relation))
+      if (w.getNotion().hasWnid())
      {
-        continue;
+        wordsByWnid_[w.getNotion().getWnid()].insert(&w);
      }
      
-      int synset_id_1 = stoi(relation_data[1]);
+      return w;
-      int synset_id_2 = stoi(relation_data[2]);
+    }
-      std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)");
+    
+    group& generator::createGroup(xmlNodePtr top)
+    {
+      groups_.emplace_back();
+      group& grp = groups_.back();
      
-      for (auto mapping1 : wn[synset_id_1])
+      xmlChar* key;
+  
+      for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
      {
-        for (auto mapping2 : wn[synset_id_2])
+        if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES")))
        {
-          sqlite3_stmt* ppstmt;
+          for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next)
-          if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
          {
-            db_error(ppdb, query);
+            if (!xmlStrcmp(subclass->name, reinterpret_cast<const xmlChar*>("VNSUBCLASS")))
+            {
+              try
+              {
+                group& subgrp = createGroup(subclass);
+                subgrp.setParent(grp);
+              } catch (const std::exception& e)
+              {
+                key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID"));
+                
+                if (key == nullptr)
+                {
+                  std::throw_with_nested(std::logic_error("Error parsing IDless subgroup"));
+                } else {
+                  std::string subgroupId(reinterpret_cast<const char*>(key));
+                  xmlFree(key);
+                  
+                  std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId));
+                }
+              }
+            }
          }
-          
+        } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("MEMBERS")))
-          sqlite3_bind_int(ppstmt, 1, mapping1.second);
+        {
-          sqlite3_bind_int(ppstmt, 2, mapping2.second);
+          for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
-          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
          {
-            db_error(ppdb, query);
+            if (!xmlStrcmp(member->name, reinterpret_cast<const xmlChar*>("MEMBER")))
+            {
+              key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn"));
+              std::string wnSenses(reinterpret_cast<const char*>(key));
+              xmlFree(key);
+              
+              auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " ");
+              if (!wnSenseKeys.empty())
+              {
+                std::list<std::string> tempKeys;
+                
+                std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) {
+                  return sense + "::";
+                });
+                
+                std::list<std::string> filteredKeys;
+                
+                std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) {
+                  return !wnSenseKeys_.count(sense);
+                });
+                
+                wnSenseKeys = std::move(filteredKeys);
+              }
+              
+              if (!wnSenseKeys.empty())
+              {
+                for (std::string sense : wnSenseKeys)
+                {
+                  word& wordSense = *wnSenseKeys_[sense];
+                  wordSense.setVerbGroup(grp);
+                }
+              } else {
+                key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name"));
+                std::string memberName(reinterpret_cast<const char*>(key));
+                xmlFree(key);
+                
+                notion& n = createNotion(part_of_speech::verb);
+                lemma& l = lookupOrCreateLemma(memberName);
+                word& w = createWord(n, l);
+                
+                w.setVerbGroup(grp);
+              }
+            }
          }
-          
+        } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("THEMROLES")))
-          sqlite3_reset(ppstmt);
+        {
-          sqlite3_clear_bindings(ppstmt);
+          for (xmlNodePtr roletopnode = node->xmlChildrenNode; roletopnode != nullptr; roletopnode = roletopnode->next)
-          
-          sqlite3_bind_int(ppstmt, 1, mapping2.second);
-          sqlite3_bind_int(ppstmt, 2, mapping1.second);
-          
-          if (sqlite3_step(ppstmt) != SQLITE_DONE)
          {
-            db_error(ppdb, query);
+            if (!xmlStrcmp(roletopnode->name, reinterpret_cast<const xmlChar*>("THEMROLE")))
+            {
+              role r;
+              
+              key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type"));
+              std::string roleName = reinterpret_cast<const char*>(key);
+              xmlFree(key);
+              
+              for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
+              {
+                if (!xmlStrcmp(rolenode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
+                {
+                  r.setSelrestrs(parseSelrestr(rolenode));
+                }
+              }
+              grp.addRole(roleName, std::move(r));
+            }
          }
+        } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("FRAMES")))
+        {
+          for (xmlNodePtr frametopnode = node->xmlChildrenNode; frametopnode != nullptr; frametopnode = frametopnode->next)
+          {
+            if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME")))
+            {
+              frames_.emplace_back();
+              frame& fr = frames_.back();
          
-          sqlite3_finalize(ppstmt);
+              for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
+              {
+                if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX")))
+                {
+                  for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
+                  {                
+                    if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP")))
+                    {
+                      key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
+                      std::string partRole = reinterpret_cast<const char*>(key);
+                      xmlFree(key);
+                  
+                      selrestr partSelrestrs;
+                      std::set<std::string> partSynrestrs;
+                      for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
+                      {
+                        if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SYNRESTRS")))
+                        {
+                          for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
+                          {
+                            if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SYNRESTR")))
+                            {
+                              key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type"));
+                              partSynrestrs.insert(reinterpret_cast<const char*>(key));
+                              xmlFree(key);
+                            }
+                          }
+                        }
+                  
+                        if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
+                        {
+                          partSelrestrs = parseSelrestr(npnode);
+                        }
+                      }
+                      
+                      fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs)));
+                    } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB")))
+                    {
+                      fr.push_back(part::createVerb());
+                    } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("PREP")))
+                    {
+                      std::set<std::string> partChoices;
+                      bool partLiteral;
+                      
+                      if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")))
+                      {
+                        partLiteral = true;
+                        
+                        key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
+                        std::string choicesStr = reinterpret_cast<const char*>(key);
+                        xmlFree(key);
+                        
+                        split(choicesStr, " ", std::inserter(partChoices, std::end(partChoices)));
+                      } else {
+                        partLiteral = false;
+                        
+                        for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
+                        {
+                          if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
+                          {
+                            for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
+                            {
+                              if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
+                              {
+                                key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type"));
+                                partChoices.insert(reinterpret_cast<const char*>(key));
+                                xmlFree(key);
+                              }
+                            }
+                          }
+                        }
+                      }
+                  
+                      fr.push_back(part::createPreposition(std::move(partChoices), partLiteral));
+                    } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ")))
+                    {
+                      fr.push_back(part::createAdjective());
+                    } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADV")))
+                    {
+                      fr.push_back(part::createAdverb());
+                    } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("LEX")))
+                    {
+                      key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
+                      std::string literalValue = reinterpret_cast<const char*>(key);
+                      xmlFree(key);
+                      
+                      fr.push_back(part::createLiteral(literalValue));
+                    } else {
+                      continue;
+                    }
+                  }
+                  grp.addFrame(fr);
+                }
+              }
+            }
+          }
        }
      }
-    }
-  }
-  
-  // syntax table
-  {
-    std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl");
-    if (!wnsyntaxfile.is_open())
-    {
-      std::cout << "Invalid WordNet data directory." << std::endl;
-      print_usage();
-    }
-    std::list<std::string> lines;
+      return grp;
-    for (;;)
-    {
-      std::string line;
-      if (!getline(wnsyntaxfile, line))
-      {
-        break;
-      }
-    
-      if (line.back() == '\r')
-      {
-        line.pop_back();
-      }
-      
-      lines.push_back(line);
    }
    
-    progress ppgs("Writing adjective syntax markers...", lines.size());
+    selrestr generator::parseSelrestr(xmlNodePtr top)
-    for (auto line : lines)
    {
-      ppgs.update();
+      xmlChar* key;
-      
+  
-      std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
+      if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
-      std::smatch relation_data;
-      if (!std::regex_search(line, relation_data, relation))
-      {
-        continue;
-      }
-      
-      int synset_id = stoi(relation_data[1]);
-      int wnum = stoi(relation_data[2]);
-      std::string syn = relation_data[3];
-      std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?");
-      
-      sqlite3_stmt* ppstmt;
-      if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
      {
-        db_error(ppdb, query);
+        if (xmlChildElementCount(top) == 0)
-      }
+        {
-      
+          return {};
-      sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT);
+        } else if (xmlChildElementCount(top) == 1)
-      sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]);
+        {
-      
+          return parseSelrestr(xmlFirstElementChild(top));
-      if (sqlite3_step(ppstmt) != SQLITE_DONE)
+        } else {
+          bool orlogic = false;
+          if (xmlHasProp(top, reinterpret_cast<const xmlChar*>("logic")))
+          {
+            key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("logic"));
+            if (!xmlStrcmp(key, reinterpret_cast<const xmlChar*>("or")))
+            {
+              orlogic = true;
+            }
+            
+            xmlFree(key);
+          }
+  
+          std::list<selrestr> children;
+          for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next)
+          {
+            if (!xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))
+              || !xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
+            {
+              children.push_back(parseSelrestr(selrestr));
+            }
+          }
+          
+          return selrestr(children, orlogic);
+        }
+      } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
      {
-        db_error(ppdb, query);
+        key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("Value"));
+        bool selPos = (std::string(reinterpret_cast<const char*>(key)) == "+");
+        xmlFree(key);
+        key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type"));
+        std::string selRestriction = reinterpret_cast<const char*>(key);
+        xmlFree(key);
+        
+        return selrestr(selRestriction, selPos);
+      } else {
+        throw std::logic_error("Badly formatted selrestr");
      }
-      
-      sqlite3_finalize(ppstmt);
    }
-  }
+    
-  
+  };
-  sqlite3_close_v2(ppdb);
+};
-  
-  std::cout << "Done." << std::endl;
-}
diff --git a/generator/generator.h b/generator/generator.h
new file mode 100644
index 0000000..e2a7404
--- /dev/null
+++ b/generator/generator.h

@@ -0,0 +1,151 @@
+#ifndef GENERATOR_H_5B61CBC5
+#define GENERATOR_H_5B61CBC5
+#include <string>
+#include <map>
+#include <list>
+#include <set>
+#include <libxml/parser.h>
+#include "database.h"
+#include "notion.h"
+#include "word.h"
+#include "lemma.h"
+#include "form.h"
+#include "pronunciation.h"
+#include "group.h"
+#include "frame.h"
+namespace verbly {
+  namespace generator {
+    enum class part_of_speech;
+    class selrestr;
+    
+    class generator {
+    public:
+      
+      // Constructor
+      
+      generator(
+        std::string verbNetPath,
+        std::string agidPath,
+        std::string wordNetPath,
+        std::string cmudictPath,
+        std::string imageNetPath,
+        std::string outputPath);
+      
+      // Action
+      
+      void run();
+      
+    private:
+      
+      // Subroutines
+      
+      void readWordNetSynsets();
+      
+      void readAdjectivePositioning();
+      
+      void readImageNetUrls();
+      
+      void readWordNetSenseKeys();
+      
+      void readVerbNet();
+      
+      void readAgidInflections();
+      
+      void readPrepositions();
+      
+      void readCmudictPronunciations();
+      
+      void writeSchema();
+      
+      void dumpObjects();
+      
+      void readWordNetAntonymy();
+      
+      void readWordNetVariation();
+      
+      void readWordNetClasses();
+      
+      void readWordNetCausality();
+      
+      void readWordNetEntailment();
+      
+      void readWordNetHypernymy();
+      
+      void readWordNetInstantiation();
+      
+      void readWordNetMemberMeronymy();
+      
+      void readWordNetPartMeronymy();
+      
+      void readWordNetSubstanceMeronymy();
+      
+      void readWordNetPertainymy();
+      
+      void readWordNetSpecification();
+      
+      void readWordNetSimilarity();
+      
+      // Helpers
+      
+      std::list<std::string> readFile(std::string path);
+      
+      inline part_of_speech partOfSpeechByWnid(int wnid);
+      
+      notion& createNotion(part_of_speech partOfSpeech);
+      
+      notion& lookupOrCreateNotion(int wnid);
+      
+      lemma& lookupOrCreateLemma(std::string base_form);
+      
+      form& lookupOrCreateForm(std::string text);
+      
+      template <typename... Args> word& createWord(Args&&... args);
+      
+      group& createGroup(xmlNodePtr top);
+      
+      selrestr parseSelrestr(xmlNodePtr top);
+      
+      // Input
+      
+      std::string verbNetPath_;
+      std::string agidPath_;
+      std::string wordNetPath_;
+      std::string cmudictPath_;
+      std::string imageNetPath_;
+      
+      // Output
+      
+      database db_;
+      
+      // Data
+      
+      std::list<notion> notions_;
+      std::list<word> words_;
+      std::list<lemma> lemmas_;
+      std::list<form> forms_;
+      std::list<pronunciation> pronunciations_;
+      std::list<frame> frames_;
+      std::list<group> groups_;
+      
+      // Indexes
+      
+      std::map<int, notion*> notionByWnid_;
+      std::map<int, std::set<word*>> wordsByWnid_;
+      std::map<std::pair<int, int>, word*> wordByWnidAndWnum_;
+      std::map<std::string, std::set<word*>> wordsByBaseForm_;
+      std::map<std::string, lemma*> lemmaByBaseForm_;
+      std::map<std::string, form*> formByText_;
+      
+      // Caches
+      
+      std::map<std::string, word*> wnSenseKeys_;
+      
+    };
+    
+  };
+};
+#endif /* end of include guard: GENERATOR_H_5B61CBC5 */
diff --git a/generator/group.cpp b/generator/group.cpp
new file mode 100644
index 0000000..7cbd4c8
--- /dev/null
+++ b/generator/group.cpp

@@ -0,0 +1,119 @@
+#include "group.h"
+#include <stdexcept>
+#include <list>
+#include <json.hpp>
+#include "database.h"
+#include "field.h"
+#include "frame.h"
+namespace verbly {
+  namespace generator {
+    
+    int group::nextId_ = 0;
+    
+    group::group() : id_(nextId_++)
+    {
+    }
+    
+    void group::setParent(const group& parent)
+    {
+      // Adding a group to itself is nonsensical.
+      assert(&parent != this);
+      
+      parent_ = &parent;
+    }
+    
+    void group::addRole(std::string name, role r)
+    {
+      roleNames_.insert(name);
+      roles_[name] = std::move(r);
+    }
+    
+    void group::addFrame(const frame& f)
+    {
+      frames_.insert(&f);
+    }
+    
+    std::set<std::string> group::getRoles() const
+    {
+      std::set<std::string> fullRoles = roleNames_;
+      
+      if (hasParent())
+      {
+        for (std::string name : getParent().getRoles())
+        {
+          fullRoles.insert(name);
+        }
+      }
+      
+      return fullRoles;
+    }
+    
+    const role& group::getRole(std::string name) const
+    {
+      if (roles_.count(name))
+      {
+        return roles_.at(name);
+      } else if (hasParent())
+      {
+        return getParent().getRole(name);
+      } else {
+        throw std::invalid_argument("Specified role not found in verb group");
+      }
+    }
+    
+    std::set<const frame*> group::getFrames() const
+    {
+      std::set<const frame*> fullFrames = frames_;
+      
+      if (hasParent())
+      {
+        for (const frame* f : getParent().getFrames())
+        {
+          fullFrames.insert(f);
+        }
+      }
+      
+      return fullFrames;
+    }
+    
+    database& operator<<(database& db, const group& arg)
+    {
+      // Serialize the group first
+      {
+        std::list<field> fields;
+        fields.emplace_back("group_id", arg.getId());
+        
+        nlohmann::json jsonRoles;
+        for (std::string name : arg.getRoles())
+        {
+          const role& r = arg.getRole(name);
+          
+          nlohmann::json jsonRole;
+          jsonRole["type"] = name;
+          jsonRole["selrestrs"] = r.getSelrestrs().toJson();
+          
+          jsonRoles.emplace_back(std::move(jsonRole));
+        }
+        fields.emplace_back("data", jsonRoles.dump());
+        
+        db.insertIntoTable("groups", std::move(fields));
+      }
+      
+      // Then, serialize the group/frame relationship
+      for (const frame* f : arg.getFrames())
+      {
+        std::list<field> fields;
+        
+        fields.emplace_back("group_id", arg.getId());
+        fields.emplace_back("frame_id", f->getId());
+        
+        db.insertIntoTable("groups_frames", std::move(fields));
+      }
+      
+      return db;
+    }
+    
+  };
+};
diff --git a/generator/group.h b/generator/group.h
new file mode 100644
index 0000000..efb8c5d
--- /dev/null
+++ b/generator/group.h

@@ -0,0 +1,80 @@
+#ifndef GROUP_H_EDAFB5DC
+#define GROUP_H_EDAFB5DC
+#include <map>
+#include <set>
+#include <string>
+#include <cassert>
+#include "role.h"
+namespace verbly {
+  namespace generator {
+    
+    class frame;
+    class database;
+    
+    class group {
+    public:
+      
+      // Constructor
+      
+      group();
+      
+      // Mutators
+      
+      void setParent(const group& parent);
+      
+      void addRole(std::string name, role r);
+      
+      void addFrame(const frame& f);
+      
+      // Accessors
+      
+      int getId() const
+      {
+        return id_;
+      }
+      
+      bool hasParent() const
+      {
+        return (parent_ != nullptr);
+      }
+      
+      const group& getParent() const
+      {
+        // Calling code should always call hasParent first
+        assert(parent_ != nullptr);
+        
+        return *parent_;
+      }
+      
+      std::set<std::string> getRoles() const;
+      
+      const role& getRole(std::string name) const;
+      
+      std::set<const frame*> getFrames() const;
+      
+    private:
+      
+      static int nextId_;
+      
+      const int id_;
+      
+      const group* parent_ = nullptr;
+      std::map<std::string, role> roles_;
+      std::set<const frame*> frames_;
+      
+      // Caches
+      
+      std::set<std::string> roleNames_;
+      
+    };
+    
+    // Serializer
+    
+    database& operator<<(database& db, const group& arg);
+    
+  };
+};
+#endif /* end of include guard: GROUP_H_EDAFB5DC */
diff --git a/generator/lemma.cpp b/generator/lemma.cpp
new file mode 100644
index 0000000..e66b153
--- /dev/null
+++ b/generator/lemma.cpp

@@ -0,0 +1,65 @@
+#include "lemma.h"
+#include <list>
+#include <cassert>
+#include "field.h"
+#include "database.h"
+#include "form.h"
+namespace verbly {
+  namespace generator {
+    int lemma::nextId_ = 0;
+    lemma::lemma(const form& baseForm) :
+      id_(nextId_++),
+      baseForm_(baseForm)
+    {
+      inflections_[inflection::base] = {&baseForm};
+    }
+    void lemma::addInflection(inflection type, const form& f)
+    {
+      // There can only be one base form.
+      assert(type != inflection::base);
+      inflections_[type].insert(&f);
+    }
+    std::set<const form*> lemma::getInflections(inflection type) const
+    {
+      if (inflections_.count(type))
+      {
+        return inflections_.at(type);
+      } else {
+        return {};
+      }
+    }
+    database& operator<<(database& db, const lemma& arg)
+    {
+      for (inflection type : {
+        inflection::base,
+        inflection::plural,
+        inflection::comparative,
+        inflection::superlative,
+        inflection::past_tense,
+        inflection::past_participle,
+        inflection::ing_form,
+        inflection::s_form})
+      {
+        for (const form* f : arg.getInflections(type))
+        {
+          std::list<field> fields;
+          fields.emplace_back("lemma_id", arg.getId());
+          fields.emplace_back("form_id", f->getId());
+          fields.emplace_back("category", static_cast<int>(type));
+          db.insertIntoTable("lemmas_forms", std::move(fields));
+        }
+      }
+      return db;
+    }
+  };
+};
diff --git a/generator/lemma.h b/generator/lemma.h
new file mode 100644
index 0000000..6452e08
--- /dev/null
+++ b/generator/lemma.h

@@ -0,0 +1,58 @@
+#ifndef LEMMA_H_D73105A7
+#define LEMMA_H_D73105A7
+#include <string>
+#include <map>
+#include <set>
+#include "enums.h"
+namespace verbly {
+  namespace generator {
+    class database;
+    class form;
+    class lemma {
+    public:
+      // Constructors
+      explicit lemma(const form& baseForm);
+      // Mutators
+      void addInflection(inflection type, const form& f);
+      // Accessors
+      int getId() const
+      {
+        return id_;
+      }
+      const form& getBaseForm() const
+      {
+        return baseForm_;
+      }
+      std::set<const form*> getInflections(inflection type) const;
+    private:
+      static int nextId_;
+      const int id_;
+      const form& baseForm_;
+      std::map<inflection, std::set<const form*>> inflections_;
+    };
+    // Serializer
+    database& operator<<(database& db, const lemma& arg);
+  };
+};
+#endif /* end of include guard: LEMMA_H_D73105A7 */
diff --git a/generator/main.cpp b/generator/main.cpp
new file mode 100644
index 0000000..827c963
--- /dev/null
+++ b/generator/main.cpp

@@ -0,0 +1,40 @@
+#include <iostream>
+#include <exception>
+#include "generator.h"
+void printUsage()
+{
+  std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl;
+  std::cout << "verbnet  :: path to a VerbNet data directory" << std::endl;
+  std::cout << "agid     :: path to an AGID infl.txt file" << std::endl;
+  std::cout << "wordnet  :: path to a WordNet prolog data directory" << std::endl;
+  std::cout << "cmudict  :: path to a CMUDICT pronunciation file" << std::endl;
+  std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl;
+  std::cout << "output   :: datafile output path" << std::endl;
+}
+int main(int argc, char** argv)
+{
+  if (argc == 7)
+  {
+    try
+    {
+      verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]);
+      
+      try
+      {
+        app.run();
+      } catch (const std::exception& e)
+      {
+        std::cout << e.what() << std::endl;
+      }
+    } catch (const std::exception& e)
+    {
+      std::cout << e.what() << std::endl;
+      printUsage();
+    }
+  } else {
+    std::cout << "verbly datafile generator" << std::endl;
+    printUsage();
+  }
+}
diff --git a/generator/notion.cpp b/generator/notion.cpp
new file mode 100644
index 0000000..290d982
--- /dev/null
+++ b/generator/notion.cpp

@@ -0,0 +1,85 @@
+#include "notion.h"
+#include <string>
+#include <list>
+#include "database.h"
+#include "field.h"
+namespace verbly {
+  namespace generator {
+    int notion::nextId_ = 0;
+    notion::notion(
+      part_of_speech partOfSpeech) :
+        id_(nextId_++),
+        partOfSpeech_(partOfSpeech)
+    {
+    }
+    notion::notion(
+      part_of_speech partOfSpeech,
+      int wnid) :
+        id_(nextId_++),
+        partOfSpeech_(partOfSpeech),
+        wnid_(wnid),
+        hasWnid_(true)
+    {
+    }
+    void notion::incrementNumOfImages()
+    {
+      // Calling code should always call hasWnid and check that the notion is a noun first.
+      assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun));
+      
+      numOfImages_++;
+    }
+    
+    void notion::setPrepositionGroups(std::list<std::string> groups)
+    {
+      // Calling code should always check that the notion is a preposition first.
+      assert(partOfSpeech_ == part_of_speech::preposition);
+      
+      prepositionGroups_ = groups;
+    }
+    database& operator<<(database& db, const notion& arg)
+    {
+      // First, serialize the notion
+      {
+        std::list<field> fields;
+        fields.emplace_back("notion_id", arg.getId());
+        fields.emplace_back("part_of_speech", static_cast<int>(arg.getPartOfSpeech()));
+        if (arg.hasWnid())
+        {
+          fields.emplace_back("wnid", arg.getWnid());
+        
+          if (arg.getPartOfSpeech() == part_of_speech::noun)
+          {
+            fields.emplace_back("images", arg.getNumOfImages());
+          }
+        }
+        db.insertIntoTable("notions", std::move(fields));
+      }
+      
+      // Next, serialize the is_a relationship if this is a preposition
+      if (arg.getPartOfSpeech() == part_of_speech::preposition)
+      {
+        for (std::string group : arg.getPrepositionGroups())
+        {
+          std::list<field> fields;
+          
+          fields.emplace_back("notion_id", arg.getId());
+          fields.emplace_back("groupname", group);
+          
+          db.insertIntoTable("is_a", std::move(fields));
+        }
+      }
+      return db;
+    }
+  };
+};
diff --git a/generator/notion.h b/generator/notion.h
new file mode 100644
index 0000000..76210de
--- /dev/null
+++ b/generator/notion.h

@@ -0,0 +1,91 @@
+#ifndef NOTION_H_221DE2BC
+#define NOTION_H_221DE2BC
+#include <cassert>
+#include <list>
+#include <string>
+#include "enums.h"
+namespace verbly {
+  namespace generator {
+    class database;
+    class notion {
+    public:
+      // Constructors
+      explicit notion(part_of_speech partOfSpeech);
+      notion(part_of_speech partOfSpeech, int wnid);
+      // Mutators
+      void incrementNumOfImages();
+      
+      void setPrepositionGroups(std::list<std::string> groups);
+      // Accessors
+      int getId() const
+      {
+        return id_;
+      }
+      part_of_speech getPartOfSpeech() const
+      {
+        return partOfSpeech_;
+      }
+      
+      bool hasWnid() const
+      {
+        return hasWnid_;
+      }
+      int getWnid() const
+      {
+        // Calling code should always call hasWnid first.
+        assert(hasWnid_);
+        return wnid_;
+      }
+      int getNumOfImages() const
+      {
+        // Calling code should always call hasWnid and check that the notion is a noun first.
+        assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun));
+        return numOfImages_;
+      }
+      
+      std::list<std::string> getPrepositionGroups() const
+      {
+        // Calling code should always check that the notion is a preposition first.
+        assert(partOfSpeech_ == part_of_speech::preposition);
+        
+        return prepositionGroups_;
+      }
+    private:
+      static int nextId_;
+      const int id_;
+      const part_of_speech partOfSpeech_;
+      const int wnid_ = 0;
+      const bool hasWnid_ = false;
+      int numOfImages_ = 0;
+      std::list<std::string> prepositionGroups_;
+    };
+    // Serializer
+    database& operator<<(database& db, const notion& arg);
+  };
+};
+#endif /* end of include guard: NOTION_H_221DE2BC */
diff --git a/generator/part.cpp b/generator/part.cpp
new file mode 100644
index 0000000..dbd4e11
--- /dev/null
+++ b/generator/part.cpp

@@ -0,0 +1,336 @@
+#include "part.h"
+#include <stdexcept>
+#include "selrestr.h"
+namespace verbly {
+  namespace generator {
+    
+    part part::createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs)
+    {
+      part p(type::noun_phrase);
+      
+      new(&p.noun_phrase_.role) std::string(std::move(role));
+      new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs));
+      new(&p.noun_phrase_.synrestrs) std::set<std::string>(std::move(synrestrs));
+      
+      return p;
+    }
+    
+    part part::createVerb()
+    {
+      return part(type::verb);
+    }
+    
+    part part::createPreposition(std::set<std::string> choices, bool literal)
+    {
+      part p(type::preposition);
+      
+      new(&p.preposition_.choices) std::set<std::string>(std::move(choices));
+      p.preposition_.literal = literal;
+      
+      return p;
+    }
+    
+    part part::createAdjective()
+    {
+      return part(type::adjective);
+    }
+    
+    part part::createAdverb()
+    {
+      return part(type::adverb);
+    }
+    
+    part part::createLiteral(std::string value)
+    {
+      part p(type::literal);
+      
+      new(&p.literal_) std::string(std::move(value));
+      
+      return p;
+    }
+    
+    part::part(const part& other)
+    {
+      type_ = other.type_;
+      
+      switch (type_)
+      {
+        case type::noun_phrase:
+        {
+          new(&noun_phrase_.role) std::string(other.noun_phrase_.role);
+          new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs);
+          new(&noun_phrase_.synrestrs) std::set<std::string>(other.noun_phrase_.synrestrs);
+          
+          break;
+        }
+        
+        case type::preposition:
+        {
+          new(&preposition_.choices) std::set<std::string>(other.preposition_.choices);
+          preposition_.literal = other.preposition_.literal;
+          
+          break;
+        }
+        
+        case type::literal:
+        {
+          new(&literal_) std::string(other.literal_);
+          
+          break;
+        }
+        
+        case type::verb:
+        case type::adjective:
+        case type::adverb:
+        case type::invalid:
+        {
+          break;
+        }
+      }
+    }
+    
+    part::part(part&& other) : part()
+    {
+      swap(*this, other);
+    }
+    
+    part& part::operator=(part other)
+    {
+      swap(*this, other);
+      
+      return *this;
+    }
+    
+    void swap(part& first, part& second)
+    {
+      using type = part::type;
+      
+      type tempType = first.type_;
+      std::string tempRole;
+      selrestr tempSelrestrs;
+      std::set<std::string> tempSynrestrs;
+      std::set<std::string> tempChoices;
+      bool tempPrepLiteral;
+      std::string tempLiteralValue;
+      
+      switch (tempType)
+      {
+        case type::noun_phrase:
+        {
+          tempRole = std::move(first.noun_phrase_.role);
+          tempSelrestrs = std::move(first.noun_phrase_.selrestrs);
+          tempSynrestrs = std::move(first.noun_phrase_.synrestrs);
+          
+          break;
+        }
+        
+        case type::preposition:
+        {
+          tempChoices = std::move(first.preposition_.choices);
+          tempPrepLiteral = first.preposition_.literal;
+          
+          break;
+        }
+        
+        case type::literal:
+        {
+          tempLiteralValue = std::move(first.literal_);
+          
+          break;
+        }
+        
+        case type::verb:
+        case type::adjective:
+        case type::adverb:
+        case type::invalid:
+        {
+          break;
+        }
+      }
+      
+      first.~part();
+      
+      first.type_ = second.type_;
+      
+      switch (first.type_)
+      {
+        case type::noun_phrase:
+        {
+          new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role));
+          new(&first.noun_phrase_.selrestrs) selrestr(std::move(second.noun_phrase_.selrestrs));
+          new(&first.noun_phrase_.synrestrs) std::set<std::string>(std::move(second.noun_phrase_.synrestrs));
+          
+          break;
+        }
+        
+        case type::preposition:
+        {
+          new(&first.preposition_.choices) std::set<std::string>(std::move(second.preposition_.choices));
+          first.preposition_.literal = second.preposition_.literal;
+          
+          break;
+        }
+        
+        case type::literal:
+        {
+          new(&first.literal_) std::string(std::move(second.literal_));
+          
+          break;
+        }
+        
+        case type::verb:
+        case type::adjective:
+        case type::adverb:
+        case type::invalid:
+        {
+          break;
+        }
+      }
+      
+      second.~part();
+      
+      second.type_ = tempType;
+      
+      switch (second.type_)
+      {
+        case type::noun_phrase:
+        {
+          new(&second.noun_phrase_.role) std::string(std::move(tempRole));
+          new(&second.noun_phrase_.selrestrs) selrestr(std::move(tempSelrestrs));
+          new(&second.noun_phrase_.synrestrs) std::set<std::string>(std::move(tempSynrestrs));
+          
+          break;
+        }
+        
+        case type::preposition:
+        {
+          new(&second.preposition_.choices) std::set<std::string>(std::move(tempChoices));
+          second.preposition_.literal = tempPrepLiteral;
+          
+          break;
+        }
+        
+        case type::literal:
+        {
+          new(&second.literal_) std::string(std::move(tempLiteralValue));
+          
+          break;
+        }
+        
+        case type::verb:
+        case type::adjective:
+        case type::adverb:
+        case type::invalid:
+        {
+          break;
+        }
+      }
+    }
+    
+    part::~part()
+    {
+      switch (type_)
+      {
+        case type::noun_phrase:
+        {
+          using string_type = std::string;
+          using set_type = std::set<std::string>;
+          
+          noun_phrase_.role.~string_type();
+          noun_phrase_.selrestrs.~selrestr();
+          noun_phrase_.synrestrs.~set_type();
+          
+          break;
+        }
+        
+        case type::preposition:
+        {
+          using set_type = std::set<std::string>;
+          
+          preposition_.choices.~set_type();
+          
+          break;
+        }
+        
+        case type::literal:
+        {
+          using string_type = std::string;
+          
+          literal_.~string_type();
+          
+          break;
+        }
+        
+        case type::verb:
+        case type::adjective:
+        case type::adverb:
+        case type::invalid:
+        {
+          break;
+        }
+      }
+    }
+    
+    std::string part::getNounRole() const
+    {
+      if (type_ == type::noun_phrase)
+      {
+        return noun_phrase_.role;
+      } else {
+        throw std::domain_error("part::getNounRole is only valid for noun phrase parts");
+      }
+    }
+    
+    selrestr part::getNounSelrestrs() const
+    {
+      if (type_ == type::noun_phrase)
+      {
+        return noun_phrase_.selrestrs;
+      } else {
+        throw std::domain_error("part::getNounSelrestrs is only valid for noun phrase parts");
+      }
+    }
+    
+    std::set<std::string> part::getNounSynrestrs() const
+    {
+      if (type_ == type::noun_phrase)
+      {
+        return noun_phrase_.synrestrs;
+      } else {
+        throw std::domain_error("part::getNounSynrestrs is only valid for noun phrase parts");
+      }
+    }
+    
+    std::set<std::string> part::getPrepositionChoices() const
+    {
+      if (type_ == type::preposition)
+      {
+        return preposition_.choices;
+      } else {
+        throw std::domain_error("part::getPrepositionChoices is only valid for preposition parts");
+      }
+    }
+    
+    bool part::isPrepositionLiteral() const
+    {
+      if (type_ == type::preposition)
+      {
+        return preposition_.literal;
+      } else {
+        throw std::domain_error("part::isPrepositionLiteral is only valid for preposition parts");
+      }
+    }
+    
+    std::string part::getLiteralValue() const
+    {
+      if (type_ == type::literal)
+      {
+        return literal_;
+      } else {
+        throw std::domain_error("part::getLiteralValue is only valid for literal parts");
+      }
+    }
+    
+  };
+};
diff --git a/generator/part.h b/generator/part.h
new file mode 100644
index 0000000..d044630
--- /dev/null
+++ b/generator/part.h

@@ -0,0 +1,114 @@
+#ifndef PART_H_FB54F361
+#define PART_H_FB54F361
+#include <string>
+#include <set>
+#include "selrestr.h"
+namespace verbly {
+  namespace generator {
+    
+    class part {
+    public:
+      enum class type {
+        invalid = -1,
+        noun_phrase = 0,
+        verb = 1,
+        preposition = 2,
+        adjective = 3,
+        adverb = 4,
+        literal = 5
+      };
+      
+      // Static factories
+      
+      static part createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs);
+      
+      static part createVerb();
+      
+      static part createPreposition(std::set<std::string> choices, bool literal);
+      
+      static part createAdjective();
+      
+      static part createAdverb();
+      
+      static part createLiteral(std::string value);
+      
+      // Copy and move constructors
+      
+      part(const part& other);
+      
+      part(part&& other);
+      
+      // Assignment
+      
+      part& operator=(part other);
+      
+      // Swap
+      
+      friend void swap(part& first, part& second);
+      
+      // Destructor
+      
+      ~part();
+      
+      // General accessors
+      
+      type getType() const
+      {
+        return type_;
+      }
+      
+      // Noun phrase accessors
+      
+      std::string getNounRole() const;
+      
+      selrestr getNounSelrestrs() const;
+      
+      std::set<std::string> getNounSynrestrs() const;
+      
+      // Preposition accessors
+      
+      std::set<std::string> getPrepositionChoices() const;
+      
+      bool isPrepositionLiteral() const;
+      
+      // Literal accessors
+      
+      std::string getLiteralValue() const;
+      
+    private:
+      
+      // Private constructors
+      
+      part()
+      {
+      }
+      
+      part(type t) : type_(t)
+      {
+      }
+      
+      // Data
+      
+      union {
+        struct {
+          std::string role;
+          selrestr selrestrs;
+          std::set<std::string> synrestrs;
+        } noun_phrase_;
+        struct {
+          std::set<std::string> choices;
+          bool literal;
+        } preposition_;
+        std::string literal_;
+      };
+      
+      type type_ = type::invalid;
+      
+    };
+    
+  };
+};
+#endif /* end of include guard: PART_H_FB54F361 */
diff --git a/generator/progress.h b/generator/progress.h
index 81f07a3..fcb680d 100644
--- a/generator/progress.h
+++ b/generator/progress.h

@@ -3,48 +3,54 @@
 #include <string>
-class progress {
+namespace verbly {
-  private:
+  namespace generator {
-    std::string message;
-    int total;
-    int cur = 0;
-    int lprint = 0;
    
-  public:
+    class progress {
-    progress(std::string message, int total) : message(message), total(total)
+      private:
-    {
+        std::string message;
-      std::cout << message << "   0%" << std::flush;
+        int total;
-    }
+        int cur = 0;
+        int lprint = 0;
    
-    void update(int val)
+      public:
-    {
+        progress(std::string message, int total) : message(message), total(total)
-      if (val <= total)
+        {
-      {
+          std::cout << message << "   0%" << std::flush;
-        cur = val;
+        }
-      } else {
+    
-        cur = total;
+        void update(int val)
-      }
+        {
+          if (val <= total)
+          {
+            cur = val;
+          } else {
+            cur = total;
+          }
      
-      int pp = cur * 100 / total;
+          int pp = cur * 100 / total;
-      if (pp != lprint)
+          if (pp != lprint)
-      {
+          {
-        lprint = pp;
+            lprint = pp;
      
-        std::cout << "\b\b\b\b" << std::right;
+            std::cout << "\b\b\b\b" << std::right;
-        std::cout.width(3);
+            std::cout.width(3);
-        std::cout << pp << "%" << std::flush;
+            std::cout << pp << "%" << std::flush;
-      }
+          }
-    }
+        }
+    
+        void update()
+        {
+          update(cur+1);
+        }
    
-    void update()
+        ~progress()
-    {
+        {
-      update(cur+1);
+          std::cout << "\b\b\b\b100%" << std::endl;
-    }
+        }
+    };
    
-    ~progress()
+  };
-    {
-      std::cout << "\b\b\b\b100%" << std::endl;
-    }
 };
 #endif /* end of include guard: PROGRESS_H_A34EF856 */
diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp
new file mode 100644
index 0000000..eb07607
--- /dev/null
+++ b/generator/pronunciation.cpp

@@ -0,0 +1,87 @@
+#include "pronunciation.h"
+#include <list>
+#include <algorithm>
+#include <cctype>
+#include <iterator>
+#include "database.h"
+#include "field.h"
+#include "../lib/util.h"
+namespace verbly {
+  namespace generator {
+    int pronunciation::nextId_ = 0;
+    pronunciation::pronunciation(std::string phonemes) :
+      id_(nextId_++),
+      phonemes_(phonemes)
+    {
+      auto phonemeList = split<std::list<std::string>>(phonemes, " ");
+      auto rhymeStart = std::find_if(std::begin(phonemeList), std::end(phonemeList), [] (std::string phoneme) {
+        return phoneme.find("1") != std::string::npos;
+      });
+      // Rhyme detection
+      if (rhymeStart != std::end(phonemeList))
+      {
+        std::list<std::string> rhymePhonemes;
+        std::transform(rhymeStart, std::end(phonemeList), std::back_inserter(rhymePhonemes), [] (std::string phoneme) {
+          std::string naked;
+          std::remove_copy_if(std::begin(phoneme), std::end(phoneme), std::back_inserter(naked), [] (char ch) {
+            return std::isdigit(ch);
+          });
+          return naked;
+        });
+        rhyme_ = implode(std::begin(rhymePhonemes), std::end(rhymePhonemes), " ");
+        if (rhymeStart != std::begin(phonemeList))
+        {
+          prerhyme_ = *std::prev(rhymeStart);
+        }
+      }
+      // Syllable/stress
+      for (std::string phoneme : phonemeList)
+      {
+        if (std::isdigit(phoneme.back()))
+        {
+          // It's a vowel!
+          syllables_++;
+          if (phoneme.back() == '1')
+          {
+            stress_.push_back('1');
+          } else {
+            stress_.push_back('0');
+          }
+        }
+      }
+    }
+    database& operator<<(database& db, const pronunciation& arg)
+    {
+      std::list<field> fields;
+      fields.emplace_back("pronunciation_id", arg.getId());
+      fields.emplace_back("phonemes", arg.getPhonemes());
+      fields.emplace_back("syllables", arg.getSyllables());
+      fields.emplace_back("stress", arg.getStress());
+      if (arg.hasRhyme())
+      {
+        fields.emplace_back("rhyme", arg.getRhymePhonemes());
+        fields.emplace_back("prerhyme", arg.getPrerhyme());
+      }
+      db.insertIntoTable("pronunciations", std::move(fields));
+      return db;
+    }
+  };
+};
diff --git a/generator/pronunciation.h b/generator/pronunciation.h
new file mode 100644
index 0000000..81be6c4
--- /dev/null
+++ b/generator/pronunciation.h

@@ -0,0 +1,82 @@
+#ifndef PRONUNCIATION_H_584A08DD
+#define PRONUNCIATION_H_584A08DD
+#include <string>
+#include <cassert>
+namespace verbly {
+  namespace generator {
+    class database;
+    class pronunciation {
+    public:
+      // Constructor
+      explicit pronunciation(std::string phonemes);
+      // Accessors
+      int getId() const
+      {
+        return id_;
+      }
+      std::string getPhonemes() const
+      {
+        return phonemes_;
+      }
+      bool hasRhyme() const
+      {
+        return !rhyme_.empty();
+      }
+      std::string getRhymePhonemes() const
+      {
+        // Calling code should always call hasRhyme first.
+        assert(!rhyme_.empty());
+        return rhyme_;
+      }
+      std::string getPrerhyme() const
+      {
+        // Calling code should always call hasRhyme first.
+        assert(!rhyme_.empty());
+        return prerhyme_;
+      }
+      int getSyllables() const
+      {
+        return syllables_;
+      }
+      std::string getStress() const
+      {
+        return stress_;
+      }
+    private:
+      static int nextId_;
+      const int id_;
+      const std::string phonemes_;
+      std::string rhyme_;
+      std::string prerhyme_;
+      int syllables_ = 0;
+      std::string stress_;
+    };
+    // Serializer
+    database& operator<<(database& db, const pronunciation& arg);
+  };
+};
+#endif /* end of include guard: PRONUNCIATION_H_584A08DD */
diff --git a/generator/role.h b/generator/role.h
new file mode 100644
index 0000000..5fa68b8
--- /dev/null
+++ b/generator/role.h

@@ -0,0 +1,35 @@
+#ifndef ROLE_H_249F9A9C
+#define ROLE_H_249F9A9C
+#include "selrestr.h"
+namespace verbly {
+  namespace generator {
+    
+    class role {
+    public:
+      
+      // Mutators
+      
+      void setSelrestrs(selrestr selrestrs)
+      {
+        selrestrs_ = selrestrs;
+      }
+      
+      // Accessors
+      
+      const selrestr& getSelrestrs() const
+      {
+        return selrestrs_;
+      }
+      
+    private:
+      
+      selrestr selrestrs_;
+      
+    };
+    
+  };
+};
+#endif /* end of include guard: ROLE_H_249F9A9C */
diff --git a/generator/schema.sql b/generator/schema.sql
index 410b536..c3e54d8 100644
--- a/generator/schema.sql
+++ b/generator/schema.sql

@@ -1,286 +1,204 @@
-DROP TABLE IF EXISTS `verbs`;
+CREATE TABLE `notions` (
-CREATE TABLE `verbs` (
+  `notion_id` INTEGER PRIMARY KEY,
-  `verb_id` INTEGER PRIMARY KEY,
+  `part_of_speech` SMALLINT NOT NULL,
-  `infinitive` VARCHAR(32) NOT NULL,
+  `wnid` INTEGER,
-  `past_tense` VARCHAR(32) NOT NULL,
+  `images` INTEGER
-  `past_participle` VARCHAR(32) NOT NULL,
-  `ing_form` VARCHAR(32) NOT NULL,
-  `s_form` VARCHAR(32) NOT NULL
 );
-DROP TABLE IF EXISTS `groups`;
+CREATE UNIQUE INDEX `notion_by_wnid` ON `notions`(`wnid`);
-CREATE TABLE `groups` (
-  `group_id` INTEGER PRIMARY KEY,
-  `data` BLOB NOT NULL
-);
-DROP TABLE IF EXISTS `frames`;
-CREATE TABLE `frames` (
-  `frame_id` INTEGER PRIMARY KEY,
-  `group_id` INTEGER NOT NULL,
-  `data` BLOB NOT NULL,
-  FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`)
-);
-DROP TABLE IF EXISTS `verb_groups`;
-CREATE TABLE `verb_groups` (
-  `verb_id` INTEGER NOT NULL,
-  `group_id` INTEGER NOT NULL,
-  FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`),
-  FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`)
-);
-DROP TABLE IF EXISTS `adjectives`;
-CREATE TABLE `adjectives` (
-  `adjective_id` INTEGER PRIMARY KEY,
-  `base_form` VARCHAR(32) NOT NULL,
-  `comparative` VARCHAR(32),
-  `superlative` VARCHAR(32),
-  `position` CHAR(1),
-  `complexity` INTEGER NOT NULL
-);
-DROP TABLE IF EXISTS `adverbs`;
-CREATE TABLE `adverbs` (
-  `adverb_id` INTEGER PRIMARY KEY,
-  `base_form` VARCHAR(32) NOT NULL,
-  `comparative` VARCHAR(32),
-  `superlative` VARCHAR(32),
-  `complexity` INTEGER NOT NULL
-);
-DROP TABLE IF EXISTS `nouns`;
-CREATE TABLE `nouns` (
-  `noun_id` INTEGER PRIMARY KEY,
-  `singular` VARCHAR(32) NOT NULL,
-  `plural` VARCHAR(32),
-  `proper` INTEGER(1) NOT NULL,
-  `complexity` INTEGER NOT NULL,
-  `images` INTEGER NOT NULL,
-  `wnid` INTEGER NOT NULL
-);
-DROP TABLE IF EXISTS `hypernymy`;
 CREATE TABLE `hypernymy` (
  `hypernym_id` INTEGER NOT NULL,
-  `hyponym_id` INTEGER NOT NULL,
+  `hyponym_id` INTEGER NOT NULL
-  FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`),
-  FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`)
 );
-DROP TABLE IF EXISTS `instantiation`;
+CREATE INDEX `hyponym_of` ON `hypernymy`(`hypernym_id`);
+CREATE INDEX `hypernym_of` ON `hypernymy`(`hyponym_id`);
 CREATE TABLE `instantiation` (
  `class_id` INTEGER NOT NULL,
-  `instance_id` INTEGER NOT NULL,
+  `instance_id` INTEGER NOT NULL
-  FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`),
-  FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`)
 );
-DROP TABLE IF EXISTS `member_meronymy`;
+CREATE INDEX `instance_of` ON `instantiation`(`class_id`);
+CREATE INDEX `class_of` ON `instantiation`(`instance_id`);
 CREATE TABLE `member_meronymy` (
  `meronym_id` INTEGER NOT NULL,
-  `holonym_id` INTEGER NOT NULL,
+  `holonym_id` INTEGER NOT NULL
-  FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
-  FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
 );
-DROP TABLE IF EXISTS `part_meronymy`;
+CREATE INDEX `member_holonym_of` ON `member_meronymy`(`meronym_id`);
+CREATE INDEX `member_meronym_of` ON `member_meronymy`(`holonym_id`);
 CREATE TABLE `part_meronymy` (
  `meronym_id` INTEGER NOT NULL,
-  `holonym_id` INTEGER NOT NULL,
+  `holonym_id` INTEGER NOT NULL
-  FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
-  FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
 );
-DROP TABLE IF EXISTS `substance_meronymy`;
+CREATE INDEX `part_holonym_of` ON `part_meronymy`(`meronym_id`);
+CREATE INDEX `part_meronym_of` ON `part_meronymy`(`holonym_id`);
 CREATE TABLE `substance_meronymy` (
  `meronym_id` INTEGER NOT NULL,
-  `holonym_id` INTEGER NOT NULL,
+  `holonym_id` INTEGER NOT NULL
-  FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
-  FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
 );
-DROP TABLE IF EXISTS `variation`;
+CREATE INDEX `substance_holonym_of` ON `substance_meronymy`(`meronym_id`);
+CREATE INDEX `substance_meronym_of` ON `substance_meronymy`(`holonym_id`);
 CREATE TABLE `variation` (
  `noun_id` INTEGER NOT NULL,
-  `adjective_id` INTEGER NOT NULL,
+  `adjective_id` INTEGER NOT NULL
-  FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
-  FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
 );
-DROP TABLE IF EXISTS `noun_antonymy`;
+CREATE INDEX `variant_of` ON `variation`(`noun_id`);
-CREATE TABLE `noun_antonymy` (
+CREATE INDEX `attribute_of` ON `variation`(`adjective_id`);
-  `noun_1_id` INTEGER NOT NULL,
-  `noun_2_id` INTEGER NOT NULL,
-  FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`),
-  FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`)
-);
-DROP TABLE IF EXISTS `adjective_antonymy`;
+CREATE TABLE `similarity` (
-CREATE TABLE `adjective_antonymy` (
  `adjective_1_id` INTEGER NOT NULL,
-  `adjective_2_id` INTEGER NOT NULL,
+  `adjective_2_id` INTEGER NOT NULL
-  FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
+);
-  FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
+CREATE INDEX `similar_to` ON `similarity`(`adjective_1_id`);
+CREATE TABLE `is_a` (
+  `notion_id` INTEGER NOT NULL,
+  `groupname` VARCHAR(32) NOT NULL
 );
-DROP TABLE IF EXISTS `adverb_antonymy`;
+CREATE TABLE `entailment` (
-CREATE TABLE `adverb_antonymy` (
+  `given_id` INTEGER NOT NULL,
-  `adverb_1_id` INTEGER NOT NULL,
+  `entailment_id` INTEGER NOT NULL
-  `adverb_2_id` INTEGER NOT NULL,
+);
-  FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
-  FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
+CREATE INDEX `entailment_of` ON `entailment`(`given_id`);
+CREATE INDEX `entailed_by` ON `entailment`(`entailment_id`);
+CREATE TABLE `causality` (
+  `cause_id` INTEGER NOT NULL,
+  `effect_id` INTEGER NOT NULL
+);
+CREATE INDEX `effect_of` ON `causality`(`cause_id`);
+CREATE INDEX `cause_of` ON `causality`(`effect_id`);
+CREATE TABLE `words` (
+  `word_id` INTEGER PRIMARY KEY,
+  `notion_id` INTEGER NOT NULL,
+  `lemma_id` INTEGER NOT NULL,
+  `tag_count` INTEGER,
+  `position` SMALLINT,
+  `group_id` INTEGER
+);
+CREATE INDEX `notion_words` ON `words`(`notion_id`);
+CREATE INDEX `lemma_words` ON `words`(`lemma_id`);
+CREATE INDEX `group_words` ON `words`(`group_id`);
+CREATE TABLE `antonymy` (
+  `antonym_1_id` INTEGER NOT NULL,
+  `antonym_2_id` INTEGER NOT NULL
 );
-DROP TABLE IF EXISTS `specification`;
+CREATE INDEX `antonym_of` ON `antonymy`(`antonym_1_id`);
 CREATE TABLE `specification` (
  `general_id` INTEGER NOT NULL,
-  `specific_id` INTEGER NOT NULL,
+  `specific_id` INTEGER NOT NULL
-  FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`),
-  FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`)
 );
-DROP TABLE IF EXISTS `pertainymy`;
+CREATE INDEX `specification_of` ON `specification`(`general_id`);
+CREATE INDEX `generalization_of` ON `specification`(`specific_id`);
 CREATE TABLE `pertainymy` (
  `noun_id` INTEGER NOT NULL,
-  `pertainym_id` INTEGER NOT NULL,
+  `pertainym_id` INTEGER NOT NULL
-  FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
-  FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`)
 );
-DROP TABLE IF EXISTS `mannernymy`;
+CREATE INDEX `pertainym_of` ON `pertainymy`(`noun_id`);
+CREATE INDEX `anti_pertainym_of` ON `pertainymy`(`pertainym_id`);
 CREATE TABLE `mannernymy` (
  `adjective_id` INTEGER NOT NULL,
-  `mannernym_id` INTEGER NOT NULL,
+  `mannernym_id` INTEGER NOT NULL
-  FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`),
-  FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`)
 );
-DROP TABLE IF EXISTS `noun_synonymy`;
+CREATE INDEX `mannernym_of` ON `mannernymy`(`adjective_id`);
-CREATE TABLE `noun_synonymy` (
+CREATE INDEX `anti_mannernym_of` ON `mannernymy`(`mannernym_id`);
-  `noun_1_id` INTEGER NOT NULL,
-  `noun_2_id` INTEGER NOT NULL,
-  FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`),
-  FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`)
-);
-DROP TABLE IF EXISTS `adjective_synonymy`;
+CREATE TABLE `usage` (
-CREATE TABLE `adjective_synonymy` (
+  `domain_id` INTEGER NOT NULL,
-  `adjective_1_id` INTEGER NOT NULL,
+  `term_id` INTEGER NOT NULL
-  `adjective_2_id` INTEGER NOT NULL,
-  FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
-  FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
 );
-DROP TABLE IF EXISTS `adverb_synonymy`;
+CREATE INDEX `usage_term_of` ON `usage`(`domain_id`);
-CREATE TABLE `adverb_synonymy` (
+CREATE INDEX `usage_domain_of` ON `usage`(`term_id`);
-  `adverb_1_id` INTEGER NOT NULL,
-  `adverb_2_id` INTEGER NOT NULL,
-  FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
-  FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
-);
-DROP TABLE IF EXISTS `noun_pronunciations`;
+CREATE TABLE `topicality` (
-CREATE TABLE `noun_pronunciations` (
+  `domain_id` INTEGER NOT NULL,
-  `noun_id` INTEGER NOT NULL,
+  `term_id` INTEGER NOT NULL
-  `pronunciation` VARCHAR(64) NOT NULL,
-  `prerhyme` VARCHAR(8),
-  `rhyme` VARCHAR(64),
-  `syllables` INT NOT NULL,
-  `stress` VARCHAR(64) NOT NULL,
-  FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`)
 );
-DROP TABLE IF EXISTS `verb_pronunciations`;
+CREATE INDEX `topical_term_of` ON `topicality`(`domain_id`);
-CREATE TABLE `verb_pronunciations` (
+CREATE INDEX `topical_domain_of` ON `topicality`(`term_id`);
-  `verb_id` INTEGER NOT NULL,
-  `pronunciation` VARCHAR(64) NOT NULL,
-  `prerhyme` VARCHAR(8),
-  `rhyme` VARCHAR(64),
-  `syllables` INT NOT NULL,
-  `stress` VARCHAR(64) NOT NULL,
-  FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`)
-);
-DROP TABLE IF EXISTS `adjective_pronunciations`;
+CREATE TABLE `regionality` (
-CREATE TABLE `adjective_pronunciations` (
+  `domain_id` INTEGER NOT NULL,
-  `adjective_id` INTEGER NOT NULL,
+  `term_id` INTEGER NOT NULL
-  `pronunciation` VARCHAR(64) NOT NULL,
-  `prerhyme` VARCHAR(8),
-  `rhyme` VARCHAR(64),
-  `syllables` INT NOT NULL,
-  `stress` VARCHAR(64) NOT NULL,
-  FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
 );
-DROP TABLE IF EXISTS `adverb_pronunciations`;
+CREATE INDEX `regional_term_of` ON `regionality`(`domain_id`);
-CREATE TABLE `adverb_pronunciations` (
+CREATE INDEX `regional_domain_of` ON `regionality`(`term_id`);
-  `adverb_id` INTEGER NOT NULL,
-  `pronunciation` VARCHAR(64) NOT NULL,
-  `prerhyme` VARCHAR(8),
-  `rhyme` VARCHAR(64),
-  `syllables` INT NOT NULL,
-  `stress` VARCHAR(64) NOT NULL,
-  FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`)
-);
-DROP TABLE IF EXISTS `noun_noun_derivation`;
+CREATE TABLE `forms` (
-CREATE TABLE `noun_noun_derivation` (
+  `form_id` INTEGER PRIMARY KEY,
-  `noun_1_id` INTEGER NOT NULL,
+  `form` VARCHAR(32) NOT NULL,
-  `noun_2_id` INTEGER NOT NULL,
+  `complexity` SMALLINT NOT NULL,
-  FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`),
+  `proper` SMALLINT NOT NULL
-  FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`)
 );
-DROP TABLE IF EXISTS `noun_adjective_derivation`;
+CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`);
-CREATE TABLE `noun_adjective_derivation` (
-  `noun_id` INTEGER NOT NULL,
-  `adjective_id` INTEGER NOT NULL,
-  FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
-  FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
-);
-DROP TABLE IF EXISTS `noun_adverb_derivation`;
+CREATE TABLE `lemmas_forms` (
-CREATE TABLE `noun_adverb_derivation` (
+  `lemma_id` INTEGER NOT NULL,
-  `noun_id` INTEGER NOT NULL,
+  `form_id` INTEGER NOT NULL,
-  `adverb_id` INTEGER NOT NULL,
+  `category` SMALLINT NOT NULL
-  FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
-  FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`)
 );
-DROP TABLE IF EXISTS `adjective_adjective_derivation`;
+CREATE INDEX `form_of` ON `lemmas_forms`(`lemma_id`);
-CREATE TABLE `adjective_adjective_derivation` (
+CREATE INDEX `lemma_of` ON `lemmas_forms`(`form_id`);
-  `adjective_1_id` INTEGER NOT NULL,
-  `adjective_2_id` INTEGER NOT NULL,
+CREATE TABLE `pronunciations` (
-  FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
+  `pronunciation_id` INTEGER PRIMARY KEY,
-  FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
+  `phonemes` VARCHAR(64) NOT NULL,
+  `prerhyme` VARCHAR(8),
+  `rhyme` VARCHAR(64),
+  `syllables` INTEGER NOT NULL,
+  `stress` VARCHAR(64) NOT NULL
 );
-DROP TABLE IF EXISTS `adjective_adverb_derivation`;
+CREATE TABLE `forms_pronunciations` (
-CREATE TABLE `adjective_adverb_derivation` (
+  `form_id` INTEGER NOT NULL,
-  `adjective_id` INTEGER NOT NULL,
+  `pronunciation_id` INTEGER NOT NULL
-  `adverb_id` INTEGER NOT NULL,
-  FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`),
-  FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`)
 );
-DROP TABLE IF EXISTS `adverb_adverb_derivation`;
+CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`);
-CREATE TABLE `adverb_adverb_derivation` (
+CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`);
-  `adverb_1_id` INTEGER NOT NULL,
-  `adverb_2_id` INTEGER NOT NULL,
+CREATE TABLE `groups` (
-  FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
+  `group_id` INTEGER PRIMARY KEY,
-  FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
+  `data` BLOB NOT NULL
 );
-DROP TABLE IF EXISTS `prepositions`;
+CREATE TABLE `frames` (
-CREATE TABLE `prepositions` (
+  `frame_id` INTEGER PRIMARY KEY,
-  `preposition_id` INTEGER PRIMARY KEY,
+  `data` BLOB NOT NULL
-  `form` VARCHAR(32) NOT NULL
 );
-DROP TABLE IF EXISTS `preposition_groups`;
+CREATE TABLE `groups_frames` (
-CREATE TABLE `preposition_groups` (
+  `group_id` INTEGER NOT NULL,
-  `preposition_id` INTEGER NOT NULL,
+  `frame_id` INTEGER NOT NULL
-  `groupname` VARCHAR(32) NOT NULL,
-  FOREIGN KEY (`preposition_id`) REFERENCES `prepositions`(`preposition_id`)
 );
+CREATE INDEX `frames_in` ON `groups_frames`(`group_id`);
diff --git a/generator/selrestr.cpp b/generator/selrestr.cpp
new file mode 100644
index 0000000..8bdd3f6
--- /dev/null
+++ b/generator/selrestr.cpp

@@ -0,0 +1,288 @@
+#include "selrestr.h"
+namespace verbly {
+  namespace generator {
+    
+    selrestr::selrestr(const selrestr& other)
+    {
+      type_ = other.type_;
+      
+      switch (type_)
+      {
+        case type::singleton:
+        {
+          singleton_.pos = other.singleton_.pos;
+          new(&singleton_.restriction) std::string(other.singleton_.restriction);
+          
+          break;
+        }
+        
+        case type::group:
+        {
+          new(&group_.children) std::list<selrestr>(other.group_.children);
+          group_.orlogic = other.group_.orlogic;
+          
+          break;
+        }
+        
+        case type::empty:
+        {
+          break;
+        }
+      }
+    }
+    
+    selrestr::selrestr(selrestr&& other) : selrestr()
+    {
+      swap(*this, other);
+    }
+    
+    selrestr& selrestr::operator=(selrestr other)
+    {
+      swap(*this, other);
+      
+      return *this;
+    }
+    
+    void swap(selrestr& first, selrestr& second)
+    {
+      using type = selrestr::type;
+      
+      type tempType = first.type_;
+      int tempPos;
+      std::string tempRestriction;
+      std::list<selrestr> tempChildren;
+      bool tempOrlogic;
+      
+      switch (tempType)
+      {
+        case type::singleton:
+        {
+          tempPos = first.singleton_.pos;
+          tempRestriction = std::move(first.singleton_.restriction);
+          
+          break;
+        }
+        
+        case type::group:
+        {
+          tempChildren = std::move(first.group_.children);
+          tempOrlogic = first.group_.orlogic;
+          
+          break;
+        }
+        
+        case type::empty:
+        {
+          break;
+        }
+      }
+      
+      first.~selrestr();
+      
+      first.type_ = second.type_;
+      
+      switch (first.type_)
+      {
+        case type::singleton:
+        {
+          first.singleton_.pos = second.singleton_.pos;
+          new(&first.singleton_.restriction) std::string(std::move(second.singleton_.restriction));
+          
+          break;
+        }
+        
+        case type::group:
+        {
+          new(&first.group_.children) std::list<selrestr>(std::move(second.group_.children));
+          first.group_.orlogic = second.group_.orlogic;
+          
+          break;
+        }
+        
+        case type::empty:
+        {
+          break;
+        }
+      }
+      
+      second.~selrestr();
+      
+      second.type_ = tempType;
+      
+      switch (second.type_)
+      {
+        case type::singleton:
+        {
+          second.singleton_.pos = tempPos;
+          new(&second.singleton_.restriction) std::string(std::move(tempRestriction));
+          
+          break;
+        }
+        
+        case type::group:
+        {
+          new(&second.group_.children) std::list<selrestr>(std::move(tempChildren));
+          second.group_.orlogic = tempOrlogic;
+          
+          break;
+        }
+        
+        case type::empty:
+        {
+          break;
+        }
+      }
+    }
+    
+    selrestr::~selrestr()
+    {
+      switch (type_)
+      {
+        case type::singleton:
+        {
+          using string_type = std::string;
+          singleton_.restriction.~string_type();
+          
+          break;
+        }
+        
+        case type::group:
+        {
+          using list_type = std::list<selrestr>;
+          group_.children.~list_type();
+          
+          break;
+        }
+        
+        case type::empty:
+        {
+          break;
+        }
+      }
+    }
+    
+    selrestr::selrestr() : type_(type::empty)
+    {
+    }
+    
+    selrestr::selrestr(
+      std::string restriction,
+      bool pos) :
+        type_(type::singleton)
+    {
+      new(&singleton_.restriction) std::string(std::move(restriction));
+      singleton_.pos = pos;
+    }
+    
+    std::string selrestr::getRestriction() const
+    {
+      if (type_ == type::singleton)
+      {
+        return singleton_.restriction;
+      } else {
+        throw std::domain_error("Only singleton selrestrs have restrictions");
+      }
+    }
+    
+    bool selrestr::getPos() const
+    {
+      if (type_ == type::singleton)
+      {
+        return singleton_.pos;
+      } else {
+        throw std::domain_error("Only singleton selrestrs have positivity flags");
+      }
+    }
+    
+    selrestr::selrestr(
+      std::list<selrestr> children,
+      bool orlogic) :
+        type_(type::group)
+    {
+      new(&group_.children) std::list<selrestr>(std::move(children));
+      group_.orlogic = orlogic;
+    }
+    
+    std::list<selrestr> selrestr::getChildren() const
+    {
+      if (type_ == type::group)
+      {
+        return group_.children;
+      } else {
+        throw std::domain_error("Only group selrestrs have children");
+      }
+    }
+    
+    std::list<selrestr>::const_iterator selrestr::begin() const
+    {
+      if (type_ == type::group)
+      {
+        return std::begin(group_.children);
+      } else {
+        throw std::domain_error("Only group selrestrs have children");
+      }
+    }
+    
+    std::list<selrestr>::const_iterator selrestr::end() const
+    {
+      if (type_ == type::group)
+      {
+        return std::end(group_.children);
+      } else {
+        throw std::domain_error("Only group selrestrs have children");
+      }
+    }
+    
+    bool selrestr::getOrlogic() const
+    {
+      if (type_ == type::group)
+      {
+        return group_.orlogic;
+      } else {
+        throw std::domain_error("Only group selrestrs have logic");
+      }
+    }
+    
+    nlohmann::json selrestr::toJson() const
+    {
+      switch (type_)
+      {
+        case type::empty:
+        {
+          return {};
+        }
+        
+        case type::singleton:
+        {
+          return {
+            {"type", singleton_.restriction},
+            {"pos", singleton_.pos}
+          };
+        }
+        
+        case type::group:
+        {
+          std::string logic;
+          if (group_.orlogic)
+          {
+            logic = "or";
+          } else {
+            logic = "and";
+          }
+          
+          std::list<nlohmann::json> children;
+          std::transform(std::begin(group_.children), std::end(group_.children), std::back_inserter(children), [] (const selrestr& child) {
+            return child.toJson();
+          });
+          
+          return {
+            {"logic", logic},
+            {"children", children}
+          };
+        }
+      }
+    }
+    
+  };
+};
diff --git a/generator/selrestr.h b/generator/selrestr.h
new file mode 100644
index 0000000..5000970
--- /dev/null
+++ b/generator/selrestr.h

@@ -0,0 +1,88 @@
+#ifndef SELRESTR_H_50652FB7
+#define SELRESTR_H_50652FB7
+#include <list>
+#include <string>
+#include <json.hpp>
+namespace verbly {
+  namespace generator {
+    
+    class selrestr {
+    public:
+      enum class type {
+        empty,
+        singleton,
+        group
+      };
+      
+      // Copy and move constructors
+      
+      selrestr(const selrestr& other);
+      selrestr(selrestr&& other);
+      
+      // Assignment
+      
+      selrestr& operator=(selrestr other);
+      
+      // Swap
+      
+      friend void swap(selrestr& first, selrestr& second);
+      
+      // Destructor
+      
+      ~selrestr();
+      
+      // Generic accessors
+      
+      type getType() const
+      {
+        return type_;
+      }
+      
+      // Empty
+      
+      selrestr();
+      
+      // Singleton
+      
+      selrestr(std::string restriction, bool pos);
+      
+      std::string getRestriction() const;
+      
+      bool getPos() const;
+      
+      // Group
+      
+      selrestr(std::list<selrestr> children, bool orlogic);
+      
+      std::list<selrestr> getChildren() const;
+      
+      std::list<selrestr>::const_iterator begin() const;
+      
+      std::list<selrestr>::const_iterator end() const;
+      
+      bool getOrlogic() const;
+      
+      // Helpers
+      
+      nlohmann::json toJson() const;
+      
+    private:
+      union {
+        struct {
+          bool pos;
+          std::string restriction;
+        } singleton_;
+        struct {
+          std::list<selrestr> children;
+          bool orlogic;
+        } group_;
+      };
+      type type_;
+    };
+    
+  };
+};
+#endif /* end of include guard: SELRESTR_H_50652FB7 */
diff --git a/generator/word.cpp b/generator/word.cpp
new file mode 100644
index 0000000..8ba3ce2
--- /dev/null
+++ b/generator/word.cpp

@@ -0,0 +1,77 @@
+#include "word.h"
+#include <list>
+#include <string>
+#include "database.h"
+#include "notion.h"
+#include "lemma.h"
+#include "field.h"
+#include "group.h"
+namespace verbly {
+  namespace generator {
+    int word::nextId_ = 0;
+    word::word(
+      notion& n,
+      lemma& l) :
+        id_(nextId_++),
+        notion_(n),
+        lemma_(l)
+    {
+    }
+    word::word(
+      notion& n,
+      lemma& l,
+      int tagCount) :
+        id_(nextId_++),
+        notion_(n),
+        lemma_(l),
+        tagCount_(tagCount),
+        hasTagCount_(true)
+    {
+    }
+    void word::setAdjectivePosition(positioning adjectivePosition)
+    {
+      adjectivePosition_ = adjectivePosition;
+    }
+    
+    void word::setVerbGroup(const group& verbGroup)
+    {
+      verbGroup_ = &verbGroup;
+    }
+    database& operator<<(database& db, const word& arg)
+    {
+      std::list<field> fields;
+      
+      fields.emplace_back("word_id", arg.getId());
+      fields.emplace_back("notion_id", arg.getNotion().getId());
+      fields.emplace_back("lemma_id", arg.getLemma().getId());
+      if (arg.hasTagCount())
+      {
+        fields.emplace_back("tag_count", arg.getTagCount());
+      }
+      
+      if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective)
+        && (arg.getAdjectivePosition() != positioning::undefined))
+      {
+        fields.emplace_back("position", static_cast<int>(arg.getAdjectivePosition()));
+      }
+      
+      if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb)
+        && (arg.hasVerbGroup()))
+      {
+        fields.emplace_back("group_id", arg.getVerbGroup().getId());
+      }
+      db.insertIntoTable("words", std::move(fields));
+      return db;
+    }
+  };
+};
diff --git a/generator/word.h b/generator/word.h
new file mode 100644
index 0000000..bfed586
--- /dev/null
+++ b/generator/word.h

@@ -0,0 +1,110 @@
+#ifndef WORD_H_91F99D46
+#define WORD_H_91F99D46
+#include <cassert>
+#include "enums.h"
+namespace verbly {
+  namespace generator {
+    class notion;
+    class lemma;
+    class database;
+    class group;
+    class word {
+    public:
+      // Constructors
+      word(notion& n, lemma& l);
+      word(notion& n, lemma& l, int tagCount);
+      // Mutators
+      void setAdjectivePosition(positioning adjectivePosition);
+      
+      void setVerbGroup(const group& verbGroup);
+      // Accessors
+      int getId() const
+      {
+        return id_;
+      }
+      notion& getNotion()
+      {
+        return notion_;
+      }
+      const notion& getNotion() const
+      {
+        return notion_;
+      }
+      lemma& getLemma()
+      {
+        return lemma_;
+      }
+      const lemma& getLemma() const
+      {
+        return lemma_;
+      }
+      bool hasTagCount() const
+      {
+        return hasTagCount_;
+      }
+      
+      int getTagCount() const
+      {
+        // Calling code should always call hasTagCount first.
+        assert(hasTagCount_);
+        return tagCount_;
+      }
+      positioning getAdjectivePosition() const
+      {
+        return adjectivePosition_;
+      }
+      
+      bool hasVerbGroup() const
+      {
+        return (verbGroup_ != nullptr);
+      }
+      
+      const group& getVerbGroup() const
+      {
+        // Calling code should always call hasVerbGroup first.
+        assert(verbGroup_ != nullptr);
+        
+        return *verbGroup_;
+      }
+    private:
+      static int nextId_;
+      const int id_;
+      notion& notion_;
+      lemma& lemma_;
+      const int tagCount_ = 0;
+      const bool hasTagCount_ = false;
+      positioning adjectivePosition_ = positioning::undefined;
+      const group* verbGroup_ = nullptr;
+    };
+    // Serializer
+    database& operator<<(database& db, const word& arg);
+  };
+};
+#endif /* end of include guard: WORD_H_91F99D46 */