From 2c5ed155e8951f7f28b82ed7570295d5629f4770 Mon Sep 17 00:00:00 2001
From: Kelly Rauchenberger <fefferburbia@gmail.com>
Date: Sun, 22 Jan 2017 18:43:25 -0500
Subject: Fixed generator ignoring multiple inflection variants

Previously, the generator would recognize at most one form per
inflection per lemma; now, the generator adds all variants in AGID to
the database.
---
 generator/generator.cpp | 525 ++++++++++++++++++++++++------------------------
 1 file changed, 268 insertions(+), 257 deletions(-)

(limited to 'generator/generator.cpp')

diff --git a/generator/generator.cpp b/generator/generator.cpp
index 3dd2ce7..610a602 100644
--- a/generator/generator.cpp
+++ b/generator/generator.cpp
@@ -15,7 +15,7 @@
 
 namespace verbly {
   namespace generator {
-    
+
     generator::generator(
       std::string verbNetPath,
       std::string agidPath,
@@ -36,21 +36,21 @@ namespace verbly {
       {
         throw std::invalid_argument("Invalid VerbNet data directory");
       }
-      
+
       closedir(dir);
-      
+
       // Ensure AGID infl.txt exists
       if (!std::ifstream(agidPath_))
       {
         throw std::invalid_argument("AGID infl.txt file not found");
       }
-      
+
       // Add directory separator to WordNet path
       if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\'))
       {
         wordNetPath_ += '/';
       }
-      
+
       // Ensure WordNet tables exist
       for (std::string table : {
         "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax"
@@ -61,37 +61,37 @@ namespace verbly {
           throw std::invalid_argument("WordNet " + table + " table not found");
         }
       }
-      
+
       // Ensure CMUDICT file exists
       if (!std::ifstream(cmudictPath_))
       {
         throw std::invalid_argument("CMUDICT file not found");
       }
-      
+
       // Ensure ImageNet urls.txt exists
       if (!std::ifstream(imageNetPath_))
       {
         throw std::invalid_argument("ImageNet urls.txt file not found");
       }
     }
-    
+
     void generator::run()
     {
       // Create notions, words, lemmas, and forms from WordNet synsets
       readWordNetSynsets();
-      
+
       // Reads adjective positioning WordNet data
       readAdjectivePositioning();
-      
-      // Counts the number of URLs ImageNet has per notion 
+
+      // Counts the number of URLs ImageNet has per notion
       readImageNetUrls();
-      
+
       // Creates a word by WordNet sense key lookup table
       readWordNetSenseKeys();
-      
+
       // Creates groups and frames from VerbNet data
       readVerbNet();
-      
+
       // Creates forms and inflections from AGID. To reduce the amount of forms
       // created, we do this after most lemmas that need inflecting have been
       // created through other means, and then only generate forms for
@@ -101,86 +101,78 @@ namespace verbly {
       // then a notion and a word is generated and the form generation proceeds
       // as usual.
       readAgidInflections();
-      
+
       // Reads in prepositions and the is_a relationship
       readPrepositions();
-      
+
       // Creates pronunciations from CMUDICT. To reduce the amount of
       // pronunciations created, we do this after all forms have been created,
       // and then only generate pronunciations for already-exisiting forms.
       readCmudictPronunciations();
-      
+
       // Writes the database schema
       writeSchema();
-      
+
       // Dumps data to the database
       dumpObjects();
-      
+
       // Populates the antonymy relationship from WordNet
       readWordNetAntonymy();
-      
+
       // Populates the variation relationship from WordNet
       readWordNetVariation();
-      
+
       // Populates the usage, topicality, and regionality relationships from
       // WordNet
       readWordNetClasses();
-      
+
       // Populates the causality relationship from WordNet
       readWordNetCausality();
-      
+
       // Populates the entailment relationship from WordNet
       readWordNetEntailment();
-      
+
       // Populates the hypernymy relationship from WordNet
       readWordNetHypernymy();
-      
+
       // Populates the instantiation relationship from WordNet
       readWordNetInstantiation();
-      
+
       // Populates the member meronymy relationship from WordNet
       readWordNetMemberMeronymy();
-      
+
       // Populates the part meronymy relationship from WordNet
       readWordNetPartMeronymy();
-      
+
       // Populates the substance meronymy relationship from WordNet
       readWordNetSubstanceMeronymy();
-      
+
       // Populates the pertainymy and mannernymy relationships from WordNet
       readWordNetPertainymy();
-      
+
       // Populates the specification relationship from WordNet
       readWordNetSpecification();
-      
+
       // Populates the adjective similarity relationship from WordNet
       readWordNetSimilarity();
-      
-      
-      
-      
-      
-      
-      
-      
     }
-    
+
     void generator::readWordNetSynsets()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl"));
       progress ppgs("Reading synsets from WordNet...", lines.size());
-      
+
       for (std::string line : lines)
       {
         ppgs.update();
-        
+
         std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
         {
           continue;
         }
-    
+
         int synset_id = std::stoi(relation_data[1]);
         int wnum = std::stoi(relation_data[2]);
         std::string text = relation_data[3];
@@ -190,7 +182,7 @@ namespace verbly {
         {
           text.erase(word_it, 1);
         }
-        
+
         // The WordNet data does contain duplicates, so we need to check that we
         // haven't already created this word.
         std::pair<int, int> lookup(synset_id, wnum);
@@ -204,32 +196,32 @@ namespace verbly {
         }
       }
     }
-    
+
     void generator::readAdjectivePositioning()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl"));
       progress ppgs("Reading adjective positionings from WordNet...", lines.size());
-      
+
       for (std::string line : lines)
       {
         ppgs.update();
-        
+
         std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
         {
           continue;
         }
-      
+
         int synset_id = stoi(relation_data[1]);
         int wnum = stoi(relation_data[2]);
         std::string adjpos_str = relation_data[3];
-        
+
         std::pair<int, int> lookup(synset_id, wnum);
         if (wordByWnidAndWnum_.count(lookup))
         {
           word& adj = *wordByWnidAndWnum_.at(lookup);
-          
+
           if (adjpos_str == "p")
           {
             adj.setAdjectivePosition(positioning::predicate);
@@ -246,20 +238,20 @@ namespace verbly {
         }
       }
     }
-    
+
     void generator::readImageNetUrls()
     {
       // The ImageNet datafile is so large that it is unreasonable and
       // unnecessary to read it into memory; instead, we will parse each line as
       // we read it. This has the caveat that we cannot display a progress bar.
       std::cout << "Reading image counts from ImageNet..." << std::endl;
-      
+
       std::ifstream file(imageNetPath_);
       if (!file)
       {
         throw std::invalid_argument("Could not find file " + imageNetPath_);
       }
-  
+
       std::string line;
       while (std::getline(file, line))
       {
@@ -267,7 +259,7 @@ namespace verbly {
         {
           line.pop_back();
         }
-      
+
         std::string wnid_s = line.substr(1, 8);
         int wnid = stoi(wnid_s) + 100000000;
         if (notionByWnid_.count(wnid))
@@ -277,16 +269,16 @@ namespace verbly {
         }
       }
     }
-    
+
     void generator::readWordNetSenseKeys()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl"));
       progress ppgs("Reading sense keys from WordNet...", lines.size());
-      
+
       for (std::string line : lines)
       {
         ppgs.update();
-        
+
         // We only actually need to lookup verbs by sense key so we'll just
         // ignore everything that isn't a verb.
         std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$");
@@ -295,11 +287,11 @@ namespace verbly {
         {
           continue;
         }
-    
+
         int synset_id = stoi(relation_data[1]);
         int wnum = stoi(relation_data[2]);
         std::string sense_key = relation_data[3];
-        
+
         // We are treating this mapping as injective, which is not entirely
         // accurate. First, the WordNet table contains duplicate rows, so those
         // need to be ignored. More importantly, a small number of sense keys
@@ -317,17 +309,17 @@ namespace verbly {
         }
       }
     }
-    
+
     void generator::readVerbNet()
     {
       std::cout << "Reading frames from VerbNet..." << std::endl;
-  
+
       DIR* dir;
       if ((dir = opendir(verbNetPath_.c_str())) == nullptr)
       {
         throw std::invalid_argument("Invalid VerbNet data directory");
       }
-  
+
       struct dirent* ent;
       while ((ent = readdir(dir)) != nullptr)
       {
@@ -337,20 +329,20 @@ namespace verbly {
         {
           filename += '/';
         }
-    
+
         filename += ent->d_name;
-    
+
         if (filename.rfind(".xml") != filename.size() - 4)
         {
           continue;
         }
-    
+
         xmlDocPtr doc = xmlParseFile(filename.c_str());
         if (doc == nullptr)
         {
           throw std::logic_error("Error opening " + filename);
         }
-    
+
         xmlNodePtr top = xmlDocGetRootElement(doc);
         if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS"))))
         {
@@ -365,71 +357,81 @@ namespace verbly {
           std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename));
         }
       }
-  
+
       closedir(dir);
     }
-    
+
     void generator::readAgidInflections()
     {
       std::list<std::string> lines(readFile(agidPath_));
       progress ppgs("Reading inflections from AGID...", lines.size());
-      
+
       for (std::string line : lines)
       {
         ppgs.update();
-        
+
         int divider = line.find_first_of(" ");
         std::string infinitive = line.substr(0, divider);
         line = line.substr(divider+1);
         char type = line[0];
-    
+
         if (line[1] == '?')
         {
           line.erase(0, 4);
         } else {
           line.erase(0, 3);
         }
-        
+
         if (!lemmaByBaseForm_.count(infinitive) && (type != 'V'))
         {
           continue;
         }
-        
+
         lemma& curLemma = lookupOrCreateLemma(infinitive);
 
-        auto forms = split<std::vector<std::string>>(line, " | ");
-        for (std::string& inflForm : forms)
+        std::vector<std::list<std::string>> agidForms;
+        for (std::string inflForms : split<std::list<std::string>>(line, " | "))
         {
-          int sympos = inflForm.find_first_of(",?");
-          if (sympos != std::string::npos)
+          std::list<std::string> forms;
+
+          for (std::string inflForm : split<std::list<std::string>>(std::move(inflForms), ", "))
           {
-            inflForm = inflForm.substr(0, sympos);
+            int sympos = inflForm.find_first_of("~<!? ");
+            if (sympos != std::string::npos)
+            {
+              inflForm = inflForm.substr(0, sympos);
+            }
+
+            forms.push_back(std::move(inflForm));
           }
+
+          agidForms.push_back(std::move(forms));
         }
-    
+
+        std::map<inflection, std::list<std::string>> mappedForms;
         switch (type)
         {
           case 'V':
           {
-            if (forms.size() == 4)
+            if (agidForms.size() == 4)
             {
-              curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
-              curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1]));
-              curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2]));
-              curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3]));
-            } else if (forms.size() == 3)
+              mappedForms[inflection::past_tense] = agidForms[0];
+              mappedForms[inflection::past_participle] = agidForms[1];
+              mappedForms[inflection::ing_form] = agidForms[2];
+              mappedForms[inflection::s_form] = agidForms[3];
+            } else if (agidForms.size() == 3)
             {
-              curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
-              curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0]));
-              curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1]));
-              curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2]));
-            } else if (forms.size() == 8)
+              mappedForms[inflection::past_tense] = agidForms[0];
+              mappedForms[inflection::past_participle] = agidForms[0];
+              mappedForms[inflection::ing_form] = agidForms[1];
+              mappedForms[inflection::s_form] = agidForms[2];
+            } else if (agidForms.size() == 8)
             {
               // As of AGID 2014.08.11, this is only "to be"
-              curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
-              curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2]));
-              curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3]));
-              curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4]));
+              mappedForms[inflection::past_tense] = agidForms[0];
+              mappedForms[inflection::past_participle] = agidForms[2];
+              mappedForms[inflection::ing_form] = agidForms[3];
+              mappedForms[inflection::s_form] = agidForms[4];
             } else {
               // Words that don't fit the cases above as of AGID 2014.08.11:
               // - may and shall do not conjugate the way we want them to
@@ -437,7 +439,7 @@ namespace verbly {
               // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
               std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
             }
-    
+
             // For verbs in particular, we sometimes create a notion and a word
             // from inflection data. Specifically, if there are not yet any
             // verbs existing that have the same infinitive form. "Yet" means
@@ -451,84 +453,93 @@ namespace verbly {
               notion& n = createNotion(part_of_speech::verb);
               createWord(n, curLemma);
             }
-        
+
             break;
           }
-      
+
           case 'A':
           {
-            if (forms.size() == 2)
+            if (agidForms.size() == 2)
             {
-              curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0]));
-              curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1]));
+              mappedForms[inflection::comparative] = agidForms[0];
+              mappedForms[inflection::superlative] = agidForms[1];
             } else {
               // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
               std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
             }
-        
+
             break;
           }
-      
+
           case 'N':
           {
-            if (forms.size() == 1)
+            if (agidForms.size() == 1)
             {
-              curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0]));
+              mappedForms[inflection::plural] = agidForms[0];
             } else {
               // As of AGID 2014.08.11, this is non-existent.
               std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
             }
-        
+
             break;
           }
         }
+
+        // Compile the forms we have mapped.
+        for (auto mapping : std::move(mappedForms))
+        {
+          for (std::string infl : std::move(mapping.second))
+          {
+            curLemma.addInflection(mapping.first, lookupOrCreateForm(std::move(infl)));
+          }
+        }
       }
     }
-    
+
     void generator::readPrepositions()
     {
       std::list<std::string> lines(readFile("prepositions.txt"));
       progress ppgs("Reading prepositions...", lines.size());
-      
+
       for (std::string line : lines)
       {
         ppgs.update();
-        
+
         std::regex relation("^([^:]+): (.+)");
         std::smatch relation_data;
         std::regex_search(line, relation_data, relation);
         std::string prep = relation_data[1];
         auto groups = split<std::list<std::string>>(relation_data[2], ", ");
-        
+
         notion& n = createNotion(part_of_speech::preposition);
         lemma& l = lookupOrCreateLemma(prep);
         word& w = createWord(n, l);
-        
+
         n.setPrepositionGroups(groups);
       }
     }
-    
+
     void generator::readCmudictPronunciations()
     {
       std::list<std::string> lines(readFile(cmudictPath_));
       progress ppgs("Reading pronunciations from CMUDICT...", lines.size());
-      
+
       for (std::string line : lines)
       {
         ppgs.update();
-        
+
         std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))?  ([A-Z 0-9]+)");
         std::smatch phoneme_data;
         if (std::regex_search(line, phoneme_data, phoneme))
         {
           std::string canonical(phoneme_data[1]);
           std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
-          
+
           if (!formByText_.count(canonical))
           {
             continue;
           }
-          
+
           std::string phonemes = phoneme_data[2];
           pronunciations_.emplace_back(phonemes);
           pronunciation& p = pronunciations_.back();
@@ -536,7 +547,7 @@ namespace verbly {
         }
       }
     }
-    
+
     void generator::writeSchema()
     {
       std::ifstream file("schema.sql");
@@ -544,7 +555,7 @@ namespace verbly {
       {
         throw std::invalid_argument("Could not find database schema");
       }
-  
+
       std::ostringstream schemaBuilder;
       std::string line;
       while (std::getline(file, line))
@@ -553,10 +564,10 @@ namespace verbly {
         {
           line.pop_back();
         }
-      
+
         schemaBuilder << line;
       }
-      
+
       std::string schema = schemaBuilder.str();
       auto queries = split<std::list<std::string>>(schema, ";");
       progress ppgs("Writing database schema...", queries.size());
@@ -566,91 +577,91 @@ namespace verbly {
         {
           db_.runQuery(query);
         }
-        
+
         ppgs.update();
       }
     }
-    
+
     void generator::dumpObjects()
     {
       {
         progress ppgs("Writing notions...", notions_.size());
-        
+
         for (notion& n : notions_)
         {
           db_ << n;
-          
+
           ppgs.update();
         }
       }
-      
+
       {
         progress ppgs("Writing words...", words_.size());
-        
+
         for (word& w : words_)
         {
           db_ << w;
-          
+
           ppgs.update();
         }
       }
-      
+
       {
         progress ppgs("Writing lemmas...", lemmas_.size());
-        
+
         for (lemma& l : lemmas_)
         {
           db_ << l;
-          
+
           ppgs.update();
         }
       }
-      
+
       {
         progress ppgs("Writing forms...", forms_.size());
-        
+
         for (form& f : forms_)
         {
           db_ << f;
-          
+
           ppgs.update();
         }
       }
-      
+
       {
         progress ppgs("Writing pronunciations...", pronunciations_.size());
-        
+
         for (pronunciation& p : pronunciations_)
         {
           db_ << p;
-          
+
           ppgs.update();
         }
       }
-      
+
       {
         progress ppgs("Writing verb groups...", groups_.size());
-        
+
         for (group& g : groups_)
         {
           db_ << g;
-          
+
           ppgs.update();
         }
       }
-      
+
       {
         progress ppgs("Writing verb frames...", frames_.size());
-        
+
         for (frame& f : frames_)
         {
           db_ << f;
-          
+
           ppgs.update();
         }
       }
     }
-    
+
     void generator::readWordNetAntonymy()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl"));
@@ -658,7 +669,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -668,21 +679,21 @@ namespace verbly {
 
         std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
         std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
-        
+
         if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
         {
           word& word1 = *wordByWnidAndWnum_.at(lookup1);
           word& word2 = *wordByWnidAndWnum_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("antonym_1_id", word1.getId());
           fields.emplace_back("antonym_2_id", word2.getId());
-          
+
           db_.insertIntoTable("antonymy", std::move(fields));
         }
       }
     }
-    
+
     void generator::readWordNetVariation()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl"));
@@ -690,7 +701,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -700,21 +711,21 @@ namespace verbly {
 
         int lookup1 = std::stoi(relation_data[1]);
         int lookup2 = std::stoi(relation_data[2]);
-        
+
         if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
         {
           notion& notion1 = *notionByWnid_.at(lookup1);
           notion& notion2 = *notionByWnid_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("noun_id", notion1.getId());
           fields.emplace_back("adjective_id", notion2.getId());
-          
+
           db_.insertIntoTable("variation", std::move(fields));
         }
       }
     }
-    
+
     void generator::readWordNetClasses()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl"));
@@ -722,7 +733,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -733,7 +744,7 @@ namespace verbly {
         std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
         std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
         std::string class_type = relation_data[5];
-        
+
         std::string table_name;
         if (class_type == "t")
         {
@@ -745,10 +756,10 @@ namespace verbly {
         {
           table_name += "regionality";
         }
-        
+
         std::list<int> leftJoin;
         std::list<int> rightJoin;
-        
+
         if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first)))
         {
           std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) {
@@ -757,7 +768,7 @@ namespace verbly {
         } else if (wordByWnidAndWnum_.count(lookup1)) {
           leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId());
         }
-        
+
         if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first)))
         {
           std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) {
@@ -766,7 +777,7 @@ namespace verbly {
         } else if (wordByWnidAndWnum_.count(lookup2)) {
           rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId());
         }
-        
+
         for (int word1 : leftJoin)
         {
           for (int word2 : rightJoin)
@@ -774,13 +785,13 @@ namespace verbly {
             std::list<field> fields;
             fields.emplace_back("term_id", word1);
             fields.emplace_back("domain_id", word2);
-          
+
             db_.insertIntoTable(table_name, std::move(fields));
           }
         }
       }
     }
-    
+
     void generator::readWordNetCausality()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl"));
@@ -788,7 +799,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -798,21 +809,21 @@ namespace verbly {
 
         int lookup1 = std::stoi(relation_data[1]);
         int lookup2 = std::stoi(relation_data[2]);
-        
+
         if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
         {
           notion& notion1 = *notionByWnid_.at(lookup1);
           notion& notion2 = *notionByWnid_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("effect_id", notion1.getId());
           fields.emplace_back("cause_id", notion2.getId());
-          
+
           db_.insertIntoTable("causality", std::move(fields));
         }
       }
     }
-    
+
     void generator::readWordNetEntailment()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl"));
@@ -820,7 +831,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -830,21 +841,21 @@ namespace verbly {
 
         int lookup1 = std::stoi(relation_data[1]);
         int lookup2 = std::stoi(relation_data[2]);
-        
+
         if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
         {
           notion& notion1 = *notionByWnid_.at(lookup1);
           notion& notion2 = *notionByWnid_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("given_id", notion1.getId());
           fields.emplace_back("entailment_id", notion2.getId());
-          
+
           db_.insertIntoTable("entailment", std::move(fields));
         }
       }
     }
-    
+
     void generator::readWordNetHypernymy()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl"));
@@ -852,7 +863,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -862,21 +873,21 @@ namespace verbly {
 
         int lookup1 = std::stoi(relation_data[1]);
         int lookup2 = std::stoi(relation_data[2]);
-        
+
         if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
         {
           notion& notion1 = *notionByWnid_.at(lookup1);
           notion& notion2 = *notionByWnid_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("hyponym_id", notion1.getId());
           fields.emplace_back("hypernym_id", notion2.getId());
-          
+
           db_.insertIntoTable("hypernymy", std::move(fields));
         }
       }
     }
-    
+
     void generator::readWordNetInstantiation()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl"));
@@ -884,7 +895,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -894,21 +905,21 @@ namespace verbly {
 
         int lookup1 = std::stoi(relation_data[1]);
         int lookup2 = std::stoi(relation_data[2]);
-        
+
         if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
         {
           notion& notion1 = *notionByWnid_.at(lookup1);
           notion& notion2 = *notionByWnid_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("instance_id", notion1.getId());
           fields.emplace_back("class_id", notion2.getId());
-          
+
           db_.insertIntoTable("instantiation", std::move(fields));
         }
       }
     }
-    
+
     void generator::readWordNetMemberMeronymy()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl"));
@@ -916,7 +927,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -926,21 +937,21 @@ namespace verbly {
 
         int lookup1 = std::stoi(relation_data[1]);
         int lookup2 = std::stoi(relation_data[2]);
-        
+
         if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
         {
           notion& notion1 = *notionByWnid_.at(lookup1);
           notion& notion2 = *notionByWnid_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("holonym_id", notion1.getId());
           fields.emplace_back("meronym_id", notion2.getId());
-          
+
           db_.insertIntoTable("member_meronymy", std::move(fields));
         }
       }
     }
-    
+
     void generator::readWordNetPartMeronymy()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl"));
@@ -948,7 +959,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -958,21 +969,21 @@ namespace verbly {
 
         int lookup1 = std::stoi(relation_data[1]);
         int lookup2 = std::stoi(relation_data[2]);
-        
+
         if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
         {
           notion& notion1 = *notionByWnid_.at(lookup1);
           notion& notion2 = *notionByWnid_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("holonym_id", notion1.getId());
           fields.emplace_back("meronym_id", notion2.getId());
-          
+
           db_.insertIntoTable("part_meronymy", std::move(fields));
         }
       }
     }
-    
+
     void generator::readWordNetSubstanceMeronymy()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl"));
@@ -980,7 +991,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -990,21 +1001,21 @@ namespace verbly {
 
         int lookup1 = std::stoi(relation_data[1]);
         int lookup2 = std::stoi(relation_data[2]);
-        
+
         if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
         {
           notion& notion1 = *notionByWnid_.at(lookup1);
           notion& notion2 = *notionByWnid_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("holonym_id", notion1.getId());
           fields.emplace_back("meronym_id", notion2.getId());
-          
+
           db_.insertIntoTable("substance_meronymy", std::move(fields));
         }
       }
     }
-    
+
     void generator::readWordNetPertainymy()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl"));
@@ -1012,7 +1023,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -1022,31 +1033,31 @@ namespace verbly {
 
         std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
         std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
-        
+
         if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
         {
           word& word1 = *wordByWnidAndWnum_.at(lookup1);
           word& word2 = *wordByWnidAndWnum_.at(lookup2);
-          
+
           if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective)
           {
             std::list<field> fields;
             fields.emplace_back("pertainym_id", word1.getId());
             fields.emplace_back("noun_id", word2.getId());
-          
+
             db_.insertIntoTable("pertainymy", std::move(fields));
           } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb)
           {
             std::list<field> fields;
             fields.emplace_back("mannernym_id", word1.getId());
             fields.emplace_back("adjective_id", word2.getId());
-          
+
             db_.insertIntoTable("mannernymy", std::move(fields));
           }
         }
       }
     }
-    
+
     void generator::readWordNetSpecification()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl"));
@@ -1064,21 +1075,21 @@ namespace verbly {
 
         std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
         std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
-        
+
         if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
         {
           word& word1 = *wordByWnidAndWnum_.at(lookup1);
           word& word2 = *wordByWnidAndWnum_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("general_id", word1.getId());
           fields.emplace_back("specific_id", word2.getId());
-          
+
           db_.insertIntoTable("specification", std::move(fields));
         }
       }
     }
-    
+
     void generator::readWordNetSimilarity()
     {
       std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl"));
@@ -1086,7 +1097,7 @@ namespace verbly {
       for (auto line : lines)
       {
         ppgs.update();
-      
+
         std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
         std::smatch relation_data;
         if (!std::regex_search(line, relation_data, relation))
@@ -1096,21 +1107,21 @@ namespace verbly {
 
         int lookup1 = std::stoi(relation_data[1]);
         int lookup2 = std::stoi(relation_data[2]);
-        
+
         if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
         {
           notion& notion1 = *notionByWnid_.at(lookup1);
           notion& notion2 = *notionByWnid_.at(lookup2);
-          
+
           std::list<field> fields;
           fields.emplace_back("adjective_1_id", notion1.getId());
           fields.emplace_back("adjective_2_id", notion2.getId());
-          
+
           db_.insertIntoTable("similarity", std::move(fields));
         }
       }
     }
-    
+
     std::list<std::string> generator::readFile(std::string path)
     {
       std::ifstream file(path);
@@ -1118,7 +1129,7 @@ namespace verbly {
       {
         throw std::invalid_argument("Could not find file " + path);
       }
-  
+
       std::list<std::string> lines;
       std::string line;
       while (std::getline(file, line))
@@ -1127,13 +1138,13 @@ namespace verbly {
         {
           line.pop_back();
         }
-      
+
         lines.push_back(line);
       }
-      
+
       return lines;
     }
-    
+
     part_of_speech generator::partOfSpeechByWnid(int wnid)
     {
       switch (wnid / 100000000)
@@ -1145,14 +1156,14 @@ namespace verbly {
         default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid));
       }
     }
-    
+
     notion& generator::createNotion(part_of_speech partOfSpeech)
     {
       notions_.emplace_back(partOfSpeech);
-      
+
       return notions_.back();
     }
-    
+
     notion& generator::lookupOrCreateNotion(int wnid)
     {
       if (!notionByWnid_.count(wnid))
@@ -1160,10 +1171,10 @@ namespace verbly {
         notions_.emplace_back(partOfSpeechByWnid(wnid), wnid);
         notionByWnid_[wnid] = &notions_.back();
       }
-      
+
       return *notionByWnid_.at(wnid);
     }
-    
+
     lemma& generator::lookupOrCreateLemma(std::string base_form)
     {
       if (!lemmaByBaseForm_.count(base_form))
@@ -1171,10 +1182,10 @@ namespace verbly {
         lemmas_.emplace_back(lookupOrCreateForm(base_form));
         lemmaByBaseForm_[base_form] = &lemmas_.back();
       }
-      
+
       return *lemmaByBaseForm_.at(base_form);
     }
-    
+
     form& generator::lookupOrCreateForm(std::string text)
     {
       if (!formByText_.count(text))
@@ -1182,32 +1193,32 @@ namespace verbly {
         forms_.emplace_back(text);
         formByText_[text] = &forms_.back();
       }
-      
+
       return *formByText_[text];
     }
-    
+
     template <typename... Args> word& generator::createWord(Args&&... args)
     {
       words_.emplace_back(std::forward<Args>(args)...);
       word& w = words_.back();
-      
+
       wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w);
-      
+
       if (w.getNotion().hasWnid())
       {
         wordsByWnid_[w.getNotion().getWnid()].insert(&w);
       }
-      
+
       return w;
     }
-    
+
     group& generator::createGroup(xmlNodePtr top)
     {
       groups_.emplace_back();
       group& grp = groups_.back();
-      
+
       xmlChar* key;
-  
+
       for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
       {
         if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES")))
@@ -1223,14 +1234,14 @@ namespace verbly {
               } catch (const std::exception& e)
               {
                 key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID"));
-                
+
                 if (key == nullptr)
                 {
                   std::throw_with_nested(std::logic_error("Error parsing IDless subgroup"));
                 } else {
                   std::string subgroupId(reinterpret_cast<const char*>(key));
                   xmlFree(key);
-                  
+
                   std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId));
                 }
               }
@@ -1245,25 +1256,25 @@ namespace verbly {
               key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn"));
               std::string wnSenses(reinterpret_cast<const char*>(key));
               xmlFree(key);
-              
+
               auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " ");
               if (!wnSenseKeys.empty())
               {
                 std::list<std::string> tempKeys;
-                
+
                 std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) {
                   return sense + "::";
                 });
-                
+
                 std::list<std::string> filteredKeys;
-                
+
                 std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) {
                   return !wnSenseKeys_.count(sense);
                 });
-                
+
                 wnSenseKeys = std::move(filteredKeys);
               }
-              
+
               if (!wnSenseKeys.empty())
               {
                 for (std::string sense : wnSenseKeys)
@@ -1275,11 +1286,11 @@ namespace verbly {
                 key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name"));
                 std::string memberName(reinterpret_cast<const char*>(key));
                 xmlFree(key);
-                
+
                 notion& n = createNotion(part_of_speech::verb);
                 lemma& l = lookupOrCreateLemma(memberName);
                 word& w = createWord(n, l);
-                
+
                 w.setVerbGroup(grp);
               }
             }
@@ -1293,7 +1304,7 @@ namespace verbly {
               key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type"));
               std::string roleName = reinterpret_cast<const char*>(key);
               xmlFree(key);
-              
+
               selrestr roleSelrestrs;
               for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
               {
@@ -1314,19 +1325,19 @@ namespace verbly {
             {
               frames_.emplace_back();
               frame& fr = frames_.back();
-          
+
               for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
               {
                 if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX")))
                 {
                   for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
-                  {                
+                  {
                     if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP")))
                     {
                       key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
                       std::string partRole = reinterpret_cast<const char*>(key);
                       xmlFree(key);
-                  
+
                       selrestr partSelrestrs;
                       std::set<std::string> partSynrestrs;
 
@@ -1344,13 +1355,13 @@ namespace verbly {
                             }
                           }
                         }
-                  
+
                         if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
                         {
                           partSelrestrs = parseSelrestr(npnode);
                         }
                       }
-                      
+
                       fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs)));
                     } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB")))
                     {
@@ -1359,11 +1370,11 @@ namespace verbly {
                     {
                       std::set<std::string> partChoices;
                       bool partLiteral;
-                      
+
                       if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")))
                       {
                         partLiteral = true;
-                        
+
                         key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
                         std::string choicesStr = reinterpret_cast<const char*>(key);
                         xmlFree(key);
@@ -1380,7 +1391,7 @@ namespace verbly {
                         }
                       } else {
                         partLiteral = false;
-                        
+
                         for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
                         {
                           if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
@@ -1397,7 +1408,7 @@ namespace verbly {
                           }
                         }
                       }
-                  
+
                       fr.push_back(part::createPreposition(std::move(partChoices), partLiteral));
                     } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ")))
                     {
@@ -1410,7 +1421,7 @@ namespace verbly {
                       key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
                       std::string literalValue = reinterpret_cast<const char*>(key);
                       xmlFree(key);
-                      
+
                       fr.push_back(part::createLiteral(literalValue));
                     } else {
                       continue;
@@ -1427,11 +1438,11 @@ namespace verbly {
 
       return grp;
     }
-    
+
     selrestr generator::parseSelrestr(xmlNodePtr top)
     {
       xmlChar* key;
-  
+
       if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
       {
         if (xmlChildElementCount(top) == 0)
@@ -1449,10 +1460,10 @@ namespace verbly {
             {
               orlogic = true;
             }
-            
+
             xmlFree(key);
           }
-  
+
           std::list<selrestr> children;
           for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next)
           {
@@ -1462,7 +1473,7 @@ namespace verbly {
               children.push_back(parseSelrestr(selrestr));
             }
           }
-          
+
           return selrestr(children, orlogic);
         }
       } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
@@ -1474,12 +1485,12 @@ namespace verbly {
         key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type"));
         std::string selRestriction = reinterpret_cast<const char*>(key);
         xmlFree(key);
-        
+
         return selrestr(selRestriction, selPos);
       } else {
         throw std::logic_error("Badly formatted selrestr");
       }
     }
-    
+
   };
 };
-- 
cgit 1.4.1