From 6746da6edd7d9d50efe374eabbb79a3cac882d81 Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 16 Jan 2017 18:02:50 -0500 Subject: Started structural rewrite The new object structure was designed to build on the existing WordNet structure, while also adding in all of the data that we get from other sources. More information about this can be found on the project wiki. The generator has already been completely rewritten to generate a datafile that uses the new structure. In addition, a number of indexes are created, which does double the size of the datafile, but also allows for much faster lookups. Finally, the new generator is written modularly and is a lot more readable than the old one. The verbly interface to the new object structure has mostly been completed, but has not been tested fully. There is a completely new search API which utilizes a lot of operator overloading; documentation on how to use it should go up at some point. Token processing and verb frames are currently unimplemented. Source for these have been left in the repository for now. --- generator/generator.h | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 generator/generator.h (limited to 'generator/generator.h') diff --git a/generator/generator.h b/generator/generator.h new file mode 100644 index 0000000..e2a7404 --- /dev/null +++ b/generator/generator.h @@ -0,0 +1,151 @@ +#ifndef GENERATOR_H_5B61CBC5 +#define GENERATOR_H_5B61CBC5 + +#include +#include +#include +#include +#include +#include "database.h" +#include "notion.h" +#include "word.h" +#include "lemma.h" +#include "form.h" +#include "pronunciation.h" +#include "group.h" +#include "frame.h" + +namespace verbly { + namespace generator { + + enum class part_of_speech; + class selrestr; + + class generator { + public: + + // Constructor + + generator( + std::string verbNetPath, + std::string agidPath, + std::string wordNetPath, + std::string cmudictPath, + std::string imageNetPath, + std::string outputPath); + + // Action + + void run(); + + private: + + // Subroutines + + void readWordNetSynsets(); + + void readAdjectivePositioning(); + + void readImageNetUrls(); + + void readWordNetSenseKeys(); + + void readVerbNet(); + + void readAgidInflections(); + + void readPrepositions(); + + void readCmudictPronunciations(); + + void writeSchema(); + + void dumpObjects(); + + void readWordNetAntonymy(); + + void readWordNetVariation(); + + void readWordNetClasses(); + + void readWordNetCausality(); + + void readWordNetEntailment(); + + void readWordNetHypernymy(); + + void readWordNetInstantiation(); + + void readWordNetMemberMeronymy(); + + void readWordNetPartMeronymy(); + + void readWordNetSubstanceMeronymy(); + + void readWordNetPertainymy(); + + void readWordNetSpecification(); + + void readWordNetSimilarity(); + + // Helpers + + std::list readFile(std::string path); + + inline part_of_speech partOfSpeechByWnid(int wnid); + + notion& createNotion(part_of_speech partOfSpeech); + + notion& lookupOrCreateNotion(int wnid); + + lemma& lookupOrCreateLemma(std::string base_form); + + form& lookupOrCreateForm(std::string text); + + template word& createWord(Args&&... args); + + group& createGroup(xmlNodePtr top); + + selrestr parseSelrestr(xmlNodePtr top); + + // Input + + std::string verbNetPath_; + std::string agidPath_; + std::string wordNetPath_; + std::string cmudictPath_; + std::string imageNetPath_; + + // Output + + database db_; + + // Data + + std::list notions_; + std::list words_; + std::list lemmas_; + std::list
forms_; + std::list pronunciations_; + std::list frames_; + std::list groups_; + + // Indexes + + std::map notionByWnid_; + std::map> wordsByWnid_; + std::map, word*> wordByWnidAndWnum_; + std::map> wordsByBaseForm_; + std::map lemmaByBaseForm_; + std::map formByText_; + + // Caches + + std::map wnSenseKeys_; + + }; + + }; +}; + +#endif /* end of include guard: GENERATOR_H_5B61CBC5 */ -- cgit 1.4.1