From 4cbe7a42a685bc2449f1adb7c37144c9496eab5f Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Mon, 23 Jan 2017 11:59:23 -0500 Subject: Rewrote tokens --- CMakeLists.txt | 2 +- lib/token.cpp | 813 ++++++++++++++++++++++----------------------------------- lib/token.h | 236 +++++++---------- lib/verbly.h | 4 + 4 files changed, 405 insertions(+), 650 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d42fdd..5c15e79 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) set(CMAKE_BUILD_TYPE Debug) include_directories(vendor/json) -add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp lib/selrestr.cpp lib/part.cpp) +add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp lib/token.cpp lib/selrestr.cpp lib/part.cpp) set_property(TARGET verbly PROPERTY CXX_STANDARD 11) set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) target_link_libraries(verbly ${sqlite3_LIBRARIES}) diff --git a/lib/token.cpp b/lib/token.cpp index 3cc77e5..769acad 100644 --- a/lib/token.cpp +++ b/lib/token.cpp @@ -1,645 +1,450 @@ -#include "verbly.h" +#include "token.h" +#include +#include "util.h" namespace verbly { - - token::type token::get_type() const - { - return _type; - } - - int token::get_extra() const - { - return _extra; - } - - void token::set_extra(int _arg) - { - _extra = _arg; - } - + token::token(const token& other) { - _type = other._type; - _extra = other._extra; - - switch (_type) + type_ = other.type_; + + switch (type_) { - case token::type::verb: + case type::word: { - new(&_verb._verb) verb(other._verb._verb); - _verb._infl = other._verb._infl; - - break; - } - - case token::type::noun: - { - new(&_noun._noun) noun(other._noun._noun); - _noun._infl = other._noun._infl; - + new(&word_.word_) word(other.word_.word_); + word_.category_ = other.word_.category_; + break; } - - case token::type::adjective: + + case type::literal: { - new(&_adjective._adjective) adjective(other._adjective._adjective); - _adjective._infl = other._adjective._infl; - + new(&literal_) std::string(other.literal_); + break; } - - case token::type::adverb: + + case type::part: { - new(&_adverb._adverb) adverb(other._adverb._adverb); - _adverb._infl = other._adverb._infl; - + new(&part_) part(other.part_); + break; } - - case token::type::preposition: + + case type::fillin: { - new(&_preposition._preposition) preposition(other._preposition._preposition); - + new(&fillin_) std::set(other.fillin_); + break; } - - case token::type::fillin: + + case type::utterance: { - _fillin._type = other._fillin._type; - + new(&utterance_) std::list(other.utterance_); + break; } - - case token::type::string: + } + } + + token::token(token&& other) : token() + { + swap(*this, other); + } + + token& token::operator=(token other) + { + swap(*this, other); + + return *this; + } + + void swap(token& first, token& second) + { + using type = token::type; + + type tempType = first.type_; + word tempWord; + inflection tempCategory; + std::string tempLiteral; + part tempPart; + std::set tempFillin; + std::list tempUtterance; + + switch (tempType) + { + case type::word: { - new(&_string._str) std::string(other._string._str); - + tempWord = std::move(first.word_.word_); + tempCategory = first.word_.category_; + break; } - - case token::type::utterance: + + case type::literal: { - new(&_utterance._utterance) std::list(other._utterance._utterance); - + tempLiteral = std::move(first.literal_); + break; } - } - } - - token& token::operator=(const token& other) - { - this->~token(); - - _type = other._type; - _extra = other._extra; - - switch (_type) - { - case token::type::verb: + + case type::part: { - new(&_verb._verb) verb(other._verb._verb); - _verb._infl = other._verb._infl; - + tempPart = std::move(first.part_); + break; } - - case token::type::noun: + + case type::fillin: { - new(&_noun._noun) noun(other._noun._noun); - _noun._infl = other._noun._infl; - + tempFillin = std::move(first.fillin_); + break; } - - case token::type::adjective: + + case type::utterance: { - new(&_adjective._adjective) adjective(other._adjective._adjective); - _adjective._infl = other._adjective._infl; - + tempUtterance = std::move(first.utterance_); + break; } - - case token::type::adverb: + } + + first.~token(); + + first.type_ = second.type_; + + switch (first.type_) + { + case type::word: { - new(&_adverb._adverb) adverb(other._adverb._adverb); - _adverb._infl = other._adverb._infl; - + new(&first.word_.word_) word(std::move(second.word_.word_)); + first.word_.category_ = second.word_.category_; + break; } - - case token::type::preposition: + + case type::literal: { - new(&_preposition._preposition) preposition(other._preposition._preposition); - + new(&first.literal_) std::string(std::move(second.literal_)); + break; } - - case token::type::fillin: + + case type::part: { - _fillin._type = other._fillin._type; - + new(&first.part_) part(std::move(second.part_)); + break; } - - case token::type::string: + + case type::fillin: { - new(&_string._str) std::string(other._string._str); - + new(&first.fillin_) std::set(std::move(second.fillin_)); + break; } - - case token::type::utterance: + + case type::utterance: { - new(&_utterance._utterance) std::list(other._utterance._utterance); - + new(&first.utterance_) std::list(std::move(second.utterance_)); + break; } } - - return *this; - } - - token::~token() - { - switch (_type) + + second.~token(); + + second.type_ = tempType; + + switch (second.type_) { - case token::type::verb: + case type::word: { - _verb._verb.~verb(); - + new(&second.word_.word_) word(std::move(tempWord)); + second.word_.category_ = tempCategory; + break; } - - case token::type::noun: + + case type::literal: { - _noun._noun.~noun(); - + new(&second.literal_) std::string(std::move(tempLiteral)); + break; } - - case token::type::adjective: + + case type::part: { - _adjective._adjective.~adjective(); - + new(&second.part_) part(std::move(tempPart)); + break; } - - case token::type::adverb: + + case type::fillin: { - _adverb._adverb.~adverb(); - + new(&second.fillin_) std::set(std::move(tempFillin)); + break; } - - case token::type::preposition: + + case type::utterance: { - _preposition._preposition.~preposition(); - + new(&second.utterance_) std::list(std::move(tempUtterance)); + break; } - - case token::type::fillin: + } + } + + token::~token() + { + switch (type_) + { + case type::word: { - // Nothing! - + word_.word_.~word(); + break; } - - case token::type::string: + + case type::literal: { using string_type = std::string; - _string._str.~string_type(); - + literal_.~string_type(); + + break; + } + + case type::part: + { + part_.~part(); + + break; + } + + case type::fillin: + { + using set_type = std::set; + fillin_.~set_type(); + break; } - - case token::type::utterance: + + case type::utterance: { using list_type = std::list; - _utterance._utterance.~list_type(); - + utterance_.~list_type(); + break; } } } - - bool token::is_complete() const + + bool token::isComplete() const { - if (_type == token::type::utterance) + switch (type_) { - return std::all_of(std::begin(_utterance._utterance), std::end(_utterance._utterance), [] (const token& tkn) { - return tkn.is_complete(); + case type::word: return true; + case type::literal: return true; + case type::part: return false; + case type::fillin: return false; + case type::utterance: return std::all_of(std::begin(utterance_), std::end(utterance_), [] (const token& tkn) { + return tkn.isComplete(); }); - } else if (_type == token::type::fillin) - { - return false; - } else { - return true; } } - + std::string token::compile() const { - switch (_type) + switch (type_) { - case token::type::verb: - { - switch (_verb._infl) - { - case token::verb_inflection::infinitive: return _verb._verb.infinitive_form(); - case token::verb_inflection::past_tense: return _verb._verb.past_tense_form(); - case token::verb_inflection::past_participle: return _verb._verb.past_participle_form(); - case token::verb_inflection::ing_form: return _verb._verb.ing_form(); - case token::verb_inflection::s_form: return _verb._verb.s_form(); - } - } - - case token::type::noun: - { - switch (_noun._infl) - { - case token::noun_inflection::singular: return _noun._noun.singular_form(); - case token::noun_inflection::plural: return _noun._noun.plural_form(); - } - } - - case token::type::adjective: - { - switch (_adjective._infl) - { - case token::adjective_inflection::base: return _adjective._adjective.base_form(); - case token::adjective_inflection::comparative: return _adjective._adjective.comparative_form(); - case token::adjective_inflection::superlative: return _adjective._adjective.superlative_form(); - } - } - - case token::type::adverb: + case type::word: return word_.word_.getInflections(word_.category_).front(); + case type::literal: return literal_; + case type::part: throw std::domain_error("Cannot compile incomplete token"); + case type::fillin: throw std::domain_error("Cannot compile incomplete token"); + + case type::utterance: { - switch (_adverb._infl) + std::list compiled; + for (const token& tkn : utterance_) { - case token::adverb_inflection::base: return _adverb._adverb.base_form(); - case token::adverb_inflection::comparative: return _adverb._adverb.comparative_form(); - case token::adverb_inflection::superlative: return _adverb._adverb.superlative_form(); + compiled.push_back(tkn.compile()); } - } - - case token::type::preposition: return _preposition._preposition.get_form(); - case token::type::string: return _string._str; - - case token::type::fillin: - { - throw std::runtime_error("Cannot compile a fillin token."); - } - - case token::type::utterance: - { - std::list compiled; - std::transform(std::begin(_utterance._utterance), std::end(_utterance._utterance), std::back_inserter(compiled), [] (token tkn) { - return tkn.compile(); - }); - - return verbly::implode(std::begin(compiled), std::end(compiled), " "); + + return implode(std::begin(compiled), std::end(compiled), " "); } } } - - token::token(verb _verb) : _type(type::verb) - { - new(&this->_verb._verb) verb(_verb); - this->_verb._infl = verb_inflection::infinitive; - } - - token::token(verb _verb, verb_inflection _infl) : token(_verb) - { - this->_verb._infl = _infl; - } - - token& token::operator=(verb _verb) - { - *this = token{_verb}; - - return *this; - } - - verb token::get_verb() const - { - assert(_type == type::verb); - - return _verb._verb; - } - - void token::set_verb(verb _verb) - { - assert(_type == type::verb); - - this->_verb._verb = _verb; - } - - token::verb_inflection token::get_verb_inflection() const - { - assert(_type == type::verb); - - return _verb._infl; - } - - void token::set_verb_inflection(verb_inflection _infl) - { - assert(_type == type::verb); - - _verb._infl = _infl; - } - - token::token(noun _noun) : _type(type::noun) - { - new(&this->_noun._noun) noun(_noun); - this->_noun._infl = noun_inflection::singular; - } - - token::token(noun _noun, noun_inflection _infl) : token(_noun) - { - this->_noun._infl = _infl; - } - - token& token::operator=(noun _noun) - { - *this = token{_noun}; - - return *this; - } - - noun token::get_noun() const - { - assert(_type == type::noun); - - return _noun._noun; - } - - void token::set_noun(noun _noun) - { - assert(_type == type::noun); - - this->_noun._noun = _noun; - } - - token::noun_inflection token::get_noun_inflection() const - { - assert(_type == type::noun); - - return _noun._infl; - } - - void token::set_noun_inflection(noun_inflection _infl) - { - assert(_type == type::noun); - - _noun._infl = _infl; - } - - token::token(adjective _adjective) : _type(type::adjective) - { - new(&this->_adjective._adjective) adjective(_adjective); - this->_adjective._infl = adjective_inflection::base; - } - - token::token(adjective _adjective, adjective_inflection _infl) : token(_adjective) - { - this->_adjective._infl = _infl; - } - - token& token::operator=(adjective _adjective) - { - *this = token{_adjective}; - - return *this; - } - - adjective token::get_adjective() const - { - assert(_type == type::adjective); - - return _adjective._adjective; - } - - void token::set_adjective(adjective _adjective) - { - assert(_type == type::adjective); - - this->_adjective._adjective = _adjective; - } - - token::adjective_inflection token::get_adjective_inflection() const - { - assert(_type == type::adjective); - - return _adjective._infl; - } - - void token::set_adjective_inflection(adjective_inflection _infl) - { - assert(_type == type::adjective); - - _adjective._infl = _infl; - } - - token::token(adverb _adverb) : _type(type::adverb) - { - new(&this->_adverb._adverb) adverb(_adverb); - this->_adverb._infl = adverb_inflection::base; - } - - token::token(adverb _adverb, adverb_inflection _infl) : token(_adverb) - { - this->_adverb._infl = _infl; - } - - token& token::operator=(adverb _adverb) - { - *this = token{_adverb}; - - return *this; - } - - adverb token::get_adverb() const + + token::token(word arg, inflection category) : type_(type::word) { - assert(_type == type::adverb); - - return _adverb._adverb; + new(&word_.word_) word(std::move(arg)); + word_.category_ = category; } - - void token::set_adverb(adverb _adverb) + + const word& token::getWord() const { - assert(_type == type::adverb); - - this->_adverb._adverb = _adverb; + if (type_ != type::word) + { + throw std::domain_error("Token is not a word"); + } + + return word_.word_; } - - token::adverb_inflection token::get_adverb_inflection() const + + token token::inflect(inflection category) const { - assert(_type == type::adverb); - - return _adverb._infl; + if (type_ != type::word) + { + throw std::domain_error("Token is not a word"); + } + + return token(word_.word_, category); } - - void token::set_adverb_inflection(adverb_inflection _infl) + + token::token(std::string arg) : type_(type::literal) { - assert(_type == type::adverb); - - _adverb._infl = _infl; + new(&literal_) std::string(std::move(arg)); } - - token::token(preposition _preposition) : _type(type::preposition) + + token::token(const char* arg) : token(std::string(arg)) { - new(&this->_preposition._preposition) preposition(_preposition); } - - token& token::operator=(preposition _preposition) + + std::string token::getLiteral() const { - *this = token{_preposition}; - - return *this; + if (type_ != type::literal) + { + throw std::domain_error("Token is not a literal"); + } + + return literal_; } - - preposition token::get_preposition() const + + token::token(part arg) : type_(type::part) { - assert(_type == type::preposition); - - return _preposition._preposition; + new(&part_) part(std::move(arg)); } - - void token::set_preposition(preposition _preposition) + + part token::getPart() const { - assert(_type == type::preposition); - - this->_preposition._preposition = _preposition; + if (type_ != type::part) + { + throw std::domain_error("Token is not a part"); + } + + return part_; } - - token::token(fillin_type _ft) : _type(type::fillin) + + token::token(std::set synrestrs) : type_(type::fillin) { - _fillin._type = _ft; + new(&fillin_) std::set(std::move(synrestrs)); } - - token& token::operator=(fillin_type _ft) + + const std::set& token::getSynrestrs() const { - *this = token{_ft}; - - return *this; + if (type_ != type::fillin) + { + throw std::domain_error("Token is not a fillin"); + } + + return fillin_; } - - token::fillin_type token::get_fillin_type() const + + bool token::hasSynrestr(std::string synrestr) const { - assert(_type == type::fillin); - - return _fillin._type; + if (type_ != type::fillin) + { + throw std::domain_error("Token is not a fillin"); + } + + return (fillin_.count(synrestr) == 1); } - - void token::set_fillin_type(fillin_type _ft) + + void token::addSynrestr(std::string synrestr) { - assert(_type == type::fillin); - - _fillin._type = _ft; + if (type_ != type::fillin) + { + throw std::domain_error("Token is not a fillin"); + } + + fillin_.insert(std::move(synrestr)); } - - token::token() : _type(type::utterance) + + token::token() : type_(type::utterance) { - new(&_utterance._utterance) std::list(); + new(&utterance_) std::list(); } - - token::token(std::initializer_list _init) : _type(type::utterance) + + token::token(std::vector parts) : type_(type::utterance) { - new(&_utterance._utterance) std::list(_init); + new(&utterance_) std::list(std::begin(parts), std::end(parts)); } - + token::iterator token::begin() { - assert(_type == type::utterance); - - return _utterance._utterance.begin(); - } - - token::iterator token::end() - { - assert(_type == type::utterance); - - return _utterance._utterance.end(); - } - - token& token::operator<<(token _tkn) - { - assert(_type == type::utterance); - - _utterance._utterance.push_back(_tkn); - - return *this; - } - - void token::push_back(token _tkn) - { - assert(_type == type::utterance); - - _utterance._utterance.push_back(_tkn); - } - - void token::insert(iterator before, token _tkn) - { - assert(_type == type::utterance); - - _utterance._utterance.insert(before, _tkn); + if (type_ != type::utterance) + { + throw std::domain_error("Token is not an utterance"); + } + + return std::begin(utterance_); } - - void token::replace(iterator torepl, token _tkn) + + token::const_iterator token::begin() const { - assert(_type == type::utterance); - - _utterance._utterance.insert(torepl, _tkn); - _utterance._utterance.erase(torepl); + if (type_ != type::utterance) + { + throw std::domain_error("Token is not an utterance"); + } + + return std::begin(utterance_); } - - void token::erase(iterator toer) + + token::iterator token::end() { - assert(_type == type::utterance); - - _utterance._utterance.erase(toer); + if (type_ != type::utterance) + { + throw std::domain_error("Token is not an utterance"); + } + + return std::end(utterance_); } - - token::token(std::string _str) : _type(type::string) + + token::const_iterator token::end() const { - new(&_string._str) std::string(_str); + if (type_ != type::utterance) + { + throw std::domain_error("Token is not an utterance"); + } + + return std::end(utterance_); } - - token& token::operator=(std::string _str) + + token& token::operator<<(token arg) { - *this = token{_str}; - + if (type_ != type::utterance) + { + throw std::domain_error("Token is not an utterance"); + } + + utterance_.push_back(std::move(arg)); + return *this; } - - std::string token::get_string() const - { - assert(_type == type::string); - - return _string._str; - } - - void token::set_string(std::string _str) - { - assert(_type == type::string); - - _string._str = _str; - } - - std::ostream& operator<<(std::ostream& os, token::type _type) + + std::ostream& operator<<(std::ostream& os, token::type type) { - switch (_type) + switch (type) { - case token::type::verb: return os << "verb"; - case token::type::noun: return os << "noun"; - case token::type::adjective: return os << "adjective"; - case token::type::adverb: return os << "adverb"; - case token::type::preposition: return os << "preposition"; + case token::type::word: return os << "word"; + case token::type::literal: return os << "literal"; + case token::type::part: return os << "part"; case token::type::fillin: return os << "fillin"; case token::type::utterance: return os << "utterance"; - case token::type::string: return os << "string"; } } - + }; diff --git a/lib/token.h b/lib/token.h index ff3c37b..e7f8c28 100644 --- a/lib/token.h +++ b/lib/token.h @@ -1,170 +1,116 @@ #ifndef TOKEN_H_AD62C505 #define TOKEN_H_AD62C505 +#include +#include +#include +#include +#include "enums.h" +#include "word.h" +#include "part.h" + namespace verbly { - + class token { public: enum class type { - verb, - noun, - adjective, - adverb, - preposition, + word, + literal, + part, fillin, - utterance, - string - }; - - enum class verb_inflection { - infinitive, - past_tense, - past_participle, - s_form, - ing_form - }; - - enum class noun_inflection { - singular, - plural - }; - - enum class adjective_inflection { - base, - comparative, - superlative - }; - - enum class adverb_inflection { - base, - comparative, - superlative - }; - - enum class fillin_type { - generic, - noun_phrase, - adjective_phrase, - adverb_phrase, - participle_phrase, - infinitive_phrase + utterance }; - - type get_type() const; - - int get_extra() const; - void set_extra(int _arg); - + + // Copy & move constructors + token(const token& other); - token& operator=(const token& other); + token(token&& other); + + // Assignment operator + + token& operator=(token other); + + // Swap + + friend void swap(token& first, token& second); + + // Destructor + ~token(); - - bool is_complete() const; + + // Accessors + + type getType() const + { + return type_; + } + + bool isComplete() const; + std::string compile() const; - - // Verb - token(verb _verb); - token(verb _verb, verb_inflection _infl); - token& operator=(verb _verb); - verb get_verb() const; - void set_verb(verb _verb); - verb_inflection get_verb_inflection() const; - void set_verb_inflection(verb_inflection _infl); - - // Noun - token(noun _noun); - token(noun _noun, noun_inflection _infl); - token& operator=(noun _noun); - noun get_noun() const; - void set_noun(noun _noun); - noun_inflection get_noun_inflection() const; - void set_noun_inflection(noun_inflection _infl); - - // Adjective - token(adjective _adjective); - token(adjective _adjective, adjective_inflection _infl); - token& operator=(adjective _adjective); - adjective get_adjective() const; - void set_adjective(adjective _adjective); - adjective_inflection get_adjective_inflection() const; - void set_adjective_inflection(adjective_inflection _infl); - - // Adverb - token(adverb _adverb); - token(adverb _adverb, adverb_inflection _infl); - token& operator=(adverb _adverb); - adverb get_adverb() const; - void set_adverb(adverb _adverb); - adverb_inflection get_adverb_inflection() const; - void set_adverb_inflection(adverb_inflection _infl); - - // Preposition - token(preposition _preposition); - token& operator=(preposition _preposition); - preposition get_preposition() const; - void set_preposition(preposition _preposition); - + + // Word + + token(word arg, inflection category = inflection::base); + + const word& getWord() const; + + token inflect(inflection category) const; + + // Literal + + token(std::string arg); + token(const char* arg); + + std::string getLiteral() const; + + // Part + + token(part arg); + + part getPart() const; + // Fillin - token(fillin_type _ft); - token& operator=(fillin_type _ft); - fillin_type get_fillin_type() const; - void set_fillin_type(fillin_type _ft); - + + token(std::set synrestrs); + + const std::set& getSynrestrs() const; + + bool hasSynrestr(std::string synrestr) const; + + void addSynrestr(std::string synrestr); + // Utterance - typedef std::list::iterator iterator; - + + using iterator = std::list::iterator; + using const_iterator = std::list::const_iterator; + token(); - token(std::initializer_list _init); + token(std::vector parts); + iterator begin(); + const_iterator begin() const; + iterator end(); - token& operator<<(token _tkn); - void push_back(token _tkn); - void insert(iterator before, token _tkn); - void replace(iterator torepl, token _tkn); - void erase(iterator toer); - - // String - token(std::string _str); - token& operator=(std::string _str); - std::string get_string() const; - void set_string(std::string _str); - + const_iterator end() const; + + token& operator<<(token arg); + private: - type _type; - int _extra = 0; union { struct { - verb _verb; - verb_inflection _infl; - } _verb; - struct { - noun _noun; - noun_inflection _infl; - } _noun; - struct { - adjective _adjective; - adjective_inflection _infl; - } _adjective; - struct { - adverb _adverb; - adverb_inflection _infl; - } _adverb; - struct { - preposition _preposition; - } _preposition; - struct { - fillin_type _type; - } _fillin; - struct { - std::string _str; - } _string; - struct { - std::list _utterance; - } _utterance; + word word_; + inflection category_; + } word_; + std::string literal_; + part part_; + std::set fillin_; + std::list utterance_; }; + type type_; }; - - std::ostream& operator<<(std::ostream& os, token::type _type); + + std::ostream& operator<<(std::ostream& os, token::type type); }; diff --git a/lib/verbly.h b/lib/verbly.h index 6dfc01a..d8875b3 100644 --- a/lib/verbly.h +++ b/lib/verbly.h @@ -13,5 +13,9 @@ #include "lemma.h" #include "form.h" #include "pronunciation.h" +#include "token.h" +#include "selrestr.h" +#include "part.h" +#include "role.h" #endif /* end of include guard: VERBLY_H_5B39CE50 */ -- cgit 1.4.1