From 8455e8badc80aa018a982102ffff71d5a6b1940c Mon Sep 17 00:00:00 2001 From: Kelly Rauchenberger Date: Wed, 9 Mar 2016 23:30:14 -0500 Subject: Started verbly rewrite verbly is intended to be a general use natural language generation library. Here, I'm using it to simply generate random verbs or adjectives. A schema for the sqlite database is provided, and for testing I manually added data. A generator program is being written that will generate a database from WordNet, VerbNet, PropBank, and AGID data. --- adjective.h | 21 ++++ c++14.h | 35 +++++++ data.h | 201 ++++++++++++++++++++++++++++++++++++ token.h | 336 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ verb.h | 67 ++++++++++++ verbly.h | 10 ++ 6 files changed, 670 insertions(+) create mode 100644 adjective.h create mode 100644 c++14.h create mode 100644 data.h create mode 100644 token.h create mode 100644 verb.h create mode 100644 verbly.h diff --git a/adjective.h b/adjective.h new file mode 100644 index 0000000..58c490e --- /dev/null +++ b/adjective.h @@ -0,0 +1,21 @@ +#ifndef ADJECTIVE_H_87B3FB75 +#define ADJECTIVE_H_87B3FB75 + +namespace verbly { + + class adjective { + private: + int id; + + public: + std::string value; + + adjective(int id) : id(id) + { + + } + }; + +}; + +#endif /* end of include guard: ADJECTIVE_H_87B3FB75 */ diff --git a/c++14.h b/c++14.h new file mode 100644 index 0000000..b3efbe2 --- /dev/null +++ b/c++14.h @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +namespace std { + template struct _Unique_if { + typedef unique_ptr _Single_object; + }; + + template struct _Unique_if { + typedef unique_ptr _Unknown_bound; + }; + + template struct _Unique_if { + typedef void _Known_bound; + }; + + template + typename _Unique_if::_Single_object + make_unique(Args&&... args) { + return unique_ptr(new T(std::forward(args)...)); + } + + template + typename _Unique_if::_Unknown_bound + make_unique(size_t n) { + typedef typename remove_extent::type U; + return unique_ptr(new U[n]()); + } + + template + typename _Unique_if::_Known_bound + make_unique(Args&&...) = delete; +} diff --git a/data.h b/data.h new file mode 100644 index 0000000..2c23c15 --- /dev/null +++ b/data.h @@ -0,0 +1,201 @@ +#ifndef DATA_H_C4AEC3DD +#define DATA_H_C4AEC3DD + +#include "verb.h" +#include +#include + +namespace verbly { + + class data { + private: + sqlite3* ppdb; + + public: + class verb_query { + public: + const static int unlimited = -1; + + private: + const data& database; + int m_limit = unlimited; + bool m_random = false; + + public: + verb_query(const data& database) : database(database) + { + + } + + verb_query& limit(int m_limit) + { + if ((m_limit > 0) || (m_limit == unlimited)) + { + this->m_limit = m_limit; + } + + return *this; + } + + verb_query& random(bool m_random) + { + this->m_random = m_random; + + return *this; + } + + std::list run() const + { + std::stringstream construct; + construct << "SELECT verb_id, infinitive, past_tense, past_participle, ing_form, s_form FROM verbs"; + + if (m_random) + { + construct << " ORDER BY RANDOM()"; + } + + if (m_limit != unlimited) + { + construct << " LIMIT " << m_limit; + } + + sqlite3_stmt* ppstmt; + std::string query = construct.str(); + if (sqlite3_prepare_v2(database.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) + { + throw std::runtime_error(sqlite3_errmsg(database.ppdb)); + } + + std::list output; + while (sqlite3_step(ppstmt) == SQLITE_ROW) + { + verb tnc {sqlite3_column_int(ppstmt, 0)}; + tnc.infinitive = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + tnc.past_tense = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 2))); + tnc.past_participle = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 3))); + tnc.ing_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 4))); + tnc.s_form = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 5))); + + output.push_back(tnc); + } + + sqlite3_finalize(ppstmt); + + return output; + } + + }; + + class adjective_query { + public: + const static int unlimited = -1; + + private: + const data& database; + int m_limit = unlimited; + bool m_random = false; + + public: + adjective_query(const data& database) : database(database) + { + + } + + adjective_query& limit(int m_limit) + { + if ((m_limit > 0) || (m_limit == unlimited)) + { + this->m_limit = m_limit; + } + + return *this; + } + + adjective_query& random(bool m_random) + { + this->m_random = m_random; + + return *this; + } + + std::list run() const + { + std::stringstream construct; + construct << "SELECT adjective_id, adjective FROM adjectives"; + + if (m_random) + { + construct << " ORDER BY RANDOM()"; + } + + if (m_limit != unlimited) + { + construct << " LIMIT " << m_limit; + } + + sqlite3_stmt* ppstmt; + std::string query = construct.str(); + if (sqlite3_prepare_v2(database.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) + { + throw std::runtime_error(sqlite3_errmsg(database.ppdb)); + } + + std::list output; + while (sqlite3_step(ppstmt) == SQLITE_ROW) + { + adjective tnc {sqlite3_column_int(ppstmt, 0)}; + tnc.value = std::string(reinterpret_cast(sqlite3_column_text(ppstmt, 1))); + + output.push_back(tnc); + } + + sqlite3_finalize(ppstmt); + + return output; + } + + }; + + data(std::string datafile) + { + if (sqlite3_open_v2(datafile.c_str(), &ppdb, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) + { + throw std::invalid_argument(sqlite3_errmsg(ppdb)); + } + } + + data(const data& other) = delete; + data& operator=(const data& other) = delete; + + data(data&& other) + { + ppdb = other.ppdb; + } + + data& operator=(data&& other) + { + ppdb = other.ppdb; + + return *this; + } + + ~data() + { + sqlite3_close_v2(ppdb); + } + + verb_query verbs() const + { + return verb_query(*this); + } + + adjective_query adjectives() const + { + return adjective_query(*this); + } + + }; + +}; + +#endif /* end of include guard: DATA_H_C4AEC3DD */ diff --git a/token.h b/token.h new file mode 100644 index 0000000..bbe7c2d --- /dev/null +++ b/token.h @@ -0,0 +1,336 @@ +#ifndef TOKEN_H_AD62C505 +#define TOKEN_H_AD62C505 + +#include +#include +#include +#include "verb.h" + +namespace verbly { + + enum class type { + verb, + fillin, + string, + utterance + }; + + class selrestr { + }; + + class synrestr { + }; + + enum class fillin_type { + noun_phrase, + participle_phrase, + adjective + }; + + class token { + protected: + // General + type type; + + token(enum type type) : type(type) + { + + } + + public: + enum type token_type() const + { + return type; + } + + virtual bool complete() const = 0; + virtual std::string compile() const = 0; + virtual token* copy() const = 0; + }; + + class verb_token : public token { + private: + // Verb + const verb* m_verb; + conjugation verb_infl = conjugation::infinitive; + + public: + verb_token(const class verb& verb) : token(type::verb), m_verb(&verb) + { + + } + + const class verb& verb() const + { + return *m_verb; + } + + verb_token& conjugate(conjugation infl) + { + verb_infl = infl; + return *this; + } + + bool complete() const + { + return true; + } + + std::string compile() const + { + return m_verb->conjugate(verb_infl); + } + + token* copy() const + { + return new verb_token(*this); + } + }; + + class utterance_token : public token { + private: + // Utterance + std::list> utterance; + + public: + typedef std::list>::iterator iterator; + /*class iterator { + private: + friend class utterance_token; + + std::list>::iterator it; + + public: + iterator(std::list>::iterator it) : it(it) + { + + } + + iterator& operator++() + { + ++it; + return *this; + } + + iterator& operator--() + { + --it; + return *this; + } + + bool operator==(const iterator& other) const + { + return it == other.it; + } + + bool operator!=(const iterator& other) const + { + return it != other.it; + } + + token* operator*() + { + return *it->get(); + } + + token* operator->() + { + return *it->get(); + } + };*/ + + utterance_token(std::initializer_list tkns) : token(type::utterance) + { + for (auto tkn : tkns) + { + utterance.push_back(std::unique_ptr(tkn)); + } + } + + utterance_token(const utterance_token& other) : token(type::utterance) + { + for (auto& tkn : other.utterance) + { + utterance.push_back(std::unique_ptr(tkn->copy())); + } + } + + utterance_token(utterance_token&& other) : token(type::utterance), utterance(std::move(other.utterance)) + { + + } + + utterance_token& operator=(const utterance_token& other) + { + utterance.clear(); + + for (auto& tkn : other.utterance) + { + utterance.push_back(std::unique_ptr(tkn->copy())); + } + + return *this; + } + + utterance_token& operator=(utterance_token&& other) + { + utterance = std::move(other.utterance); + + return *this; + } + + iterator begin() + { + return std::begin(utterance); + } + + iterator end() + { + return std::end(utterance); + } + + const iterator begin() const + { + return std::begin(utterance); + } + + const iterator end() const + { + return std::end(utterance); + } + + void erase(iterator it) + { + utterance.erase(it); + } + + bool complete() const + { + return std::all_of(std::begin(utterance), std::end(utterance), [] (const std::unique_ptr& tkn) { + return tkn->complete(); + }); + } + + std::string compile() const + { + std::stringstream result; + for (auto& t : utterance) + { + if (t->complete()) + { + result << t->compile() << " "; + } else { + return "Could not compile!"; + } + } + + std::string output = result.str(); + if (output != "") + { + output.pop_back(); + } + + return output; + } + + token* copy() const + { + return new utterance_token(*this); + } + }; + + class fillin_token : public token { + private: + // Fillin + std::string m_theme; + fillin_type m_fillin_type; + + public: + fillin_token(fillin_type ft) : token(type::fillin), m_fillin_type(ft) + { + + } + +/* void synrestrs(std::initializer_list ins) + { + m_synrestrs = std::set(ins); + } + + std::set& synrestrs() + { + return m_synrestrs; + } + + void selrestrs(std::initializer_list ins) + { + m_selrestrs = std::set(ins); + } + + std::set& selrestrs() + { + return m_selrestrs; + }*/ + + fillin_token theme(std::string theme) + { + m_theme = theme; + + return *this; + } + + std::string& theme() + { + return m_theme; + } + + fillin_type fillin_type() const + { + return m_fillin_type; + } + + bool complete() const + { + return false; + } + + std::string compile() const + { + return ""; + } + + token* copy() const + { + return new fillin_token(*this); + } + }; + + class string_token : public token { + private: + // String + std::string str; + + public: + string_token(std::string str) : token(type::string), str(str) + { + + } + + bool complete() const + { + return true; + } + + std::string compile() const + { + return str; + } + + token* copy() const + { + return new string_token(*this); + } + }; + +}; + +#endif /* end of include guard: TOKEN_H_AD62C505 */ diff --git a/verb.h b/verb.h new file mode 100644 index 0000000..42c8dc2 --- /dev/null +++ b/verb.h @@ -0,0 +1,67 @@ +#ifndef VERB_H_BCC929AD +#define VERB_H_BCC929AD + +#include + +namespace verbly { + + /*class frame_part { + + }; + + class frame { + private: + std::list content; + std::map::iterator>> predicates; + + public: + frame(std::list content) : content(content) + { + + } + + std::unique_ptr make_utterance() const + { + + } + };*/ + + enum class conjugation { + present_participle, + past_participle, + infinitive + }; + + class verb { + private: + int id; + + public: + verb(int id) : id(id) + { + + } + + std::string infinitive; + std::string past_tense; + std::string past_participle; + std::string ing_form; + std::string s_form; + //std::vector frames; + + std::string conjugate(conjugation infl) const + { + switch (infl) + { + case conjugation::infinitive: return infinitive; + case conjugation::past_participle: return past_participle; + case conjugation::present_participle: return ing_form; + } + } + }; + +}; + +#include "token.h" + +#endif /* end of include guard: VERB_H_BCC929AD */ diff --git a/verbly.h b/verbly.h new file mode 100644 index 0000000..139d8f8 --- /dev/null +++ b/verbly.h @@ -0,0 +1,10 @@ +#ifndef VERBLY_H_5B39CE50 +#define VERBLY_H_5B39CE50 + +#include "c++14.h" +#include "token.h" +#include "verb.h" +#include "adjective.h" +#include "data.h" + +#endif /* end of include guard: VERBLY_H_5B39CE50 */ -- cgit 1.4.1