Started structural rewrite

The new object structure was designed to build on the existing WordNet structure, while also adding in all of the data that we get from other sources. More information about this can be found on the project wiki. The generator has already been completely rewritten to generate a datafile that uses the new structure. In addition, a number of indexes are created, which does double the size of the datafile, but also allows for much faster lookups. Finally, the new generator is written modularly and is a lot more readable than the old one. The verbly interface to the new object structure has mostly been completed, but has not been tested fully. There is a completely new search API which utilizes a lot of operator overloading; documentation on how to use it should go up at some point. Token processing and verb frames are currently unimplemented. Source for these have been left in the repository for now.
author: Kelly Rauchenberger <fefferburbia@gmail.com> 2017-01-16 18:02:50 -0500
committer: Kelly Rauchenberger <fefferburbia@gmail.com> 2017-01-16 18:02:50 -0500
commit: 6746da6edd7d9d50efe374eabbb79a3cac882d81 (patch)
tree: ff20917e08b08d36b9541c1371106596e7bec442 /lib/word.cpp
parent: 4af7e55733098ca42f75a4ffaca1b0f6bab4dd36 (diff)
download: verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.gz
verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.bz2
verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.zip
1 files changed, 86 insertions, 34 deletions
diff --git a/lib/word.cpp b/lib/word.cpp
index 49e34a1..3edf2d2 100644
--- a/lib/word.cpp
+++ b/lib/word.cpp

@@ -1,60 +1,112 @@
-#include "verbly.h"
+#include "word.h"
-#include <algorithm>
+#include <sqlite3.h>
+#include "form.h"
+#include "util.h"
+#include "database.h"
+#include "query.h"
 namespace verbly {
  
-  rhyme::rhyme(std::string prerhyme, std::string phonemes) : _prerhyme(prerhyme), _rhyme(phonemes)
+  const object word::objectType = object::word;
-  {
-    
-  }
  
-  std::string rhyme::get_prerhyme() const
+  const std::list<std::string> word::select = {"word_id", "notion_id", "lemma_id", "tag_count", "position", "group_id"};
-  {
-    return _prerhyme;
-  }
  
-  std::string rhyme::get_rhyme() const
+  const field word::id = field::integerField(object::word, "word_id");
-  {
+  const field word::tagCount = field::integerField(object::word, "tag_count", true);
-    return _rhyme;
+  const field word::adjectivePosition = field::integerField(object::word, "position", true);
-  }
+  
+  const field word::notion = field::joinField(object::word, "notion_id", object::notion);
+  const field word::lemma = field::joinField(object::word, "lemma_id", object::lemma);
+  const field word::group = field::joinField(object::word, "group_id", object::group, true);
+  
+  const field word::antonyms = field::selfJoin(object::word, "word_id", "antonymy", "antonym_2_id", "antonym_1_id");
+  
+  const field word::specifications = field::selfJoin(object::word, "word_id", "specification", "general_id", "specific_id");
+  const field word::generalizations = field::selfJoin(object::word, "word_id", "specification", "specific_id", "general_id");
  
-  bool rhyme::operator==(const rhyme& other) const
+  const field word::pertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "noun_id", "pertainym_id");
+  const field word::antiPertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "pertainym_id", "noun_id");
+  
+  const field word::mannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "adjective_id", "mannernym_id");
+  const field word::antiMannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "mannernym_id", "adjective_id");
+  
+  const field word::usageTerms = field::selfJoin(object::word, "word_id", "usage", "domain_id", "term_id");
+  const field word::usageDomains = field::selfJoin(object::word, "word_id", "usage", "term_id", "domain_id");
+  
+  const field word::topicalTerms = field::selfJoin(object::word, "word_id", "topicality", "domain_id", "term_id");
+  const field word::topicalDomains = field::selfJoin(object::word, "word_id", "topicality", "term_id", "domain_id");
+  
+  const field word::regionalTerms = field::selfJoin(object::word, "word_id", "regionality", "domain_id", "term_id");
+  const field word::regionalDomains = field::selfJoin(object::word, "word_id", "regionality", "term_id", "domain_id");
+  
+  word::word(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
  {
-    return std::tie(_prerhyme, _rhyme) == std::tie(other._prerhyme, other._rhyme);
+    id_ = sqlite3_column_int(row, 0);
+    notionId_ = sqlite3_column_int(row, 1);
+    lemmaId_ = sqlite3_column_int(row, 2);
+    
+    if (sqlite3_column_type(row, 3) != SQLITE_NULL)
+    {
+      hasTagCount_ = true;
+      tagCount_ = sqlite3_column_int(row, 3);
+    }
+    
+    if (sqlite3_column_type(row, 4) != SQLITE_NULL)
+    {
+      adjectivePosition_ = static_cast<positioning>(sqlite3_column_int(row, 4));
+    }
+    
+    if (sqlite3_column_type(row, 5) != SQLITE_NULL)
+    {
+      hasGroup_ = true;
+      groupId_ = sqlite3_column_int(row, 5);
+    }
  }
  
-  word::word()
+  const notion& word::getNotion() const
  {
+    if (!valid_)
+    {
+      throw std::domain_error("Bad access to uninitialized word");
+    }
+    
+    if (!notion_)
+    {
+      notion_ = db_->notions(notion::id == notionId_).first();
+    }
    
+    return notion_;
  }
  
-  word::word(const data& _data, int _id) : _data(&_data), _id(_id), _valid(true)
+  const lemma& word::getLemma() const
  {
+    if (!valid_)
+    {
+      throw std::domain_error("Bad access to uninitialized word");
+    }
    
+    if (!lemma_)
+    {
+      lemma_ = db_->lemmas(lemma::id == lemmaId_).first();
+    }
+    
+    return lemma_;
  }
  
-  std::list<rhyme> word::get_rhymes() const
+  std::string word::getBaseForm() const
  {
-    assert(_valid == true);
+    return getLemma().getBaseForm().getText();
-    
-    return rhymes;
  }
  
-  bool word::starts_with_vowel_sound() const
+  std::list<std::string> word::getInflections(inflection category) const
  {
-    assert(_valid == true);
+    std::list<std::string> result;
-    
+    for (const form& infl : getLemma().getInflections(category))
-    if (pronunciations.size() > 0)
    {
-      return std::any_of(std::begin(pronunciations), std::end(pronunciations), [] (std::list<std::string> phonemes) {
+      result.push_back(infl.getText());
-        return (phonemes.front().find_first_of("012") != std::string::npos);
-      });
-    } else {
-      // If the word is not in CMUDICT, fall back to checking whether the first letter is a vowel
-      // Not perfect but will work in most cases
-      char ch = tolower(base_form().front());
-      return (ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u');
    }
+    return result;
  }
  
 };
author	Kelly Rauchenberger <fefferburbia@gmail.com>	2017-01-16 18:02:50 -0500
committer	Kelly Rauchenberger <fefferburbia@gmail.com>	2017-01-16 18:02:50 -0500
commit	6746da6edd7d9d50efe374eabbb79a3cac882d81 (patch)
tree	ff20917e08b08d36b9541c1371106596e7bec442 /lib/word.cpp
parent	4af7e55733098ca42f75a4ffaca1b0f6bab4dd36 (diff)
download	verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.gz verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.bz2 verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.zip

diff --git a/lib/word.cpp b/lib/word.cpp index 49e34a1..3edf2d2 100644 --- a/lib/word.cpp +++ b/lib/word.cpp
@@ -1,60 +1,112 @@
1	#include "verbly.h"	1	#include "word.h"
2	#include <algorithm>	2	#include <sqlite3.h>
		3	#include "form.h"
		4	#include "util.h"
		5	#include "database.h"
		6	#include "query.h"
3		7
4	namespace verbly {	8	namespace verbly {
5		9
6	rhyme::rhyme(std::string prerhyme, std::string phonemes) : _prerhyme(prerhyme), _rhyme(phonemes)	10	const object word::objectType = object::word;
7	{
8
9	}
10		11
11	std::string rhyme::get_prerhyme() const	12	const std::list<std::string> word::select = {"word_id", "notion_id", "lemma_id", "tag_count", "position", "group_id"};
12	{
13	return _prerhyme;
14	}
15		13
16	std::string rhyme::get_rhyme() const	14	const field word::id = field::integerField(object::word, "word_id");
17	{	15	const field word::tagCount = field::integerField(object::word, "tag_count", true);
18	return _rhyme;	16	const field word::adjectivePosition = field::integerField(object::word, "position", true);
19	}	17
		18	const field word::notion = field::joinField(object::word, "notion_id", object::notion);
		19	const field word::lemma = field::joinField(object::word, "lemma_id", object::lemma);
		20	const field word::group = field::joinField(object::word, "group_id", object::group, true);
		21
		22	const field word::antonyms = field::selfJoin(object::word, "word_id", "antonymy", "antonym_2_id", "antonym_1_id");
		23
		24	const field word::specifications = field::selfJoin(object::word, "word_id", "specification", "general_id", "specific_id");
		25	const field word::generalizations = field::selfJoin(object::word, "word_id", "specification", "specific_id", "general_id");
20		26
21	bool rhyme::operator==(const rhyme& other) const	27	const field word::pertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "noun_id", "pertainym_id");
		28	const field word::antiPertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "pertainym_id", "noun_id");
		29
		30	const field word::mannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "adjective_id", "mannernym_id");
		31	const field word::antiMannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "mannernym_id", "adjective_id");
		32
		33	const field word::usageTerms = field::selfJoin(object::word, "word_id", "usage", "domain_id", "term_id");
		34	const field word::usageDomains = field::selfJoin(object::word, "word_id", "usage", "term_id", "domain_id");
		35
		36	const field word::topicalTerms = field::selfJoin(object::word, "word_id", "topicality", "domain_id", "term_id");
		37	const field word::topicalDomains = field::selfJoin(object::word, "word_id", "topicality", "term_id", "domain_id");
		38
		39	const field word::regionalTerms = field::selfJoin(object::word, "word_id", "regionality", "domain_id", "term_id");
		40	const field word::regionalDomains = field::selfJoin(object::word, "word_id", "regionality", "term_id", "domain_id");
		41
		42	word::word(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
22	{	43	{
23	return std::tie(_prerhyme, _rhyme) == std::tie(other._prerhyme, other._rhyme);	44	id_ = sqlite3_column_int(row, 0);
		45	notionId_ = sqlite3_column_int(row, 1);
		46	lemmaId_ = sqlite3_column_int(row, 2);
		47
		48	if (sqlite3_column_type(row, 3) != SQLITE_NULL)
		49	{
		50	hasTagCount_ = true;
		51	tagCount_ = sqlite3_column_int(row, 3);
		52	}
		53
		54	if (sqlite3_column_type(row, 4) != SQLITE_NULL)
		55	{
		56	adjectivePosition_ = static_cast<positioning>(sqlite3_column_int(row, 4));
		57	}
		58
		59	if (sqlite3_column_type(row, 5) != SQLITE_NULL)
		60	{
		61	hasGroup_ = true;
		62	groupId_ = sqlite3_column_int(row, 5);
		63	}
24	}	64	}
25		65
26	word::word()	66	const notion& word::getNotion() const
27	{	67	{
		68	if (!valid_)
		69	{
		70	throw std::domain_error("Bad access to uninitialized word");
		71	}
		72
		73	if (!notion_)
		74	{
		75	notion_ = db_->notions(notion::id == notionId_).first();
		76	}
28		77
		78	return notion_;
29	}	79	}
30		80
31	word::word(const data& _data, int _id) : _data(&_data), _id(_id), _valid(true)	81	const lemma& word::getLemma() const
32	{	82	{
		83	if (!valid_)
		84	{
		85	throw std::domain_error("Bad access to uninitialized word");
		86	}
33		87
		88	if (!lemma_)
		89	{
		90	lemma_ = db_->lemmas(lemma::id == lemmaId_).first();
		91	}
		92
		93	return lemma_;
34	}	94	}
35		95
36	std::list<rhyme> word::get_rhymes() const	96	std::string word::getBaseForm() const
37	{	97	{
38	assert(_valid == true);	98	return getLemma().getBaseForm().getText();
39
40	return rhymes;
41	}	99	}
42		100
43	bool word::starts_with_vowel_sound() const	101	std::list<std::string> word::getInflections(inflection category) const
44	{	102	{
45	assert(_valid == true);	103	std::list<std::string> result;
46		104	for (const form& infl : getLemma().getInflections(category))
47	if (pronunciations.size() > 0)
48	{	105	{
49	return std::any_of(std::begin(pronunciations), std::end(pronunciations), [] (std::list<std::string> phonemes) {	106	result.push_back(infl.getText());
50	return (phonemes.front().find_first_of("012") != std::string::npos);
51	});
52	} else {
53	// If the word is not in CMUDICT, fall back to checking whether the first letter is a vowel
54	// Not perfect but will work in most cases
55	char ch = tolower(base_form().front());
56	return (ch == 'a') \|\| (ch == 'e') \|\| (ch == 'i') \|\| (ch == 'o') \|\| (ch == 'u');
57	}	107	}
		108
		109	return result;
58	}	110	}
59		111
60	};	112	};