summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-01-16 18:02:50 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-01-16 18:02:50 -0500
commit6746da6edd7d9d50efe374eabbb79a3cac882d81 (patch)
treeff20917e08b08d36b9541c1371106596e7bec442
parent4af7e55733098ca42f75a4ffaca1b0f6bab4dd36 (diff)
downloadverbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.gz
verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.bz2
verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.zip
Started structural rewrite
The new object structure was designed to build on the existing WordNet
structure, while also adding in all of the data that we get from other sources.
More information about this can be found on the project wiki.

The generator has already been completely rewritten to generate a
datafile that uses the new structure. In addition, a number of indexes
are created, which does double the size of the datafile, but also allows
for much faster lookups. Finally, the new generator is written modularly
and is a lot more readable than the old one.

The verbly interface to the new object structure has mostly been
completed, but has not been tested fully. There is a completely new
search API which utilizes a lot of operator overloading; documentation
on how to use it should go up at some point.

Token processing and verb frames are currently unimplemented. Source for
these have been left in the repository for now.
-rw-r--r--CMakeLists.txt4
-rw-r--r--generator/CMakeLists.txt6
-rw-r--r--generator/database.cpp173
-rw-r--r--generator/database.h73
-rw-r--r--generator/field.cpp193
-rw-r--r--generator/field.h76
-rw-r--r--generator/form.cpp53
-rw-r--r--generator/form.h71
-rw-r--r--generator/frame.cpp83
-rw-r--r--generator/frame.h59
-rw-r--r--generator/generator.cpp3145
-rw-r--r--generator/generator.h151
-rw-r--r--generator/group.cpp119
-rw-r--r--generator/group.h80
-rw-r--r--generator/lemma.cpp65
-rw-r--r--generator/lemma.h58
-rw-r--r--generator/main.cpp40
-rw-r--r--generator/notion.cpp85
-rw-r--r--generator/notion.h91
-rw-r--r--generator/part.cpp336
-rw-r--r--generator/part.h114
-rw-r--r--generator/progress.h78
-rw-r--r--generator/pronunciation.cpp87
-rw-r--r--generator/pronunciation.h82
-rw-r--r--generator/role.h35
-rw-r--r--generator/schema.sql352
-rw-r--r--generator/selrestr.cpp288
-rw-r--r--generator/selrestr.h88
-rw-r--r--generator/word.cpp77
-rw-r--r--generator/word.h110
-rw-r--r--lib/adjective.cpp113
-rw-r--r--lib/adjective.h51
-rw-r--r--lib/adjective_query.cpp1072
-rw-r--r--lib/adjective_query.h112
-rw-r--r--lib/adverb.cpp71
-rw-r--r--lib/adverb.h35
-rw-r--r--lib/adverb_query.cpp758
-rw-r--r--lib/adverb_query.h86
-rw-r--r--lib/binding.cpp180
-rw-r--r--lib/binding.h70
-rw-r--r--lib/data.cpp177
-rw-r--r--lib/data.h380
-rw-r--r--lib/database.cpp79
-rw-r--r--lib/database.h73
-rw-r--r--lib/enums.h45
-rw-r--r--lib/field.cpp91
-rw-r--r--lib/field.h306
-rw-r--r--lib/filter.cpp1365
-rw-r--r--lib/filter.h143
-rw-r--r--lib/form.cpp53
-rw-r--r--lib/form.h149
-rw-r--r--lib/frame.cpp317
-rw-r--r--lib/frame.h178
-rw-r--r--lib/group.cpp43
-rw-r--r--lib/group.h87
-rw-r--r--lib/lemma.cpp69
-rw-r--r--lib/lemma.h120
-rw-r--r--lib/notion.cpp94
-rw-r--r--lib/notion.h200
-rw-r--r--lib/noun.cpp221
-rw-r--r--lib/noun.h55
-rw-r--r--lib/noun_query.cpp2013
-rw-r--r--lib/noun_query.h180
-rw-r--r--lib/preposition.cpp107
-rw-r--r--lib/preposition.h38
-rw-r--r--lib/pronunciation.cpp69
-rw-r--r--lib/pronunciation.h163
-rw-r--r--lib/query.h123
-rw-r--r--lib/statement.cpp806
-rw-r--r--lib/statement.h272
-rw-r--r--lib/util.h24
-rw-r--r--lib/verb.cpp64
-rw-r--r--lib/verb.h34
-rw-r--r--lib/verb_query.cpp315
-rw-r--r--lib/verb_query.h45
-rw-r--r--lib/verbly.h36
-rw-r--r--lib/word.cpp120
-rw-r--r--lib/word.h193
78 files changed, 8971 insertions, 8696 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c39d99..61fcce2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -4,8 +4,10 @@ project (verbly)
4find_package(PkgConfig) 4find_package(PkgConfig)
5pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) 5pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED)
6 6
7set(CMAKE_BUILD_TYPE Debug)
8
7include_directories(vendor/json) 9include_directories(vendor/json)
8add_library(verbly lib/data.cpp lib/adjective.cpp lib/noun.cpp lib/verb.cpp lib/adverb.cpp lib/token.cpp lib/word.cpp lib/frame.cpp lib/preposition.cpp lib/adjective_query.cpp lib/adverb_query.cpp lib/noun_query.cpp lib/verb_query.cpp lib/frame_query.cpp) 10add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp)
9set_property(TARGET verbly PROPERTY CXX_STANDARD 11) 11set_property(TARGET verbly PROPERTY CXX_STANDARD 11)
10set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) 12set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON)
11target_link_libraries(verbly ${sqlite3_LIBRARIES}) 13target_link_libraries(verbly ${sqlite3_LIBRARIES})
diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 552526d..4f78eb8 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt
@@ -1,12 +1,12 @@
1cmake_minimum_required (VERSION 2.6) 1cmake_minimum_required (VERSION 3.1)
2project (generator) 2project (generator)
3 3
4find_package(PkgConfig) 4find_package(PkgConfig)
5pkg_check_modules(sqlite3 sqlite3 REQUIRED) 5pkg_check_modules(sqlite3 sqlite3 REQUIRED)
6find_package(libxml2 REQUIRED) 6find_package(libxml2 REQUIRED)
7 7
8include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json/src) 8include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json)
9add_executable(generator generator.cpp) 9add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp selrestr.cpp database.cpp field.cpp generator.cpp main.cpp)
10set_property(TARGET generator PROPERTY CXX_STANDARD 11) 10set_property(TARGET generator PROPERTY CXX_STANDARD 11)
11set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) 11set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON)
12target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) 12target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES})
diff --git a/generator/database.cpp b/generator/database.cpp new file mode 100644 index 0000000..c7e4cfa --- /dev/null +++ b/generator/database.cpp
@@ -0,0 +1,173 @@
1#include "database.h"
2#include <sqlite3.h>
3#include <cassert>
4#include <fstream>
5#include <stdexcept>
6#include <cstdio>
7#include <sstream>
8#include "field.h"
9#include "../lib/util.h"
10
11namespace verbly {
12 namespace generator {
13
14 sqlite3_error::sqlite3_error(
15 const std::string& what,
16 const std::string& db_err) :
17 what_(what + " (" + db_err + ")"),
18 db_err_(db_err)
19 {
20 }
21
22 const char* sqlite3_error::what() const noexcept
23 {
24 return what_.c_str();
25 }
26
27 const char* sqlite3_error::db_err() const noexcept
28 {
29 return db_err_.c_str();
30 }
31
32 database::database(std::string path)
33 {
34 // If there is already a file at this path, overwrite it.
35 if (std::ifstream(path))
36 {
37 if (std::remove(path.c_str()))
38 {
39 throw std::logic_error("Could not overwrite file at path");
40 }
41 }
42
43 if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
44 {
45 // We still have to free the resources allocated. In the event that
46 // allocation failed, ppdb will be null and sqlite3_close_v2 will just
47 // ignore it.
48 std::string errmsg(sqlite3_errmsg(ppdb_));
49 sqlite3_close_v2(ppdb_);
50
51 throw sqlite3_error("Could not create output datafile", errmsg);
52 }
53 }
54
55 database::database(database&& other) : database()
56 {
57 swap(*this, other);
58 }
59
60 database& database::operator=(database&& other)
61 {
62 swap(*this, other);
63
64 return *this;
65 }
66
67 void swap(database& first, database& second)
68 {
69 std::swap(first.ppdb_, second.ppdb_);
70 }
71
72 database::~database()
73 {
74 sqlite3_close_v2(ppdb_);
75 }
76
77 void database::runQuery(std::string query)
78 {
79 // This can only happen when doing bad things with move semantics.
80 assert(ppdb_ != nullptr);
81
82 sqlite3_stmt* ppstmt;
83
84 if (sqlite3_prepare_v2(ppdb_, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
85 {
86 throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
87 }
88
89 int result = sqlite3_step(ppstmt);
90 sqlite3_finalize(ppstmt);
91
92 if (result != SQLITE_DONE)
93 {
94 throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
95 }
96 }
97
98 void database::insertIntoTable(std::string table, std::list<field> fields)
99 {
100 // This can only happen when doing bad things with move semantics.
101 assert(ppdb_ != nullptr);
102
103 // This shouldn't happen.
104 assert(!fields.empty());
105
106 std::list<std::string> fieldNames;
107 std::list<std::string> qs;
108 for (field& f : fields)
109 {
110 fieldNames.push_back(f.getName());
111 qs.push_back("?");
112 }
113
114 std::ostringstream query;
115 query << "INSERT INTO ";
116 query << table;
117 query << " (";
118 query << implode(std::begin(fieldNames), std::end(fieldNames), ", ");
119 query << ") VALUES (";
120 query << implode(std::begin(qs), std::end(qs), ", ");
121 query << ")";
122
123 std::string query_str = query.str();
124
125 sqlite3_stmt* ppstmt;
126
127 if (sqlite3_prepare_v2(ppdb_, query_str.c_str(), query_str.length(), &ppstmt, NULL) != SQLITE_OK)
128 {
129 throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
130 }
131
132 int i = 1;
133 for (field& f : fields)
134 {
135 switch (f.getType())
136 {
137 case field::type::integer:
138 {
139 sqlite3_bind_int(ppstmt, i, f.getInteger());
140
141 break;
142 }
143
144 case field::type::string:
145 {
146 sqlite3_bind_text(ppstmt, i, f.getString().c_str(), f.getString().length(), SQLITE_TRANSIENT);
147
148 break;
149 }
150
151 case field::type::invalid:
152 {
153 // Fields can only be invalid when doing bad things with move semantics.
154 assert(false);
155
156 break;
157 }
158 }
159
160 i++;
161 }
162
163 int result = sqlite3_step(ppstmt);
164 sqlite3_finalize(ppstmt);
165
166 if (result != SQLITE_DONE)
167 {
168 throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
169 }
170 }
171
172 };
173};
diff --git a/generator/database.h b/generator/database.h new file mode 100644 index 0000000..15cdff5 --- /dev/null +++ b/generator/database.h
@@ -0,0 +1,73 @@
1#ifndef DATABASE_H_0B0A47D2
2#define DATABASE_H_0B0A47D2
3
4#include <string>
5#include <exception>
6#include <list>
7
8struct sqlite3;
9
10namespace verbly {
11 namespace generator {
12
13 class field;
14
15 class sqlite3_error : public std::exception {
16 public:
17
18 sqlite3_error(const std::string& what, const std::string& db_err);
19
20 const char* what() const noexcept override;
21 const char* db_err() const noexcept;
22
23 private:
24 std::string what_;
25 std::string db_err_;
26
27 };
28
29 class database {
30 public:
31
32 // Constructor
33
34 explicit database(std::string path);
35
36 // Disable copying
37
38 database(const database& other) = delete;
39 database& operator=(const database& other) = delete;
40
41 // Move constructor and move assignment
42
43 database(database&& other);
44 database& operator=(database&& other);
45
46 // Swap
47
48 friend void swap(database& first, database& second);
49
50 // Destructor
51
52 ~database();
53
54 // Actions
55
56 void runQuery(std::string query);
57
58 void insertIntoTable(std::string table, std::list<field> fields);
59
60 private:
61
62 database()
63 {
64 }
65
66 sqlite3* ppdb_ = nullptr;
67
68 };
69
70 };
71};
72
73#endif /* end of include guard: DATABASE_H_0B0A47D2 */
diff --git a/generator/field.cpp b/generator/field.cpp new file mode 100644 index 0000000..84b2f91 --- /dev/null +++ b/generator/field.cpp
@@ -0,0 +1,193 @@
1#include "field.h"
2#include <stdexcept>
3#include <utility>
4
5namespace verbly {
6 namespace generator {
7
8 field::field(const field& other)
9 {
10 type_ = other.type_;
11 name_ = other.name_;
12
13 switch (type_)
14 {
15 case type::integer:
16 {
17 integer_ = other.integer_;
18
19 break;
20 }
21
22 case type::string:
23 {
24 new(&string_) std::string(other.string_);
25
26 break;
27 }
28
29 case type::invalid:
30 {
31 break;
32 }
33 }
34 }
35
36 field::field(field&& other) : field()
37 {
38 swap(*this, other);
39 }
40
41 field& field::operator=(field other)
42 {
43 swap(*this, other);
44
45 return *this;
46 }
47
48 void swap(field& first, field& second)
49 {
50 using type = field::type;
51
52 type tempType = first.type_;
53 std::string tempName = std::move(first.name_);
54 int tempInteger;
55 std::string tempString;
56
57 switch (first.type_)
58 {
59 case type::integer:
60 {
61 tempInteger = first.integer_;
62
63 break;
64 }
65
66 case type::string:
67 {
68 tempString = std::move(tempString);
69
70 break;
71 }
72
73 case type::invalid:
74 {
75 break;
76 }
77 }
78
79 first.~field();
80
81 first.type_ = second.type_;
82 first.name_ = std::move(second.name_);
83
84 switch (second.type_)
85 {
86 case type::integer:
87 {
88 first.integer_ = second.integer_;
89
90 break;
91 }
92
93 case type::string:
94 {
95 new(&first.string_) std::string(std::move(second.string_));
96
97 break;
98 }
99
100 case type::invalid:
101 {
102 break;
103 }
104 }
105
106 second.~field();
107
108 second.type_ = tempType;
109 second.name_ = std::move(tempName);
110
111 switch (tempType)
112 {
113 case type::integer:
114 {
115 second.integer_ = tempInteger;
116
117 break;
118 }
119
120 case type::string:
121 {
122 new(&second.string_) std::string(std::move(tempString));
123
124 break;
125 }
126
127 case type::invalid:
128 {
129 break;
130 }
131 }
132 }
133
134 field::~field()
135 {
136 switch (type_)
137 {
138 case type::string:
139 {
140 using string_type = std::string;
141 string_.~string_type();
142
143 break;
144 }
145
146 case type::integer:
147 case type::invalid:
148 {
149 break;
150 }
151 }
152 }
153
154 field::field(
155 std::string name,
156 int arg) :
157 type_(type::integer),
158 name_(name),
159 integer_(arg)
160 {
161 }
162
163 int field::getInteger() const
164 {
165 if (type_ != type::integer)
166 {
167 throw std::domain_error("field::getInteger called on non-integer field");
168 }
169
170 return integer_;
171 }
172
173 field::field(
174 std::string name,
175 std::string arg) :
176 type_(type::string),
177 name_(name)
178 {
179 new(&string_) std::string(arg);
180 }
181
182 std::string field::getString() const
183 {
184 if (type_ != type::string)
185 {
186 throw std::domain_error("field::getString called on non-string field");
187 }
188
189 return string_;
190 }
191
192 };
193};
diff --git a/generator/field.h b/generator/field.h new file mode 100644 index 0000000..1fbabfc --- /dev/null +++ b/generator/field.h
@@ -0,0 +1,76 @@
1#ifndef BINDING_H_CAE0B18E
2#define BINDING_H_CAE0B18E
3
4#include <string>
5
6namespace verbly {
7 namespace generator {
8
9 class field {
10 public:
11 enum class type {
12 invalid,
13 integer,
14 string
15 };
16
17 // Copy and move constructors
18
19 field(const field& other);
20 field(field&& other);
21
22 // Assignment
23
24 field& operator=(field other);
25
26 // Swap
27
28 friend void swap(field& first, field& second);
29
30 // Destructor
31
32 ~field();
33
34 // Generic accessors
35
36 type getType() const
37 {
38 return type_;
39 }
40
41 std::string getName() const
42 {
43 return name_;
44 }
45
46 // Integer
47
48 field(std::string name, int arg);
49
50 int getInteger() const;
51
52 // String
53
54 field(std::string name, std::string arg);
55
56 std::string getString() const;
57
58 private:
59
60 field()
61 {
62 }
63
64 union {
65 int integer_;
66 std::string string_;
67 };
68
69 type type_ = type::invalid;
70 std::string name_;
71 };
72
73 };
74};
75
76#endif /* end of include guard: BINDING_H_CAE0B18E */
diff --git a/generator/form.cpp b/generator/form.cpp new file mode 100644 index 0000000..6be9d47 --- /dev/null +++ b/generator/form.cpp
@@ -0,0 +1,53 @@
1#include "form.h"
2#include <algorithm>
3#include <list>
4#include "database.h"
5#include "field.h"
6#include "pronunciation.h"
7
8namespace verbly {
9 namespace generator {
10
11 int form::nextId_ = 0;
12
13 form::form(std::string text) :
14 id_(nextId_++),
15 text_(text),
16 complexity_(std::count(std::begin(text), std::end(text), ' ') + 1),
17 proper_(std::any_of(std::begin(text), std::end(text), std::isupper))
18 {
19 }
20
21 void form::addPronunciation(const pronunciation& p)
22 {
23 pronunciations_.insert(&p);
24 }
25
26 database& operator<<(database& db, const form& arg)
27 {
28 // Serialize the form first.
29 {
30 std::list<field> fields;
31 fields.emplace_back("form_id", arg.getId());
32 fields.emplace_back("form", arg.getText());
33 fields.emplace_back("complexity", arg.getComplexity());
34 fields.emplace_back("proper", arg.isProper());
35
36 db.insertIntoTable("forms", std::move(fields));
37 }
38
39 // Then, serialize the form/pronunciation relationship.
40 for (const pronunciation* p : arg.getPronunciations())
41 {
42 std::list<field> fields;
43 fields.emplace_back("form_id", arg.getId());
44 fields.emplace_back("pronunciation_id", p->getId());
45
46 db.insertIntoTable("forms_pronunciations", std::move(fields));
47 }
48
49 return db;
50 }
51
52 };
53};
diff --git a/generator/form.h b/generator/form.h new file mode 100644 index 0000000..5576035 --- /dev/null +++ b/generator/form.h
@@ -0,0 +1,71 @@
1#ifndef FORM_H_7EFBC970
2#define FORM_H_7EFBC970
3
4#include <string>
5#include <set>
6
7namespace verbly {
8 namespace generator {
9
10 class pronunciation;
11 class database;
12
13 class form {
14 public:
15
16 // Constructor
17
18 explicit form(std::string text);
19
20 // Mutators
21
22 void addPronunciation(const pronunciation& p);
23
24 // Accessors
25
26 int getId() const
27 {
28 return id_;
29 }
30
31 std::string getText() const
32 {
33 return text_;
34 }
35
36 int getComplexity() const
37 {
38 return complexity_;
39 }
40
41 bool isProper() const
42 {
43 return proper_;
44 }
45
46 std::set<const pronunciation*> getPronunciations() const
47 {
48 return pronunciations_;
49 }
50
51 private:
52
53 static int nextId_;
54
55 const int id_;
56 const std::string text_;
57 const int complexity_;
58 const bool proper_;
59
60 std::set<const pronunciation*> pronunciations_;
61
62 };
63
64 // Serializer
65
66 database& operator<<(database& db, const form& arg);
67
68 };
69};
70
71#endif /* end of include guard: FORM_H_7EFBC970 */
diff --git a/generator/frame.cpp b/generator/frame.cpp new file mode 100644 index 0000000..9f0653f --- /dev/null +++ b/generator/frame.cpp
@@ -0,0 +1,83 @@
1#include "frame.h"
2#include "database.h"
3#include "field.h"
4
5namespace verbly {
6 namespace generator {
7
8 int frame::nextId_ = 0;
9
10 frame::frame() : id_(nextId_++)
11 {
12 }
13
14 void frame::push_back(part fp)
15 {
16 parts_.push_back(std::move(fp));
17 }
18
19 database& operator<<(database& db, const frame& arg)
20 {
21 std::list<field> fields;
22 fields.emplace_back("frame_id", arg.getId());
23
24 nlohmann::json jsonParts;
25 for (const part& p : arg)
26 {
27 nlohmann::json jsonPart;
28 jsonPart["type"] = static_cast<int>(p.getType());
29
30 switch (p.getType())
31 {
32 case part::type::noun_phrase:
33 {
34 jsonPart["role"] = p.getNounRole();
35 jsonPart["selrestrs"] = p.getNounSelrestrs().toJson();
36 jsonPart["synrestrs"] = p.getNounSynrestrs();
37
38 break;
39 }
40
41 case part::type::preposition:
42 {
43 jsonPart["choices"] = p.getPrepositionChoices();
44 jsonPart["literal"] = p.isPrepositionLiteral();
45
46 break;
47 }
48
49 case part::type::literal:
50 {
51 jsonPart["value"] = p.getLiteralValue();
52
53 break;
54 }
55
56 case part::type::verb:
57 case part::type::adjective:
58 case part::type::adverb:
59 {
60 break;
61 }
62
63 case part::type::invalid:
64 {
65 // Invalid parts should not be serialized.
66 assert(false);
67
68 break;
69 }
70 }
71
72 jsonParts.emplace_back(std::move(jsonPart));
73 }
74
75 fields.emplace_back("data", jsonParts.dump());
76
77 db.insertIntoTable("frames", std::move(fields));
78
79 return db;
80 }
81
82 };
83};
diff --git a/generator/frame.h b/generator/frame.h new file mode 100644 index 0000000..411ce6c --- /dev/null +++ b/generator/frame.h
@@ -0,0 +1,59 @@
1#ifndef FRAME_H_26770FF1
2#define FRAME_H_26770FF1
3
4#include <list>
5#include "part.h"
6
7namespace verbly {
8 namespace generator {
9
10 class database;
11
12 class frame {
13 public:
14
15 // Aliases
16
17 using const_iterator = std::list<part>::const_iterator;
18
19 // Constructor
20
21 frame();
22
23 // Mutators
24
25 void push_back(part fp);
26
27 // Accessors
28
29 int getId() const
30 {
31 return id_;
32 }
33
34 const_iterator begin() const
35 {
36 return std::begin(parts_);
37 }
38
39 const_iterator end() const
40 {
41 return std::end(parts_);
42 }
43
44 private:
45
46 static int nextId_;
47
48 const int id_;
49
50 std::list<part> parts_;
51
52 };
53
54 database& operator<<(database& db, const frame& arg);
55
56 };
57};
58
59#endif /* end of include guard: FRAME_H_26770FF1 */
diff --git a/generator/generator.cpp b/generator/generator.cpp index 6a16467..d88cb31 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp
@@ -1,2320 +1,1477 @@
1#include <libxml/parser.h> 1#include "generator.h"
2#include <cassert>
3#include <stdexcept>
2#include <iostream> 4#include <iostream>
5#include <regex>
3#include <dirent.h> 6#include <dirent.h>
4#include <set>
5#include <map>
6#include <string>
7#include <vector>
8#include <fstream> 7#include <fstream>
9#include <sqlite3.h> 8#include "enums.h"
10#include <sstream>
11#include <regex>
12#include <list>
13#include <algorithm>
14#include <json.hpp>
15#include "progress.h" 9#include "progress.h"
10#include "selrestr.h"
11#include "role.h"
12#include "part.h"
13#include "field.h"
16#include "../lib/util.h" 14#include "../lib/util.h"
17 15
18using json = nlohmann::json; 16namespace verbly {
19 17 namespace generator {
20struct verb_t {
21 std::string infinitive;
22 std::string past_tense;
23 std::string past_participle;
24 std::string ing_form;
25 std::string s_form;
26 int id;
27};
28
29struct adjective_t {
30 std::string base;
31 std::string comparative;
32 std::string superlative;
33};
34
35struct noun_t {
36 std::string singular;
37 std::string plural;
38};
39
40struct selrestr_t {
41 enum class type_t {
42 singleton,
43 andlogic,
44 orlogic,
45 empty
46 };
47 type_t type;
48 std::string restriction;
49 bool pos;
50 std::list<selrestr_t> subordinates;
51};
52
53struct framepart_t {
54 enum class type_t {
55 np,
56 v,
57 pp,
58 adj,
59 adv,
60 lex
61 };
62 type_t type;
63 std::string role;
64 selrestr_t selrestrs;
65 std::set<std::string> preprestrs;
66 std::set<std::string> synrestrs;
67 std::list<std::string> choices;
68 std::string lexval;
69};
70
71struct group_t {
72 std::string id;
73 std::string parent;
74 std::set<std::string> members;
75 std::map<std::string, selrestr_t> roles;
76 std::list<std::list<framepart_t>> frames;
77};
78
79struct pronunciation_t {
80 std::string phonemes;
81 std::string prerhyme;
82 std::string rhyme;
83 int syllables = 0;
84 std::string stress;
85
86 bool operator<(const pronunciation_t& other) const
87 {
88 return phonemes < other.phonemes;
89 }
90};
91
92std::map<std::string, group_t> groups;
93std::map<std::string, verb_t> verbs;
94std::map<std::string, adjective_t> adjectives;
95std::map<std::string, noun_t> nouns;
96std::map<int, std::map<int, int>> wn;
97std::map<int, int> images;
98std::map<std::string, std::set<pronunciation_t>> pronunciations;
99
100void print_usage()
101{
102 std::cout << "Verbly Datafile Generator" << std::endl;
103 std::cout << "-------------------------" << std::endl;
104 std::cout << "Requires exactly six arguments." << std::endl;
105 std::cout << "1. The path to a VerbNet data directory." << std::endl;
106 std::cout << "2. The path to an AGID infl.txt file." << std::endl;
107 std::cout << "3. The path to a WordNet prolog data directory." << std::endl;
108 std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl;
109 std::cout << "5. The path to an ImageNet urls.txt file." << std::endl;
110 std::cout << "6. Datafile output path." << std::endl;
111
112 exit(1);
113}
114
115void db_error(sqlite3* ppdb, std::string query)
116{
117 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
118 std::cout << query << std::endl;
119 sqlite3_close_v2(ppdb);
120 print_usage();
121}
122
123json export_selrestrs(selrestr_t r)
124{
125 if (r.type == selrestr_t::type_t::empty)
126 {
127 return {};
128 } else if (r.type == selrestr_t::type_t::singleton)
129 {
130 json result;
131 result["type"] = r.restriction;
132 result["pos"] = r.pos;
133 return result;
134 } else {
135 json result;
136 if (r.type == selrestr_t::type_t::andlogic)
137 {
138 result["logic"] = "and";
139 } else {
140 result["logic"] = "or";
141 }
142
143 std::list<json> outlist;
144 std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs);
145 result["children"] = outlist;
146 18
147 return result; 19 generator::generator(
148 } 20 std::string verbNetPath,
149} 21 std::string agidPath,
150 22 std::string wordNetPath,
151selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename) 23 std::string cmudictPath,
152{ 24 std::string imageNetPath,
153 selrestr_t r; 25 std::string outputPath) :
154 xmlChar* key; 26 verbNetPath_(verbNetPath),
155 27 agidPath_(agidPath),
156 if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS")) 28 wordNetPath_(wordNetPath),
157 { 29 cmudictPath_(cmudictPath),
158 if (xmlChildElementCount(top) == 0) 30 imageNetPath_(imageNetPath),
31 db_(outputPath)
159 { 32 {
160 r.type = selrestr_t::type_t::empty; 33 // Ensure VerbNet directory exists
161 } else if (xmlChildElementCount(top) == 1) 34 DIR* dir;
162 { 35 if ((dir = opendir(verbNetPath_.c_str())) == nullptr)
163 r = parse_selrestrs(xmlFirstElementChild(top), filename);
164 } else {
165 r.type = selrestr_t::type_t::andlogic;
166
167 if (xmlHasProp(top, (const xmlChar*) "logic"))
168 { 36 {
169 key = xmlGetProp(top, (const xmlChar*) "logic"); 37 throw std::invalid_argument("Invalid VerbNet data directory");
170 if (!xmlStrcmp(key, (const xmlChar*) "or"))
171 {
172 r.type = selrestr_t::type_t::orlogic;
173 }
174 xmlFree(key);
175 } 38 }
176 39
177 for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) 40 closedir(dir);
41
42 // Ensure AGID infl.txt exists
43 if (!std::ifstream(agidPath_))
178 { 44 {
179 if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR")) 45 throw std::invalid_argument("AGID infl.txt file not found");
180 {
181 r.subordinates.push_back(parse_selrestrs(selrestr, filename));
182 }
183 } 46 }
184 } 47
185 } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR")) 48 // Add directory separator to WordNet path
186 { 49 if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\'))
187 r.type = selrestr_t::type_t::singleton;
188
189 key = xmlGetProp(top, (xmlChar*) "Value");
190 r.pos = (std::string((const char*)key) == "+");
191 xmlFree(key);
192
193 key = xmlGetProp(top, (xmlChar*) "type");
194 r.restriction = (const char*) key;
195 xmlFree(key);
196 } else {
197 // Invalid
198 std::cout << "Bad VerbNet file format: " << filename << std::endl;
199 print_usage();
200 }
201
202 return r;
203}
204
205group_t& parse_group(xmlNodePtr top, std::string filename)
206{
207 xmlChar* key = xmlGetProp(top, (xmlChar*) "ID");
208 if (key == 0)
209 {
210 std::cout << "Bad VerbNet file format: " << filename << std::endl;
211 print_usage();
212 }
213 std::string vnid = (const char*)key;
214 vnid = vnid.substr(vnid.find_first_of("-")+1);
215 xmlFree(key);
216
217 group_t g;
218 g.id = vnid;
219
220 for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
221 {
222 if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES"))
223 {
224 for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next)
225 { 50 {
226 if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS")) 51 wordNetPath_ += '/';
227 {
228 auto& sg = parse_group(subclass, filename);
229 sg.parent = vnid;
230
231 for (auto member : sg.members)
232 {
233 g.members.insert(member);
234 }
235
236 // The schema requires that subclasses appear after role definitions, so we can do this now
237 for (auto role : g.roles)
238 {
239 if (sg.roles.count(role.first) == 0)
240 {
241 sg.roles[role.first] = role.second;
242 }
243 }
244 }
245 } 52 }
246 } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) 53
247 { 54 // Ensure WordNet tables exist
248 for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) 55 for (std::string table : {
56 "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax"
57 })
249 { 58 {
250 if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) 59 if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl"))
251 { 60 {
252 key = xmlGetProp(member, (xmlChar*) "name"); 61 throw std::invalid_argument("WordNet " + table + " table not found");
253 g.members.insert((const char*)key);
254 xmlFree(key);
255 } 62 }
256 } 63 }
257 } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES")) 64
258 { 65 // Ensure CMUDICT file exists
259 for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next) 66 if (!std::ifstream(cmudictPath_))
260 { 67 {
261 if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE")) 68 throw std::invalid_argument("CMUDICT file not found");
262 {
263 selrestr_t r;
264 r.type = selrestr_t::type_t::empty;
265
266 key = xmlGetProp(role, (const xmlChar*) "type");
267 std::string type = (const char*)key;
268 xmlFree(key);
269
270 for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
271 {
272 if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS"))
273 {
274 r = parse_selrestrs(rolenode, filename);
275 }
276 }
277
278 g.roles[type] = r;
279 }
280 } 69 }
281 } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) 70
282 { 71 // Ensure ImageNet urls.txt exists
283 for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) 72 if (!std::ifstream(imageNetPath_))
284 { 73 {
285 if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) 74 throw std::invalid_argument("ImageNet urls.txt file not found");
286 {
287 std::list<framepart_t> f;
288
289 for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
290 {
291 if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX"))
292 {
293 for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
294 {
295 framepart_t fp;
296
297 if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP"))
298 {
299 fp.type = framepart_t::type_t::np;
300
301 key = xmlGetProp(syntaxnode, (xmlChar*) "value");
302 fp.role = (const char*)key;
303 xmlFree(key);
304
305 fp.selrestrs.type = selrestr_t::type_t::empty;
306
307 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
308 {
309 if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS"))
310 {
311 for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
312 {
313 if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR"))
314 {
315 key = xmlGetProp(synrestr, (xmlChar*) "type");
316 fp.synrestrs.insert(std::string((const char*)key));
317 xmlFree(key);
318 }
319 }
320 }
321
322 if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS"))
323 {
324 fp.selrestrs = parse_selrestrs(npnode, filename);
325 }
326 }
327 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB"))
328 {
329 fp.type = framepart_t::type_t::v;
330 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP"))
331 {
332 fp.type = framepart_t::type_t::pp;
333
334 if (xmlHasProp(syntaxnode, (xmlChar*) "value"))
335 {
336 key = xmlGetProp(syntaxnode, (xmlChar*) "value");
337 std::string choices = (const char*)key;
338 xmlFree(key);
339
340 fp.choices = verbly::split<std::list<std::string>>(choices, " ");
341 }
342
343 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
344 {
345 if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS"))
346 {
347 for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
348 {
349 if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR"))
350 {
351 key = xmlGetProp(synrestr, (xmlChar*) "type");
352 fp.preprestrs.insert(std::string((const char*)key));
353 xmlFree(key);
354 }
355 }
356 }
357 }
358 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ"))
359 {
360 fp.type = framepart_t::type_t::adj;
361 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV"))
362 {
363 fp.type = framepart_t::type_t::adv;
364 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX"))
365 {
366 fp.type = framepart_t::type_t::lex;
367
368 key = xmlGetProp(syntaxnode, (xmlChar*) "value");
369 fp.lexval = (const char*)key;
370 xmlFree(key);
371 } else {
372 continue;
373 }
374
375 f.push_back(fp);
376 }
377
378 g.frames.push_back(f);
379 }
380 }
381 }
382 } 75 }
383 } 76 }
384 }
385
386 groups[vnid] = g;
387
388 return groups[vnid];
389}
390
391int main(int argc, char** argv)
392{
393 if (argc != 7)
394 {
395 print_usage();
396 }
397
398 // VerbNet data
399 std::cout << "Reading verb frames..." << std::endl;
400
401 DIR* dir;
402 if ((dir = opendir(argv[1])) == nullptr)
403 {
404 std::cout << "Invalid VerbNet data directory." << std::endl;
405
406 print_usage();
407 }
408
409 struct dirent* ent;
410 while ((ent = readdir(dir)) != nullptr)
411 {
412 std::string filename(argv[1]);
413 if (filename.back() != '/')
414 {
415 filename += '/';
416 }
417 77
418 filename += ent->d_name; 78 void generator::run()
419 //std::cout << ent->d_name << std::endl;
420
421 if (filename.rfind(".xml") != filename.size() - 4)
422 {
423 continue;
424 }
425
426 xmlDocPtr doc = xmlParseFile(filename.c_str());
427 if (doc == nullptr)
428 {
429 std::cout << "Error opening " << filename << std::endl;
430 print_usage();
431 }
432
433 xmlNodePtr top = xmlDocGetRootElement(doc);
434 if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS")))
435 {
436 std::cout << "Bad VerbNet file format: " << filename << std::endl;
437 print_usage();
438 }
439
440 parse_group(top, filename);
441 }
442
443 closedir(dir);
444
445 // Get verbs from AGID
446 std::cout << "Reading inflections..." << std::endl;
447
448 std::ifstream agidfile(argv[2]);
449 if (!agidfile.is_open())
450 {
451 std::cout << "Could not open AGID file: " << argv[2] << std::endl;
452 print_usage();
453 }
454
455 for (;;)
456 {
457 std::string line;
458 if (!getline(agidfile, line))
459 {
460 break;
461 }
462
463 if (line.back() == '\r')
464 { 79 {
465 line.pop_back(); 80 // Create notions, words, lemmas, and forms from WordNet synsets
466 } 81 readWordNetSynsets();
467 82
468 int divider = line.find_first_of(" "); 83 // Reads adjective positioning WordNet data
469 std::string word = line.substr(0, divider); 84 readAdjectivePositioning();
470 line = line.substr(divider+1); 85
471 char type = line[0]; 86 // Counts the number of URLs ImageNet has per notion
472 87 readImageNetUrls();
473 if (line[1] == '?') 88
474 { 89 // Creates a word by WordNet sense key lookup table
475 line.erase(0, 4); 90 readWordNetSenseKeys();
476 } else { 91
477 line.erase(0, 3); 92 // Creates groups and frames from VerbNet data
478 } 93 readVerbNet();
479 94
480 std::vector<std::string> forms; 95 // Creates forms and inflections from AGID. To reduce the amount of forms
481 while (!line.empty()) 96 // created, we do this after most lemmas that need inflecting have been
482 { 97 // created through other means, and then only generate forms for
483 std::string inflection; 98 // inflections of already-existing lemmas. The exception to this regards
484 if ((divider = line.find(" | ")) != std::string::npos) 99 // verb lemmas. If a verb lemma in AGID either does not exist yet, or does
485 { 100 // exist but is not related to any words that are related to verb notions,
486 inflection = line.substr(0, divider); 101 // then a notion and a word is generated and the form generation proceeds
487 line = line.substr(divider + 3); 102 // as usual.
488 } else { 103 readAgidInflections();
489 inflection = line; 104
490 line = ""; 105 // Reads in prepositions and the is_a relationship
491 } 106 readPrepositions();
492 107
493 if ((divider = inflection.find_first_of(",?")) != std::string::npos) 108 // Creates pronunciations from CMUDICT. To reduce the amount of
494 { 109 // pronunciations created, we do this after all forms have been created,
495 inflection = inflection.substr(0, divider); 110 // and then only generate pronunciations for already-exisiting forms.
496 } 111 readCmudictPronunciations();
497 112
498 forms.push_back(inflection); 113 // Writes the database schema
114 writeSchema();
115
116 // Dumps data to the database
117 dumpObjects();
118
119 // Populates the antonymy relationship from WordNet
120 readWordNetAntonymy();
121
122 // Populates the variation relationship from WordNet
123 readWordNetVariation();
124
125 // Populates the usage, topicality, and regionality relationships from
126 // WordNet
127 readWordNetClasses();
128
129 // Populates the causality relationship from WordNet
130 readWordNetCausality();
131
132 // Populates the entailment relationship from WordNet
133 readWordNetEntailment();
134
135 // Populates the hypernymy relationship from WordNet
136 readWordNetHypernymy();
137
138 // Populates the instantiation relationship from WordNet
139 readWordNetInstantiation();
140
141 // Populates the member meronymy relationship from WordNet
142 readWordNetMemberMeronymy();
143
144 // Populates the part meronymy relationship from WordNet
145 readWordNetPartMeronymy();
146
147 // Populates the substance meronymy relationship from WordNet
148 readWordNetSubstanceMeronymy();
149
150 // Populates the pertainymy and mannernymy relationships from WordNet
151 readWordNetPertainymy();
152
153 // Populates the specification relationship from WordNet
154 readWordNetSpecification();
155
156 // Populates the adjective similarity relationship from WordNet
157 readWordNetSimilarity();
158
159
160
161
162
163
164
165
499 } 166 }
500 167
501 switch (type) 168 void generator::readWordNetSynsets()
502 { 169 {
503 case 'V': 170 std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl"));
171 progress ppgs("Reading synsets from WordNet...", lines.size());
172
173 for (std::string line : lines)
504 { 174 {
505 verb_t v; 175 ppgs.update();
506 v.infinitive = word; 176
507 if (forms.size() == 4) 177 std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$");
508 { 178 std::smatch relation_data;
509 v.past_tense = forms[0]; 179 if (!std::regex_search(line, relation_data, relation))
510 v.past_participle = forms[1]; 180 {
511 v.ing_form = forms[2]; 181 continue;
512 v.s_form = forms[3];
513 } else if (forms.size() == 3)
514 {
515 v.past_tense = forms[0];
516 v.past_participle = forms[0];
517 v.ing_form = forms[1];
518 v.s_form = forms[2];
519 } else if (forms.size() == 8)
520 {
521 // As of AGID 2014.08.11, this is only "to be"
522 v.past_tense = forms[0];
523 v.past_participle = forms[2];
524 v.ing_form = forms[3];
525 v.s_form = forms[4];
526 } else {
527 // Words that don't fit the cases above as of AGID 2014.08.11:
528 // - may and shall do not conjugate the way we want them to
529 // - methinks only has a past tense and is an outlier
530 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
531 std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl;
532 } 182 }
533 183
534 verbs[word] = v; 184 int synset_id = std::stoi(relation_data[1]);
535 185 int wnum = std::stoi(relation_data[2]);
536 break; 186 std::string text = relation_data[3];
537 } 187 int tag_count = std::stoi(relation_data[4]);
538 188 size_t word_it;
539 case 'A': 189 while ((word_it = text.find("''")) != std::string::npos)
540 {
541 adjective_t adj;
542 adj.base = word;
543 if (forms.size() == 2)
544 { 190 {
545 adj.comparative = forms[0]; 191 text.erase(word_it, 1);
546 adj.superlative = forms[1];
547 } else {
548 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
549 std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl;
550 } 192 }
551 193
552 adjectives[word] = adj; 194 // The WordNet data does contain duplicates, so we need to check that we
553 195 // haven't already created this word.
554 break; 196 std::pair<int, int> lookup(synset_id, wnum);
555 } 197 if (!wordByWnidAndWnum_.count(lookup))
556
557 case 'N':
558 {
559 noun_t n;
560 n.singular = word;
561 if (forms.size() == 1)
562 { 198 {
563 n.plural = forms[0]; 199 notion& synset = lookupOrCreateNotion(synset_id);
564 } else { 200 lemma& lex = lookupOrCreateLemma(text);
565 // As of AGID 2014.08.11, this is non-existent. 201 word& entry = createWord(synset, lex, tag_count);
566 std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl; 202
203 wordByWnidAndWnum_[lookup] = &entry;
567 } 204 }
568
569 nouns[word] = n;
570
571 break;
572 } 205 }
573 } 206 }
574 }
575
576 // Pronounciations
577 std::cout << "Reading pronunciations..." << std::endl;
578
579 std::ifstream pronfile(argv[4]);
580 if (!pronfile.is_open())
581 {
582 std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl;
583 print_usage();
584 }
585
586 for (;;)
587 {
588 std::string line;
589 if (!getline(pronfile, line))
590 {
591 break;
592 }
593
594 if (line.back() == '\r')
595 {
596 line.pop_back();
597 }
598 207
599 std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); 208 void generator::readAdjectivePositioning()
600 std::smatch phoneme_data;
601 if (std::regex_search(line, phoneme_data, phoneme))
602 { 209 {
603 std::string canonical(phoneme_data[1]); 210 std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl"));
604 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); 211 progress ppgs("Reading adjective positionings from WordNet...", lines.size());
605
606 std::string phonemes = phoneme_data[2];
607 auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " ");
608 auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) {
609 return phoneme.find("1") != std::string::npos;
610 });
611 212
612 pronunciation_t p; 213 for (std::string line : lines)
613 p.phonemes = phonemes;
614
615 // Rhyme detection
616 if (phemstrt != std::end(phoneme_set))
617 { 214 {
618 std::stringstream rhymer; 215 ppgs.update();
619 for (auto it = phemstrt; it != std::end(phoneme_set); it++)
620 {
621 std::string naked;
622 std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) {
623 return isdigit(ch);
624 });
625
626 if (it != phemstrt)
627 {
628 rhymer << " ";
629 }
630
631 rhymer << naked;
632 }
633 216
634 p.rhyme = rhymer.str(); 217 std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
635 218 std::smatch relation_data;
636 if (phemstrt != std::begin(phoneme_set)) 219 if (!std::regex_search(line, relation_data, relation))
637 { 220 {
638 phemstrt--; 221 continue;
639 p.prerhyme = *phemstrt;
640 } else {
641 p.prerhyme = "";
642 } 222 }
643 } else {
644 p.prerhyme = "";
645 p.rhyme = "";
646 }
647 223
648 // Syllable/stress 224 int synset_id = stoi(relation_data[1]);
649 for (auto phm : phoneme_set) 225 int wnum = stoi(relation_data[2]);
650 { 226 std::string adjpos_str = relation_data[3];
651 if (isdigit(phm.back()))
652 {
653 // It's a vowel!
654 p.syllables++;
655 227
656 if (phm.back() == '1') 228 std::pair<int, int> lookup(synset_id, wnum);
229 if (wordByWnidAndWnum_.count(lookup))
230 {
231 word& adj = *wordByWnidAndWnum_.at(lookup);
232
233 if (adjpos_str == "p")
234 {
235 adj.setAdjectivePosition(positioning::predicate);
236 } else if (adjpos_str == "a")
237 {
238 adj.setAdjectivePosition(positioning::attributive);
239 } else if (adjpos_str == "i")
657 { 240 {
658 p.stress.push_back('1'); 241 adj.setAdjectivePosition(positioning::postnominal);
659 } else { 242 } else {
660 p.stress.push_back('0'); 243 // Can't happen because of how we specified the regex.
244 assert(false);
661 } 245 }
662 } 246 }
663 } 247 }
664
665 pronunciations[canonical].insert(p);
666 }
667 }
668
669 // Images
670 std::cout << "Reading images..." << std::endl;
671
672 std::ifstream imagefile(argv[5]);
673 if (!imagefile.is_open())
674 {
675 std::cout << "Could not open ImageNet file: " << argv[5] << std::endl;
676 print_usage();
677 }
678
679 for (;;)
680 {
681 std::string line;
682 if (!getline(imagefile, line))
683 {
684 break;
685 }
686
687 if (line.back() == '\r')
688 {
689 line.pop_back();
690 }
691
692 std::string wnid_s = line.substr(1, 8);
693 int wnid = stoi(wnid_s) + 100000000;
694 images[wnid]++;
695 }
696
697 imagefile.close();
698
699 // Start writing output
700 std::cout << "Writing schema..." << std::endl;
701
702 sqlite3* ppdb;
703 if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
704 {
705 std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl;
706 print_usage();
707 }
708
709 std::ifstream schemafile("schema.sql");
710 if (!schemafile.is_open())
711 {
712 std::cout << "Could not find schema file" << std::endl;
713 print_usage();
714 }
715
716 std::stringstream schemabuilder;
717 for (;;)
718 {
719 std::string line;
720 if (!getline(schemafile, line))
721 {
722 break;
723 }
724
725 if (line.back() == '\r')
726 {
727 line.pop_back();
728 }
729
730 schemabuilder << line << std::endl;
731 }
732
733 std::string schema = schemabuilder.str();
734 while (!schema.empty())
735 {
736 std::string query;
737 int divider = schema.find(";");
738 if (divider != std::string::npos)
739 {
740 query = schema.substr(0, divider+1);
741 schema = schema.substr(divider+2);
742 } else {
743 break;
744 } 248 }
745 249
746 sqlite3_stmt* schmstmt; 250 void generator::readImageNetUrls()
747 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK)
748 { 251 {
749 db_error(ppdb, query); 252 // The ImageNet datafile is so large that it is unreasonable and
750 } 253 // unnecessary to read it into memory; instead, we will parse each line as
751 254 // we read it. This has the caveat that we cannot display a progress bar.
752 if (sqlite3_step(schmstmt) != SQLITE_DONE) 255 std::cout << "Reading image counts from ImageNet..." << std::endl;
753 {
754 db_error(ppdb, query);
755 }
756
757 sqlite3_finalize(schmstmt);
758 }
759
760 std::cout << "Writing prepositions..." << std::endl;
761 std::ifstream prepfile("prepositions.txt");
762 if (!prepfile.is_open())
763 {
764 std::cout << "Could not find prepositions file" << std::endl;
765 print_usage();
766 }
767
768 for (;;)
769 {
770 std::string line;
771 if (!getline(prepfile, line))
772 {
773 break;
774 }
775
776 if (line.back() == '\r')
777 {
778 line.pop_back();
779 }
780
781 std::regex relation("^([^:]+): (.+)");
782 std::smatch relation_data;
783 std::regex_search(line, relation_data, relation);
784 std::string prep = relation_data[1];
785 std::list<std::string> groups = verbly::split<std::list<std::string>>(relation_data[2], ", ");
786
787 std::string query("INSERT INTO prepositions (form) VALUES (?)");
788 sqlite3_stmt* ppstmt;
789
790 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
791 {
792 db_error(ppdb, query);
793 }
794
795 sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT);
796
797 if (sqlite3_step(ppstmt) != SQLITE_DONE)
798 {
799 db_error(ppdb, query);
800 }
801
802 sqlite3_finalize(ppstmt);
803
804 query = "SELECT last_insert_rowid()";
805 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
806 {
807 db_error(ppdb, query);
808 }
809
810 if (sqlite3_step(ppstmt) != SQLITE_ROW)
811 {
812 db_error(ppdb, query);
813 }
814
815 int rowid = sqlite3_column_int(ppstmt, 0);
816 sqlite3_finalize(ppstmt);
817
818 for (auto group : groups)
819 {
820 query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)";
821 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
822 {
823 db_error(ppdb, query);
824 }
825 256
826 sqlite3_bind_int(ppstmt, 1, rowid); 257 std::ifstream file(imageNetPath_);
827 sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); 258 if (!file)
828
829 if (sqlite3_step(ppstmt) != SQLITE_DONE)
830 { 259 {
831 db_error(ppdb, query); 260 throw std::invalid_argument("Could not find file " + imageNetPath_);
832 } 261 }
833
834 sqlite3_finalize(ppstmt);
835 }
836 }
837
838 262
839 { 263 std::string line;
840 progress ppgs("Writing verbs...", verbs.size()); 264 while (std::getline(file, line))
841 for (auto& mapping : verbs)
842 {
843 sqlite3_stmt* ppstmt;
844 std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)");
845 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
846 {
847 db_error(ppdb, query);
848 }
849
850 sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT);
851 sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT);
852 sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT);
853 sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT);
854 sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT);
855
856 if (sqlite3_step(ppstmt) != SQLITE_DONE)
857 {
858 db_error(ppdb, query);
859 }
860
861 sqlite3_finalize(ppstmt);
862
863 std::string canonical(mapping.second.infinitive);
864 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
865 if (pronunciations.count(canonical) == 1)
866 { 265 {
867 query = "SELECT last_insert_rowid()"; 266 if (line.back() == '\r')
868 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
869 { 267 {
870 db_error(ppdb, query); 268 line.pop_back();
871 } 269 }
872 270
873 if (sqlite3_step(ppstmt) != SQLITE_ROW) 271 std::string wnid_s = line.substr(1, 8);
272 int wnid = stoi(wnid_s) + 100000000;
273 if (notionByWnid_.count(wnid))
874 { 274 {
875 db_error(ppdb, query); 275 // We know that this notion has a wnid and is a noun.
876 } 276 notionByWnid_.at(wnid)->incrementNumOfImages();
877
878 int rowid = sqlite3_column_int(ppstmt, 0);
879
880 sqlite3_finalize(ppstmt);
881
882 mapping.second.id = rowid;
883
884 for (auto pronunciation : pronunciations[canonical])
885 {
886 if (!pronunciation.rhyme.empty())
887 {
888 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
889 } else {
890 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
891 }
892
893 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
894 {
895 db_error(ppdb, query);
896 }
897
898 sqlite3_bind_int(ppstmt, 1, rowid);
899 sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT);
900 sqlite3_bind_int(ppstmt, 3, pronunciation.syllables);
901 sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT);
902
903 if (!pronunciation.rhyme.empty())
904 {
905 sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT);
906 sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT);
907 }
908
909 if (sqlite3_step(ppstmt) != SQLITE_DONE)
910 {
911 db_error(ppdb, query);
912 }
913
914 sqlite3_finalize(ppstmt);
915 } 277 }
916 } 278 }
917
918 ppgs.update();
919 } 279 }
920 } 280
921 281 void generator::readWordNetSenseKeys()
922 {
923 progress ppgs("Writing verb frames...", groups.size());
924 for (auto& mapping : groups)
925 { 282 {
926 std::list<json> roledatal; 283 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl"));
927 std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair<std::string, selrestr_t> r) { 284 progress ppgs("Reading sense keys from WordNet...", lines.size());
928 json role;
929 role["type"] = r.first;
930 role["selrestrs"] = export_selrestrs(r.second);
931
932 return role;
933 });
934
935 json roledata(roledatal);
936 std::string rdm = roledata.dump();
937
938 sqlite3_stmt* ppstmt;
939 std::string query("INSERT INTO groups (data) VALUES (?)");
940 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
941 {
942 db_error(ppdb, query);
943 }
944
945 sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT);
946
947 if (sqlite3_step(ppstmt) != SQLITE_DONE)
948 {
949 db_error(ppdb, query);
950 }
951 285
952 sqlite3_finalize(ppstmt); 286 for (std::string line : lines)
953
954 query = "SELECT last_insert_rowid()";
955 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
956 {
957 db_error(ppdb, query);
958 }
959
960 if (sqlite3_step(ppstmt) != SQLITE_ROW)
961 {
962 db_error(ppdb, query);
963 }
964
965 int gid = sqlite3_column_int(ppstmt, 0);
966 sqlite3_finalize(ppstmt);
967
968 for (auto frame : mapping.second.frames)
969 { 287 {
970 std::list<json> fdatap; 288 ppgs.update();
971 std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) {
972 json part;
973
974 switch (fp.type)
975 {
976 case framepart_t::type_t::np:
977 {
978 part["type"] = "np";
979 part["role"] = fp.role;
980 part["selrestrs"] = export_selrestrs(fp.selrestrs);
981 part["synrestrs"] = fp.synrestrs;
982
983 break;
984 }
985
986 case framepart_t::type_t::pp:
987 {
988 part["type"] = "pp";
989 part["values"] = fp.choices;
990 part["preprestrs"] = fp.preprestrs;
991
992 break;
993 }
994
995 case framepart_t::type_t::v:
996 {
997 part["type"] = "v";
998
999 break;
1000 }
1001
1002 case framepart_t::type_t::adj:
1003 {
1004 part["type"] = "adj";
1005
1006 break;
1007 }
1008
1009 case framepart_t::type_t::adv:
1010 {
1011 part["type"] = "adv";
1012
1013 break;
1014 }
1015
1016 case framepart_t::type_t::lex:
1017 {
1018 part["type"] = "lex";
1019 part["value"] = fp.lexval;
1020
1021 break;
1022 }
1023 }
1024
1025 return part;
1026 });
1027
1028 json fdata(fdatap);
1029 std::string marshall = fdata.dump();
1030
1031 query = "INSERT INTO frames (group_id, data) VALUES (?, ?)";
1032 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1033 {
1034 db_error(ppdb, query);
1035 }
1036
1037 sqlite3_bind_int(ppstmt, 1, gid);
1038 sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT);
1039 289
1040 if (sqlite3_step(ppstmt) != SQLITE_DONE) 290 // We only actually need to lookup verbs by sense key so we'll just
291 // ignore everything that isn't a verb.
292 std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$");
293 std::smatch relation_data;
294 if (!std::regex_search(line, relation_data, relation))
1041 { 295 {
1042 db_error(ppdb, query); 296 continue;
1043 } 297 }
298
299 int synset_id = stoi(relation_data[1]);
300 int wnum = stoi(relation_data[2]);
301 std::string sense_key = relation_data[3];
1044 302
1045 sqlite3_finalize(ppstmt); 303 // We are treating this mapping as injective, which is not entirely
1046 } 304 // accurate. First, the WordNet table contains duplicate rows, so those
1047 305 // need to be ignored. More importantly, a small number of sense keys
1048 for (auto member : mapping.second.members) 306 // (one for each letter of the Latin alphabet, plus 9 other words) each
1049 { 307 // map to two different words in the same synset which differ only by
1050 if (verbs.count(member) == 1) 308 // capitalization. Luckily, none of these exceptions are verbs, so we
309 // can pretend that the mapping is injective.
310 if (!wnSenseKeys_.count(sense_key))
1051 { 311 {
1052 auto& v = verbs[member]; 312 std::pair<int, int> lookup(synset_id, wnum);
1053 313 if (wordByWnidAndWnum_.count(lookup))
1054 query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)";
1055 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1056 {
1057 db_error(ppdb, query);
1058 }
1059
1060 sqlite3_bind_int(ppstmt, 1, v.id);
1061 sqlite3_bind_int(ppstmt, 2, gid);
1062
1063 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1064 { 314 {
1065 db_error(ppdb, query); 315 wnSenseKeys_[sense_key] = wordByWnidAndWnum_.at(lookup);
1066 } 316 }
1067
1068 sqlite3_finalize(ppstmt);
1069 } 317 }
1070 } 318 }
1071
1072 ppgs.update();
1073 } 319 }
1074 } 320
1075 321 void generator::readVerbNet()
1076 // Get nouns/adjectives/adverbs from WordNet
1077 // Useful relations:
1078 // - s: master list
1079 // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness)
1080 // - at: variation (e.g. a measurement can be standard or nonstandard)
1081 // - der: derivation (e.g. happy/happily, happily/happy)
1082 // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue)
1083 // - ins: instantiation (do we need this? let's see)
1084 // - mm: member meronymy/holonymy (e.g. family/mother, family/child)
1085 // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire)
1086 // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber)
1087 // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska)
1088 // mannernymy (e.g. something done quickly is done in a manner that is quick)
1089 // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific))
1090 // - sim: synonymy (e.g. cheerful/happy, happy/cheerful)
1091 // - syntax: positioning flags for some adjectives
1092 std::string wnpref {argv[3]};
1093 if (wnpref.back() != '/')
1094 {
1095 wnpref += '/';
1096 }
1097
1098 // s table
1099 {
1100 std::ifstream wnsfile(wnpref + "wn_s.pl");
1101 if (!wnsfile.is_open())
1102 { 322 {
1103 std::cout << "Invalid WordNet data directory." << std::endl; 323 std::cout << "Reading frames from VerbNet..." << std::endl;
1104 print_usage();
1105 }
1106 324
1107 std::list<std::string> lines; 325 DIR* dir;
1108 for (;;) 326 if ((dir = opendir(verbNetPath_.c_str())) == nullptr)
1109 {
1110 std::string line;
1111 if (!getline(wnsfile, line))
1112 { 327 {
1113 break; 328 throw std::invalid_argument("Invalid VerbNet data directory");
1114 } 329 }
1115 330
1116 if (line.back() == '\r') 331 struct dirent* ent;
1117 { 332 while ((ent = readdir(dir)) != nullptr)
1118 line.pop_back();
1119 }
1120
1121 lines.push_back(line);
1122 }
1123
1124 progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size());
1125 for (auto line : lines)
1126 {
1127 ppgs.update();
1128
1129 std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$");
1130 std::smatch relation_data;
1131 if (!std::regex_search(line, relation_data, relation))
1132 { 333 {
1133 continue; 334 std::string filename(verbNetPath_);
1134 } 335
336 if (filename.back() != '/')
337 {
338 filename += '/';
339 }
1135 340
1136 int synset_id = stoi(relation_data[1]); 341 filename += ent->d_name;
1137 int wnum = stoi(relation_data[2]);
1138 std::string word = relation_data[3];
1139 size_t word_it;
1140 while ((word_it = word.find("''")) != std::string::npos)
1141 {
1142 word.erase(word_it, 1);
1143 }
1144 342
1145 std::string query; 343 if (filename.rfind(".xml") != filename.size() - 4)
1146 switch (synset_id / 100000000)
1147 {
1148 case 1: // Noun
1149 { 344 {
1150 if (nouns.count(word) == 1) 345 continue;
1151 {
1152 query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)";
1153 } else {
1154 query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)";
1155 }
1156
1157 break;
1158 } 346 }
1159 347
1160 case 2: // Verb 348 xmlDocPtr doc = xmlParseFile(filename.c_str());
349 if (doc == nullptr)
1161 { 350 {
1162 // Ignore 351 throw std::logic_error("Error opening " + filename);
1163
1164 break;
1165 } 352 }
1166 353
1167 case 3: // Adjective 354 xmlNodePtr top = xmlDocGetRootElement(doc);
355 if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS"))))
1168 { 356 {
1169 if (adjectives.count(word) == 1) 357 throw std::logic_error("Bad VerbNet file format: " + filename);
1170 {
1171 query = "INSERT INTO adjectives (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)";
1172 } else {
1173 query = "INSERT INTO adjectives (base_form, complexity) VALUES (?, ?)";
1174 }
1175
1176 break;
1177 } 358 }
1178 359
1179 case 4: // Adverb 360 try
1180 { 361 {
1181 if (adjectives.count(word) == 1) 362 createGroup(top);
1182 { 363 } catch (const std::exception& e)
1183 query = "INSERT INTO adverbs (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; 364 {
1184 } else { 365 std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename));
1185 query = "INSERT INTO adverbs (base_form, complexity) VALUES (?, ?)";
1186 }
1187
1188 break;
1189 } 366 }
1190 } 367 }
368
369 closedir(dir);
370 }
1191 371
1192 sqlite3_stmt* ppstmt; 372 void generator::readAgidInflections()
1193 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) 373 {
374 std::list<std::string> lines(readFile(agidPath_));
375 progress ppgs("Reading inflections from AGID...", lines.size());
376
377 for (std::string line : lines)
1194 { 378 {
1195 db_error(ppdb, query); 379 ppgs.update();
1196 } 380
381 int divider = line.find_first_of(" ");
382 std::string infinitive = line.substr(0, divider);
383 line = line.substr(divider+1);
384 char type = line[0];
1197 385
1198 sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); 386 if (line[1] == '?')
1199 switch (synset_id / 100000000)
1200 {
1201 case 1: // Noun
1202 { 387 {
1203 sqlite3_bind_int(ppstmt, 2, (std::any_of(std::begin(word), std::end(word), [] (char ch) { 388 line.erase(0, 4);
1204 return isupper(ch); 389 } else {
1205 }) ? 1 : 0)); 390 line.erase(0, 3);
1206
1207 sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size());
1208 sqlite3_bind_int(ppstmt, 4, images[synset_id]);
1209 sqlite3_bind_int(ppstmt, 5, synset_id);
1210
1211 if (nouns.count(word) == 1)
1212 {
1213 sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT);
1214 }
1215
1216 break;
1217 } 391 }
1218 392
1219 case 3: // Adjective 393 if (!lemmaByBaseForm_.count(infinitive) && (type != 'V'))
1220 case 4: // Adverb
1221 { 394 {
1222 sqlite3_bind_int(ppstmt, 2, verbly::split<std::list<std::string>>(word, " ").size()); 395 continue;
1223 396 }
1224 if (adjectives.count(word) == 1) 397
398 lemma& curLemma = lookupOrCreateLemma(infinitive);
399
400 auto forms = split<std::vector<std::string>>(line, " | ");
401 for (std::string& inflForm : forms)
402 {
403 int sympos = inflForm.find_first_of(",?");
404 if (sympos != std::string::npos)
1225 { 405 {
1226 sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); 406 inflForm = inflForm.substr(0, sympos);
1227 sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT);
1228 } 407 }
1229
1230 break;
1231 } 408 }
1232 }
1233 409
1234 if (sqlite3_step(ppstmt) != SQLITE_DONE) 410 switch (type)
1235 {
1236 db_error(ppdb, query);
1237 }
1238
1239 sqlite3_finalize(ppstmt);
1240
1241 query = "SELECT last_insert_rowid()";
1242 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1243 {
1244 db_error(ppdb, query);
1245 }
1246
1247 if (sqlite3_step(ppstmt) != SQLITE_ROW)
1248 {
1249 db_error(ppdb, query);
1250 }
1251
1252 int rowid = sqlite3_column_int(ppstmt, 0);
1253 wn[synset_id][wnum] = rowid;
1254
1255 sqlite3_finalize(ppstmt);
1256
1257 std::string canonical(word);
1258 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
1259 if (pronunciations.count(canonical) == 1)
1260 {
1261 for (auto pronunciation : pronunciations[canonical])
1262 { 411 {
1263 switch (synset_id / 100000000) 412 case 'V':
1264 { 413 {
1265 case 1: // Noun 414 if (forms.size() == 4)
1266 { 415 {
1267 if (!pronunciation.rhyme.empty()) 416 curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
1268 { 417 curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1]));
1269 query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; 418 curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2]));
1270 } else { 419 curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3]));
1271 query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; 420 } else if (forms.size() == 3)
1272 }
1273
1274 break;
1275 }
1276
1277 case 3: // Adjective
1278 { 421 {
1279 if (!pronunciation.rhyme.empty()) 422 curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
1280 { 423 curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0]));
1281 query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; 424 curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1]));
1282 } else { 425 curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2]));
1283 query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; 426 } else if (forms.size() == 8)
1284 } 427 {
1285 428 // As of AGID 2014.08.11, this is only "to be"
1286 break; 429 curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
430 curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2]));
431 curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3]));
432 curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4]));
433 } else {
434 // Words that don't fit the cases above as of AGID 2014.08.11:
435 // - may and shall do not conjugate the way we want them to
436 // - methinks only has a past tense and is an outlier
437 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
438 std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
1287 } 439 }
1288 440
1289 case 4: // Adverb 441 // For verbs in particular, we sometimes create a notion and a word
442 // from inflection data. Specifically, if there are not yet any
443 // verbs existing that have the same infinitive form. "Yet" means
444 // that this verb appears in the AGID data but not in either WordNet
445 // or VerbNet.
446 if (!wordsByBaseForm_.count(infinitive)
447 || !std::any_of(std::begin(wordsByBaseForm_.at(infinitive)), std::end(wordsByBaseForm_.at(infinitive)), [] (word* w) {
448 return w->getNotion().getPartOfSpeech() == part_of_speech::verb;
449 }))
1290 { 450 {
1291 if (!pronunciation.rhyme.empty()) 451 notion& n = createNotion(part_of_speech::verb);
1292 { 452 createWord(n, curLemma);
1293 query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
1294 } else {
1295 query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
1296 }
1297
1298 break;
1299 } 453 }
1300 }
1301
1302 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1303 {
1304 db_error(ppdb, query);
1305 }
1306
1307 sqlite3_bind_int(ppstmt, 1, rowid);
1308 sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT);
1309 sqlite3_bind_int(ppstmt, 3, pronunciation.syllables);
1310 sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT);
1311
1312 if (!pronunciation.rhyme.empty())
1313 {
1314 sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT);
1315 sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT);
1316 }
1317 454
1318 if (sqlite3_step(ppstmt) != SQLITE_DONE) 455 break;
1319 {
1320 db_error(ppdb, query);
1321 } 456 }
1322
1323 sqlite3_finalize(ppstmt);
1324 }
1325 }
1326 }
1327 }
1328
1329 // While we're working on s
1330 {
1331 progress ppgs("Writing word synonyms...", wn.size());
1332 for (auto sense : wn)
1333 {
1334 ppgs.update();
1335 457
1336 for (auto word1 : sense.second) 458 case 'A':
1337 {
1338 for (auto word2 : sense.second)
1339 {
1340 if (word1 != word2)
1341 { 459 {
1342 std::string query; 460 if (forms.size() == 2)
1343 switch (sense.first / 100000000)
1344 { 461 {
1345 case 1: // Noun 462 curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0]));
1346 { 463 curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1]));
1347 query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; 464 } else {
1348 465 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
1349 break; 466 std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
1350 } 467 }
1351
1352 case 2: // Verb
1353 {
1354 // Ignore
1355
1356 break;
1357 }
1358
1359 case 3: // Adjective
1360 {
1361 query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
1362 468
1363 break; 469 break;
1364 } 470 }
1365 471
1366 case 4: // Adverb 472 case 'N':
1367 { 473 {
1368 query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; 474 if (forms.size() == 1)
1369
1370 break;
1371 }
1372 }
1373
1374 sqlite3_stmt* ppstmt;
1375 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1376 {
1377 db_error(ppdb, query);
1378 }
1379
1380 sqlite3_bind_int(ppstmt, 1, word1.second);
1381 sqlite3_bind_int(ppstmt, 2, word2.second);
1382
1383 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1384 { 475 {
1385 db_error(ppdb, query); 476 curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0]));
477 } else {
478 // As of AGID 2014.08.11, this is non-existent.
479 std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
1386 } 480 }
1387 481
1388 sqlite3_finalize(ppstmt); 482 break;
1389 } 483 }
1390 } 484 }
1391 } 485 }
1392 } 486 }
1393 }
1394
1395 // ant table
1396 {
1397 std::ifstream wnantfile(wnpref + "wn_ant.pl");
1398 if (!wnantfile.is_open())
1399 {
1400 std::cout << "Invalid WordNet data directory." << std::endl;
1401 print_usage();
1402 }
1403
1404 std::list<std::string> lines;
1405 for (;;)
1406 {
1407 std::string line;
1408 if (!getline(wnantfile, line))
1409 {
1410 break;
1411 }
1412 487
1413 if (line.back() == '\r') 488 void generator::readPrepositions()
1414 {
1415 line.pop_back();
1416 }
1417
1418 lines.push_back(line);
1419 }
1420
1421 progress ppgs("Writing antonyms...", lines.size());
1422 for (auto line : lines)
1423 { 489 {
1424 ppgs.update(); 490 std::list<std::string> lines(readFile("prepositions.txt"));
491 progress ppgs("Reading prepositions...", lines.size());
1425 492
1426 std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); 493 for (std::string line : lines)
1427 std::smatch relation_data;
1428 if (!std::regex_search(line, relation_data, relation))
1429 {
1430 continue;
1431 }
1432
1433 int synset_id_1 = stoi(relation_data[1]);
1434 int wnum_1 = stoi(relation_data[2]);
1435 int synset_id_2 = stoi(relation_data[3]);
1436 int wnum_2 = stoi(relation_data[4]);
1437
1438 std::string query;
1439 switch (synset_id_1 / 100000000)
1440 { 494 {
1441 case 1: // Noun 495 ppgs.update();
1442 {
1443 query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
1444 496
1445 break; 497 std::regex relation("^([^:]+): (.+)");
1446 } 498 std::smatch relation_data;
1447 499 std::regex_search(line, relation_data, relation);
1448 case 2: // Verb 500 std::string prep = relation_data[1];
1449 { 501 auto groups = split<std::list<std::string>>(relation_data[2], ", ");
1450 // Ignore
1451 502
1452 break; 503 notion& n = createNotion(part_of_speech::preposition);
1453 } 504 lemma& l = lookupOrCreateLemma(prep);
1454 505 word& w = createWord(n, l);
1455 case 3: // Adjective
1456 {
1457 query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
1458 506
1459 break; 507 n.setPrepositionGroups(groups);
1460 }
1461
1462 case 4: // Adverb
1463 {
1464 query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
1465
1466 break;
1467 }
1468 }
1469
1470 sqlite3_stmt* ppstmt;
1471 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1472 {
1473 db_error(ppdb, query);
1474 }
1475
1476 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1477 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
1478
1479 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1480 {
1481 db_error(ppdb, query);
1482 }
1483
1484 sqlite3_finalize(ppstmt);
1485 }
1486 }
1487
1488 // at table
1489 {
1490 std::ifstream wnatfile(wnpref + "wn_at.pl");
1491 if (!wnatfile.is_open())
1492 {
1493 std::cout << "Invalid WordNet data directory." << std::endl;
1494 print_usage();
1495 }
1496
1497 std::list<std::string> lines;
1498 for (;;)
1499 {
1500 std::string line;
1501 if (!getline(wnatfile, line))
1502 {
1503 break;
1504 } 508 }
1505
1506 if (line.back() == '\r')
1507 {
1508 line.pop_back();
1509 }
1510
1511 lines.push_back(line);
1512 } 509 }
1513 510
1514 progress ppgs("Writing variations...", lines.size()); 511 void generator::readCmudictPronunciations()
1515 for (auto line : lines)
1516 { 512 {
1517 ppgs.update(); 513 std::list<std::string> lines(readFile(cmudictPath_));
514 progress ppgs("Reading pronunciations from CMUDICT...", lines.size());
1518 515
1519 std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); 516 for (std::string line : lines)
1520 std::smatch relation_data;
1521 if (!std::regex_search(line, relation_data, relation))
1522 { 517 {
1523 continue; 518 ppgs.update();
1524 } 519
1525 520 std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)");
1526 int synset_id_1 = stoi(relation_data[1]); 521 std::smatch phoneme_data;
1527 int synset_id_2 = stoi(relation_data[2]); 522 if (std::regex_search(line, phoneme_data, phoneme))
1528 std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)");
1529
1530 for (auto mapping1 : wn[synset_id_1])
1531 {
1532 for (auto mapping2 : wn[synset_id_2])
1533 { 523 {
1534 sqlite3_stmt* ppstmt; 524 std::string canonical(phoneme_data[1]);
1535 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 525 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
1536 {
1537 db_error(ppdb, query);
1538 }
1539
1540 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1541 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1542 526
1543 if (sqlite3_step(ppstmt) != SQLITE_DONE) 527 if (!formByText_.count(canonical))
1544 { 528 {
1545 db_error(ppdb, query); 529 continue;
1546 } 530 }
1547 531
1548 sqlite3_finalize(ppstmt); 532 std::string phonemes = phoneme_data[2];
533 pronunciations_.emplace_back(phonemes);
534 pronunciation& p = pronunciations_.back();
535 formByText_.at(canonical)->addPronunciation(p);
1549 } 536 }
1550 } 537 }
1551 } 538 }
1552 }
1553
1554 // der table
1555 {
1556 std::ifstream wnderfile(wnpref + "wn_der.pl");
1557 if (!wnderfile.is_open())
1558 {
1559 std::cout << "Invalid WordNet data directory." << std::endl;
1560 print_usage();
1561 }
1562 539
1563 std::list<std::string> lines; 540 void generator::writeSchema()
1564 for (;;)
1565 { 541 {
1566 std::string line; 542 std::ifstream file("schema.sql");
1567 if (!getline(wnderfile, line)) 543 if (!file)
1568 { 544 {
1569 break; 545 throw std::invalid_argument("Could not find database schema");
1570 } 546 }
1571 547
1572 if (line.back() == '\r') 548 std::ostringstream schemaBuilder;
549 std::string line;
550 while (std::getline(file, line))
1573 { 551 {
1574 line.pop_back(); 552 if (line.back() == '\r')
553 {
554 line.pop_back();
555 }
556
557 schemaBuilder << line;
1575 } 558 }
1576 559
1577 lines.push_back(line); 560 std::string schema = schemaBuilder.str();
561 auto queries = split<std::list<std::string>>(schema, ";");
562 progress ppgs("Writing database schema...", queries.size());
563 for (std::string query : queries)
564 {
565 if (!queries.empty())
566 {
567 db_.runQuery(query);
568 }
569
570 ppgs.update();
571 }
1578 } 572 }
1579 573
1580 progress ppgs("Writing morphological derivation...", lines.size()); 574 void generator::dumpObjects()
1581 for (auto line : lines)
1582 { 575 {
1583 ppgs.update();
1584
1585 std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
1586 std::smatch relation_data;
1587 if (!std::regex_search(line, relation_data, relation))
1588 { 576 {
1589 continue; 577 progress ppgs("Writing notions...", notions_.size());
578
579 for (notion& n : notions_)
580 {
581 db_ << n;
582
583 ppgs.update();
584 }
1590 } 585 }
1591 586
1592 int synset_id_1 = stoi(relation_data[1]);
1593 int wnum_1 = stoi(relation_data[2]);
1594 int synset_id_2 = stoi(relation_data[3]);
1595 int wnum_2 = stoi(relation_data[4]);
1596 std::string query;
1597 switch (synset_id_1 / 100000000)
1598 { 587 {
1599 case 1: // Noun 588 progress ppgs("Writing words...", words_.size());
589
590 for (word& w : words_)
1600 { 591 {
1601 switch (synset_id_2 / 100000000) 592 db_ << w;
1602 {
1603 case 1: // Noun
1604 {
1605 query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)";
1606 break;
1607 }
1608
1609 case 3: // Adjective
1610 {
1611 query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)";
1612 break;
1613 }
1614
1615 case 4: // Adverb
1616 {
1617 query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)";
1618 break;
1619 }
1620 }
1621 593
1622 break; 594 ppgs.update();
1623 } 595 }
596 }
597
598 {
599 progress ppgs("Writing lemmas...", lemmas_.size());
1624 600
1625 case 3: // Adjective 601 for (lemma& l : lemmas_)
1626 { 602 {
1627 switch (synset_id_2 / 100000000) 603 db_ << l;
1628 {
1629 case 1: // Noun
1630 {
1631 query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)";
1632 break;
1633 }
1634
1635 case 3: // Adjective
1636 {
1637 query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)";
1638 break;
1639 }
1640
1641 case 4: // Adverb
1642 {
1643 query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)";
1644 break;
1645 }
1646 }
1647 604
1648 break; 605 ppgs.update();
1649 } 606 }
607 }
608
609 {
610 progress ppgs("Writing forms...", forms_.size());
1650 611
1651 case 4: // Adverb 612 for (form& f : forms_)
1652 { 613 {
1653 switch (synset_id_2 / 100000000) 614 db_ << f;
1654 {
1655 case 1: // Noun
1656 {
1657 query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)";
1658 break;
1659 }
1660
1661 case 3: // Adjective
1662 {
1663 query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)";
1664 break;
1665 }
1666
1667 case 4: // Adverb
1668 {
1669 query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)";
1670 break;
1671 }
1672 }
1673 615
1674 break; 616 ppgs.update();
1675 } 617 }
1676 } 618 }
1677 619
1678 sqlite3_stmt* ppstmt;
1679 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1680 { 620 {
1681 db_error(ppdb, query); 621 progress ppgs("Writing pronunciations...", pronunciations_.size());
622
623 for (pronunciation& p : pronunciations_)
624 {
625 db_ << p;
626
627 ppgs.update();
628 }
1682 } 629 }
1683 630
1684 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1685 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
1686
1687 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1688 { 631 {
1689 db_error(ppdb, query); 632 progress ppgs("Writing verb groups...", groups_.size());
633
634 for (group& g : groups_)
635 {
636 db_ << g;
637
638 ppgs.update();
639 }
1690 } 640 }
1691 641
1692 sqlite3_finalize(ppstmt);
1693 }
1694 }
1695
1696 // hyp table
1697 {
1698 std::ifstream wnhypfile(wnpref + "wn_hyp.pl");
1699 if (!wnhypfile.is_open())
1700 {
1701 std::cout << "Invalid WordNet data directory." << std::endl;
1702 print_usage();
1703 }
1704
1705 std::list<std::string> lines;
1706 for (;;)
1707 {
1708 std::string line;
1709 if (!getline(wnhypfile, line))
1710 {
1711 break;
1712 }
1713
1714 if (line.back() == '\r')
1715 { 642 {
1716 line.pop_back(); 643 progress ppgs("Writing verb frames...", frames_.size());
644
645 for (frame& f : frames_)
646 {
647 db_ << f;
648
649 ppgs.update();
650 }
1717 } 651 }
1718
1719 lines.push_back(line);
1720 } 652 }
1721 653
1722 progress ppgs("Writing hypernyms...", lines.size()); 654 void generator::readWordNetAntonymy()
1723 for (auto line : lines)
1724 { 655 {
1725 ppgs.update(); 656 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl"));
1726 657 progress ppgs("Writing antonyms...", lines.size());
1727 std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\."); 658 for (auto line : lines)
1728 std::smatch relation_data;
1729 if (!std::regex_search(line, relation_data, relation))
1730 { 659 {
1731 continue; 660 ppgs.update();
1732 }
1733
1734 int synset_id_1 = stoi(relation_data[1]);
1735 int synset_id_2 = stoi(relation_data[2]);
1736 std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)");
1737 661
1738 for (auto mapping1 : wn[synset_id_1]) 662 std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
1739 { 663 std::smatch relation_data;
1740 for (auto mapping2 : wn[synset_id_2]) 664 if (!std::regex_search(line, relation_data, relation))
1741 { 665 {
1742 sqlite3_stmt* ppstmt; 666 continue;
1743 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 667 }
1744 { 668
1745 db_error(ppdb, query); 669 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
1746 } 670 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
1747 671
1748 sqlite3_bind_int(ppstmt, 1, mapping1.second); 672 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
1749 sqlite3_bind_int(ppstmt, 2, mapping2.second); 673 {
674 word& word1 = *wordByWnidAndWnum_.at(lookup1);
675 word& word2 = *wordByWnidAndWnum_.at(lookup2);
1750 676
1751 if (sqlite3_step(ppstmt) != SQLITE_DONE) 677 std::list<field> fields;
1752 { 678 fields.emplace_back("antonym_1_id", word1.getId());
1753 db_error(ppdb, query); 679 fields.emplace_back("antonym_2_id", word2.getId());
1754 }
1755 680
1756 sqlite3_finalize(ppstmt); 681 db_.insertIntoTable("antonymy", std::move(fields));
1757 } 682 }
1758 } 683 }
1759 } 684 }
1760 }
1761
1762 // ins table
1763 {
1764 std::ifstream wninsfile(wnpref + "wn_ins.pl");
1765 if (!wninsfile.is_open())
1766 {
1767 std::cout << "Invalid WordNet data directory." << std::endl;
1768 print_usage();
1769 }
1770
1771 std::list<std::string> lines;
1772 for (;;)
1773 {
1774 std::string line;
1775 if (!getline(wninsfile, line))
1776 {
1777 break;
1778 }
1779 685
1780 if (line.back() == '\r') 686 void generator::readWordNetVariation()
687 {
688 std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl"));
689 progress ppgs("Writing variation...", lines.size());
690 for (auto line : lines)
1781 { 691 {
1782 line.pop_back(); 692 ppgs.update();
1783 }
1784 693
1785 lines.push_back(line); 694 std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
695 std::smatch relation_data;
696 if (!std::regex_search(line, relation_data, relation))
697 {
698 continue;
699 }
700
701 int lookup1 = std::stoi(relation_data[1]);
702 int lookup2 = std::stoi(relation_data[2]);
703
704 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
705 {
706 notion& notion1 = *notionByWnid_.at(lookup1);
707 notion& notion2 = *notionByWnid_.at(lookup2);
708
709 std::list<field> fields;
710 fields.emplace_back("noun_id", notion1.getId());
711 fields.emplace_back("adjective_id", notion2.getId());
712
713 db_.insertIntoTable("variation", std::move(fields));
714 }
715 }
1786 } 716 }
1787 717
1788 progress ppgs("Writing instantiations...", lines.size()); 718 void generator::readWordNetClasses()
1789 for (auto line : lines)
1790 { 719 {
1791 ppgs.update(); 720 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl"));
1792 721 progress ppgs("Writing usage, topicality, and regionality...", lines.size());
1793 std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); 722 for (auto line : lines)
1794 std::smatch relation_data;
1795 if (!std::regex_search(line, relation_data, relation))
1796 { 723 {
1797 continue; 724 ppgs.update();
1798 }
1799
1800 int synset_id_1 = stoi(relation_data[1]);
1801 int synset_id_2 = stoi(relation_data[2]);
1802 std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)");
1803 725
1804 for (auto mapping1 : wn[synset_id_1]) 726 std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\.");
1805 { 727 std::smatch relation_data;
1806 for (auto mapping2 : wn[synset_id_2]) 728 if (!std::regex_search(line, relation_data, relation))
729 {
730 continue;
731 }
732
733 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
734 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
735 std::string class_type = relation_data[5];
736
737 std::string table_name;
738 if (class_type == "t")
739 {
740 table_name += "topicality";
741 } else if (class_type == "u")
742 {
743 table_name += "usage";
744 } else if (class_type == "r")
745 {
746 table_name += "regionality";
747 }
748
749 std::list<int> leftJoin;
750 std::list<int> rightJoin;
751
752 if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first)))
1807 { 753 {
1808 sqlite3_stmt* ppstmt; 754 std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) {
1809 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 755 return w->getId();
756 });
757 } else if (wordByWnidAndWnum_.count(lookup1)) {
758 leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId());
759 }
760
761 if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first)))
762 {
763 std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) {
764 return w->getId();
765 });
766 } else if (wordByWnidAndWnum_.count(lookup2)) {
767 rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId());
768 }
769
770 for (int word1 : leftJoin)
771 {
772 for (int word2 : rightJoin)
1810 { 773 {
1811 db_error(ppdb, query); 774 std::list<field> fields;
1812 } 775 fields.emplace_back("term_id", word1);
776 fields.emplace_back("domain_id", word2);
1813 777
1814 sqlite3_bind_int(ppstmt, 1, mapping1.second); 778 db_.insertIntoTable(table_name, std::move(fields));
1815 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1816
1817 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1818 {
1819 db_error(ppdb, query);
1820 } 779 }
1821
1822 sqlite3_finalize(ppstmt);
1823 } 780 }
1824 } 781 }
1825 } 782 }
1826 }
1827
1828 // mm table
1829 {
1830 std::ifstream wnmmfile(wnpref + "wn_mm.pl");
1831 if (!wnmmfile.is_open())
1832 {
1833 std::cout << "Invalid WordNet data directory." << std::endl;
1834 print_usage();
1835 }
1836
1837 std::list<std::string> lines;
1838 for (;;)
1839 {
1840 std::string line;
1841 if (!getline(wnmmfile, line))
1842 {
1843 break;
1844 }
1845 783
1846 if (line.back() == '\r') 784 void generator::readWordNetCausality()
785 {
786 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl"));
787 progress ppgs("Writing causality...", lines.size());
788 for (auto line : lines)
1847 { 789 {
1848 line.pop_back(); 790 ppgs.update();
1849 }
1850 791
1851 lines.push_back(line); 792 std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\.");
793 std::smatch relation_data;
794 if (!std::regex_search(line, relation_data, relation))
795 {
796 continue;
797 }
798
799 int lookup1 = std::stoi(relation_data[1]);
800 int lookup2 = std::stoi(relation_data[2]);
801
802 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
803 {
804 notion& notion1 = *notionByWnid_.at(lookup1);
805 notion& notion2 = *notionByWnid_.at(lookup2);
806
807 std::list<field> fields;
808 fields.emplace_back("effect_id", notion1.getId());
809 fields.emplace_back("cause_id", notion2.getId());
810
811 db_.insertIntoTable("causality", std::move(fields));
812 }
813 }
1852 } 814 }
1853 815
1854 progress ppgs("Writing member meronyms...", lines.size()); 816 void generator::readWordNetEntailment()
1855 for (auto line : lines)
1856 { 817 {
1857 ppgs.update(); 818 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl"));
1858 819 progress ppgs("Writing entailment...", lines.size());
1859 std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); 820 for (auto line : lines)
1860 std::smatch relation_data;
1861 if (!std::regex_search(line, relation_data, relation))
1862 { 821 {
1863 continue; 822 ppgs.update();
1864 }
1865 823
1866 int synset_id_1 = stoi(relation_data[1]); 824 std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\.");
1867 int synset_id_2 = stoi(relation_data[2]); 825 std::smatch relation_data;
1868 std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); 826 if (!std::regex_search(line, relation_data, relation))
1869
1870 for (auto mapping1 : wn[synset_id_1])
1871 {
1872 for (auto mapping2 : wn[synset_id_2])
1873 { 827 {
1874 sqlite3_stmt* ppstmt; 828 continue;
1875 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 829 }
1876 { 830
1877 db_error(ppdb, query); 831 int lookup1 = std::stoi(relation_data[1]);
1878 } 832 int lookup2 = std::stoi(relation_data[2]);
1879 833
1880 sqlite3_bind_int(ppstmt, 1, mapping1.second); 834 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
1881 sqlite3_bind_int(ppstmt, 2, mapping2.second); 835 {
836 notion& notion1 = *notionByWnid_.at(lookup1);
837 notion& notion2 = *notionByWnid_.at(lookup2);
1882 838
1883 if (sqlite3_step(ppstmt) != SQLITE_DONE) 839 std::list<field> fields;
1884 { 840 fields.emplace_back("given_id", notion1.getId());
1885 db_error(ppdb, query); 841 fields.emplace_back("entailment_id", notion2.getId());
1886 }
1887 842
1888 sqlite3_finalize(ppstmt); 843 db_.insertIntoTable("entailment", std::move(fields));
1889 } 844 }
1890 } 845 }
1891 } 846 }
1892 } 847
1893 848 void generator::readWordNetHypernymy()
1894 // ms table
1895 {
1896 std::ifstream wnmsfile(wnpref + "wn_ms.pl");
1897 if (!wnmsfile.is_open())
1898 {
1899 std::cout << "Invalid WordNet data directory." << std::endl;
1900 print_usage();
1901 }
1902
1903 std::list<std::string> lines;
1904 for (;;)
1905 { 849 {
1906 std::string line; 850 std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl"));
1907 if (!getline(wnmsfile, line)) 851 progress ppgs("Writing hypernymy...", lines.size());
852 for (auto line : lines)
1908 { 853 {
1909 break; 854 ppgs.update();
855
856 std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\.");
857 std::smatch relation_data;
858 if (!std::regex_search(line, relation_data, relation))
859 {
860 continue;
861 }
862
863 int lookup1 = std::stoi(relation_data[1]);
864 int lookup2 = std::stoi(relation_data[2]);
865
866 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
867 {
868 notion& notion1 = *notionByWnid_.at(lookup1);
869 notion& notion2 = *notionByWnid_.at(lookup2);
870
871 std::list<field> fields;
872 fields.emplace_back("hyponym_id", notion1.getId());
873 fields.emplace_back("hypernym_id", notion2.getId());
874
875 db_.insertIntoTable("hypernymy", std::move(fields));
876 }
1910 } 877 }
878 }
1911 879
1912 if (line.back() == '\r') 880 void generator::readWordNetInstantiation()
881 {
882 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl"));
883 progress ppgs("Writing instantiation...", lines.size());
884 for (auto line : lines)
1913 { 885 {
1914 line.pop_back(); 886 ppgs.update();
1915 }
1916 887
1917 lines.push_back(line); 888 std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
889 std::smatch relation_data;
890 if (!std::regex_search(line, relation_data, relation))
891 {
892 continue;
893 }
894
895 int lookup1 = std::stoi(relation_data[1]);
896 int lookup2 = std::stoi(relation_data[2]);
897
898 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
899 {
900 notion& notion1 = *notionByWnid_.at(lookup1);
901 notion& notion2 = *notionByWnid_.at(lookup2);
902
903 std::list<field> fields;
904 fields.emplace_back("instance_id", notion1.getId());
905 fields.emplace_back("class_id", notion2.getId());
906
907 db_.insertIntoTable("instantiation", std::move(fields));
908 }
909 }
1918 } 910 }
1919 911
1920 progress ppgs("Writing substance meronyms...", lines.size()); 912 void generator::readWordNetMemberMeronymy()
1921 for (auto line : lines)
1922 { 913 {
1923 ppgs.update(); 914 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl"));
1924 915 progress ppgs("Writing member meronymy...", lines.size());
1925 std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); 916 for (auto line : lines)
1926 std::smatch relation_data;
1927 if (!std::regex_search(line, relation_data, relation))
1928 { 917 {
1929 continue; 918 ppgs.update();
1930 }
1931
1932 int synset_id_1 = stoi(relation_data[1]);
1933 int synset_id_2 = stoi(relation_data[2]);
1934 std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1935 919
1936 for (auto mapping1 : wn[synset_id_1]) 920 std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
1937 { 921 std::smatch relation_data;
1938 for (auto mapping2 : wn[synset_id_2]) 922 if (!std::regex_search(line, relation_data, relation))
1939 { 923 {
1940 sqlite3_stmt* ppstmt; 924 continue;
1941 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 925 }
1942 { 926
1943 db_error(ppdb, query); 927 int lookup1 = std::stoi(relation_data[1]);
1944 } 928 int lookup2 = std::stoi(relation_data[2]);
929
930 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
931 {
932 notion& notion1 = *notionByWnid_.at(lookup1);
933 notion& notion2 = *notionByWnid_.at(lookup2);
1945 934
1946 sqlite3_bind_int(ppstmt, 1, mapping1.second); 935 std::list<field> fields;
1947 sqlite3_bind_int(ppstmt, 2, mapping2.second); 936 fields.emplace_back("holonym_id", notion1.getId());
937 fields.emplace_back("meronym_id", notion2.getId());
1948 938
1949 if (sqlite3_step(ppstmt) != SQLITE_DONE) 939 db_.insertIntoTable("member_meronymy", std::move(fields));
1950 {
1951 db_error(ppdb, query);
1952 }
1953
1954 sqlite3_finalize(ppstmt);
1955 } 940 }
1956 } 941 }
1957 } 942 }
1958 } 943
1959 944 void generator::readWordNetPartMeronymy()
1960 // mm table
1961 {
1962 std::ifstream wnmpfile(wnpref + "wn_mp.pl");
1963 if (!wnmpfile.is_open())
1964 {
1965 std::cout << "Invalid WordNet data directory." << std::endl;
1966 print_usage();
1967 }
1968
1969 std::list<std::string> lines;
1970 for (;;)
1971 { 945 {
1972 std::string line; 946 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl"));
1973 if (!getline(wnmpfile, line)) 947 progress ppgs("Writing part meronymy...", lines.size());
948 for (auto line : lines)
1974 { 949 {
1975 break; 950 ppgs.update();
951
952 std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
953 std::smatch relation_data;
954 if (!std::regex_search(line, relation_data, relation))
955 {
956 continue;
957 }
958
959 int lookup1 = std::stoi(relation_data[1]);
960 int lookup2 = std::stoi(relation_data[2]);
961
962 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
963 {
964 notion& notion1 = *notionByWnid_.at(lookup1);
965 notion& notion2 = *notionByWnid_.at(lookup2);
966
967 std::list<field> fields;
968 fields.emplace_back("holonym_id", notion1.getId());
969 fields.emplace_back("meronym_id", notion2.getId());
970
971 db_.insertIntoTable("part_meronymy", std::move(fields));
972 }
1976 } 973 }
974 }
1977 975
1978 if (line.back() == '\r') 976 void generator::readWordNetSubstanceMeronymy()
977 {
978 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl"));
979 progress ppgs("Writing substance meronymy...", lines.size());
980 for (auto line : lines)
1979 { 981 {
1980 line.pop_back(); 982 ppgs.update();
1981 }
1982 983
1983 lines.push_back(line); 984 std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
985 std::smatch relation_data;
986 if (!std::regex_search(line, relation_data, relation))
987 {
988 continue;
989 }
990
991 int lookup1 = std::stoi(relation_data[1]);
992 int lookup2 = std::stoi(relation_data[2]);
993
994 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
995 {
996 notion& notion1 = *notionByWnid_.at(lookup1);
997 notion& notion2 = *notionByWnid_.at(lookup2);
998
999 std::list<field> fields;
1000 fields.emplace_back("holonym_id", notion1.getId());
1001 fields.emplace_back("meronym_id", notion2.getId());
1002
1003 db_.insertIntoTable("substance_meronymy", std::move(fields));
1004 }
1005 }
1984 } 1006 }
1985 1007
1986 progress ppgs("Writing part meronyms...", lines.size()); 1008 void generator::readWordNetPertainymy()
1987 for (auto line : lines)
1988 { 1009 {
1989 ppgs.update(); 1010 std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl"));
1990 1011 progress ppgs("Writing pertainymy and mannernymy...", lines.size());
1991 std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); 1012 for (auto line : lines)
1992 std::smatch relation_data;
1993 if (!std::regex_search(line, relation_data, relation))
1994 { 1013 {
1995 continue; 1014 ppgs.update();
1996 }
1997
1998 int synset_id_1 = stoi(relation_data[1]);
1999 int synset_id_2 = stoi(relation_data[2]);
2000 std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
2001 1015
2002 for (auto mapping1 : wn[synset_id_1]) 1016 std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
2003 { 1017 std::smatch relation_data;
2004 for (auto mapping2 : wn[synset_id_2]) 1018 if (!std::regex_search(line, relation_data, relation))
2005 { 1019 {
2006 sqlite3_stmt* ppstmt; 1020 continue;
2007 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 1021 }
2008 { 1022
2009 db_error(ppdb, query); 1023 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
2010 } 1024 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
1025
1026 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
1027 {
1028 word& word1 = *wordByWnidAndWnum_.at(lookup1);
1029 word& word2 = *wordByWnidAndWnum_.at(lookup2);
2011 1030
2012 sqlite3_bind_int(ppstmt, 1, mapping1.second); 1031 if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective)
2013 sqlite3_bind_int(ppstmt, 2, mapping2.second); 1032 {
1033 std::list<field> fields;
1034 fields.emplace_back("pertainym_id", word1.getId());
1035 fields.emplace_back("noun_id", word2.getId());
2014 1036
2015 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1037 db_.insertIntoTable("pertainymy", std::move(fields));
1038 } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb)
2016 { 1039 {
2017 db_error(ppdb, query); 1040 std::list<field> fields;
2018 } 1041 fields.emplace_back("mannernym_id", word1.getId());
1042 fields.emplace_back("adjective_id", word2.getId());
2019 1043
2020 sqlite3_finalize(ppstmt); 1044 db_.insertIntoTable("mannernymy", std::move(fields));
1045 }
2021 } 1046 }
2022 } 1047 }
2023 } 1048 }
2024 }
2025
2026 // per table
2027 {
2028 std::ifstream wnperfile(wnpref + "wn_per.pl");
2029 if (!wnperfile.is_open())
2030 {
2031 std::cout << "Invalid WordNet data directory." << std::endl;
2032 print_usage();
2033 }
2034
2035 std::list<std::string> lines;
2036 for (;;)
2037 {
2038 std::string line;
2039 if (!getline(wnperfile, line))
2040 {
2041 break;
2042 }
2043 1049
2044 if (line.back() == '\r') 1050 void generator::readWordNetSpecification()
1051 {
1052 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl"));
1053 progress ppgs("Writing specifications...", lines.size());
1054 for (auto line : lines)
2045 { 1055 {
2046 line.pop_back(); 1056 ppgs.update();
1057
1058 std::regex relation("^sa\\((23\\d{8}),(\\d+),(23\\d{8}),(\\d+)\\)\\.");
1059 std::smatch relation_data;
1060 if (!std::regex_search(line, relation_data, relation))
1061 {
1062 continue;
1063 }
1064
1065 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
1066 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
1067
1068 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
1069 {
1070 word& word1 = *wordByWnidAndWnum_.at(lookup1);
1071 word& word2 = *wordByWnidAndWnum_.at(lookup2);
1072
1073 std::list<field> fields;
1074 fields.emplace_back("general_id", word1.getId());
1075 fields.emplace_back("specific_id", word2.getId());
1076
1077 db_.insertIntoTable("specification", std::move(fields));
1078 }
2047 } 1079 }
2048
2049 lines.push_back(line);
2050 } 1080 }
2051 1081
2052 progress ppgs("Writing pertainyms and mannernyms...", lines.size()); 1082 void generator::readWordNetSimilarity()
2053 for (auto line : lines)
2054 { 1083 {
2055 ppgs.update(); 1084 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl"));
2056 1085 progress ppgs("Writing adjective similarity...", lines.size());
2057 std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); 1086 for (auto line : lines)
2058 std::smatch relation_data;
2059 if (!std::regex_search(line, relation_data, relation))
2060 { 1087 {
2061 continue; 1088 ppgs.update();
2062 }
2063 1089
2064 int synset_id_1 = stoi(relation_data[1]); 1090 std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
2065 int wnum_1 = stoi(relation_data[2]); 1091 std::smatch relation_data;
2066 int synset_id_2 = stoi(relation_data[3]); 1092 if (!std::regex_search(line, relation_data, relation))
2067 int wnum_2 = stoi(relation_data[4]);
2068 std::string query;
2069 switch (synset_id_1 / 100000000)
2070 {
2071 case 3: // Adjective
2072 { 1093 {
2073 // This is a pertainym, the second word should be a noun 1094 continue;
2074 // Technically it can be an adjective but we're ignoring that
2075 if (synset_id_2 / 100000000 != 1)
2076 {
2077 continue;
2078 }
2079
2080 query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)";
2081
2082 break;
2083 } 1095 }
1096
1097 int lookup1 = std::stoi(relation_data[1]);
1098 int lookup2 = std::stoi(relation_data[2]);
2084 1099
2085 case 4: // Adverb 1100 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
2086 { 1101 {
2087 // This is a mannernym, the second word should be an adjective 1102 notion& notion1 = *notionByWnid_.at(lookup1);
2088 if (synset_id_2 / 100000000 != 3) 1103 notion& notion2 = *notionByWnid_.at(lookup2);
2089 {
2090 continue;
2091 }
2092 1104
2093 query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)"; 1105 std::list<field> fields;
1106 fields.emplace_back("adjective_1_id", notion1.getId());
1107 fields.emplace_back("adjective_2_id", notion2.getId());
2094 1108
2095 break; 1109 db_.insertIntoTable("similarity", std::move(fields));
2096 } 1110 }
2097 } 1111 }
2098 1112 }
2099 sqlite3_stmt* ppstmt;
2100 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
2101 {
2102 db_error(ppdb, query);
2103 }
2104
2105 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
2106 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
2107 1113
2108 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1114 std::list<std::string> generator::readFile(std::string path)
1115 {
1116 std::ifstream file(path);
1117 if (!file)
2109 { 1118 {
2110 db_error(ppdb, query); 1119 throw std::invalid_argument("Could not find file " + path);
2111 } 1120 }
2112
2113 sqlite3_finalize(ppstmt);
2114 }
2115 }
2116 1121
2117 // sa table 1122 std::list<std::string> lines;
2118 {
2119 std::ifstream wnsafile(wnpref + "wn_sa.pl");
2120 if (!wnsafile.is_open())
2121 {
2122 std::cout << "Invalid WordNet data directory." << std::endl;
2123 print_usage();
2124 }
2125
2126 std::list<std::string> lines;
2127 for (;;)
2128 {
2129 std::string line; 1123 std::string line;
2130 if (!getline(wnsafile, line)) 1124 while (std::getline(file, line))
2131 {
2132 break;
2133 }
2134
2135 if (line.back() == '\r')
2136 { 1125 {
2137 line.pop_back(); 1126 if (line.back() == '\r')
1127 {
1128 line.pop_back();
1129 }
1130
1131 lines.push_back(line);
2138 } 1132 }
2139 1133
2140 lines.push_back(line); 1134 return lines;
2141 } 1135 }
2142 1136
2143 progress ppgs("Writing specifications...", lines.size()); 1137 part_of_speech generator::partOfSpeechByWnid(int wnid)
2144 for (auto line : lines)
2145 { 1138 {
2146 ppgs.update(); 1139 switch (wnid / 100000000)
2147
2148 std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\.");
2149 std::smatch relation_data;
2150 if (!std::regex_search(line, relation_data, relation))
2151 {
2152 continue;
2153 }
2154
2155 int synset_id_1 = stoi(relation_data[1]);
2156 int wnum_1 = stoi(relation_data[2]);
2157 int synset_id_2 = stoi(relation_data[3]);
2158 int wnum_2 = stoi(relation_data[4]);
2159 std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)");
2160
2161 sqlite3_stmt* ppstmt;
2162 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
2163 { 1140 {
2164 db_error(ppdb, query); 1141 case 1: return part_of_speech::noun;
1142 case 2: return part_of_speech::verb;
1143 case 3: return part_of_speech::adjective;
1144 case 4: return part_of_speech::adverb;
1145 default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid));
2165 } 1146 }
1147 }
2166 1148
2167 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); 1149 notion& generator::createNotion(part_of_speech partOfSpeech)
2168 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); 1150 {
1151 notions_.emplace_back(partOfSpeech);
1152
1153 return notions_.back();
1154 }
2169 1155
2170 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1156 notion& generator::lookupOrCreateNotion(int wnid)
1157 {
1158 if (!notionByWnid_.count(wnid))
2171 { 1159 {
2172 db_error(ppdb, query); 1160 notions_.emplace_back(partOfSpeechByWnid(wnid), wnid);
1161 notionByWnid_[wnid] = &notions_.back();
2173 } 1162 }
2174 1163
2175 sqlite3_finalize(ppstmt); 1164 return *notionByWnid_.at(wnid);
2176 }
2177 }
2178
2179 // sim table
2180 {
2181 std::ifstream wnsimfile(wnpref + "wn_sim.pl");
2182 if (!wnsimfile.is_open())
2183 {
2184 std::cout << "Invalid WordNet data directory." << std::endl;
2185 print_usage();
2186 } 1165 }
2187 1166
2188 std::list<std::string> lines; 1167 lemma& generator::lookupOrCreateLemma(std::string base_form)
2189 for (;;)
2190 { 1168 {
2191 std::string line; 1169 if (!lemmaByBaseForm_.count(base_form))
2192 if (!getline(wnsimfile, line))
2193 { 1170 {
2194 break; 1171 lemmas_.emplace_back(lookupOrCreateForm(base_form));
1172 lemmaByBaseForm_[base_form] = &lemmas_.back();
2195 } 1173 }
1174
1175 return *lemmaByBaseForm_.at(base_form);
1176 }
2196 1177
2197 if (line.back() == '\r') 1178 form& generator::lookupOrCreateForm(std::string text)
1179 {
1180 if (!formByText_.count(text))
2198 { 1181 {
2199 line.pop_back(); 1182 forms_.emplace_back(text);
1183 formByText_[text] = &forms_.back();
2200 } 1184 }
2201 1185
2202 lines.push_back(line); 1186 return *formByText_[text];
2203 } 1187 }
2204 1188
2205 progress ppgs("Writing sense synonyms...", lines.size()); 1189 template <typename... Args> word& generator::createWord(Args&&... args)
2206 for (auto line : lines)
2207 { 1190 {
2208 ppgs.update(); 1191 words_.emplace_back(std::forward<Args>(args)...);
1192 word& w = words_.back();
2209 1193
2210 std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); 1194 wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w);
2211 std::smatch relation_data; 1195
2212 if (!std::regex_search(line, relation_data, relation)) 1196 if (w.getNotion().hasWnid())
2213 { 1197 {
2214 continue; 1198 wordsByWnid_[w.getNotion().getWnid()].insert(&w);
2215 } 1199 }
2216 1200
2217 int synset_id_1 = stoi(relation_data[1]); 1201 return w;
2218 int synset_id_2 = stoi(relation_data[2]); 1202 }
2219 std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"); 1203
1204 group& generator::createGroup(xmlNodePtr top)
1205 {
1206 groups_.emplace_back();
1207 group& grp = groups_.back();
2220 1208
2221 for (auto mapping1 : wn[synset_id_1]) 1209 xmlChar* key;
1210
1211 for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
2222 { 1212 {
2223 for (auto mapping2 : wn[synset_id_2]) 1213 if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES")))
2224 { 1214 {
2225 sqlite3_stmt* ppstmt; 1215 for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next)
2226 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
2227 { 1216 {
2228 db_error(ppdb, query); 1217 if (!xmlStrcmp(subclass->name, reinterpret_cast<const xmlChar*>("VNSUBCLASS")))
1218 {
1219 try
1220 {
1221 group& subgrp = createGroup(subclass);
1222 subgrp.setParent(grp);
1223 } catch (const std::exception& e)
1224 {
1225 key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID"));
1226
1227 if (key == nullptr)
1228 {
1229 std::throw_with_nested(std::logic_error("Error parsing IDless subgroup"));
1230 } else {
1231 std::string subgroupId(reinterpret_cast<const char*>(key));
1232 xmlFree(key);
1233
1234 std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId));
1235 }
1236 }
1237 }
2229 } 1238 }
2230 1239 } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("MEMBERS")))
2231 sqlite3_bind_int(ppstmt, 1, mapping1.second); 1240 {
2232 sqlite3_bind_int(ppstmt, 2, mapping2.second); 1241 for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
2233
2234 if (sqlite3_step(ppstmt) != SQLITE_DONE)
2235 { 1242 {
2236 db_error(ppdb, query); 1243 if (!xmlStrcmp(member->name, reinterpret_cast<const xmlChar*>("MEMBER")))
1244 {
1245 key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn"));
1246 std::string wnSenses(reinterpret_cast<const char*>(key));
1247 xmlFree(key);
1248
1249 auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " ");
1250 if (!wnSenseKeys.empty())
1251 {
1252 std::list<std::string> tempKeys;
1253
1254 std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) {
1255 return sense + "::";
1256 });
1257
1258 std::list<std::string> filteredKeys;
1259
1260 std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) {
1261 return !wnSenseKeys_.count(sense);
1262 });
1263
1264 wnSenseKeys = std::move(filteredKeys);
1265 }
1266
1267 if (!wnSenseKeys.empty())
1268 {
1269 for (std::string sense : wnSenseKeys)
1270 {
1271 word& wordSense = *wnSenseKeys_[sense];
1272 wordSense.setVerbGroup(grp);
1273 }
1274 } else {
1275 key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name"));
1276 std::string memberName(reinterpret_cast<const char*>(key));
1277 xmlFree(key);
1278
1279 notion& n = createNotion(part_of_speech::verb);
1280 lemma& l = lookupOrCreateLemma(memberName);
1281 word& w = createWord(n, l);
1282
1283 w.setVerbGroup(grp);
1284 }
1285 }
2237 } 1286 }
2238 1287 } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("THEMROLES")))
2239 sqlite3_reset(ppstmt); 1288 {
2240 sqlite3_clear_bindings(ppstmt); 1289 for (xmlNodePtr roletopnode = node->xmlChildrenNode; roletopnode != nullptr; roletopnode = roletopnode->next)
2241
2242 sqlite3_bind_int(ppstmt, 1, mapping2.second);
2243 sqlite3_bind_int(ppstmt, 2, mapping1.second);
2244
2245 if (sqlite3_step(ppstmt) != SQLITE_DONE)
2246 { 1290 {
2247 db_error(ppdb, query); 1291 if (!xmlStrcmp(roletopnode->name, reinterpret_cast<const xmlChar*>("THEMROLE")))
1292 {
1293 role r;
1294
1295 key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type"));
1296 std::string roleName = reinterpret_cast<const char*>(key);
1297 xmlFree(key);
1298
1299 for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
1300 {
1301 if (!xmlStrcmp(rolenode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
1302 {
1303 r.setSelrestrs(parseSelrestr(rolenode));
1304 }
1305 }
1306
1307 grp.addRole(roleName, std::move(r));
1308 }
2248 } 1309 }
1310 } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("FRAMES")))
1311 {
1312 for (xmlNodePtr frametopnode = node->xmlChildrenNode; frametopnode != nullptr; frametopnode = frametopnode->next)
1313 {
1314 if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME")))
1315 {
1316 frames_.emplace_back();
1317 frame& fr = frames_.back();
2249 1318
2250 sqlite3_finalize(ppstmt); 1319 for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
1320 {
1321 if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX")))
1322 {
1323 for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
1324 {
1325 if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP")))
1326 {
1327 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
1328 std::string partRole = reinterpret_cast<const char*>(key);
1329 xmlFree(key);
1330
1331 selrestr partSelrestrs;
1332 std::set<std::string> partSynrestrs;
1333
1334 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
1335 {
1336 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SYNRESTRS")))
1337 {
1338 for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
1339 {
1340 if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SYNRESTR")))
1341 {
1342 key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type"));
1343 partSynrestrs.insert(reinterpret_cast<const char*>(key));
1344 xmlFree(key);
1345 }
1346 }
1347 }
1348
1349 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
1350 {
1351 partSelrestrs = parseSelrestr(npnode);
1352 }
1353 }
1354
1355 fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs)));
1356 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB")))
1357 {
1358 fr.push_back(part::createVerb());
1359 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("PREP")))
1360 {
1361 std::set<std::string> partChoices;
1362 bool partLiteral;
1363
1364 if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")))
1365 {
1366 partLiteral = true;
1367
1368 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
1369 std::string choicesStr = reinterpret_cast<const char*>(key);
1370 xmlFree(key);
1371
1372 split(choicesStr, " ", std::inserter(partChoices, std::end(partChoices)));
1373 } else {
1374 partLiteral = false;
1375
1376 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
1377 {
1378 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
1379 {
1380 for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
1381 {
1382 if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
1383 {
1384 key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type"));
1385 partChoices.insert(reinterpret_cast<const char*>(key));
1386 xmlFree(key);
1387 }
1388 }
1389 }
1390 }
1391 }
1392
1393 fr.push_back(part::createPreposition(std::move(partChoices), partLiteral));
1394 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ")))
1395 {
1396 fr.push_back(part::createAdjective());
1397 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADV")))
1398 {
1399 fr.push_back(part::createAdverb());
1400 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("LEX")))
1401 {
1402 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
1403 std::string literalValue = reinterpret_cast<const char*>(key);
1404 xmlFree(key);
1405
1406 fr.push_back(part::createLiteral(literalValue));
1407 } else {
1408 continue;
1409 }
1410 }
1411
1412 grp.addFrame(fr);
1413 }
1414 }
1415 }
1416 }
2251 } 1417 }
2252 } 1418 }
2253 }
2254 }
2255
2256 // syntax table
2257 {
2258 std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl");
2259 if (!wnsyntaxfile.is_open())
2260 {
2261 std::cout << "Invalid WordNet data directory." << std::endl;
2262 print_usage();
2263 }
2264 1419
2265 std::list<std::string> lines; 1420 return grp;
2266 for (;;)
2267 {
2268 std::string line;
2269 if (!getline(wnsyntaxfile, line))
2270 {
2271 break;
2272 }
2273
2274 if (line.back() == '\r')
2275 {
2276 line.pop_back();
2277 }
2278
2279 lines.push_back(line);
2280 } 1421 }
2281 1422
2282 progress ppgs("Writing adjective syntax markers...", lines.size()); 1423 selrestr generator::parseSelrestr(xmlNodePtr top)
2283 for (auto line : lines)
2284 { 1424 {
2285 ppgs.update(); 1425 xmlChar* key;
2286 1426
2287 std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); 1427 if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
2288 std::smatch relation_data;
2289 if (!std::regex_search(line, relation_data, relation))
2290 {
2291 continue;
2292 }
2293
2294 int synset_id = stoi(relation_data[1]);
2295 int wnum = stoi(relation_data[2]);
2296 std::string syn = relation_data[3];
2297 std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?");
2298
2299 sqlite3_stmt* ppstmt;
2300 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
2301 { 1428 {
2302 db_error(ppdb, query); 1429 if (xmlChildElementCount(top) == 0)
2303 } 1430 {
2304 1431 return {};
2305 sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); 1432 } else if (xmlChildElementCount(top) == 1)
2306 sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); 1433 {
2307 1434 return parseSelrestr(xmlFirstElementChild(top));
2308 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1435 } else {
1436 bool orlogic = false;
1437 if (xmlHasProp(top, reinterpret_cast<const xmlChar*>("logic")))
1438 {
1439 key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("logic"));
1440 if (!xmlStrcmp(key, reinterpret_cast<const xmlChar*>("or")))
1441 {
1442 orlogic = true;
1443 }
1444
1445 xmlFree(key);
1446 }
1447
1448 std::list<selrestr> children;
1449 for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next)
1450 {
1451 if (!xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))
1452 || !xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
1453 {
1454 children.push_back(parseSelrestr(selrestr));
1455 }
1456 }
1457
1458 return selrestr(children, orlogic);
1459 }
1460 } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
2309 { 1461 {
2310 db_error(ppdb, query); 1462 key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("Value"));
1463 bool selPos = (std::string(reinterpret_cast<const char*>(key)) == "+");
1464 xmlFree(key);
1465
1466 key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type"));
1467 std::string selRestriction = reinterpret_cast<const char*>(key);
1468 xmlFree(key);
1469
1470 return selrestr(selRestriction, selPos);
1471 } else {
1472 throw std::logic_error("Badly formatted selrestr");
2311 } 1473 }
2312
2313 sqlite3_finalize(ppstmt);
2314 } 1474 }
2315 } 1475
2316 1476 };
2317 sqlite3_close_v2(ppdb); 1477};
2318
2319 std::cout << "Done." << std::endl;
2320}
diff --git a/generator/generator.h b/generator/generator.h new file mode 100644 index 0000000..e2a7404 --- /dev/null +++ b/generator/generator.h
@@ -0,0 +1,151 @@
1#ifndef GENERATOR_H_5B61CBC5
2#define GENERATOR_H_5B61CBC5
3
4#include <string>
5#include <map>
6#include <list>
7#include <set>
8#include <libxml/parser.h>
9#include "database.h"
10#include "notion.h"
11#include "word.h"
12#include "lemma.h"
13#include "form.h"
14#include "pronunciation.h"
15#include "group.h"
16#include "frame.h"
17
18namespace verbly {
19 namespace generator {
20
21 enum class part_of_speech;
22 class selrestr;
23
24 class generator {
25 public:
26
27 // Constructor
28
29 generator(
30 std::string verbNetPath,
31 std::string agidPath,
32 std::string wordNetPath,
33 std::string cmudictPath,
34 std::string imageNetPath,
35 std::string outputPath);
36
37 // Action
38
39 void run();
40
41 private:
42
43 // Subroutines
44
45 void readWordNetSynsets();
46
47 void readAdjectivePositioning();
48
49 void readImageNetUrls();
50
51 void readWordNetSenseKeys();
52
53 void readVerbNet();
54
55 void readAgidInflections();
56
57 void readPrepositions();
58
59 void readCmudictPronunciations();
60
61 void writeSchema();
62
63 void dumpObjects();
64
65 void readWordNetAntonymy();
66
67 void readWordNetVariation();
68
69 void readWordNetClasses();
70
71 void readWordNetCausality();
72
73 void readWordNetEntailment();
74
75 void readWordNetHypernymy();
76
77 void readWordNetInstantiation();
78
79 void readWordNetMemberMeronymy();
80
81 void readWordNetPartMeronymy();
82
83 void readWordNetSubstanceMeronymy();
84
85 void readWordNetPertainymy();
86
87 void readWordNetSpecification();
88
89 void readWordNetSimilarity();
90
91 // Helpers
92
93 std::list<std::string> readFile(std::string path);
94
95 inline part_of_speech partOfSpeechByWnid(int wnid);
96
97 notion& createNotion(part_of_speech partOfSpeech);
98
99 notion& lookupOrCreateNotion(int wnid);
100
101 lemma& lookupOrCreateLemma(std::string base_form);
102
103 form& lookupOrCreateForm(std::string text);
104
105 template <typename... Args> word& createWord(Args&&... args);
106
107 group& createGroup(xmlNodePtr top);
108
109 selrestr parseSelrestr(xmlNodePtr top);
110
111 // Input
112
113 std::string verbNetPath_;
114 std::string agidPath_;
115 std::string wordNetPath_;
116 std::string cmudictPath_;
117 std::string imageNetPath_;
118
119 // Output
120
121 database db_;
122
123 // Data
124
125 std::list<notion> notions_;
126 std::list<word> words_;
127 std::list<lemma> lemmas_;
128 std::list<form> forms_;
129 std::list<pronunciation> pronunciations_;
130 std::list<frame> frames_;
131 std::list<group> groups_;
132
133 // Indexes
134
135 std::map<int, notion*> notionByWnid_;
136 std::map<int, std::set<word*>> wordsByWnid_;
137 std::map<std::pair<int, int>, word*> wordByWnidAndWnum_;
138 std::map<std::string, std::set<word*>> wordsByBaseForm_;
139 std::map<std::string, lemma*> lemmaByBaseForm_;
140 std::map<std::string, form*> formByText_;
141
142 // Caches
143
144 std::map<std::string, word*> wnSenseKeys_;
145
146 };
147
148 };
149};
150
151#endif /* end of include guard: GENERATOR_H_5B61CBC5 */
diff --git a/generator/group.cpp b/generator/group.cpp new file mode 100644 index 0000000..7cbd4c8 --- /dev/null +++ b/generator/group.cpp
@@ -0,0 +1,119 @@
1#include "group.h"
2#include <stdexcept>
3#include <list>
4#include <json.hpp>
5#include "database.h"
6#include "field.h"
7#include "frame.h"
8
9namespace verbly {
10 namespace generator {
11
12 int group::nextId_ = 0;
13
14 group::group() : id_(nextId_++)
15 {
16 }
17
18 void group::setParent(const group& parent)
19 {
20 // Adding a group to itself is nonsensical.
21 assert(&parent != this);
22
23 parent_ = &parent;
24 }
25
26 void group::addRole(std::string name, role r)
27 {
28 roleNames_.insert(name);
29 roles_[name] = std::move(r);
30 }
31
32 void group::addFrame(const frame& f)
33 {
34 frames_.insert(&f);
35 }
36
37 std::set<std::string> group::getRoles() const
38 {
39 std::set<std::string> fullRoles = roleNames_;
40
41 if (hasParent())
42 {
43 for (std::string name : getParent().getRoles())
44 {
45 fullRoles.insert(name);
46 }
47 }
48
49 return fullRoles;
50 }
51
52 const role& group::getRole(std::string name) const
53 {
54 if (roles_.count(name))
55 {
56 return roles_.at(name);
57 } else if (hasParent())
58 {
59 return getParent().getRole(name);
60 } else {
61 throw std::invalid_argument("Specified role not found in verb group");
62 }
63 }
64
65 std::set<const frame*> group::getFrames() const
66 {
67 std::set<const frame*> fullFrames = frames_;
68
69 if (hasParent())
70 {
71 for (const frame* f : getParent().getFrames())
72 {
73 fullFrames.insert(f);
74 }
75 }
76
77 return fullFrames;
78 }
79
80 database& operator<<(database& db, const group& arg)
81 {
82 // Serialize the group first
83 {
84 std::list<field> fields;
85 fields.emplace_back("group_id", arg.getId());
86
87 nlohmann::json jsonRoles;
88 for (std::string name : arg.getRoles())
89 {
90 const role& r = arg.getRole(name);
91
92 nlohmann::json jsonRole;
93 jsonRole["type"] = name;
94 jsonRole["selrestrs"] = r.getSelrestrs().toJson();
95
96 jsonRoles.emplace_back(std::move(jsonRole));
97 }
98
99 fields.emplace_back("data", jsonRoles.dump());
100
101 db.insertIntoTable("groups", std::move(fields));
102 }
103
104 // Then, serialize the group/frame relationship
105 for (const frame* f : arg.getFrames())
106 {
107 std::list<field> fields;
108
109 fields.emplace_back("group_id", arg.getId());
110 fields.emplace_back("frame_id", f->getId());
111
112 db.insertIntoTable("groups_frames", std::move(fields));
113 }
114
115 return db;
116 }
117
118 };
119};
diff --git a/generator/group.h b/generator/group.h new file mode 100644 index 0000000..efb8c5d --- /dev/null +++ b/generator/group.h
@@ -0,0 +1,80 @@
1#ifndef GROUP_H_EDAFB5DC
2#define GROUP_H_EDAFB5DC
3
4#include <map>
5#include <set>
6#include <string>
7#include <cassert>
8#include "role.h"
9
10namespace verbly {
11 namespace generator {
12
13 class frame;
14 class database;
15
16 class group {
17 public:
18
19 // Constructor
20
21 group();
22
23 // Mutators
24
25 void setParent(const group& parent);
26
27 void addRole(std::string name, role r);
28
29 void addFrame(const frame& f);
30
31 // Accessors
32
33 int getId() const
34 {
35 return id_;
36 }
37
38 bool hasParent() const
39 {
40 return (parent_ != nullptr);
41 }
42
43 const group& getParent() const
44 {
45 // Calling code should always call hasParent first
46 assert(parent_ != nullptr);
47
48 return *parent_;
49 }
50
51 std::set<std::string> getRoles() const;
52
53 const role& getRole(std::string name) const;
54
55 std::set<const frame*> getFrames() const;
56
57 private:
58
59 static int nextId_;
60
61 const int id_;
62
63 const group* parent_ = nullptr;
64 std::map<std::string, role> roles_;
65 std::set<const frame*> frames_;
66
67 // Caches
68
69 std::set<std::string> roleNames_;
70
71 };
72
73 // Serializer
74
75 database& operator<<(database& db, const group& arg);
76
77 };
78};
79
80#endif /* end of include guard: GROUP_H_EDAFB5DC */
diff --git a/generator/lemma.cpp b/generator/lemma.cpp new file mode 100644 index 0000000..e66b153 --- /dev/null +++ b/generator/lemma.cpp
@@ -0,0 +1,65 @@
1#include "lemma.h"
2#include <list>
3#include <cassert>
4#include "field.h"
5#include "database.h"
6#include "form.h"
7
8namespace verbly {
9 namespace generator {
10
11 int lemma::nextId_ = 0;
12
13 lemma::lemma(const form& baseForm) :
14 id_(nextId_++),
15 baseForm_(baseForm)
16 {
17 inflections_[inflection::base] = {&baseForm};
18 }
19
20 void lemma::addInflection(inflection type, const form& f)
21 {
22 // There can only be one base form.
23 assert(type != inflection::base);
24
25 inflections_[type].insert(&f);
26 }
27
28 std::set<const form*> lemma::getInflections(inflection type) const
29 {
30 if (inflections_.count(type))
31 {
32 return inflections_.at(type);
33 } else {
34 return {};
35 }
36 }
37
38 database& operator<<(database& db, const lemma& arg)
39 {
40 for (inflection type : {
41 inflection::base,
42 inflection::plural,
43 inflection::comparative,
44 inflection::superlative,
45 inflection::past_tense,
46 inflection::past_participle,
47 inflection::ing_form,
48 inflection::s_form})
49 {
50 for (const form* f : arg.getInflections(type))
51 {
52 std::list<field> fields;
53 fields.emplace_back("lemma_id", arg.getId());
54 fields.emplace_back("form_id", f->getId());
55 fields.emplace_back("category", static_cast<int>(type));
56
57 db.insertIntoTable("lemmas_forms", std::move(fields));
58 }
59 }
60
61 return db;
62 }
63
64 };
65};
diff --git a/generator/lemma.h b/generator/lemma.h new file mode 100644 index 0000000..6452e08 --- /dev/null +++ b/generator/lemma.h
@@ -0,0 +1,58 @@
1#ifndef LEMMA_H_D73105A7
2#define LEMMA_H_D73105A7
3
4#include <string>
5#include <map>
6#include <set>
7#include "enums.h"
8
9namespace verbly {
10 namespace generator {
11
12 class database;
13 class form;
14
15 class lemma {
16 public:
17
18 // Constructors
19
20 explicit lemma(const form& baseForm);
21
22 // Mutators
23
24 void addInflection(inflection type, const form& f);
25
26 // Accessors
27
28 int getId() const
29 {
30 return id_;
31 }
32
33 const form& getBaseForm() const
34 {
35 return baseForm_;
36 }
37
38 std::set<const form*> getInflections(inflection type) const;
39
40 private:
41
42 static int nextId_;
43
44 const int id_;
45 const form& baseForm_;
46
47 std::map<inflection, std::set<const form*>> inflections_;
48
49 };
50
51 // Serializer
52
53 database& operator<<(database& db, const lemma& arg);
54
55 };
56};
57
58#endif /* end of include guard: LEMMA_H_D73105A7 */
diff --git a/generator/main.cpp b/generator/main.cpp new file mode 100644 index 0000000..827c963 --- /dev/null +++ b/generator/main.cpp
@@ -0,0 +1,40 @@
1#include <iostream>
2#include <exception>
3#include "generator.h"
4
5void printUsage()
6{
7 std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl;
8 std::cout << "verbnet :: path to a VerbNet data directory" << std::endl;
9 std::cout << "agid :: path to an AGID infl.txt file" << std::endl;
10 std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl;
11 std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl;
12 std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl;
13 std::cout << "output :: datafile output path" << std::endl;
14}
15
16int main(int argc, char** argv)
17{
18 if (argc == 7)
19 {
20 try
21 {
22 verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]);
23
24 try
25 {
26 app.run();
27 } catch (const std::exception& e)
28 {
29 std::cout << e.what() << std::endl;
30 }
31 } catch (const std::exception& e)
32 {
33 std::cout << e.what() << std::endl;
34 printUsage();
35 }
36 } else {
37 std::cout << "verbly datafile generator" << std::endl;
38 printUsage();
39 }
40}
diff --git a/generator/notion.cpp b/generator/notion.cpp new file mode 100644 index 0000000..290d982 --- /dev/null +++ b/generator/notion.cpp
@@ -0,0 +1,85 @@
1#include "notion.h"
2#include <string>
3#include <list>
4#include "database.h"
5#include "field.h"
6
7namespace verbly {
8 namespace generator {
9
10 int notion::nextId_ = 0;
11
12 notion::notion(
13 part_of_speech partOfSpeech) :
14 id_(nextId_++),
15 partOfSpeech_(partOfSpeech)
16 {
17 }
18
19 notion::notion(
20 part_of_speech partOfSpeech,
21 int wnid) :
22 id_(nextId_++),
23 partOfSpeech_(partOfSpeech),
24 wnid_(wnid),
25 hasWnid_(true)
26 {
27 }
28
29 void notion::incrementNumOfImages()
30 {
31 // Calling code should always call hasWnid and check that the notion is a noun first.
32 assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun));
33
34 numOfImages_++;
35 }
36
37 void notion::setPrepositionGroups(std::list<std::string> groups)
38 {
39 // Calling code should always check that the notion is a preposition first.
40 assert(partOfSpeech_ == part_of_speech::preposition);
41
42 prepositionGroups_ = groups;
43 }
44
45 database& operator<<(database& db, const notion& arg)
46 {
47 // First, serialize the notion
48 {
49 std::list<field> fields;
50
51 fields.emplace_back("notion_id", arg.getId());
52 fields.emplace_back("part_of_speech", static_cast<int>(arg.getPartOfSpeech()));
53
54 if (arg.hasWnid())
55 {
56 fields.emplace_back("wnid", arg.getWnid());
57
58 if (arg.getPartOfSpeech() == part_of_speech::noun)
59 {
60 fields.emplace_back("images", arg.getNumOfImages());
61 }
62 }
63
64 db.insertIntoTable("notions", std::move(fields));
65 }
66
67 // Next, serialize the is_a relationship if this is a preposition
68 if (arg.getPartOfSpeech() == part_of_speech::preposition)
69 {
70 for (std::string group : arg.getPrepositionGroups())
71 {
72 std::list<field> fields;
73
74 fields.emplace_back("notion_id", arg.getId());
75 fields.emplace_back("groupname", group);
76
77 db.insertIntoTable("is_a", std::move(fields));
78 }
79 }
80
81 return db;
82 }
83
84 };
85};
diff --git a/generator/notion.h b/generator/notion.h new file mode 100644 index 0000000..76210de --- /dev/null +++ b/generator/notion.h
@@ -0,0 +1,91 @@
1#ifndef NOTION_H_221DE2BC
2#define NOTION_H_221DE2BC
3
4#include <cassert>
5#include <list>
6#include <string>
7#include "enums.h"
8
9namespace verbly {
10 namespace generator {
11
12 class database;
13
14 class notion {
15 public:
16
17 // Constructors
18
19 explicit notion(part_of_speech partOfSpeech);
20
21 notion(part_of_speech partOfSpeech, int wnid);
22
23 // Mutators
24
25 void incrementNumOfImages();
26
27 void setPrepositionGroups(std::list<std::string> groups);
28
29 // Accessors
30
31 int getId() const
32 {
33 return id_;
34 }
35
36 part_of_speech getPartOfSpeech() const
37 {
38 return partOfSpeech_;
39 }
40
41 bool hasWnid() const
42 {
43 return hasWnid_;
44 }
45
46 int getWnid() const
47 {
48 // Calling code should always call hasWnid first.
49 assert(hasWnid_);
50
51 return wnid_;
52 }
53
54 int getNumOfImages() const
55 {
56 // Calling code should always call hasWnid and check that the notion is a noun first.
57 assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun));
58
59 return numOfImages_;
60 }
61
62 std::list<std::string> getPrepositionGroups() const
63 {
64 // Calling code should always check that the notion is a preposition first.
65 assert(partOfSpeech_ == part_of_speech::preposition);
66
67 return prepositionGroups_;
68 }
69
70 private:
71
72 static int nextId_;
73
74 const int id_;
75 const part_of_speech partOfSpeech_;
76 const int wnid_ = 0;
77 const bool hasWnid_ = false;
78
79 int numOfImages_ = 0;
80 std::list<std::string> prepositionGroups_;
81
82 };
83
84 // Serializer
85
86 database& operator<<(database& db, const notion& arg);
87
88 };
89};
90
91#endif /* end of include guard: NOTION_H_221DE2BC */
diff --git a/generator/part.cpp b/generator/part.cpp new file mode 100644 index 0000000..dbd4e11 --- /dev/null +++ b/generator/part.cpp
@@ -0,0 +1,336 @@
1#include "part.h"
2#include <stdexcept>
3#include "selrestr.h"
4
5namespace verbly {
6 namespace generator {
7
8 part part::createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs)
9 {
10 part p(type::noun_phrase);
11
12 new(&p.noun_phrase_.role) std::string(std::move(role));
13 new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs));
14 new(&p.noun_phrase_.synrestrs) std::set<std::string>(std::move(synrestrs));
15
16 return p;
17 }
18
19 part part::createVerb()
20 {
21 return part(type::verb);
22 }
23
24 part part::createPreposition(std::set<std::string> choices, bool literal)
25 {
26 part p(type::preposition);
27
28 new(&p.preposition_.choices) std::set<std::string>(std::move(choices));
29 p.preposition_.literal = literal;
30
31 return p;
32 }
33
34 part part::createAdjective()
35 {
36 return part(type::adjective);
37 }
38
39 part part::createAdverb()
40 {
41 return part(type::adverb);
42 }
43
44 part part::createLiteral(std::string value)
45 {
46 part p(type::literal);
47
48 new(&p.literal_) std::string(std::move(value));
49
50 return p;
51 }
52
53 part::part(const part& other)
54 {
55 type_ = other.type_;
56
57 switch (type_)
58 {
59 case type::noun_phrase:
60 {
61 new(&noun_phrase_.role) std::string(other.noun_phrase_.role);
62 new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs);
63 new(&noun_phrase_.synrestrs) std::set<std::string>(other.noun_phrase_.synrestrs);
64
65 break;
66 }
67
68 case type::preposition:
69 {
70 new(&preposition_.choices) std::set<std::string>(other.preposition_.choices);
71 preposition_.literal = other.preposition_.literal;
72
73 break;
74 }
75
76 case type::literal:
77 {
78 new(&literal_) std::string(other.literal_);
79
80 break;
81 }
82
83 case type::verb:
84 case type::adjective:
85 case type::adverb:
86 case type::invalid:
87 {
88 break;
89 }
90 }
91 }
92
93 part::part(part&& other) : part()
94 {
95 swap(*this, other);
96 }
97
98 part& part::operator=(part other)
99 {
100 swap(*this, other);
101
102 return *this;
103 }
104
105 void swap(part& first, part& second)
106 {
107 using type = part::type;
108
109 type tempType = first.type_;
110 std::string tempRole;
111 selrestr tempSelrestrs;
112 std::set<std::string> tempSynrestrs;
113 std::set<std::string> tempChoices;
114 bool tempPrepLiteral;
115 std::string tempLiteralValue;
116
117 switch (tempType)
118 {
119 case type::noun_phrase:
120 {
121 tempRole = std::move(first.noun_phrase_.role);
122 tempSelrestrs = std::move(first.noun_phrase_.selrestrs);
123 tempSynrestrs = std::move(first.noun_phrase_.synrestrs);
124
125 break;
126 }
127
128 case type::preposition:
129 {
130 tempChoices = std::move(first.preposition_.choices);
131 tempPrepLiteral = first.preposition_.literal;
132
133 break;
134 }
135
136 case type::literal:
137 {
138 tempLiteralValue = std::move(first.literal_);
139
140 break;
141 }
142
143 case type::verb:
144 case type::adjective:
145 case type::adverb:
146 case type::invalid:
147 {
148 break;
149 }
150 }
151
152 first.~part();
153
154 first.type_ = second.type_;
155
156 switch (first.type_)
157 {
158 case type::noun_phrase:
159 {
160 new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role));
161 new(&first.noun_phrase_.selrestrs) selrestr(std::move(second.noun_phrase_.selrestrs));
162 new(&first.noun_phrase_.synrestrs) std::set<std::string>(std::move(second.noun_phrase_.synrestrs));
163
164 break;
165 }
166
167 case type::preposition:
168 {
169 new(&first.preposition_.choices) std::set<std::string>(std::move(second.preposition_.choices));
170 first.preposition_.literal = second.preposition_.literal;
171
172 break;
173 }
174
175 case type::literal:
176 {
177 new(&first.literal_) std::string(std::move(second.literal_));
178
179 break;
180 }
181
182 case type::verb:
183 case type::adjective:
184 case type::adverb:
185 case type::invalid:
186 {
187 break;
188 }
189 }
190
191 second.~part();
192
193 second.type_ = tempType;
194
195 switch (second.type_)
196 {
197 case type::noun_phrase:
198 {
199 new(&second.noun_phrase_.role) std::string(std::move(tempRole));
200 new(&second.noun_phrase_.selrestrs) selrestr(std::move(tempSelrestrs));
201 new(&second.noun_phrase_.synrestrs) std::set<std::string>(std::move(tempSynrestrs));
202
203 break;
204 }
205
206 case type::preposition:
207 {
208 new(&second.preposition_.choices) std::set<std::string>(std::move(tempChoices));
209 second.preposition_.literal = tempPrepLiteral;
210
211 break;
212 }
213
214 case type::literal:
215 {
216 new(&second.literal_) std::string(std::move(tempLiteralValue));
217
218 break;
219 }
220
221 case type::verb:
222 case type::adjective:
223 case type::adverb:
224 case type::invalid:
225 {
226 break;
227 }
228 }
229 }
230
231 part::~part()
232 {
233 switch (type_)
234 {
235 case type::noun_phrase:
236 {
237 using string_type = std::string;
238 using set_type = std::set<std::string>;
239
240 noun_phrase_.role.~string_type();
241 noun_phrase_.selrestrs.~selrestr();
242 noun_phrase_.synrestrs.~set_type();
243
244 break;
245 }
246
247 case type::preposition:
248 {
249 using set_type = std::set<std::string>;
250
251 preposition_.choices.~set_type();
252
253 break;
254 }
255
256 case type::literal:
257 {
258 using string_type = std::string;
259
260 literal_.~string_type();
261
262 break;
263 }
264
265 case type::verb:
266 case type::adjective:
267 case type::adverb:
268 case type::invalid:
269 {
270 break;
271 }
272 }
273 }
274
275 std::string part::getNounRole() const
276 {
277 if (type_ == type::noun_phrase)
278 {
279 return noun_phrase_.role;
280 } else {
281 throw std::domain_error("part::getNounRole is only valid for noun phrase parts");
282 }
283 }
284
285 selrestr part::getNounSelrestrs() const
286 {
287 if (type_ == type::noun_phrase)
288 {
289 return noun_phrase_.selrestrs;
290 } else {
291 throw std::domain_error("part::getNounSelrestrs is only valid for noun phrase parts");
292 }
293 }
294
295 std::set<std::string> part::getNounSynrestrs() const
296 {
297 if (type_ == type::noun_phrase)
298 {
299 return noun_phrase_.synrestrs;
300 } else {
301 throw std::domain_error("part::getNounSynrestrs is only valid for noun phrase parts");
302 }
303 }
304
305 std::set<std::string> part::getPrepositionChoices() const
306 {
307 if (type_ == type::preposition)
308 {
309 return preposition_.choices;
310 } else {
311 throw std::domain_error("part::getPrepositionChoices is only valid for preposition parts");
312 }
313 }
314
315 bool part::isPrepositionLiteral() const
316 {
317 if (type_ == type::preposition)
318 {
319 return preposition_.literal;
320 } else {
321 throw std::domain_error("part::isPrepositionLiteral is only valid for preposition parts");
322 }
323 }
324
325 std::string part::getLiteralValue() const
326 {
327 if (type_ == type::literal)
328 {
329 return literal_;
330 } else {
331 throw std::domain_error("part::getLiteralValue is only valid for literal parts");
332 }
333 }
334
335 };
336};
diff --git a/generator/part.h b/generator/part.h new file mode 100644 index 0000000..d044630 --- /dev/null +++ b/generator/part.h
@@ -0,0 +1,114 @@
1#ifndef PART_H_FB54F361
2#define PART_H_FB54F361
3
4#include <string>
5#include <set>
6#include "selrestr.h"
7
8namespace verbly {
9 namespace generator {
10
11 class part {
12 public:
13 enum class type {
14 invalid = -1,
15 noun_phrase = 0,
16 verb = 1,
17 preposition = 2,
18 adjective = 3,
19 adverb = 4,
20 literal = 5
21 };
22
23 // Static factories
24
25 static part createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs);
26
27 static part createVerb();
28
29 static part createPreposition(std::set<std::string> choices, bool literal);
30
31 static part createAdjective();
32
33 static part createAdverb();
34
35 static part createLiteral(std::string value);
36
37 // Copy and move constructors
38
39 part(const part& other);
40
41 part(part&& other);
42
43 // Assignment
44
45 part& operator=(part other);
46
47 // Swap
48
49 friend void swap(part& first, part& second);
50
51 // Destructor
52
53 ~part();
54
55 // General accessors
56
57 type getType() const
58 {
59 return type_;
60 }
61
62 // Noun phrase accessors
63
64 std::string getNounRole() const;
65
66 selrestr getNounSelrestrs() const;
67
68 std::set<std::string> getNounSynrestrs() const;
69
70 // Preposition accessors
71
72 std::set<std::string> getPrepositionChoices() const;
73
74 bool isPrepositionLiteral() const;
75
76 // Literal accessors
77
78 std::string getLiteralValue() const;
79
80 private:
81
82 // Private constructors
83
84 part()
85 {
86 }
87
88 part(type t) : type_(t)
89 {
90 }
91
92 // Data
93
94 union {
95 struct {
96 std::string role;
97 selrestr selrestrs;
98 std::set<std::string> synrestrs;
99 } noun_phrase_;
100 struct {
101 std::set<std::string> choices;
102 bool literal;
103 } preposition_;
104 std::string literal_;
105 };
106
107 type type_ = type::invalid;
108
109 };
110
111 };
112};
113
114#endif /* end of include guard: PART_H_FB54F361 */
diff --git a/generator/progress.h b/generator/progress.h index 81f07a3..fcb680d 100644 --- a/generator/progress.h +++ b/generator/progress.h
@@ -3,48 +3,54 @@
3 3
4#include <string> 4#include <string>
5 5
6class progress { 6namespace verbly {
7 private: 7 namespace generator {
8 std::string message;
9 int total;
10 int cur = 0;
11 int lprint = 0;
12 8
13 public: 9 class progress {
14 progress(std::string message, int total) : message(message), total(total) 10 private:
15 { 11 std::string message;
16 std::cout << message << " 0%" << std::flush; 12 int total;
17 } 13 int cur = 0;
14 int lprint = 0;
18 15
19 void update(int val) 16 public:
20 { 17 progress(std::string message, int total) : message(message), total(total)
21 if (val <= total) 18 {
22 { 19 std::cout << message << " 0%" << std::flush;
23 cur = val; 20 }
24 } else { 21
25 cur = total; 22 void update(int val)
26 } 23 {
24 if (val <= total)
25 {
26 cur = val;
27 } else {
28 cur = total;
29 }
27 30
28 int pp = cur * 100 / total; 31 int pp = cur * 100 / total;
29 if (pp != lprint) 32 if (pp != lprint)
30 { 33 {
31 lprint = pp; 34 lprint = pp;
32 35
33 std::cout << "\b\b\b\b" << std::right; 36 std::cout << "\b\b\b\b" << std::right;
34 std::cout.width(3); 37 std::cout.width(3);
35 std::cout << pp << "%" << std::flush; 38 std::cout << pp << "%" << std::flush;
36 } 39 }
37 } 40 }
41
42 void update()
43 {
44 update(cur+1);
45 }
38 46
39 void update() 47 ~progress()
40 { 48 {
41 update(cur+1); 49 std::cout << "\b\b\b\b100%" << std::endl;
42 } 50 }
51 };
43 52
44 ~progress() 53 };
45 {
46 std::cout << "\b\b\b\b100%" << std::endl;
47 }
48}; 54};
49 55
50#endif /* end of include guard: PROGRESS_H_A34EF856 */ 56#endif /* end of include guard: PROGRESS_H_A34EF856 */
diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp new file mode 100644 index 0000000..eb07607 --- /dev/null +++ b/generator/pronunciation.cpp
@@ -0,0 +1,87 @@
1#include "pronunciation.h"
2#include <list>
3#include <algorithm>
4#include <cctype>
5#include <iterator>
6#include "database.h"
7#include "field.h"
8#include "../lib/util.h"
9
10namespace verbly {
11 namespace generator {
12
13 int pronunciation::nextId_ = 0;
14
15 pronunciation::pronunciation(std::string phonemes) :
16 id_(nextId_++),
17 phonemes_(phonemes)
18 {
19 auto phonemeList = split<std::list<std::string>>(phonemes, " ");
20
21 auto rhymeStart = std::find_if(std::begin(phonemeList), std::end(phonemeList), [] (std::string phoneme) {
22 return phoneme.find("1") != std::string::npos;
23 });
24
25 // Rhyme detection
26 if (rhymeStart != std::end(phonemeList))
27 {
28 std::list<std::string> rhymePhonemes;
29
30 std::transform(rhymeStart, std::end(phonemeList), std::back_inserter(rhymePhonemes), [] (std::string phoneme) {
31 std::string naked;
32
33 std::remove_copy_if(std::begin(phoneme), std::end(phoneme), std::back_inserter(naked), [] (char ch) {
34 return std::isdigit(ch);
35 });
36
37 return naked;
38 });
39
40 rhyme_ = implode(std::begin(rhymePhonemes), std::end(rhymePhonemes), " ");
41
42 if (rhymeStart != std::begin(phonemeList))
43 {
44 prerhyme_ = *std::prev(rhymeStart);
45 }
46 }
47
48 // Syllable/stress
49 for (std::string phoneme : phonemeList)
50 {
51 if (std::isdigit(phoneme.back()))
52 {
53 // It's a vowel!
54 syllables_++;
55
56 if (phoneme.back() == '1')
57 {
58 stress_.push_back('1');
59 } else {
60 stress_.push_back('0');
61 }
62 }
63 }
64 }
65
66 database& operator<<(database& db, const pronunciation& arg)
67 {
68 std::list<field> fields;
69
70 fields.emplace_back("pronunciation_id", arg.getId());
71 fields.emplace_back("phonemes", arg.getPhonemes());
72 fields.emplace_back("syllables", arg.getSyllables());
73 fields.emplace_back("stress", arg.getStress());
74
75 if (arg.hasRhyme())
76 {
77 fields.emplace_back("rhyme", arg.getRhymePhonemes());
78 fields.emplace_back("prerhyme", arg.getPrerhyme());
79 }
80
81 db.insertIntoTable("pronunciations", std::move(fields));
82
83 return db;
84 }
85
86 };
87};
diff --git a/generator/pronunciation.h b/generator/pronunciation.h new file mode 100644 index 0000000..81be6c4 --- /dev/null +++ b/generator/pronunciation.h
@@ -0,0 +1,82 @@
1#ifndef PRONUNCIATION_H_584A08DD
2#define PRONUNCIATION_H_584A08DD
3
4#include <string>
5#include <cassert>
6
7namespace verbly {
8 namespace generator {
9
10 class database;
11
12 class pronunciation {
13 public:
14
15 // Constructor
16
17 explicit pronunciation(std::string phonemes);
18
19 // Accessors
20
21 int getId() const
22 {
23 return id_;
24 }
25
26 std::string getPhonemes() const
27 {
28 return phonemes_;
29 }
30
31 bool hasRhyme() const
32 {
33 return !rhyme_.empty();
34 }
35
36 std::string getRhymePhonemes() const
37 {
38 // Calling code should always call hasRhyme first.
39 assert(!rhyme_.empty());
40
41 return rhyme_;
42 }
43
44 std::string getPrerhyme() const
45 {
46 // Calling code should always call hasRhyme first.
47 assert(!rhyme_.empty());
48
49 return prerhyme_;
50 }
51
52 int getSyllables() const
53 {
54 return syllables_;
55 }
56
57 std::string getStress() const
58 {
59 return stress_;
60 }
61
62 private:
63
64 static int nextId_;
65
66 const int id_;
67 const std::string phonemes_;
68 std::string rhyme_;
69 std::string prerhyme_;
70 int syllables_ = 0;
71 std::string stress_;
72
73 };
74
75 // Serializer
76
77 database& operator<<(database& db, const pronunciation& arg);
78
79 };
80};
81
82#endif /* end of include guard: PRONUNCIATION_H_584A08DD */
diff --git a/generator/role.h b/generator/role.h new file mode 100644 index 0000000..5fa68b8 --- /dev/null +++ b/generator/role.h
@@ -0,0 +1,35 @@
1#ifndef ROLE_H_249F9A9C
2#define ROLE_H_249F9A9C
3
4#include "selrestr.h"
5
6namespace verbly {
7 namespace generator {
8
9 class role {
10 public:
11
12 // Mutators
13
14 void setSelrestrs(selrestr selrestrs)
15 {
16 selrestrs_ = selrestrs;
17 }
18
19 // Accessors
20
21 const selrestr& getSelrestrs() const
22 {
23 return selrestrs_;
24 }
25
26 private:
27
28 selrestr selrestrs_;
29
30 };
31
32 };
33};
34
35#endif /* end of include guard: ROLE_H_249F9A9C */
diff --git a/generator/schema.sql b/generator/schema.sql index 410b536..c3e54d8 100644 --- a/generator/schema.sql +++ b/generator/schema.sql
@@ -1,286 +1,204 @@
1DROP TABLE IF EXISTS `verbs`; 1CREATE TABLE `notions` (
2CREATE TABLE `verbs` ( 2 `notion_id` INTEGER PRIMARY KEY,
3 `verb_id` INTEGER PRIMARY KEY, 3 `part_of_speech` SMALLINT NOT NULL,
4 `infinitive` VARCHAR(32) NOT NULL, 4 `wnid` INTEGER,
5 `past_tense` VARCHAR(32) NOT NULL, 5 `images` INTEGER
6 `past_participle` VARCHAR(32) NOT NULL,
7 `ing_form` VARCHAR(32) NOT NULL,
8 `s_form` VARCHAR(32) NOT NULL
9); 6);
10 7
11DROP TABLE IF EXISTS `groups`; 8CREATE UNIQUE INDEX `notion_by_wnid` ON `notions`(`wnid`);
12CREATE TABLE `groups` (
13 `group_id` INTEGER PRIMARY KEY,
14 `data` BLOB NOT NULL
15);
16
17DROP TABLE IF EXISTS `frames`;
18CREATE TABLE `frames` (
19 `frame_id` INTEGER PRIMARY KEY,
20 `group_id` INTEGER NOT NULL,
21 `data` BLOB NOT NULL,
22 FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`)
23);
24 9
25DROP TABLE IF EXISTS `verb_groups`;
26CREATE TABLE `verb_groups` (
27 `verb_id` INTEGER NOT NULL,
28 `group_id` INTEGER NOT NULL,
29 FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`),
30 FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`)
31);
32
33DROP TABLE IF EXISTS `adjectives`;
34CREATE TABLE `adjectives` (
35 `adjective_id` INTEGER PRIMARY KEY,
36 `base_form` VARCHAR(32) NOT NULL,
37 `comparative` VARCHAR(32),
38 `superlative` VARCHAR(32),
39 `position` CHAR(1),
40 `complexity` INTEGER NOT NULL
41);
42
43DROP TABLE IF EXISTS `adverbs`;
44CREATE TABLE `adverbs` (
45 `adverb_id` INTEGER PRIMARY KEY,
46 `base_form` VARCHAR(32) NOT NULL,
47 `comparative` VARCHAR(32),
48 `superlative` VARCHAR(32),
49 `complexity` INTEGER NOT NULL
50);
51
52DROP TABLE IF EXISTS `nouns`;
53CREATE TABLE `nouns` (
54 `noun_id` INTEGER PRIMARY KEY,
55 `singular` VARCHAR(32) NOT NULL,
56 `plural` VARCHAR(32),
57 `proper` INTEGER(1) NOT NULL,
58 `complexity` INTEGER NOT NULL,
59 `images` INTEGER NOT NULL,
60 `wnid` INTEGER NOT NULL
61);
62
63DROP TABLE IF EXISTS `hypernymy`;
64CREATE TABLE `hypernymy` ( 10CREATE TABLE `hypernymy` (
65 `hypernym_id` INTEGER NOT NULL, 11 `hypernym_id` INTEGER NOT NULL,
66 `hyponym_id` INTEGER NOT NULL, 12 `hyponym_id` INTEGER NOT NULL
67 FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`),
68 FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`)
69); 13);
70 14
71DROP TABLE IF EXISTS `instantiation`; 15CREATE INDEX `hyponym_of` ON `hypernymy`(`hypernym_id`);
16CREATE INDEX `hypernym_of` ON `hypernymy`(`hyponym_id`);
17
72CREATE TABLE `instantiation` ( 18CREATE TABLE `instantiation` (
73 `class_id` INTEGER NOT NULL, 19 `class_id` INTEGER NOT NULL,
74 `instance_id` INTEGER NOT NULL, 20 `instance_id` INTEGER NOT NULL
75 FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`),
76 FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`)
77); 21);
78 22
79DROP TABLE IF EXISTS `member_meronymy`; 23CREATE INDEX `instance_of` ON `instantiation`(`class_id`);
24CREATE INDEX `class_of` ON `instantiation`(`instance_id`);
25
80CREATE TABLE `member_meronymy` ( 26CREATE TABLE `member_meronymy` (
81 `meronym_id` INTEGER NOT NULL, 27 `meronym_id` INTEGER NOT NULL,
82 `holonym_id` INTEGER NOT NULL, 28 `holonym_id` INTEGER NOT NULL
83 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
84 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
85); 29);
86 30
87DROP TABLE IF EXISTS `part_meronymy`; 31CREATE INDEX `member_holonym_of` ON `member_meronymy`(`meronym_id`);
32CREATE INDEX `member_meronym_of` ON `member_meronymy`(`holonym_id`);
33
88CREATE TABLE `part_meronymy` ( 34CREATE TABLE `part_meronymy` (
89 `meronym_id` INTEGER NOT NULL, 35 `meronym_id` INTEGER NOT NULL,
90 `holonym_id` INTEGER NOT NULL, 36 `holonym_id` INTEGER NOT NULL
91 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
92 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
93); 37);
94 38
95DROP TABLE IF EXISTS `substance_meronymy`; 39CREATE INDEX `part_holonym_of` ON `part_meronymy`(`meronym_id`);
40CREATE INDEX `part_meronym_of` ON `part_meronymy`(`holonym_id`);
41
96CREATE TABLE `substance_meronymy` ( 42CREATE TABLE `substance_meronymy` (
97 `meronym_id` INTEGER NOT NULL, 43 `meronym_id` INTEGER NOT NULL,
98 `holonym_id` INTEGER NOT NULL, 44 `holonym_id` INTEGER NOT NULL
99 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
100 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
101); 45);
102 46
103DROP TABLE IF EXISTS `variation`; 47CREATE INDEX `substance_holonym_of` ON `substance_meronymy`(`meronym_id`);
48CREATE INDEX `substance_meronym_of` ON `substance_meronymy`(`holonym_id`);
49
104CREATE TABLE `variation` ( 50CREATE TABLE `variation` (
105 `noun_id` INTEGER NOT NULL, 51 `noun_id` INTEGER NOT NULL,
106 `adjective_id` INTEGER NOT NULL, 52 `adjective_id` INTEGER NOT NULL
107 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
108 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
109); 53);
110 54
111DROP TABLE IF EXISTS `noun_antonymy`; 55CREATE INDEX `variant_of` ON `variation`(`noun_id`);
112CREATE TABLE `noun_antonymy` ( 56CREATE INDEX `attribute_of` ON `variation`(`adjective_id`);
113 `noun_1_id` INTEGER NOT NULL,
114 `noun_2_id` INTEGER NOT NULL,
115 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`),
116 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`)
117);
118 57
119DROP TABLE IF EXISTS `adjective_antonymy`; 58CREATE TABLE `similarity` (
120CREATE TABLE `adjective_antonymy` (
121 `adjective_1_id` INTEGER NOT NULL, 59 `adjective_1_id` INTEGER NOT NULL,
122 `adjective_2_id` INTEGER NOT NULL, 60 `adjective_2_id` INTEGER NOT NULL
123 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), 61);
124 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) 62
63CREATE INDEX `similar_to` ON `similarity`(`adjective_1_id`);
64
65CREATE TABLE `is_a` (
66 `notion_id` INTEGER NOT NULL,
67 `groupname` VARCHAR(32) NOT NULL
125); 68);
126 69
127DROP TABLE IF EXISTS `adverb_antonymy`; 70CREATE TABLE `entailment` (
128CREATE TABLE `adverb_antonymy` ( 71 `given_id` INTEGER NOT NULL,
129 `adverb_1_id` INTEGER NOT NULL, 72 `entailment_id` INTEGER NOT NULL
130 `adverb_2_id` INTEGER NOT NULL, 73);
131 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), 74
132 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) 75CREATE INDEX `entailment_of` ON `entailment`(`given_id`);
76CREATE INDEX `entailed_by` ON `entailment`(`entailment_id`);
77
78CREATE TABLE `causality` (
79 `cause_id` INTEGER NOT NULL,
80 `effect_id` INTEGER NOT NULL
81);
82
83CREATE INDEX `effect_of` ON `causality`(`cause_id`);
84CREATE INDEX `cause_of` ON `causality`(`effect_id`);
85
86CREATE TABLE `words` (
87 `word_id` INTEGER PRIMARY KEY,
88 `notion_id` INTEGER NOT NULL,
89 `lemma_id` INTEGER NOT NULL,
90 `tag_count` INTEGER,
91 `position` SMALLINT,
92 `group_id` INTEGER
93);
94
95CREATE INDEX `notion_words` ON `words`(`notion_id`);
96CREATE INDEX `lemma_words` ON `words`(`lemma_id`);
97CREATE INDEX `group_words` ON `words`(`group_id`);
98
99CREATE TABLE `antonymy` (
100 `antonym_1_id` INTEGER NOT NULL,
101 `antonym_2_id` INTEGER NOT NULL
133); 102);
134 103
135DROP TABLE IF EXISTS `specification`; 104CREATE INDEX `antonym_of` ON `antonymy`(`antonym_1_id`);
105
136CREATE TABLE `specification` ( 106CREATE TABLE `specification` (
137 `general_id` INTEGER NOT NULL, 107 `general_id` INTEGER NOT NULL,
138 `specific_id` INTEGER NOT NULL, 108 `specific_id` INTEGER NOT NULL
139 FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`),
140 FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`)
141); 109);
142 110
143DROP TABLE IF EXISTS `pertainymy`; 111CREATE INDEX `specification_of` ON `specification`(`general_id`);
112CREATE INDEX `generalization_of` ON `specification`(`specific_id`);
113
144CREATE TABLE `pertainymy` ( 114CREATE TABLE `pertainymy` (
145 `noun_id` INTEGER NOT NULL, 115 `noun_id` INTEGER NOT NULL,
146 `pertainym_id` INTEGER NOT NULL, 116 `pertainym_id` INTEGER NOT NULL
147 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
148 FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`)
149); 117);
150 118
151DROP TABLE IF EXISTS `mannernymy`; 119CREATE INDEX `pertainym_of` ON `pertainymy`(`noun_id`);
120CREATE INDEX `anti_pertainym_of` ON `pertainymy`(`pertainym_id`);
121
152CREATE TABLE `mannernymy` ( 122CREATE TABLE `mannernymy` (
153 `adjective_id` INTEGER NOT NULL, 123 `adjective_id` INTEGER NOT NULL,
154 `mannernym_id` INTEGER NOT NULL, 124 `mannernym_id` INTEGER NOT NULL
155 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`),
156 FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`)
157); 125);
158 126
159DROP TABLE IF EXISTS `noun_synonymy`; 127CREATE INDEX `mannernym_of` ON `mannernymy`(`adjective_id`);
160CREATE TABLE `noun_synonymy` ( 128CREATE INDEX `anti_mannernym_of` ON `mannernymy`(`mannernym_id`);
161 `noun_1_id` INTEGER NOT NULL,
162 `noun_2_id` INTEGER NOT NULL,
163 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`),
164 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`)
165);
166 129
167DROP TABLE IF EXISTS `adjective_synonymy`; 130CREATE TABLE `usage` (
168CREATE TABLE `adjective_synonymy` ( 131 `domain_id` INTEGER NOT NULL,
169 `adjective_1_id` INTEGER NOT NULL, 132 `term_id` INTEGER NOT NULL
170 `adjective_2_id` INTEGER NOT NULL,
171 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
172 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
173); 133);
174 134
175DROP TABLE IF EXISTS `adverb_synonymy`; 135CREATE INDEX `usage_term_of` ON `usage`(`domain_id`);
176CREATE TABLE `adverb_synonymy` ( 136CREATE INDEX `usage_domain_of` ON `usage`(`term_id`);
177 `adverb_1_id` INTEGER NOT NULL,
178 `adverb_2_id` INTEGER NOT NULL,
179 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
180 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
181);
182 137
183DROP TABLE IF EXISTS `noun_pronunciations`; 138CREATE TABLE `topicality` (
184CREATE TABLE `noun_pronunciations` ( 139 `domain_id` INTEGER NOT NULL,
185 `noun_id` INTEGER NOT NULL, 140 `term_id` INTEGER NOT NULL
186 `pronunciation` VARCHAR(64) NOT NULL,
187 `prerhyme` VARCHAR(8),
188 `rhyme` VARCHAR(64),
189 `syllables` INT NOT NULL,
190 `stress` VARCHAR(64) NOT NULL,
191 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`)
192); 141);
193 142
194DROP TABLE IF EXISTS `verb_pronunciations`; 143CREATE INDEX `topical_term_of` ON `topicality`(`domain_id`);
195CREATE TABLE `verb_pronunciations` ( 144CREATE INDEX `topical_domain_of` ON `topicality`(`term_id`);
196 `verb_id` INTEGER NOT NULL,
197 `pronunciation` VARCHAR(64) NOT NULL,
198 `prerhyme` VARCHAR(8),
199 `rhyme` VARCHAR(64),
200 `syllables` INT NOT NULL,
201 `stress` VARCHAR(64) NOT NULL,
202 FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`)
203);
204 145
205DROP TABLE IF EXISTS `adjective_pronunciations`; 146CREATE TABLE `regionality` (
206CREATE TABLE `adjective_pronunciations` ( 147 `domain_id` INTEGER NOT NULL,
207 `adjective_id` INTEGER NOT NULL, 148 `term_id` INTEGER NOT NULL
208 `pronunciation` VARCHAR(64) NOT NULL,
209 `prerhyme` VARCHAR(8),
210 `rhyme` VARCHAR(64),
211 `syllables` INT NOT NULL,
212 `stress` VARCHAR(64) NOT NULL,
213 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
214); 149);
215 150
216DROP TABLE IF EXISTS `adverb_pronunciations`; 151CREATE INDEX `regional_term_of` ON `regionality`(`domain_id`);
217CREATE TABLE `adverb_pronunciations` ( 152CREATE INDEX `regional_domain_of` ON `regionality`(`term_id`);
218 `adverb_id` INTEGER NOT NULL,
219 `pronunciation` VARCHAR(64) NOT NULL,
220 `prerhyme` VARCHAR(8),
221 `rhyme` VARCHAR(64),
222 `syllables` INT NOT NULL,
223 `stress` VARCHAR(64) NOT NULL,
224 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`)
225);
226 153
227DROP TABLE IF EXISTS `noun_noun_derivation`; 154CREATE TABLE `forms` (
228CREATE TABLE `noun_noun_derivation` ( 155 `form_id` INTEGER PRIMARY KEY,
229 `noun_1_id` INTEGER NOT NULL, 156 `form` VARCHAR(32) NOT NULL,
230 `noun_2_id` INTEGER NOT NULL, 157 `complexity` SMALLINT NOT NULL,
231 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), 158 `proper` SMALLINT NOT NULL
232 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`)
233); 159);
234 160
235DROP TABLE IF EXISTS `noun_adjective_derivation`; 161CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`);
236CREATE TABLE `noun_adjective_derivation` (
237 `noun_id` INTEGER NOT NULL,
238 `adjective_id` INTEGER NOT NULL,
239 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
240 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
241);
242 162
243DROP TABLE IF EXISTS `noun_adverb_derivation`; 163CREATE TABLE `lemmas_forms` (
244CREATE TABLE `noun_adverb_derivation` ( 164 `lemma_id` INTEGER NOT NULL,
245 `noun_id` INTEGER NOT NULL, 165 `form_id` INTEGER NOT NULL,
246 `adverb_id` INTEGER NOT NULL, 166 `category` SMALLINT NOT NULL
247 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
248 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`)
249); 167);
250 168
251DROP TABLE IF EXISTS `adjective_adjective_derivation`; 169CREATE INDEX `form_of` ON `lemmas_forms`(`lemma_id`);
252CREATE TABLE `adjective_adjective_derivation` ( 170CREATE INDEX `lemma_of` ON `lemmas_forms`(`form_id`);
253 `adjective_1_id` INTEGER NOT NULL, 171
254 `adjective_2_id` INTEGER NOT NULL, 172CREATE TABLE `pronunciations` (
255 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), 173 `pronunciation_id` INTEGER PRIMARY KEY,
256 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) 174 `phonemes` VARCHAR(64) NOT NULL,
175 `prerhyme` VARCHAR(8),
176 `rhyme` VARCHAR(64),
177 `syllables` INTEGER NOT NULL,
178 `stress` VARCHAR(64) NOT NULL
257); 179);
258 180
259DROP TABLE IF EXISTS `adjective_adverb_derivation`; 181CREATE TABLE `forms_pronunciations` (
260CREATE TABLE `adjective_adverb_derivation` ( 182 `form_id` INTEGER NOT NULL,
261 `adjective_id` INTEGER NOT NULL, 183 `pronunciation_id` INTEGER NOT NULL
262 `adverb_id` INTEGER NOT NULL,
263 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`),
264 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`)
265); 184);
266 185
267DROP TABLE IF EXISTS `adverb_adverb_derivation`; 186CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`);
268CREATE TABLE `adverb_adverb_derivation` ( 187CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`);
269 `adverb_1_id` INTEGER NOT NULL, 188
270 `adverb_2_id` INTEGER NOT NULL, 189CREATE TABLE `groups` (
271 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), 190 `group_id` INTEGER PRIMARY KEY,
272 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) 191 `data` BLOB NOT NULL
273); 192);
274 193
275DROP TABLE IF EXISTS `prepositions`; 194CREATE TABLE `frames` (
276CREATE TABLE `prepositions` ( 195 `frame_id` INTEGER PRIMARY KEY,
277 `preposition_id` INTEGER PRIMARY KEY, 196 `data` BLOB NOT NULL
278 `form` VARCHAR(32) NOT NULL
279); 197);
280 198
281DROP TABLE IF EXISTS `preposition_groups`; 199CREATE TABLE `groups_frames` (
282CREATE TABLE `preposition_groups` ( 200 `group_id` INTEGER NOT NULL,
283 `preposition_id` INTEGER NOT NULL, 201 `frame_id` INTEGER NOT NULL
284 `groupname` VARCHAR(32) NOT NULL,
285 FOREIGN KEY (`preposition_id`) REFERENCES `prepositions`(`preposition_id`)
286); 202);
203
204CREATE INDEX `frames_in` ON `groups_frames`(`group_id`);
diff --git a/generator/selrestr.cpp b/generator/selrestr.cpp new file mode 100644 index 0000000..8bdd3f6 --- /dev/null +++ b/generator/selrestr.cpp
@@ -0,0 +1,288 @@
1#include "selrestr.h"
2
3namespace verbly {
4 namespace generator {
5
6 selrestr::selrestr(const selrestr& other)
7 {
8 type_ = other.type_;
9
10 switch (type_)
11 {
12 case type::singleton:
13 {
14 singleton_.pos = other.singleton_.pos;
15 new(&singleton_.restriction) std::string(other.singleton_.restriction);
16
17 break;
18 }
19
20 case type::group:
21 {
22 new(&group_.children) std::list<selrestr>(other.group_.children);
23 group_.orlogic = other.group_.orlogic;
24
25 break;
26 }
27
28 case type::empty:
29 {
30 break;
31 }
32 }
33 }
34
35 selrestr::selrestr(selrestr&& other) : selrestr()
36 {
37 swap(*this, other);
38 }
39
40 selrestr& selrestr::operator=(selrestr other)
41 {
42 swap(*this, other);
43
44 return *this;
45 }
46
47 void swap(selrestr& first, selrestr& second)
48 {
49 using type = selrestr::type;
50
51 type tempType = first.type_;
52 int tempPos;
53 std::string tempRestriction;
54 std::list<selrestr> tempChildren;
55 bool tempOrlogic;
56
57 switch (tempType)
58 {
59 case type::singleton:
60 {
61 tempPos = first.singleton_.pos;
62 tempRestriction = std::move(first.singleton_.restriction);
63
64 break;
65 }
66
67 case type::group:
68 {
69 tempChildren = std::move(first.group_.children);
70 tempOrlogic = first.group_.orlogic;
71
72 break;
73 }
74
75 case type::empty:
76 {
77 break;
78 }
79 }
80
81 first.~selrestr();
82
83 first.type_ = second.type_;
84
85 switch (first.type_)
86 {
87 case type::singleton:
88 {
89 first.singleton_.pos = second.singleton_.pos;
90 new(&first.singleton_.restriction) std::string(std::move(second.singleton_.restriction));
91
92 break;
93 }
94
95 case type::group:
96 {
97 new(&first.group_.children) std::list<selrestr>(std::move(second.group_.children));
98 first.group_.orlogic = second.group_.orlogic;
99
100 break;
101 }
102
103 case type::empty:
104 {
105 break;
106 }
107 }
108
109 second.~selrestr();
110
111 second.type_ = tempType;
112
113 switch (second.type_)
114 {
115 case type::singleton:
116 {
117 second.singleton_.pos = tempPos;
118 new(&second.singleton_.restriction) std::string(std::move(tempRestriction));
119
120 break;
121 }
122
123 case type::group:
124 {
125 new(&second.group_.children) std::list<selrestr>(std::move(tempChildren));
126 second.group_.orlogic = tempOrlogic;
127
128 break;
129 }
130
131 case type::empty:
132 {
133 break;
134 }
135 }
136 }
137
138 selrestr::~selrestr()
139 {
140 switch (type_)
141 {
142 case type::singleton:
143 {
144 using string_type = std::string;
145 singleton_.restriction.~string_type();
146
147 break;
148 }
149
150 case type::group:
151 {
152 using list_type = std::list<selrestr>;
153 group_.children.~list_type();
154
155 break;
156 }
157
158 case type::empty:
159 {
160 break;
161 }
162 }
163 }
164
165 selrestr::selrestr() : type_(type::empty)
166 {
167 }
168
169 selrestr::selrestr(
170 std::string restriction,
171 bool pos) :
172 type_(type::singleton)
173 {
174 new(&singleton_.restriction) std::string(std::move(restriction));
175 singleton_.pos = pos;
176 }
177
178 std::string selrestr::getRestriction() const
179 {
180 if (type_ == type::singleton)
181 {
182 return singleton_.restriction;
183 } else {
184 throw std::domain_error("Only singleton selrestrs have restrictions");
185 }
186 }
187
188 bool selrestr::getPos() const
189 {
190 if (type_ == type::singleton)
191 {
192 return singleton_.pos;
193 } else {
194 throw std::domain_error("Only singleton selrestrs have positivity flags");
195 }
196 }
197
198 selrestr::selrestr(
199 std::list<selrestr> children,
200 bool orlogic) :
201 type_(type::group)
202 {
203 new(&group_.children) std::list<selrestr>(std::move(children));
204 group_.orlogic = orlogic;
205 }
206
207 std::list<selrestr> selrestr::getChildren() const
208 {
209 if (type_ == type::group)
210 {
211 return group_.children;
212 } else {
213 throw std::domain_error("Only group selrestrs have children");
214 }
215 }
216
217 std::list<selrestr>::const_iterator selrestr::begin() const
218 {
219 if (type_ == type::group)
220 {
221 return std::begin(group_.children);
222 } else {
223 throw std::domain_error("Only group selrestrs have children");
224 }
225 }
226
227 std::list<selrestr>::const_iterator selrestr::end() const
228 {
229 if (type_ == type::group)
230 {
231 return std::end(group_.children);
232 } else {
233 throw std::domain_error("Only group selrestrs have children");
234 }
235 }
236
237 bool selrestr::getOrlogic() const
238 {
239 if (type_ == type::group)
240 {
241 return group_.orlogic;
242 } else {
243 throw std::domain_error("Only group selrestrs have logic");
244 }
245 }
246
247 nlohmann::json selrestr::toJson() const
248 {
249 switch (type_)
250 {
251 case type::empty:
252 {
253 return {};
254 }
255
256 case type::singleton:
257 {
258 return {
259 {"type", singleton_.restriction},
260 {"pos", singleton_.pos}
261 };
262 }
263
264 case type::group:
265 {
266 std::string logic;
267 if (group_.orlogic)
268 {
269 logic = "or";
270 } else {
271 logic = "and";
272 }
273
274 std::list<nlohmann::json> children;
275 std::transform(std::begin(group_.children), std::end(group_.children), std::back_inserter(children), [] (const selrestr& child) {
276 return child.toJson();
277 });
278
279 return {
280 {"logic", logic},
281 {"children", children}
282 };
283 }
284 }
285 }
286
287 };
288};
diff --git a/generator/selrestr.h b/generator/selrestr.h new file mode 100644 index 0000000..5000970 --- /dev/null +++ b/generator/selrestr.h
@@ -0,0 +1,88 @@
1#ifndef SELRESTR_H_50652FB7
2#define SELRESTR_H_50652FB7
3
4#include <list>
5#include <string>
6#include <json.hpp>
7
8namespace verbly {
9 namespace generator {
10
11 class selrestr {
12 public:
13 enum class type {
14 empty,
15 singleton,
16 group
17 };
18
19 // Copy and move constructors
20
21 selrestr(const selrestr& other);
22 selrestr(selrestr&& other);
23
24 // Assignment
25
26 selrestr& operator=(selrestr other);
27
28 // Swap
29
30 friend void swap(selrestr& first, selrestr& second);
31
32 // Destructor
33
34 ~selrestr();
35
36 // Generic accessors
37
38 type getType() const
39 {
40 return type_;
41 }
42
43 // Empty
44
45 selrestr();
46
47 // Singleton
48
49 selrestr(std::string restriction, bool pos);
50
51 std::string getRestriction() const;
52
53 bool getPos() const;
54
55 // Group
56
57 selrestr(std::list<selrestr> children, bool orlogic);
58
59 std::list<selrestr> getChildren() const;
60
61 std::list<selrestr>::const_iterator begin() const;
62
63 std::list<selrestr>::const_iterator end() const;
64
65 bool getOrlogic() const;
66
67 // Helpers
68
69 nlohmann::json toJson() const;
70
71 private:
72 union {
73 struct {
74 bool pos;
75 std::string restriction;
76 } singleton_;
77 struct {
78 std::list<selrestr> children;
79 bool orlogic;
80 } group_;
81 };
82 type type_;
83 };
84
85 };
86};
87
88#endif /* end of include guard: SELRESTR_H_50652FB7 */
diff --git a/generator/word.cpp b/generator/word.cpp new file mode 100644 index 0000000..8ba3ce2 --- /dev/null +++ b/generator/word.cpp
@@ -0,0 +1,77 @@
1#include "word.h"
2#include <list>
3#include <string>
4#include "database.h"
5#include "notion.h"
6#include "lemma.h"
7#include "field.h"
8#include "group.h"
9
10namespace verbly {
11 namespace generator {
12
13 int word::nextId_ = 0;
14
15 word::word(
16 notion& n,
17 lemma& l) :
18 id_(nextId_++),
19 notion_(n),
20 lemma_(l)
21 {
22 }
23
24 word::word(
25 notion& n,
26 lemma& l,
27 int tagCount) :
28 id_(nextId_++),
29 notion_(n),
30 lemma_(l),
31 tagCount_(tagCount),
32 hasTagCount_(true)
33 {
34 }
35
36 void word::setAdjectivePosition(positioning adjectivePosition)
37 {
38 adjectivePosition_ = adjectivePosition;
39 }
40
41 void word::setVerbGroup(const group& verbGroup)
42 {
43 verbGroup_ = &verbGroup;
44 }
45
46 database& operator<<(database& db, const word& arg)
47 {
48 std::list<field> fields;
49
50 fields.emplace_back("word_id", arg.getId());
51 fields.emplace_back("notion_id", arg.getNotion().getId());
52 fields.emplace_back("lemma_id", arg.getLemma().getId());
53
54 if (arg.hasTagCount())
55 {
56 fields.emplace_back("tag_count", arg.getTagCount());
57 }
58
59 if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective)
60 && (arg.getAdjectivePosition() != positioning::undefined))
61 {
62 fields.emplace_back("position", static_cast<int>(arg.getAdjectivePosition()));
63 }
64
65 if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb)
66 && (arg.hasVerbGroup()))
67 {
68 fields.emplace_back("group_id", arg.getVerbGroup().getId());
69 }
70
71 db.insertIntoTable("words", std::move(fields));
72
73 return db;
74 }
75
76 };
77};
diff --git a/generator/word.h b/generator/word.h new file mode 100644 index 0000000..bfed586 --- /dev/null +++ b/generator/word.h
@@ -0,0 +1,110 @@
1#ifndef WORD_H_91F99D46
2#define WORD_H_91F99D46
3
4#include <cassert>
5#include "enums.h"
6
7namespace verbly {
8 namespace generator {
9
10 class notion;
11 class lemma;
12 class database;
13 class group;
14
15 class word {
16 public:
17
18 // Constructors
19
20 word(notion& n, lemma& l);
21
22 word(notion& n, lemma& l, int tagCount);
23
24 // Mutators
25
26 void setAdjectivePosition(positioning adjectivePosition);
27
28 void setVerbGroup(const group& verbGroup);
29
30 // Accessors
31
32 int getId() const
33 {
34 return id_;
35 }
36
37 notion& getNotion()
38 {
39 return notion_;
40 }
41
42 const notion& getNotion() const
43 {
44 return notion_;
45 }
46
47 lemma& getLemma()
48 {
49 return lemma_;
50 }
51
52 const lemma& getLemma() const
53 {
54 return lemma_;
55 }
56
57 bool hasTagCount() const
58 {
59 return hasTagCount_;
60 }
61
62 int getTagCount() const
63 {
64 // Calling code should always call hasTagCount first.
65 assert(hasTagCount_);
66
67 return tagCount_;
68 }
69
70 positioning getAdjectivePosition() const
71 {
72 return adjectivePosition_;
73 }
74
75 bool hasVerbGroup() const
76 {
77 return (verbGroup_ != nullptr);
78 }
79
80 const group& getVerbGroup() const
81 {
82 // Calling code should always call hasVerbGroup first.
83 assert(verbGroup_ != nullptr);
84
85 return *verbGroup_;
86 }
87
88 private:
89
90 static int nextId_;
91
92 const int id_;
93 notion& notion_;
94 lemma& lemma_;
95 const int tagCount_ = 0;
96 const bool hasTagCount_ = false;
97
98 positioning adjectivePosition_ = positioning::undefined;
99 const group* verbGroup_ = nullptr;
100
101 };
102
103 // Serializer
104
105 database& operator<<(database& db, const word& arg);
106
107 };
108};
109
110#endif /* end of include guard: WORD_H_91F99D46 */
diff --git a/lib/adjective.cpp b/lib/adjective.cpp deleted file mode 100644 index ba8254a..0000000 --- a/lib/adjective.cpp +++ /dev/null
@@ -1,113 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 adjective::adjective()
6 {
7
8 }
9
10 adjective::adjective(const data& _data, int _id) : word(_data, _id)
11 {
12
13 }
14
15 std::string adjective::base_form() const
16 {
17 assert(_valid == true);
18
19 return _base_form;
20 }
21
22 std::string adjective::comparative_form() const
23 {
24 assert(_valid == true);
25
26 return _comparative_form;
27 }
28
29 std::string adjective::superlative_form() const
30 {
31 assert(_valid == true);
32
33 return _superlative_form;
34 }
35
36 adjective::positioning adjective::position() const
37 {
38 assert(_valid == true);
39
40 return _position;
41 }
42
43 bool adjective::has_comparative_form() const
44 {
45 assert(_valid == true);
46
47 return !_comparative_form.empty();
48 }
49
50 bool adjective::has_superlative_form() const
51 {
52 assert(_valid == true);
53
54 return !_superlative_form.empty();
55 }
56
57 bool adjective::has_position() const
58 {
59 assert(_valid == true);
60
61 return _position != adjective::positioning::undefined;
62 }
63
64 adjective_query adjective::antonyms() const
65 {
66 assert(_valid == true);
67
68 return _data->adjectives().antonym_of(*this);
69 }
70
71 adjective_query adjective::synonyms() const
72 {
73 assert(_valid == true);
74
75 return _data->adjectives().synonym_of(*this);
76 }
77
78 adjective_query adjective::generalizations() const
79 {
80 assert(_valid == true);
81
82 return _data->adjectives().generalization_of(*this);
83 }
84
85 adjective_query adjective::specifications() const
86 {
87 assert(_valid == true);
88
89 return _data->adjectives().specification_of(*this);
90 }
91
92 noun_query adjective::anti_pertainyms() const
93 {
94 assert(_valid == true);
95
96 return _data->nouns().anti_pertainym_of(*this);
97 }
98
99 adverb_query adjective::mannernyms() const
100 {
101 assert(_valid == true);
102
103 return _data->adverbs().mannernym_of(*this);
104 }
105
106 noun_query adjective::attributes() const
107 {
108 assert(_valid == true);
109
110 return _data->nouns().attribute_of(*this);
111 }
112
113};
diff --git a/lib/adjective.h b/lib/adjective.h deleted file mode 100644 index a6eb293..0000000 --- a/lib/adjective.h +++ /dev/null
@@ -1,51 +0,0 @@
1#ifndef ADJECTIVE_H_87B3FB75
2#define ADJECTIVE_H_87B3FB75
3
4namespace verbly {
5
6 class adjective_query;
7 class adverb_query;
8 class noun_query;
9
10 class adjective : public word {
11 public:
12 enum class positioning {
13 undefined,
14 predicate,
15 attributive,
16 postnominal
17 };
18
19 private:
20 std::string _base_form;
21 std::string _comparative_form;
22 std::string _superlative_form;
23 positioning _position = positioning::undefined;
24
25 friend class adjective_query;
26
27 public:
28 adjective();
29 adjective(const data& _data, int _id);
30
31 std::string base_form() const;
32 std::string comparative_form() const;
33 std::string superlative_form() const;
34 positioning position() const;
35
36 bool has_comparative_form() const;
37 bool has_superlative_form() const;
38 bool has_position() const;
39
40 adjective_query antonyms() const;
41 adjective_query synonyms() const;
42 adjective_query generalizations() const;
43 adjective_query specifications() const;
44 noun_query anti_pertainyms() const;
45 adverb_query mannernyms() const;
46 noun_query attributes() const;
47 };
48
49};
50
51#endif /* end of include guard: ADJECTIVE_H_87B3FB75 */
diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp deleted file mode 100644 index 90ccef4..0000000 --- a/lib/adjective_query.cpp +++ /dev/null
@@ -1,1072 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 adjective_query::adjective_query(const data& _data) : _data(_data)
6 {
7
8 }
9
10 adjective_query& adjective_query::limit(int _limit)
11 {
12 if ((_limit > 0) || (_limit == unlimited))
13 {
14 this->_limit = _limit;
15 }
16
17 return *this;
18 }
19
20 adjective_query& adjective_query::random()
21 {
22 this->_random = true;
23
24 return *this;
25 }
26
27 adjective_query& adjective_query::except(const adjective& _word)
28 {
29 _except.push_back(_word);
30
31 return *this;
32 }
33
34 adjective_query& adjective_query::rhymes_with(const word& _word)
35 {
36 for (auto rhyme : _word.get_rhymes())
37 {
38 _rhymes.push_back(rhyme);
39 }
40
41 if (dynamic_cast<const adjective*>(&_word) != nullptr)
42 {
43 _except.push_back(dynamic_cast<const adjective&>(_word));
44 }
45
46 return *this;
47 }
48
49 adjective_query& adjective_query::rhymes_with(rhyme _r)
50 {
51 _rhymes.push_back(_r);
52
53 return *this;
54 }
55
56 adjective_query& adjective_query::has_pronunciation()
57 {
58 this->_has_prn = true;
59
60 return *this;
61 }
62
63 adjective_query& adjective_query::has_rhyming_noun()
64 {
65 _has_rhyming_noun = true;
66
67 return *this;
68 }
69
70 adjective_query& adjective_query::has_rhyming_adjective()
71 {
72 _has_rhyming_adjective = true;
73
74 return *this;
75 }
76
77 adjective_query& adjective_query::has_rhyming_adverb()
78 {
79 _has_rhyming_adverb = true;
80
81 return *this;
82 }
83
84 adjective_query& adjective_query::has_rhyming_verb()
85 {
86 _has_rhyming_verb = true;
87
88 return *this;
89 }
90
91 adjective_query& adjective_query::with_stress(filter<std::vector<bool>> _arg)
92 {
93 _stress = _arg;
94
95 return *this;
96 }
97
98 adjective_query& adjective_query::with_prefix(filter<std::string> _f)
99 {
100 _f.clean();
101 _with_prefix = _f;
102
103 return *this;
104 }
105
106 adjective_query& adjective_query::with_suffix(filter<std::string> _f)
107 {
108 _f.clean();
109 _with_suffix = _f;
110
111 return *this;
112 }
113
114 adjective_query& adjective_query::with_complexity(int _arg)
115 {
116 _with_complexity = _arg;
117
118 return *this;
119 }
120
121 adjective_query& adjective_query::requires_comparative_form()
122 {
123 _requires_comparative_form = true;
124
125 return *this;
126 }
127
128 adjective_query& adjective_query::requires_superlative_form()
129 {
130 _requires_superlative_form = true;
131
132 return *this;
133 }
134
135 adjective_query& adjective_query::position(adjective::positioning pos)
136 {
137 _position = pos;
138
139 return *this;
140 }
141
142 adjective_query& adjective_query::is_variant()
143 {
144 this->_is_variant = true;
145
146 return *this;
147 }
148
149 adjective_query& adjective_query::variant_of(filter<noun> _f)
150 {
151 _f.clean();
152 _variant_of = _f;
153
154 return *this;
155 }
156
157 adjective_query& adjective_query::has_antonyms()
158 {
159 this->_is_antonymic = true;
160
161 return *this;
162 }
163
164 adjective_query& adjective_query::antonym_of(filter<adjective> _f)
165 {
166 _f.clean();
167 _antonym_of = _f;
168
169 return *this;
170 }
171
172 adjective_query& adjective_query::has_synonyms()
173 {
174 this->_is_synonymic = true;
175
176 return *this;
177 }
178
179 adjective_query& adjective_query::synonym_of(filter<adjective> _f)
180 {
181 _f.clean();
182 _synonym_of = _f;
183
184 return *this;
185 }
186
187 adjective_query& adjective_query::is_generalization()
188 {
189 this->_is_generalization = true;
190
191 return *this;
192 }
193
194 adjective_query& adjective_query::generalization_of(filter<adjective> _f)
195 {
196 _f.clean();
197 _generalization_of = _f;
198
199 return *this;
200 }
201
202 adjective_query& adjective_query::is_specification()
203 {
204 this->_is_specification = true;
205
206 return *this;
207 }
208
209 adjective_query& adjective_query::specification_of(filter<adjective> _f)
210 {
211 _f.clean();
212 _specification_of = _f;
213
214 return *this;
215 }
216
217 adjective_query& adjective_query::is_pertainymic()
218 {
219 this->_is_pertainymic = true;
220
221 return *this;
222 }
223
224 adjective_query& adjective_query::pertainym_of(filter<noun> _f)
225 {
226 _f.clean();
227 _pertainym_of = _f;
228
229 return *this;
230 }
231
232 adjective_query& adjective_query::is_mannernymic()
233 {
234 this->_is_mannernymic = true;
235
236 return *this;
237 }
238
239 adjective_query& adjective_query::anti_mannernym_of(filter<adverb> _f)
240 {
241 _f.clean();
242 _anti_mannernym_of = _f;
243
244 return *this;
245 }
246 /*
247 adjective_query& adjective_query::derived_from(const word& _w)
248 {
249 if (dynamic_cast<const adjective*>(&_w) != nullptr)
250 {
251 _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
252 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
253 {
254 _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
255 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
256 {
257 _derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
258 }
259
260 return *this;
261 }
262
263 adjective_query& adjective_query::not_derived_from(const word& _w)
264 {
265 if (dynamic_cast<const adjective*>(&_w) != nullptr)
266 {
267 _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
268 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
269 {
270 _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
271 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
272 {
273 _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
274 }
275
276 return *this;
277 }
278 */
279 std::list<adjective> adjective_query::run() const
280 {
281 std::stringstream construct;
282 construct << "SELECT adjective_id, base_form, comparative, superlative, position FROM adjectives";
283 std::list<std::string> conditions;
284 std::list<binding> bindings;
285
286 if (_has_prn)
287 {
288 conditions.push_back("adjective_id IN (SELECT adjective_id FROM adjective_pronunciations)");
289 }
290
291 if (!_rhymes.empty())
292 {
293 std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)");
294 std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
295 conditions.push_back(cond);
296
297 for (auto rhy : _rhymes)
298 {
299 bindings.emplace_back(rhy.get_prerhyme());
300 bindings.emplace_back(rhy.get_rhyme());
301 }
302 }
303
304 if (_has_rhyming_noun)
305 {
306 conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
307 }
308
309 if (_has_rhyming_adjective)
310 {
311 conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adjective_id != curp.adjective_id)");
312 }
313
314 if (_has_rhyming_adverb)
315 {
316 conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
317 }
318
319 if (_has_rhyming_verb)
320 {
321 conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
322 }
323
324 for (auto except : _except)
325 {
326 conditions.push_back("adjective_id != ?");
327 bindings.emplace_back(except._id);
328 }
329
330 if (_requires_comparative_form)
331 {
332 conditions.push_back("comparative IS NOT NULL");
333 }
334
335 if (_requires_superlative_form)
336 {
337 conditions.push_back("superlative IS NOT NULL");
338 }
339
340 switch (_position)
341 {
342 case adjective::positioning::predicate: conditions.push_back("position = 'p'"); break;
343 case adjective::positioning::attributive: conditions.push_back("position = 'a'"); break;
344 case adjective::positioning::postnominal: conditions.push_back("position = 'i'"); break;
345 case adjective::positioning::undefined: break;
346 }
347
348 if (!_stress.empty())
349 {
350 std::stringstream cond;
351 if (_stress.get_notlogic())
352 {
353 cond << "adjective_id NOT IN";
354 } else {
355 cond << "adjective_id IN";
356 }
357
358 cond << "(SELECT adjective_id FROM adjective_pronunciations WHERE ";
359
360 std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string {
361 switch (f.get_type())
362 {
363 case filter<std::vector<bool>>::type::singleton:
364 {
365 std::ostringstream _val;
366 for (auto syl : f.get_elem())
367 {
368 if (syl)
369 {
370 _val << "1";
371 } else {
372 _val << "0";
373 }
374 }
375
376 bindings.emplace_back(_val.str());
377
378 if (notlogic == f.get_notlogic())
379 {
380 return "stress = ?";
381 } else {
382 return "stress != ?";
383 }
384 }
385
386 case filter<std::vector<bool>>::type::group:
387 {
388 bool truelogic = notlogic != f.get_notlogic();
389
390 std::list<std::string> clauses;
391 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) {
392 return recur(f2, truelogic);
393 });
394
395 if (truelogic == f.get_orlogic())
396 {
397 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
398 } else {
399 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
400 }
401 }
402 }
403 };
404
405 cond << recur(_stress, _stress.get_notlogic());
406 cond << ")";
407 conditions.push_back(cond.str());
408 }
409
410 if (!_with_prefix.empty())
411 {
412 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
413 switch (f.get_type())
414 {
415 case filter<std::string>::type::singleton:
416 {
417 bindings.emplace_back(f.get_elem() + "%");
418
419 if (notlogic == f.get_notlogic())
420 {
421 return "base_form LIKE ?";
422 } else {
423 return "base_form NOT LIKE ?";
424 }
425 }
426
427 case filter<std::string>::type::group:
428 {
429 bool truelogic = notlogic != f.get_notlogic();
430
431 std::list<std::string> clauses;
432 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
433 return recur(f2, truelogic);
434 });
435
436 if (truelogic == f.get_orlogic())
437 {
438 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
439 } else {
440 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
441 }
442 }
443 }
444 };
445
446 conditions.push_back(recur(_with_prefix, false));
447 }
448
449 if (!_with_suffix.empty())
450 {
451 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
452 switch (f.get_type())
453 {
454 case filter<std::string>::type::singleton:
455 {
456 bindings.emplace_back("%" + f.get_elem());
457
458 if (notlogic == f.get_notlogic())
459 {
460 return "base_form LIKE ?";
461 } else {
462 return "base_form NOT LIKE ?";
463 }
464 }
465
466 case filter<std::string>::type::group:
467 {
468 bool truelogic = notlogic != f.get_notlogic();
469
470 std::list<std::string> clauses;
471 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
472 return recur(f2, truelogic);
473 });
474
475 if (truelogic == f.get_orlogic())
476 {
477 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
478 } else {
479 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
480 }
481 }
482 }
483 };
484
485 conditions.push_back(recur(_with_suffix, false));
486 }
487
488 if (_with_complexity != unlimited)
489 {
490 conditions.push_back("complexity = ?");
491 bindings.emplace_back(_with_complexity);
492 }
493
494 if (_is_variant)
495 {
496 conditions.push_back("adjective_id IN (SELECT adjective_id FROM variation)");
497 }
498
499 if (!_variant_of.empty())
500 {
501 std::stringstream cond;
502 if (_variant_of.get_notlogic())
503 {
504 cond << "adjective_id NOT IN";
505 } else {
506 cond << "adjective_id IN";
507 }
508
509 cond << "(SELECT adjective_id FROM variation WHERE ";
510
511 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
512 switch (f.get_type())
513 {
514 case filter<noun>::type::singleton:
515 {
516 bindings.emplace_back(f.get_elem()._id);
517
518 if (notlogic == f.get_notlogic())
519 {
520 return "noun_id = ?";
521 } else {
522 return "noun_id != ?";
523 }
524 }
525
526 case filter<noun>::type::group:
527 {
528 bool truelogic = notlogic != f.get_notlogic();
529
530 std::list<std::string> clauses;
531 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
532 return recur(f2, truelogic);
533 });
534
535 if (truelogic == f.get_orlogic())
536 {
537 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
538 } else {
539 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
540 }
541 }
542 }
543 };
544
545 cond << recur(_variant_of, _variant_of.get_notlogic());
546 cond << ")";
547 conditions.push_back(cond.str());
548 }
549
550 if (_is_antonymic)
551 {
552 conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_antonymy)");
553 }
554
555 if (!_antonym_of.empty())
556 {
557 std::stringstream cond;
558 if (_antonym_of.get_notlogic())
559 {
560 cond << "adjective_id NOT IN";
561 } else {
562 cond << "adjective_id IN";
563 }
564
565 cond << "(SELECT adjective_2_id FROM adjective_antonymy WHERE ";
566
567 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
568 switch (f.get_type())
569 {
570 case filter<adjective>::type::singleton:
571 {
572 bindings.emplace_back(f.get_elem()._id);
573
574 if (notlogic == f.get_notlogic())
575 {
576 return "adjective_1_id = ?";
577 } else {
578 return "adjective_1_id != ?";
579 }
580 }
581
582 case filter<adjective>::type::group:
583 {
584 bool truelogic = notlogic != f.get_notlogic();
585
586 std::list<std::string> clauses;
587 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
588 return recur(f2, truelogic);
589 });
590
591 if (truelogic == f.get_orlogic())
592 {
593 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
594 } else {
595 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
596 }
597 }
598 }
599 };
600
601 cond << recur(_antonym_of, _antonym_of.get_notlogic());
602 cond << ")";
603 conditions.push_back(cond.str());
604 }
605
606 if (_is_synonymic)
607 {
608 conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_synonymy)");
609 }
610
611 if (!_synonym_of.empty())
612 {
613 std::stringstream cond;
614 if (_synonym_of.get_notlogic())
615 {
616 cond << "adjective_id NOT IN";
617 } else {
618 cond << "adjective_id IN";
619 }
620
621 cond << "(SELECT adjective_2_id FROM adjective_synonymy WHERE ";
622
623 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
624 switch (f.get_type())
625 {
626 case filter<adjective>::type::singleton:
627 {
628 bindings.emplace_back(f.get_elem()._id);
629
630 if (notlogic == f.get_notlogic())
631 {
632 return "adjective_1_id = ?";
633 } else {
634 return "adjective_1_id != ?";
635 }
636 }
637
638 case filter<adjective>::type::group:
639 {
640 bool truelogic = notlogic != f.get_notlogic();
641
642 std::list<std::string> clauses;
643 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
644 return recur(f2, truelogic);
645 });
646
647 if (truelogic == f.get_orlogic())
648 {
649 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
650 } else {
651 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
652 }
653 }
654 }
655 };
656
657 cond << recur(_synonym_of, _synonym_of.get_notlogic());
658 cond << ")";
659 conditions.push_back(cond.str());
660 }
661
662 if (_is_generalization)
663 {
664 conditions.push_back("adjective_id IN (SELECT general_id FROM specification)");
665 }
666
667 if (!_generalization_of.empty())
668 {
669 std::stringstream cond;
670 if (_generalization_of.get_notlogic())
671 {
672 cond << "adjective_id NOT IN";
673 } else {
674 cond << "adjective_id IN";
675 }
676
677 cond << "(SELECT general_id FROM specification WHERE ";
678
679 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
680 switch (f.get_type())
681 {
682 case filter<adjective>::type::singleton:
683 {
684 bindings.emplace_back(f.get_elem()._id);
685
686 if (notlogic == f.get_notlogic())
687 {
688 return "specific_id = ?";
689 } else {
690 return "specific_id != ?";
691 }
692 }
693
694 case filter<adjective>::type::group:
695 {
696 bool truelogic = notlogic != f.get_notlogic();
697
698 std::list<std::string> clauses;
699 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
700 return recur(f2, truelogic);
701 });
702
703 if (truelogic == f.get_orlogic())
704 {
705 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
706 } else {
707 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
708 }
709 }
710 }
711 };
712
713 cond << recur(_generalization_of, _generalization_of.get_notlogic());
714 cond << ")";
715 conditions.push_back(cond.str());
716 }
717
718 if (_is_specification)
719 {
720 conditions.push_back("adjective_id IN (SELECT specific_id FROM specification)");
721 }
722
723 if (!_specification_of.empty())
724 {
725 std::stringstream cond;
726 if (_specification_of.get_notlogic())
727 {
728 cond << "adjective_id NOT IN";
729 } else {
730 cond << "adjective_id IN";
731 }
732
733 cond << "(SELECT specific_id FROM specification WHERE ";
734
735 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
736 switch (f.get_type())
737 {
738 case filter<adjective>::type::singleton:
739 {
740 bindings.emplace_back(f.get_elem()._id);
741
742 if (notlogic == f.get_notlogic())
743 {
744 return "general_id = ?";
745 } else {
746 return "general_id != ?";
747 }
748 }
749
750 case filter<adjective>::type::group:
751 {
752 bool truelogic = notlogic != f.get_notlogic();
753
754 std::list<std::string> clauses;
755 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
756 return recur(f2, truelogic);
757 });
758
759 if (truelogic == f.get_orlogic())
760 {
761 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
762 } else {
763 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
764 }
765 }
766 }
767 };
768
769 cond << recur(_specification_of, _specification_of.get_notlogic());
770 cond << ")";
771 conditions.push_back(cond.str());
772 }
773
774 if (_is_pertainymic)
775 {
776 conditions.push_back("adjective_id IN (SELECT pertainym_id FROM pertainymy)");
777 }
778
779 if (!_pertainym_of.empty())
780 {
781 std::stringstream cond;
782 if (_pertainym_of.get_notlogic())
783 {
784 cond << "adjective_id NOT IN";
785 } else {
786 cond << "adjective_id IN";
787 }
788
789 cond << "(SELECT pertainym_id FROM pertainymy WHERE ";
790
791 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
792 switch (f.get_type())
793 {
794 case filter<noun>::type::singleton:
795 {
796 bindings.emplace_back(f.get_elem()._id);
797
798 if (notlogic == f.get_notlogic())
799 {
800 return "noun_id = ?";
801 } else {
802 return "noun_id != ?";
803 }
804 }
805
806 case filter<noun>::type::group:
807 {
808 bool truelogic = notlogic != f.get_notlogic();
809
810 std::list<std::string> clauses;
811 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
812 return recur(f2, truelogic);
813 });
814
815 if (truelogic == f.get_orlogic())
816 {
817 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
818 } else {
819 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
820 }
821 }
822 }
823 };
824
825 cond << recur(_pertainym_of, _pertainym_of.get_notlogic());
826 cond << ")";
827 conditions.push_back(cond.str());
828 }
829
830 if (_is_mannernymic)
831 {
832 conditions.push_back("adjective_id IN (SELECT adjective_id FROM mannernymy)");
833 }
834
835 if (!_anti_mannernym_of.empty())
836 {
837 std::stringstream cond;
838 if (_anti_mannernym_of.get_notlogic())
839 {
840 cond << "adjective_id NOT IN";
841 } else {
842 cond << "adjective_id IN";
843 }
844
845 cond << "(SELECT adjective_id FROM mannernymy WHERE ";
846
847 std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string {
848 switch (f.get_type())
849 {
850 case filter<adverb>::type::singleton:
851 {
852 bindings.emplace_back(f.get_elem()._id);
853
854 if (notlogic == f.get_notlogic())
855 {
856 return "mannernym_id = ?";
857 } else {
858 return "mannernym_id != ?";
859 }
860 }
861
862 case filter<adverb>::type::group:
863 {
864 bool truelogic = notlogic != f.get_notlogic();
865
866 std::list<std::string> clauses;
867 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) {
868 return recur(f2, truelogic);
869 });
870
871 if (truelogic == f.get_orlogic())
872 {
873 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
874 } else {
875 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
876 }
877 }
878 }
879 };
880
881 cond << recur(_anti_mannernym_of, _anti_mannernym_of.get_notlogic());
882 cond << ")";
883 conditions.push_back(cond.str());
884 }
885/*
886 if (!_derived_from_adjective.empty())
887 {
888 std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_2_id = @DERADJ");
889 std::string cond = "adjective_id IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
890 conditions.push_back(cond);
891 }
892
893 if (!_not_derived_from_adjective.empty())
894 {
895 std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_2_id = @NDERADJ");
896 std::string cond = "adjective_id NOT IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
897 conditions.push_back(cond);
898 }
899
900 if (!_derived_from_adverb.empty())
901 {
902 std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV");
903 std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
904 conditions.push_back(cond);
905 }
906
907 if (!_not_derived_from_adverb.empty())
908 {
909 std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV");
910 std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
911 conditions.push_back(cond);
912 }
913
914 if (!_derived_from_noun.empty())
915 {
916 std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN");
917 std::string cond = "adjective_id IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
918 conditions.push_back(cond);
919 }
920
921 if (!_not_derived_from_noun.empty())
922 {
923 std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN");
924 std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
925 conditions.push_back(cond);
926 }*/
927
928 if (!conditions.empty())
929 {
930 construct << " WHERE ";
931 construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND ");
932 }
933
934 if (_random)
935 {
936 construct << " ORDER BY RANDOM()";
937 }
938
939 if (_limit != unlimited)
940 {
941 construct << " LIMIT " << _limit;
942 }
943
944 sqlite3_stmt* ppstmt;
945 std::string query = construct.str();
946 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
947 {
948 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
949 }
950
951 int i = 1;
952 for (auto& binding : bindings)
953 {
954 switch (binding.get_type())
955 {
956 case binding::type::integer:
957 {
958 sqlite3_bind_int(ppstmt, i, binding.get_integer());
959
960 break;
961 }
962
963 case binding::type::string:
964 {
965 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
966
967 break;
968 }
969 }
970
971 i++;
972 }
973
974 /*
975 for (auto adj : _derived_from_adjective)
976 {
977 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id);
978 }
979
980 for (auto adj : _not_derived_from_adjective)
981 {
982 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id);
983 }
984
985 for (auto adv : _derived_from_adverb)
986 {
987 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id);
988 }
989
990 for (auto adv : _not_derived_from_adverb)
991 {
992 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id);
993 }
994
995 for (auto n : _derived_from_noun)
996 {
997 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id);
998 }
999
1000 for (auto n : _not_derived_from_noun)
1001 {
1002 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id);
1003 }
1004*/
1005 std::list<adjective> output;
1006 while (sqlite3_step(ppstmt) == SQLITE_ROW)
1007 {
1008 adjective tnc {_data, sqlite3_column_int(ppstmt, 0)};
1009 tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
1010
1011 if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)
1012 {
1013 tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
1014 }
1015
1016 if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL)
1017 {
1018 tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3)));
1019 }
1020
1021 if (sqlite3_column_type(ppstmt, 4) != SQLITE_NULL)
1022 {
1023 std::string adjpos(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4)));
1024 if (adjpos == "p")
1025 {
1026 tnc._position = adjective::positioning::predicate;
1027 } else if (adjpos == "a")
1028 {
1029 tnc._position = adjective::positioning::attributive;
1030 } else if (adjpos == "i")
1031 {
1032 tnc._position = adjective::positioning::postnominal;
1033 }
1034 }
1035
1036 output.push_back(tnc);
1037 }
1038
1039 sqlite3_finalize(ppstmt);
1040
1041 for (auto& adjective : output)
1042 {
1043 query = "SELECT pronunciation, prerhyme, rhyme FROM adjective_pronunciations WHERE adjective_id = ?";
1044 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1045 {
1046 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
1047 }
1048
1049 sqlite3_bind_int(ppstmt, 1, adjective._id);
1050
1051 while (sqlite3_step(ppstmt) == SQLITE_ROW)
1052 {
1053 std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
1054 auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " ");
1055
1056 adjective.pronunciations.push_back(phonemes);
1057
1058 if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL))
1059 {
1060 std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
1061 std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
1062 adjective.rhymes.emplace_back(prerhyme, rhyming);
1063 }
1064 }
1065
1066 sqlite3_finalize(ppstmt);
1067 }
1068
1069 return output;
1070 }
1071
1072};
diff --git a/lib/adjective_query.h b/lib/adjective_query.h deleted file mode 100644 index e6a6609..0000000 --- a/lib/adjective_query.h +++ /dev/null
@@ -1,112 +0,0 @@
1#ifndef ADJECTIVE_QUERY_H_05E590FD
2#define ADJECTIVE_QUERY_H_05E590FD
3
4namespace verbly {
5
6 class adjective_query {
7 public:
8 adjective_query(const data& _data);
9
10 adjective_query& limit(int _limit);
11 adjective_query& random();
12 adjective_query& except(const adjective& _word);
13 adjective_query& rhymes_with(const word& _word);
14 adjective_query& rhymes_with(rhyme _r);
15 adjective_query& has_pronunciation();
16 adjective_query& has_rhyming_noun();
17 adjective_query& has_rhyming_adjective();
18 adjective_query& has_rhyming_adverb();
19 adjective_query& has_rhyming_verb();
20 adjective_query& with_stress(filter<std::vector<bool>> _arg);
21
22 adjective_query& requires_comparative_form();
23 adjective_query& requires_superlative_form();
24 adjective_query& position(adjective::positioning pos);
25
26 adjective_query& with_prefix(filter<std::string> _f);
27 adjective_query& with_suffix(filter<std::string> _f);
28
29 adjective_query& with_complexity(int _arg);
30
31 adjective_query& is_variant();
32 adjective_query& variant_of(filter<noun> _f);
33
34 adjective_query& has_antonyms();
35 adjective_query& antonym_of(filter<adjective> _f);
36
37 adjective_query& has_synonyms();
38 adjective_query& synonym_of(filter<adjective> _f);
39
40 adjective_query& is_generalization();
41 adjective_query& generalization_of(filter<adjective> _f);
42
43 adjective_query& is_specification();
44 adjective_query& specification_of(filter<adjective> _f);
45
46 adjective_query& is_pertainymic();
47 adjective_query& pertainym_of(filter<noun> _f);
48
49 adjective_query& is_mannernymic();
50 adjective_query& anti_mannernym_of(filter<adverb> _f);
51
52/* adjective_query& derived_from(const word& _w);
53 adjective_query& not_derived_from(const word& _w);*/
54
55 std::list<adjective> run() const;
56
57 const static int unlimited = -1;
58
59 protected:
60 const data& _data;
61 int _limit = unlimited;
62 bool _random = false;
63 std::list<rhyme> _rhymes;
64 std::list<adjective> _except;
65 bool _has_prn = false;
66 bool _has_rhyming_noun = false;
67 bool _has_rhyming_adjective = false;
68 bool _has_rhyming_adverb = false;
69 bool _has_rhyming_verb = false;
70 filter<std::vector<bool>> _stress;
71
72 bool _requires_comparative_form = false;
73 bool _requires_superlative_form = false;
74 adjective::positioning _position = adjective::positioning::undefined;
75
76 filter<std::string> _with_prefix;
77 filter<std::string> _with_suffix;
78
79 int _with_complexity = unlimited;
80
81 bool _is_variant = false;
82 filter<noun> _variant_of;
83
84 bool _is_antonymic = false;
85 filter<adjective> _antonym_of;
86
87 bool _is_synonymic = false;
88 filter<adjective> _synonym_of;
89
90 bool _is_generalization = false;
91 filter<adjective> _generalization_of;
92
93 bool _is_specification = false;
94 filter<adjective> _specification_of;
95
96 bool _is_pertainymic = false;
97 filter<noun> _pertainym_of;
98
99 bool _is_mannernymic = false;
100 filter<adverb> _anti_mannernym_of;
101
102/* std::list<adjective> _derived_from_adjective;
103 std::list<adjective> _not_derived_from_adjective;
104 std::list<adverb> _derived_from_adverb;
105 std::list<adverb> _not_derived_from_adverb;
106 std::list<noun> _derived_from_noun;
107 std::list<noun> _not_derived_from_noun;*/
108 };
109
110};
111
112#endif /* end of include guard: ADJECTIVE_QUERY_H_05E590FD */
diff --git a/lib/adverb.cpp b/lib/adverb.cpp deleted file mode 100644 index 442574e..0000000 --- a/lib/adverb.cpp +++ /dev/null
@@ -1,71 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 adverb::adverb()
6 {
7
8 }
9
10 adverb::adverb(const data& _data, int _id) : word(_data, _id)
11 {
12
13 }
14
15 std::string adverb::base_form() const
16 {
17 assert(_valid == true);
18
19 return _base_form;
20 }
21
22 std::string adverb::comparative_form() const
23 {
24 assert(_valid == true);
25
26 return _comparative_form;
27 }
28
29 std::string adverb::superlative_form() const
30 {
31 assert(_valid == true);
32
33 return _superlative_form;
34 }
35
36 bool adverb::has_comparative_form() const
37 {
38 assert(_valid == true);
39
40 return !_comparative_form.empty();
41 }
42
43 bool adverb::has_superlative_form() const
44 {
45 assert(_valid == true);
46
47 return !_superlative_form.empty();
48 }
49
50 adverb_query adverb::antonyms() const
51 {
52 assert(_valid == true);
53
54 return _data->adverbs().antonym_of(*this);
55 }
56
57 adverb_query adverb::synonyms() const
58 {
59 assert(_valid == true);
60
61 return _data->adverbs().synonym_of(*this);
62 }
63
64 adjective_query adverb::anti_mannernyms() const
65 {
66 assert(_valid == true);
67
68 return _data->adjectives().anti_mannernym_of(*this);
69 }
70
71};
diff --git a/lib/adverb.h b/lib/adverb.h deleted file mode 100644 index 56d4e28..0000000 --- a/lib/adverb.h +++ /dev/null
@@ -1,35 +0,0 @@
1#ifndef ADVERB_H_86F8302F
2#define ADVERB_H_86F8302F
3
4namespace verbly {
5
6 class adverb : public word {
7 private:
8 std::string _base_form;
9 std::string _comparative_form;
10 std::string _superlative_form;
11
12 friend class adverb_query;
13
14 public:
15 adverb();
16 adverb(const data& _data, int _id);
17
18 std::string base_form() const;
19 std::string comparative_form() const;
20 std::string superlative_form() const;
21
22 bool has_comparative_form() const;
23 bool has_superlative_form() const;
24
25 adverb_query antonyms() const;
26 adverb_query synonyms() const;
27 adjective_query anti_mannernyms() const;
28
29 adverb_query& derived_from(const word& _w);
30 adverb_query& not_derived_from(const word& _w);
31 };
32
33};
34
35#endif /* end of include guard: ADVERB_H_86F8302F */
diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp deleted file mode 100644 index 3e62bb7..0000000 --- a/lib/adverb_query.cpp +++ /dev/null
@@ -1,758 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 adverb_query::adverb_query(const data& _data) : _data(_data)
6 {
7
8 }
9
10 adverb_query& adverb_query::limit(int _limit)
11 {
12 if ((_limit > 0) || (_limit == unlimited))
13 {
14 this->_limit = _limit;
15 }
16
17 return *this;
18 }
19
20 adverb_query& adverb_query::random()
21 {
22 this->_random = true;
23
24 return *this;
25 }
26
27 adverb_query& adverb_query::except(const adverb& _word)
28 {
29 _except.push_back(_word);
30
31 return *this;
32 }
33
34 adverb_query& adverb_query::rhymes_with(const word& _word)
35 {
36 for (auto rhyme : _word.get_rhymes())
37 {
38 _rhymes.push_back(rhyme);
39 }
40
41 if (dynamic_cast<const adverb*>(&_word) != nullptr)
42 {
43 _except.push_back(dynamic_cast<const adverb&>(_word));
44 }
45
46 return *this;
47 }
48
49 adverb_query& adverb_query::rhymes_with(rhyme _r)
50 {
51 _rhymes.push_back(_r);
52
53 return *this;
54 }
55
56 adverb_query& adverb_query::has_pronunciation()
57 {
58 this->_has_prn = true;
59
60 return *this;
61 }
62
63 adverb_query& adverb_query::has_rhyming_noun()
64 {
65 _has_rhyming_noun = true;
66
67 return *this;
68 }
69
70 adverb_query& adverb_query::has_rhyming_adjective()
71 {
72 _has_rhyming_adjective = true;
73
74 return *this;
75 }
76
77 adverb_query& adverb_query::has_rhyming_adverb()
78 {
79 _has_rhyming_adverb = true;
80
81 return *this;
82 }
83
84 adverb_query& adverb_query::has_rhyming_verb()
85 {
86 _has_rhyming_verb = true;
87
88 return *this;
89 }
90
91 adverb_query& adverb_query::requires_comparative_form()
92 {
93 _requires_comparative_form = true;
94
95 return *this;
96 }
97
98 adverb_query& adverb_query::requires_superlative_form()
99 {
100 _requires_superlative_form = true;
101
102 return *this;
103 }
104
105 adverb_query& adverb_query::with_stress(filter<std::vector<bool>> _arg)
106 {
107 _stress = _arg;
108
109 return *this;
110 }
111
112 adverb_query& adverb_query::with_prefix(filter<std::string> _f)
113 {
114 _f.clean();
115 _with_prefix = _f;
116
117 return *this;
118 }
119
120 adverb_query& adverb_query::with_suffix(filter<std::string> _f)
121 {
122 _f.clean();
123 _with_suffix = _f;
124
125 return *this;
126 }
127
128 adverb_query& adverb_query::with_complexity(int _arg)
129 {
130 _with_complexity = _arg;
131
132 return *this;
133 }
134
135 adverb_query& adverb_query::has_antonyms()
136 {
137 _has_antonyms = true;
138
139 return *this;
140 }
141
142 adverb_query& adverb_query::antonym_of(filter<adverb> _f)
143 {
144 _f.clean();
145 _antonym_of = _f;
146
147 return *this;
148 }
149
150 adverb_query& adverb_query::has_synonyms()
151 {
152 _has_synonyms = true;
153
154 return *this;
155 }
156
157 adverb_query& adverb_query::synonym_of(filter<adverb> _f)
158 {
159 _f.clean();
160 _synonym_of = _f;
161
162 return *this;
163 }
164
165 adverb_query& adverb_query::is_mannernymic()
166 {
167 _is_mannernymic = true;
168
169 return *this;
170 }
171
172 adverb_query& adverb_query::mannernym_of(filter<adjective> _f)
173 {
174 _f.clean();
175 _mannernym_of = _f;
176
177 return *this;
178 }
179 /*
180 adverb_query& adverb_query::derived_from(const word& _w)
181 {
182 if (dynamic_cast<const adjective*>(&_w) != nullptr)
183 {
184 _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
185 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
186 {
187 _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
188 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
189 {
190 _derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
191 }
192
193 return *this;
194 }
195
196 adverb_query& adverb_query::not_derived_from(const word& _w)
197 {
198 if (dynamic_cast<const adjective*>(&_w) != nullptr)
199 {
200 _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
201 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
202 {
203 _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
204 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
205 {
206 _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
207 }
208
209 return *this;
210 }
211 */
212 std::list<adverb> adverb_query::run() const
213 {
214 std::stringstream construct;
215 construct << "SELECT adverb_id, base_form, comparative, superlative FROM adverbs";
216 std::list<std::string> conditions;
217 std::list<binding> bindings;
218
219 if (_has_prn)
220 {
221 conditions.push_back("adverb_id IN (SELECT adverb_id FROM adverb_pronunciations)");
222 }
223
224 if (!_rhymes.empty())
225 {
226 std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)");
227 std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
228 conditions.push_back(cond);
229
230 for (auto rhy : _rhymes)
231 {
232 bindings.emplace_back(rhy.get_prerhyme());
233 bindings.emplace_back(rhy.get_rhyme());
234 }
235 }
236
237 if (_has_rhyming_noun)
238 {
239 conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
240 }
241
242 if (_has_rhyming_adjective)
243 {
244 conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
245 }
246
247 if (_has_rhyming_adverb)
248 {
249 conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adverb_id != curp.adverb_id)");
250 }
251
252 if (_has_rhyming_verb)
253 {
254 conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
255 }
256
257 for (auto except : _except)
258 {
259 conditions.push_back("adverb_id != ?");
260 bindings.emplace_back(except._id);
261 }
262
263 if (_requires_comparative_form)
264 {
265 conditions.push_back("comparative IS NOT NULL");
266 }
267
268 if (_requires_superlative_form)
269 {
270 conditions.push_back("superlative IS NOT NULL");
271 }
272
273 if (!_stress.empty())
274 {
275 std::stringstream cond;
276 if (_stress.get_notlogic())
277 {
278 cond << "adverb_id NOT IN";
279 } else {
280 cond << "adverb_id IN";
281 }
282
283 cond << "(SELECT adverb_id FROM adverb_pronunciations WHERE ";
284
285 std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string {
286 switch (f.get_type())
287 {
288 case filter<std::vector<bool>>::type::singleton:
289 {
290 std::ostringstream _val;
291 for (auto syl : f.get_elem())
292 {
293 if (syl)
294 {
295 _val << "1";
296 } else {
297 _val << "0";
298 }
299 }
300
301 bindings.emplace_back(_val.str());
302
303 if (notlogic == f.get_notlogic())
304 {
305 return "stress = ?";
306 } else {
307 return "stress != ?";
308 }
309 }
310
311 case filter<std::vector<bool>>::type::group:
312 {
313 bool truelogic = notlogic != f.get_notlogic();
314
315 std::list<std::string> clauses;
316 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) {
317 return recur(f2, truelogic);
318 });
319
320 if (truelogic == f.get_orlogic())
321 {
322 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
323 } else {
324 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
325 }
326 }
327 }
328 };
329
330 cond << recur(_stress, _stress.get_notlogic());
331 cond << ")";
332 conditions.push_back(cond.str());
333 }
334
335 if (!_with_prefix.empty())
336 {
337 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
338 switch (f.get_type())
339 {
340 case filter<std::string>::type::singleton:
341 {
342 bindings.emplace_back(f.get_elem() + "%");
343
344 if (notlogic == f.get_notlogic())
345 {
346 return "base_form LIKE ?";
347 } else {
348 return "base_form NOT LIKE ?";
349 }
350 }
351
352 case filter<std::string>::type::group:
353 {
354 bool truelogic = notlogic != f.get_notlogic();
355
356 std::list<std::string> clauses;
357 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
358 return recur(f2, truelogic);
359 });
360
361 if (truelogic == f.get_orlogic())
362 {
363 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
364 } else {
365 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
366 }
367 }
368 }
369 };
370
371 conditions.push_back(recur(_with_prefix, false));
372 }
373
374 if (!_with_suffix.empty())
375 {
376 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
377 switch (f.get_type())
378 {
379 case filter<std::string>::type::singleton:
380 {
381 bindings.emplace_back("%" + f.get_elem());
382
383 if (notlogic == f.get_notlogic())
384 {
385 return "base_form LIKE ?";
386 } else {
387 return "base_form NOT LIKE ?";
388 }
389 }
390
391 case filter<std::string>::type::group:
392 {
393 bool truelogic = notlogic != f.get_notlogic();
394
395 std::list<std::string> clauses;
396 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
397 return recur(f2, truelogic);
398 });
399
400 if (truelogic == f.get_orlogic())
401 {
402 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
403 } else {
404 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
405 }
406 }
407 }
408 };
409
410 conditions.push_back(recur(_with_suffix, false));
411 }
412
413 if (_with_complexity != unlimited)
414 {
415 conditions.push_back("complexity = ?");
416 bindings.emplace_back(_with_complexity);
417 }
418
419 if (_has_antonyms)
420 {
421 conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_antonymy)");
422 }
423
424 if (!_antonym_of.empty())
425 {
426 std::stringstream cond;
427 if (_antonym_of.get_notlogic())
428 {
429 cond << "adverb_id NOT IN";
430 } else {
431 cond << "adverb_id IN";
432 }
433
434 cond << "(SELECT adverb_2_id FROM adverb_antonymy WHERE ";
435
436 std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string {
437 switch (f.get_type())
438 {
439 case filter<adverb>::type::singleton:
440 {
441 bindings.emplace_back(f.get_elem()._id);
442
443 if (notlogic == f.get_notlogic())
444 {
445 return "adverb_1_id = ?";
446 } else {
447 return "adverb_1_id != ?";
448 }
449 }
450
451 case filter<adverb>::type::group:
452 {
453 bool truelogic = notlogic != f.get_notlogic();
454
455 std::list<std::string> clauses;
456 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) {
457 return recur(f2, truelogic);
458 });
459
460 if (truelogic == f.get_orlogic())
461 {
462 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
463 } else {
464 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
465 }
466 }
467 }
468 };
469
470 cond << recur(_antonym_of, _antonym_of.get_notlogic());
471 cond << ")";
472 conditions.push_back(cond.str());
473 }
474
475 if (_has_synonyms)
476 {
477 conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_synonymy)");
478 }
479
480 if (!_synonym_of.empty())
481 {
482 std::stringstream cond;
483 if (_antonym_of.get_notlogic())
484 {
485 cond << "adverb_id NOT IN";
486 } else {
487 cond << "adverb_id IN";
488 }
489
490 cond << "(SELECT adverb_2_id FROM adverb_synonymy WHERE ";
491
492 std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string {
493 switch (f.get_type())
494 {
495 case filter<adverb>::type::singleton:
496 {
497 bindings.emplace_back(f.get_elem()._id);
498
499 if (notlogic == f.get_notlogic())
500 {
501 return "adverb_1_id = ?";
502 } else {
503 return "adverb_1_id != ?";
504 }
505 }
506
507 case filter<adverb>::type::group:
508 {
509 bool truelogic = notlogic != f.get_notlogic();
510
511 std::list<std::string> clauses;
512 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) {
513 return recur(f2, truelogic);
514 });
515
516 if (truelogic == f.get_orlogic())
517 {
518 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
519 } else {
520 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
521 }
522 }
523 }
524 };
525
526 cond << recur(_synonym_of, _synonym_of.get_notlogic());
527 cond << ")";
528 conditions.push_back(cond.str());
529 }
530
531 if (_is_mannernymic)
532 {
533 conditions.push_back("adverb_id IN (SELECT mannernym_id FROM mannernymy)");
534 }
535
536 if (!_mannernym_of.empty())
537 {
538 std::stringstream cond;
539 if (_antonym_of.get_notlogic())
540 {
541 cond << "adverb_id NOT IN";
542 } else {
543 cond << "adverb_id IN";
544 }
545
546 cond << "(SELECT mannernym_id FROM mannernymy WHERE ";
547
548 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
549 switch (f.get_type())
550 {
551 case filter<adjective>::type::singleton:
552 {
553 bindings.emplace_back(f.get_elem()._id);
554
555 if (notlogic == f.get_notlogic())
556 {
557 return "adjective_id = ?";
558 } else {
559 return "adjective_id != ?";
560 }
561 }
562
563 case filter<adjective>::type::group:
564 {
565 bool truelogic = notlogic != f.get_notlogic();
566
567 std::list<std::string> clauses;
568 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
569 return recur(f2, truelogic);
570 });
571
572 if (truelogic == f.get_orlogic())
573 {
574 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
575 } else {
576 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
577 }
578 }
579 }
580 };
581
582 cond << recur(_mannernym_of, _mannernym_of.get_notlogic());
583 cond << ")";
584 conditions.push_back(cond.str());
585 }
586
587/* if (!_derived_from_adjective.empty())
588 {
589 std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ");
590 std::string cond = "adverb_id IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
591 conditions.push_back(cond);
592 }
593
594 if (!_not_derived_from_adjective.empty())
595 {
596 std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ");
597 std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
598 conditions.push_back(cond);
599 }
600
601 if (!_derived_from_adverb.empty())
602 {
603 std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_2_id = @DERADV");
604 std::string cond = "adverb_id IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
605 conditions.push_back(cond);
606 }
607
608 if (!_not_derived_from_adverb.empty())
609 {
610 std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_2_id = @NDERADV");
611 std::string cond = "adverb_id NOT IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
612 conditions.push_back(cond);
613 }
614
615 if (!_derived_from_noun.empty())
616 {
617 std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN");
618 std::string cond = "adverb_id IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
619 conditions.push_back(cond);
620 }
621
622 if (!_not_derived_from_noun.empty())
623 {
624 std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN");
625 std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
626 conditions.push_back(cond);
627 }*/
628
629 if (!conditions.empty())
630 {
631 construct << " WHERE ";
632 construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND ");
633 }
634
635 if (_random)
636 {
637 construct << " ORDER BY RANDOM()";
638 }
639
640 if (_limit != unlimited)
641 {
642 construct << " LIMIT " << _limit;
643 }
644
645 sqlite3_stmt* ppstmt;
646 std::string query = construct.str();
647 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
648 {
649 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
650 }
651
652 int i = 1;
653 for (auto& binding : bindings)
654 {
655 switch (binding.get_type())
656 {
657 case binding::type::integer:
658 {
659 sqlite3_bind_int(ppstmt, i, binding.get_integer());
660
661 break;
662 }
663
664 case binding::type::string:
665 {
666 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
667
668 break;
669 }
670 }
671
672 i++;
673 }
674
675 /*
676 for (auto adj : _derived_from_adjective)
677 {
678 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id);
679 }
680
681 for (auto adj : _not_derived_from_adjective)
682 {
683 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id);
684 }
685
686 for (auto adv : _derived_from_adverb)
687 {
688 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id);
689 }
690
691 for (auto adv : _not_derived_from_adverb)
692 {
693 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id);
694 }
695
696 for (auto n : _derived_from_noun)
697 {
698 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id);
699 }
700
701 for (auto n : _not_derived_from_noun)
702 {
703 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id);
704 }*/
705
706 std::list<adverb> output;
707 while (sqlite3_step(ppstmt) == SQLITE_ROW)
708 {
709 adverb tnc {_data, sqlite3_column_int(ppstmt, 0)};
710 tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
711
712 if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)
713 {
714 tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
715 }
716
717 if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL)
718 {
719 tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3)));
720 }
721
722 output.push_back(tnc);
723 }
724
725 sqlite3_finalize(ppstmt);
726
727 for (auto& adverb : output)
728 {
729 query = "SELECT pronunciation, prerhyme, rhyme FROM adverb_pronunciations WHERE adverb_id = ?";
730 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
731 {
732 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
733 }
734
735 sqlite3_bind_int(ppstmt, 1, adverb._id);
736
737 while (sqlite3_step(ppstmt) == SQLITE_ROW)
738 {
739 std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
740 auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " ");
741
742 adverb.pronunciations.push_back(phonemes);
743
744 if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL))
745 {
746 std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
747 std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
748 adverb.rhymes.emplace_back(prerhyme, rhyming);
749 }
750 }
751
752 sqlite3_finalize(ppstmt);
753 }
754
755 return output;
756 }
757
758};
diff --git a/lib/adverb_query.h b/lib/adverb_query.h deleted file mode 100644 index 30e7400..0000000 --- a/lib/adverb_query.h +++ /dev/null
@@ -1,86 +0,0 @@
1#ifndef ADVERB_QUERY_H_CA13CCDD
2#define ADVERB_QUERY_H_CA13CCDD
3
4namespace verbly {
5
6 class adverb_query {
7 public:
8 adverb_query(const data& _data);
9
10 adverb_query& limit(int _limit);
11 adverb_query& random();
12 adverb_query& except(const adverb& _word);
13 adverb_query& rhymes_with(const word& _word);
14 adverb_query& rhymes_with(rhyme _r);
15 adverb_query& has_pronunciation();
16 adverb_query& has_rhyming_noun();
17 adverb_query& has_rhyming_adjective();
18 adverb_query& has_rhyming_adverb();
19 adverb_query& has_rhyming_verb();
20 adverb_query& with_stress(filter<std::vector<bool>> _arg);
21
22 adverb_query& requires_comparative_form();
23 adverb_query& requires_superlative_form();
24
25 adverb_query& with_prefix(filter<std::string> _f);
26 adverb_query& with_suffix(filter<std::string> _f);
27
28 adverb_query& with_complexity(int _arg);
29
30 adverb_query& has_antonyms();
31 adverb_query& antonym_of(filter<adverb> _f);
32
33 adverb_query& has_synonyms();
34 adverb_query& synonym_of(filter<adverb> _f);
35
36 adverb_query& is_mannernymic();
37 adverb_query& mannernym_of(filter<adjective> _f);
38
39/* adverb_query& derived_from(const word& _w);
40 adverb_query& not_derived_from(const word& _w);*/
41
42 std::list<adverb> run() const;
43
44 const static int unlimited = -1;
45
46 private:
47 const data& _data;
48 int _limit = unlimited;
49 bool _random = false;
50 std::list<rhyme> _rhymes;
51 std::list<adverb> _except;
52 bool _has_prn = false;
53 bool _has_rhyming_noun = false;
54 bool _has_rhyming_adjective = false;
55 bool _has_rhyming_adverb = false;
56 bool _has_rhyming_verb = false;
57 filter<std::vector<bool>> _stress;
58
59 bool _requires_comparative_form = false;
60 bool _requires_superlative_form = false;
61
62 filter<std::string> _with_prefix;
63 filter<std::string> _with_suffix;
64
65 int _with_complexity = unlimited;
66
67 bool _has_antonyms = false;
68 filter<adverb> _antonym_of;
69
70 bool _has_synonyms = false;
71 filter<adverb> _synonym_of;
72
73 bool _is_mannernymic = false;
74 filter<adjective> _mannernym_of;
75
76/* std::list<adjective> _derived_from_adjective;
77 std::list<adjective> _not_derived_from_adjective;
78 std::list<adverb> _derived_from_adverb;
79 std::list<adverb> _not_derived_from_adverb;
80 std::list<noun> _derived_from_noun;
81 std::list<noun> _not_derived_from_noun;*/
82 };
83
84};
85
86#endif /* end of include guard: ADVERB_QUERY_H_CA13CCDD */
diff --git a/lib/binding.cpp b/lib/binding.cpp new file mode 100644 index 0000000..349cd6f --- /dev/null +++ b/lib/binding.cpp
@@ -0,0 +1,180 @@
1#include "binding.h"
2#include <stdexcept>
3#include <utility>
4
5namespace verbly {
6
7 binding::binding(const binding& other)
8 {
9 type_ = other.type_;
10
11 switch (type_)
12 {
13 case type::integer:
14 {
15 integer_ = other.integer_;
16
17 break;
18 }
19
20 case type::string:
21 {
22 new(&string_) std::string(other.string_);
23
24 break;
25 }
26
27 case type::invalid:
28 {
29 break;
30 }
31 }
32 }
33
34 binding::binding(binding&& other) : binding()
35 {
36 swap(*this, other);
37 }
38
39 binding& binding::operator=(binding other)
40 {
41 swap(*this, other);
42
43 return *this;
44 }
45
46 void swap(binding& first, binding& second)
47 {
48 using type = binding::type;
49
50 type tempType = first.type_;
51 int tempInteger;
52 std::string tempString;
53
54 switch (first.type_)
55 {
56 case type::integer:
57 {
58 tempInteger = first.integer_;
59
60 break;
61 }
62
63 case type::string:
64 {
65 tempString = std::move(tempString);
66
67 break;
68 }
69
70 case type::invalid:
71 {
72 break;
73 }
74 }
75
76 first.~binding();
77
78 first.type_ = second.type_;
79
80 switch (second.type_)
81 {
82 case type::integer:
83 {
84 first.integer_ = second.integer_;
85
86 break;
87 }
88
89 case type::string:
90 {
91 new(&first.string_) std::string(std::move(second.string_));
92
93 break;
94 }
95
96 case type::invalid:
97 {
98 break;
99 }
100 }
101
102 second.~binding();
103
104 second.type_ = tempType;
105
106 switch (tempType)
107 {
108 case type::integer:
109 {
110 second.integer_ = tempInteger;
111
112 break;
113 }
114
115 case type::string:
116 {
117 new(&second.string_) std::string(std::move(tempString));
118
119 break;
120 }
121
122 case type::invalid:
123 {
124 break;
125 }
126 }
127 }
128
129 binding::~binding()
130 {
131 switch (type_)
132 {
133 case type::string:
134 {
135 using string_type = std::string;
136 string_.~string_type();
137
138 break;
139 }
140
141 case type::integer:
142 case type::invalid:
143 {
144 break;
145 }
146 }
147 }
148
149 binding::binding(int arg) :
150 type_(type::integer),
151 integer_(arg)
152 {
153 }
154
155 int binding::getInteger() const
156 {
157 if (type_ != type::integer)
158 {
159 throw std::domain_error("binding::getInteger called on non-integer binding");
160 }
161
162 return integer_;
163 }
164
165 binding::binding(std::string arg) : type_(type::string)
166 {
167 new(&string_) std::string(arg);
168 }
169
170 std::string binding::getString() const
171 {
172 if (type_ != type::string)
173 {
174 throw std::domain_error("binding::getString called on non-string binding");
175 }
176
177 return string_;
178 }
179
180};
diff --git a/lib/binding.h b/lib/binding.h new file mode 100644 index 0000000..7fbe20e --- /dev/null +++ b/lib/binding.h
@@ -0,0 +1,70 @@
1#ifndef BINDING_H_CAE0B18E
2#define BINDING_H_CAE0B18E
3
4#include <string>
5
6namespace verbly {
7
8 class binding {
9 public:
10 enum class type {
11 invalid,
12 integer,
13 string
14 };
15
16 // Default constructor
17
18 binding()
19 {
20 }
21
22 // Copy and move constructors
23
24 binding(const binding& other);
25 binding(binding&& other);
26
27 // Assignment
28
29 binding& operator=(binding other);
30
31 // Swap
32
33 friend void swap(binding& first, binding& second);
34
35 // Destructor
36
37 ~binding();
38
39 // Generic accessors
40
41 type getType() const
42 {
43 return type_;
44 }
45
46 // Integer
47
48 binding(int arg);
49
50 int getInteger() const;
51
52 // String
53
54 binding(std::string arg);
55
56 std::string getString() const;
57
58 private:
59
60 union {
61 int integer_;
62 std::string string_;
63 };
64
65 type type_ = type::invalid;
66 };
67
68};
69
70#endif /* end of include guard: BINDING_H_CAE0B18E */
diff --git a/lib/data.cpp b/lib/data.cpp deleted file mode 100644 index db42487..0000000 --- a/lib/data.cpp +++ /dev/null
@@ -1,177 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 data::data(std::string datafile)
6 {
7 if (sqlite3_open_v2(datafile.c_str(), &ppdb, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK)
8 {
9 throw std::invalid_argument(sqlite3_errmsg(ppdb));
10 }
11 }
12
13 data::data(data&& other)
14 {
15 ppdb = other.ppdb;
16 }
17
18 data& data::operator=(data&& other)
19 {
20 ppdb = other.ppdb;
21
22 return *this;
23 }
24
25 data::~data()
26 {
27 sqlite3_close_v2(ppdb);
28 }
29
30 verb_query data::verbs() const
31 {
32 return verb_query(*this);
33 }
34
35 adjective_query data::adjectives() const
36 {
37 return adjective_query(*this);
38 }
39
40 adverb_query data::adverbs() const
41 {
42 return adverb_query(*this);
43 }
44
45 noun_query data::nouns() const
46 {
47 return noun_query(*this);
48 }
49
50 frame_query data::frames() const
51 {
52 return frame_query(*this);
53 }
54
55 preposition_query data::prepositions() const
56 {
57 return preposition_query(*this);
58 }
59
60 binding::type binding::get_type() const
61 {
62 return _type;
63 }
64
65 binding::binding(const binding& other)
66 {
67 _type = other._type;
68
69 switch (_type)
70 {
71 case type::integer:
72 {
73 _integer = other._integer;
74
75 break;
76 }
77
78 case type::string:
79 {
80 new(&_string) std::string(other._string);
81
82 break;
83 }
84 }
85 }
86
87 binding::~binding()
88 {
89 switch (_type)
90 {
91 case type::string:
92 {
93 using string_type = std::string;
94 _string.~string_type();
95
96 break;
97 }
98 }
99 }
100
101 binding& binding::operator=(const binding& other)
102 {
103 this->~binding();
104
105 _type = other._type;
106
107 switch (_type)
108 {
109 case type::integer:
110 {
111 _integer = other._integer;
112
113 break;
114 }
115
116 case type::string:
117 {
118 new(&_string) std::string(other._string);
119
120 break;
121 }
122 }
123
124 return *this;
125 }
126
127 binding::binding(int _arg)
128 {
129 _type = type::integer;
130 _integer = _arg;
131 }
132
133 int binding::get_integer() const
134 {
135 assert(_type == type::integer);
136
137 return _integer;
138 }
139
140 void binding::set_integer(int _arg)
141 {
142 *this = binding(_arg);
143 }
144
145 binding& binding::operator=(int _arg)
146 {
147 *this = binding(_arg);
148
149 return *this;
150 }
151
152 binding::binding(std::string _arg)
153 {
154 _type = type::string;
155 new(&_string) std::string(_arg);
156 }
157
158 std::string binding::get_string() const
159 {
160 assert(_type == type::string);
161
162 return _string;
163 }
164
165 void binding::set_string(std::string _arg)
166 {
167 *this = binding(_arg);
168 }
169
170 binding& binding::operator=(std::string _arg)
171 {
172 *this = binding(_arg);
173
174 return *this;
175 }
176
177};
diff --git a/lib/data.h b/lib/data.h deleted file mode 100644 index b8b12b9..0000000 --- a/lib/data.h +++ /dev/null
@@ -1,380 +0,0 @@
1#ifndef DATA_H_C4AEC3DD
2#define DATA_H_C4AEC3DD
3
4#include <sqlite3.h>
5
6namespace verbly {
7
8 class data;
9 class word;
10 class adjective;
11 class noun;
12 class verb;
13 class adverb;
14 class frame;
15 class adjective_query;
16 class adverb_query;
17 class noun_query;
18 class verb_query;
19 class frame_query;
20 class preposition_query;
21
22 class data {
23 private:
24 sqlite3* ppdb;
25
26 friend class adjective_query;
27 friend class noun_query;
28 friend class verb_query;
29 friend class adverb_query;
30 friend class frame_query;
31 friend class preposition_query;
32
33 public:
34 data(std::string datafile);
35
36 data(const data& other) = delete;
37 data& operator=(const data& other) = delete;
38
39 data(data&& other);
40 data& operator=(data&& other);
41
42 ~data();
43
44 verb_query verbs() const;
45 adjective_query adjectives() const;
46 adverb_query adverbs() const;
47 noun_query nouns() const;
48 frame_query frames() const;
49 preposition_query prepositions() const;
50
51 };
52
53 template <class T>
54 class filter {
55 public:
56 enum class type {
57 singleton,
58 group
59 };
60
61 typedef filter<T> value_type;
62
63 type get_type() const
64 {
65 return _type;
66 }
67
68 filter(const filter<T>& other)
69 {
70 _type = other._type;
71 _notlogic = other._notlogic;
72
73 switch (_type)
74 {
75 case type::singleton:
76 {
77 new(&_singleton.elem) T(other._singleton.elem);
78
79 break;
80 }
81
82 case type::group:
83 {
84 new(&_group.elems) std::list<filter<T>>(other._group.elems);
85 _group.orlogic = other._group.orlogic;
86
87 break;
88 }
89 }
90 }
91
92 filter<T>& operator=(const filter<T>& other)
93 {
94 this->~filter();
95
96 _type = other._type;
97 _notlogic = other._notlogic;
98
99 switch (_type)
100 {
101 case type::singleton:
102 {
103 new(&_singleton.elem) T(other._singleton.elem);
104
105 break;
106 }
107
108 case type::group:
109 {
110 new(&_group.elems) std::list<filter<T>>(other._group.elems);
111 _group.orlogic = other._group.orlogic;
112
113 break;
114 }
115 }
116
117 return *this;
118 }
119
120 ~filter()
121 {
122 switch (_type)
123 {
124 case type::singleton:
125 {
126 _singleton.elem.~T();
127
128 break;
129 }
130
131 case type::group:
132 {
133 using list_type = std::list<filter<T>>;
134 _group.elems.~list_type();
135
136 break;
137 }
138 }
139 }
140
141 bool get_notlogic() const
142 {
143 return _notlogic;
144 }
145
146 void set_notlogic(bool _nl)
147 {
148 _notlogic = _nl;
149 }
150
151 std::list<T> inorder_flatten() const
152 {
153 std::list<T> result;
154
155 if (_type == type::singleton)
156 {
157 result.push_back(_singleton.elem);
158 } else if (_type == type::group)
159 {
160 for (auto elem : _group.elems)
161 {
162 auto l = elem.inorder_flatten();
163 result.insert(std::end(result), std::begin(l), std::end(l));
164 }
165 }
166
167 return result;
168 }
169
170 std::set<T> uniq_flatten() const
171 {
172 std::set<T> result;
173
174 if (_type == type::singleton)
175 {
176 result.insert(_singleton.elem);
177 } else if (_type == type::group)
178 {
179 for (auto elem : _group.elems)
180 {
181 auto l = elem.uniq_flatten();
182 result.insert(std::begin(l), std::end(l));
183 }
184 }
185
186 return result;
187 }
188
189 void clean()
190 {
191 if (_type == type::group)
192 {
193 std::list<typename std::list<filter<T>>::iterator> toremove;
194 for (auto it = _group.elems.begin(); it != _group.elems.end(); it++)
195 {
196 it->clean();
197
198 if (it->get_type() == type::group)
199 {
200 if (it->_group.elems.size() == 0)
201 {
202 toremove.push_back(it);
203 } else if (it->_group.elems.size() == 1)
204 {
205 bool truelogic = it->_notlogic != it->_group.elems.front()._notlogic;
206 filter<T> e = it->_group.elems.front();
207 *it = e;
208 it->_notlogic = truelogic;
209 }
210 }
211 }
212
213 for (auto rem : toremove)
214 {
215 _group.elems.erase(rem);
216 }
217
218 if (_group.elems.size() == 1)
219 {
220 bool truelogic = _notlogic != _group.elems.front()._notlogic;
221 filter<T> e = _group.elems.front();
222 *this = e;
223 _notlogic = truelogic;
224 }
225 }
226 }
227
228 // Singleton
229 filter(T _elem, bool _notlogic = false) : _type(type::singleton)
230 {
231 new(&_singleton.elem) T(_elem);
232 this->_notlogic = _notlogic;
233 }
234
235 filter<T>& operator=(T _elem)
236 {
237 *this = filter<T>{_elem};
238
239 return *this;
240 }
241
242 T get_elem() const
243 {
244 assert(_type == type::singleton);
245
246 return _singleton.elem;
247 }
248
249 void set_elem(T _elem)
250 {
251 assert(_type == type::singleton);
252
253 _singleton.elem = _elem;
254 }
255
256 // Group
257 typedef typename std::list<filter<T>>::iterator iterator;
258
259 filter() : _type(type::group)
260 {
261 new(&_group.elems) std::list<filter<T>>();
262 _group.orlogic = false;
263 }
264
265 filter(std::initializer_list<filter<T>> _init) : _type(type::group)
266 {
267 new(&_group.elems) std::list<filter<T>>(_init);
268 _group.orlogic = false;
269 }
270
271 iterator begin()
272 {
273 assert(_type == type::group);
274
275 return _group.elems.begin();
276 }
277
278 iterator end()
279 {
280 assert(_type == type::group);
281
282 return _group.elems.end();
283 }
284
285 filter<T>& operator<<(filter<T> _elem)
286 {
287 assert(_type == type::group);
288
289 _group.elems.push_back(_elem);
290
291 return *this;
292 }
293
294 void push_back(filter<T> _elem)
295 {
296 assert(_type == type::group);
297
298 _group.elems.push_back(_elem);
299 }
300
301 bool get_orlogic() const
302 {
303 assert(_type == type::group);
304
305 return _group.orlogic;
306 }
307
308 void set_orlogic(bool _ol)
309 {
310 assert(_type == type::group);
311
312 _group.orlogic = _ol;
313 }
314
315 bool empty() const
316 {
317 if (_type == type::group)
318 {
319 return _group.elems.empty();
320 } else {
321 return false;
322 }
323 }
324
325 int size() const
326 {
327 assert(_type == type::group);
328
329 return _group.elems.size();
330 }
331
332 private:
333 type _type;
334 bool _notlogic = false;
335 union {
336 struct {
337 T elem;
338 } _singleton;
339 struct {
340 std::list<filter<T>> elems;
341 bool orlogic;
342 } _group;
343 };
344 };
345
346 class binding {
347 public:
348 enum class type {
349 integer,
350 string
351 };
352
353 type get_type() const;
354 binding(const binding& other);
355 ~binding();
356 binding& operator=(const binding& other);
357
358 // Integer
359 binding(int _arg);
360 int get_integer() const;
361 void set_integer(int _arg);
362 binding& operator=(int _arg);
363
364 // String
365 binding(std::string _arg);
366 std::string get_string() const;
367 void set_string(std::string _arg);
368 binding& operator=(std::string _arg);
369
370 private:
371 union {
372 int _integer;
373 std::string _string;
374 };
375 type _type;
376 };
377
378};
379
380#endif /* end of include guard: DATA_H_C4AEC3DD */
diff --git a/lib/database.cpp b/lib/database.cpp new file mode 100644 index 0000000..351b93d --- /dev/null +++ b/lib/database.cpp
@@ -0,0 +1,79 @@
1#include "database.h"
2#include <sqlite3.h>
3#include <stdexcept>
4#include "query.h"
5
6namespace verbly {
7
8 database::database(std::string path)
9 {
10 if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK)
11 {
12 // We still have to free the resources allocated. In the event that
13 // allocation failed, ppdb will be null and sqlite3_close_v2 will just
14 // ignore it.
15 std::string errmsg(sqlite3_errmsg(ppdb_));
16 sqlite3_close_v2(ppdb_);
17
18 throw database_error("Could not open verbly datafile", errmsg);
19 }
20 }
21
22 database::database(database&& other) : database()
23 {
24 swap(*this, other);
25 }
26
27 database& database::operator=(database&& other)
28 {
29 swap(*this, other);
30
31 return *this;
32 }
33
34 void swap(database& first, database& second)
35 {
36 std::swap(first.ppdb_, second.ppdb_);
37 }
38
39 database::~database()
40 {
41 sqlite3_close_v2(ppdb_);
42 }
43
44 query<notion> database::notions(filter where, bool random, int limit) const
45 {
46 return query<notion>(*this, ppdb_, std::move(where), random, limit);
47 }
48
49 query<word> database::words(filter where, bool random, int limit) const
50 {
51 return query<word>(*this, ppdb_, std::move(where), random, limit);
52 }
53
54 query<group> database::groups(filter where, bool random, int limit) const
55 {
56 return query<group>(*this, ppdb_, std::move(where), random, limit);
57 }
58
59 query<frame> database::frames(filter where, bool random, int limit) const
60 {
61 return query<frame>(*this, ppdb_, std::move(where), random, limit);
62 }
63
64 query<lemma> database::lemmas(filter where, bool random, int limit) const
65 {
66 return query<lemma>(*this, ppdb_, std::move(where), random, limit);
67 }
68
69 query<form> database::forms(filter where, bool random, int limit) const
70 {
71 return query<form>(*this, ppdb_, std::move(where), random, limit);
72 }
73
74 query<pronunciation> database::pronunciations(filter where, bool random, int limit) const
75 {
76 return query<pronunciation>(*this, ppdb_, std::move(where), random, limit);
77 }
78
79};
diff --git a/lib/database.h b/lib/database.h new file mode 100644 index 0000000..d68c40b --- /dev/null +++ b/lib/database.h
@@ -0,0 +1,73 @@
1#ifndef DATABASE_H_0B0A47D2
2#define DATABASE_H_0B0A47D2
3
4#include <string>
5#include <exception>
6#include <list>
7#include "notion.h"
8#include "word.h"
9#include "group.h"
10#include "frame.h"
11#include "lemma.h"
12#include "form.h"
13#include "pronunciation.h"
14
15struct sqlite3;
16
17namespace verbly {
18
19 template <typename Object>
20 class query;
21
22 class database {
23 public:
24
25 // Constructor
26
27 explicit database(std::string path);
28
29 // Disable copying
30
31 database(const database& other) = delete;
32 database& operator=(const database& other) = delete;
33
34 // Move constructor and move assignment
35
36 database(database&& other);
37 database& operator=(database&& other);
38
39 // Swap
40
41 friend void swap(database& first, database& second);
42
43 // Destructor
44
45 ~database();
46
47 // Queries
48
49 query<notion> notions(filter where, bool random = true, int limit = 1) const;
50
51 query<word> words(filter where, bool random = true, int limit = 1) const;
52
53 query<group> groups(filter where, bool random = true, int limit = 1) const;
54
55 query<frame> frames(filter where, bool random = true, int limit = 1) const;
56
57 query<lemma> lemmas(filter where, bool random = true, int limit = 1) const;
58
59 query<form> forms(filter where, bool random = true, int limit = 1) const;
60
61 query<pronunciation> pronunciations(filter where, bool random = true, int limit = 1) const;
62
63 private:
64
65 database() = default;
66
67 sqlite3* ppdb_ = nullptr;
68
69 };
70
71};
72
73#endif /* end of include guard: DATABASE_H_0B0A47D2 */
diff --git a/lib/enums.h b/lib/enums.h new file mode 100644 index 0000000..b37be7b --- /dev/null +++ b/lib/enums.h
@@ -0,0 +1,45 @@
1#ifndef ENUMS_H_260BA847
2#define ENUMS_H_260BA847
3
4namespace verbly {
5
6 enum class part_of_speech {
7 noun = 0,
8 adjective = 1,
9 adverb = 2,
10 verb = 3,
11 preposition = 4
12 };
13
14 enum class positioning {
15 undefined = -1,
16 predicate = 0,
17 attributive = 1,
18 postnominal = 2
19 };
20
21 enum class inflection {
22 base = 0,
23 plural = 1,
24 comparative = 2,
25 superlative = 3,
26 past_tense = 4,
27 past_participle = 5,
28 ing_form = 6,
29 s_form = 7
30 };
31
32 enum class object {
33 undefined = -1,
34 notion = 0,
35 word = 1,
36 group = 2,
37 frame = 3,
38 lemma = 4,
39 form = 5,
40 pronunciation = 6
41 };
42
43};
44
45#endif /* end of include guard: ENUMS_H_260BA847 */
diff --git a/lib/field.cpp b/lib/field.cpp new file mode 100644 index 0000000..d7adbb3 --- /dev/null +++ b/lib/field.cpp
@@ -0,0 +1,91 @@
1#include "field.h"
2#include "filter.h"
3
4namespace verbly {
5
6 filter field::operator==(int value) const
7 {
8 return filter(*this, filter::comparison::int_equals, value);
9 }
10
11 filter field::operator!=(int value) const
12 {
13 return filter(*this, filter::comparison::int_does_not_equal, value);
14 }
15
16 filter field::operator<(int value) const
17 {
18 return filter(*this, filter::comparison::int_is_less_than, value);
19 }
20
21 filter field::operator<=(int value) const
22 {
23 return filter(*this, filter::comparison::int_is_at_most, value);
24 }
25
26 filter field::operator>(int value) const
27 {
28 return filter(*this, filter::comparison::int_is_greater_than, value);
29 }
30
31 filter field::operator>=(int value) const
32 {
33 return filter(*this, filter::comparison::int_is_at_least, value);
34 }
35
36 filter field::operator==(part_of_speech value) const
37 {
38 return filter(*this, filter::comparison::int_equals, static_cast<int>(value));
39 }
40
41 filter field::operator==(positioning value) const
42 {
43 return filter(*this, filter::comparison::int_equals, static_cast<int>(value));
44 }
45
46 filter field::operator==(inflection value) const
47 {
48 return filter(*this, filter::comparison::int_equals, static_cast<int>(value));
49 }
50
51 filter field::operator==(bool value) const
52 {
53 return filter(*this, filter::comparison::boolean_equals, value);
54 }
55
56 filter field::operator==(std::string value) const
57 {
58 return filter(*this, filter::comparison::string_equals, std::move(value));
59 }
60
61 filter field::operator!=(std::string value) const
62 {
63 return filter(*this, filter::comparison::string_does_not_equal, std::move(value));
64 }
65
66 filter field::operator%=(std::string value) const
67 {
68 return filter(*this, filter::comparison::string_is_like, std::move(value));
69 }
70
71 field::operator filter() const
72 {
73 return filter(*this, filter::comparison::is_not_null);
74 }
75
76 filter field::operator!() const
77 {
78 return filter(*this, filter::comparison::is_null);
79 }
80
81 filter field::operator%=(filter joinCondition) const
82 {
83 if (type_ == type::hierarchal_join)
84 {
85 return filter(*this, filter::comparison::hierarchally_matches, std::move(joinCondition));
86 } else {
87 return filter(*this, filter::comparison::matches, std::move(joinCondition));
88 }
89 }
90
91};
diff --git a/lib/field.h b/lib/field.h new file mode 100644 index 0000000..30c62be --- /dev/null +++ b/lib/field.h
@@ -0,0 +1,306 @@
1#ifndef FIELD_H_43258321
2#define FIELD_H_43258321
3
4#include "enums.h"
5#include <stdexcept>
6#include <tuple>
7
8namespace verbly {
9
10 class filter;
11
12 class field {
13 public:
14 enum class type {
15 undefined,
16 string,
17 integer,
18 boolean,
19 join,
20 join_through,
21 hierarchal_join
22 };
23
24 // Default constructor
25
26 field()
27 {
28 }
29
30 // Static factories
31
32 static field stringField(
33 object obj,
34 const char* name,
35 bool nullable = false)
36 {
37 return field(obj, type::string, name, nullable);
38 }
39
40 static field stringField(
41 const char* table,
42 const char* name,
43 bool nullable = false)
44 {
45 return field(object::undefined, type::string, name, nullable, table);
46 }
47
48 static field integerField(
49 object obj,
50 const char* name,
51 bool nullable = false)
52 {
53 return field(obj, type::integer, name, nullable);
54 }
55
56 static field integerField(
57 const char* table,
58 const char* name,
59 bool nullable = false)
60 {
61 return field(object::undefined, type::integer, name, nullable, table);
62 }
63
64 static field booleanField(
65 object obj,
66 const char* name,
67 bool nullable = false)
68 {
69 return field(obj, type::boolean, name, nullable);
70 }
71
72 static field booleanField(
73 const char* table,
74 const char* name,
75 bool nullable = false)
76 {
77 return field(object::undefined, type::boolean, name, nullable, table);
78 }
79
80 static field joinField(
81 object obj,
82 const char* name,
83 object joinWith,
84 bool nullable = false)
85 {
86 return field(obj, type::join, name, nullable, 0, joinWith);
87 }
88
89 static field joinField(
90 object obj,
91 const char* name,
92 const char* table,
93 bool nullable = false)
94 {
95 return field(obj, type::join, name, nullable, table);
96 }
97
98 static field joinThrough(
99 object obj,
100 const char* name,
101 object joinWith,
102 const char* joinTable,
103 const char* foreignColumn)
104 {
105 return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, name, foreignColumn);
106 }
107
108 static field joinThrough(
109 object obj,
110 const char* name,
111 object joinWith,
112 const char* joinTable,
113 const char* foreignColumn,
114 const char* joinColumn,
115 const char* foreignJoinColumn)
116 {
117 return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, joinColumn, foreignJoinColumn);
118 }
119
120 static field selfJoin(
121 object obj,
122 const char* name,
123 const char* joinTable,
124 const char* joinColumn,
125 const char* foreignJoinColumn)
126 {
127 return field(obj, type::join_through, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn);
128 }
129
130 static field hierarchalSelfJoin(
131 object obj,
132 const char* name,
133 const char* joinTable,
134 const char* joinColumn,
135 const char* foreignJoinColumn)
136 {
137 return field(obj, type::hierarchal_join, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn);
138 }
139
140 // Accessors
141
142 object getObject() const
143 {
144 return object_;
145 }
146
147 type getType() const
148 {
149 return type_;
150 }
151
152 bool isJoin() const
153 {
154 return ((type_ == type::join) || (type_ == type::join_through) || (type_ == type::hierarchal_join));
155 }
156
157 const char* getColumn() const
158 {
159 return column_;
160 }
161
162 bool isNullable() const
163 {
164 return nullable_;
165 }
166
167 bool hasTable() const
168 {
169 return (table_ != 0);
170 }
171
172 const char* getTable() const
173 {
174 return table_;
175 }
176
177 // Joins
178
179 object getJoinObject() const
180 {
181 // We ignore hierarchal joins because they are always self joins.
182 return ((type_ == type::join) || (type_ == type::join_through))
183 ? joinObject_
184 : throw std::domain_error("Non-join fields don't have join objects");
185 }
186
187 // Many-to-many joins
188
189 const char* getForeignColumn() const
190 {
191 // We ignore hierarchal joins because they are always self joins.
192 return (type_ == type::join_through)
193 ? foreignColumn_
194 : throw std::domain_error("Only many-to-many join fields have a foreign column");
195 }
196
197 const char* getJoinColumn() const
198 {
199 return ((type_ == type::join_through) || (type_ == type::hierarchal_join))
200 ? joinColumn_
201 : throw std::domain_error("Only many-to-many join fields have a join column");
202 }
203
204 const char* getForeignJoinColumn() const
205 {
206 return ((type_ == type::join_through) || (type_ == type::hierarchal_join))
207 ? foreignJoinColumn_
208 : throw std::domain_error("Only many-to-many join fields have a foreign join column");
209 }
210
211 // Ordering
212
213 bool operator<(const field& other) const
214 {
215 // For the most part, (object, column) uniquely identifies fields.
216 // However, there do exist a number of relationships from an object to
217 // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have
218 // the same object (notion), the same column (notion_id), and the same
219 // table (hypernymy); however, they have different join columns.
220 return std::tie(object_, column_, table_, joinColumn_) < std::tie(other.object_, other.column_, other.table_, other.joinColumn_);
221 }
222
223 // Equality
224
225 bool operator==(const field& other) const
226 {
227 // For the most part, (object, column) uniquely identifies fields.
228 // However, there do exist a number of relationships from an object to
229 // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have
230 // the same object (notion), the same column (notion_id), and the same
231 // table (hypernymy); however, they have different join columns.
232 return std::tie(object_, column_, table_, joinColumn_) == std::tie(other.object_, other.column_, other.table_, other.joinColumn_);
233 }
234
235 // Filter construction
236
237 filter operator==(int value) const; // Integer equality
238 filter operator!=(int value) const; // Integer inequality
239 filter operator<(int value) const; // Integer is less than
240 filter operator<=(int value) const; // Integer is at most
241 filter operator>(int value) const; // Integer is greater than
242 filter operator>=(int value) const; // Integer is at least
243
244 filter operator==(part_of_speech value) const; // Part of speech equality
245 filter operator==(positioning value) const; // Adjective positioning equality
246 filter operator==(inflection value) const; // Inflection category equality
247
248 filter operator==(bool value) const; // Boolean equality
249
250 filter operator==(std::string value) const; // String equality
251 filter operator!=(std::string value) const; // String inequality
252 filter operator%=(std::string value) const; // String matching
253
254 operator filter() const; // Non-nullity
255 filter operator!() const; // Nullity
256
257 filter operator%=(filter joinCondition) const; // Join
258
259 private:
260
261 // Constructor
262
263 field(
264 object obj,
265 type datatype,
266 const char* column,
267 bool nullable = false,
268 const char* table = 0,
269 object joinObject = object::undefined,
270 const char* foreignColumn = 0,
271 const char* joinColumn = 0,
272 const char* foreignJoinColumn = 0) :
273 object_(obj),
274 type_(datatype),
275 column_(column),
276 nullable_(nullable),
277 table_(table),
278 joinObject_(joinObject),
279 foreignColumn_(foreignColumn),
280 joinColumn_(joinColumn),
281 foreignJoinColumn_(foreignJoinColumn)
282 {
283 }
284
285 // General
286 object object_ = object::undefined;
287 type type_ = type::undefined;
288 const char* column_ = 0;
289 const char* table_ = 0;
290
291 // Non-joins and belongs-to joins
292 bool nullable_ = false;
293
294 // Joins
295 object joinObject_ = object::undefined;
296
297 // Many-to-many joins
298 const char* foreignColumn_ = 0;
299 const char* joinColumn_ = 0;
300 const char* foreignJoinColumn_ = 0;
301
302 };
303
304};
305
306#endif /* end of include guard: FIELD_H_43258321 */
diff --git a/lib/filter.cpp b/lib/filter.cpp new file mode 100644 index 0000000..959fa05 --- /dev/null +++ b/lib/filter.cpp
@@ -0,0 +1,1365 @@
1#include "filter.h"
2#include <stdexcept>
3#include <map>
4#include "notion.h"
5#include "word.h"
6#include "group.h"
7#include "frame.h"
8#include "lemma.h"
9#include "form.h"
10#include "pronunciation.h"
11
12namespace verbly {
13
14 filter::filter(const filter& other)
15 {
16 type_ = other.type_;
17
18 switch (type_)
19 {
20 case type::empty:
21 {
22 break;
23 }
24
25 case type::singleton:
26 {
27 new(&singleton_.filterField) field(other.singleton_.filterField);
28 singleton_.filterType = other.singleton_.filterType;
29
30 switch (singleton_.filterType)
31 {
32 case comparison::int_equals:
33 case comparison::int_does_not_equal:
34 case comparison::int_is_at_least:
35 case comparison::int_is_greater_than:
36 case comparison::int_is_at_most:
37 case comparison::int_is_less_than:
38 {
39 singleton_.intValue = other.singleton_.intValue;
40
41 break;
42 }
43
44 case comparison::boolean_equals:
45 {
46 singleton_.boolValue = other.singleton_.boolValue;
47
48 break;
49 }
50
51 case comparison::string_equals:
52 case comparison::string_does_not_equal:
53 case comparison::string_is_like:
54 case comparison::string_is_not_like:
55 {
56 new(&singleton_.stringValue) std::string(other.singleton_.stringValue);
57
58 break;
59 }
60
61 case comparison::is_null:
62 case comparison::is_not_null:
63 {
64 break;
65 }
66
67 case comparison::matches:
68 case comparison::does_not_match:
69 case comparison::hierarchally_matches:
70 case comparison::does_not_hierarchally_match:
71 {
72 new(&singleton_.join) std::unique_ptr<filter>(new filter(*other.singleton_.join));
73
74 break;
75 }
76 }
77
78 break;
79 }
80
81 case type::group:
82 {
83 new(&group_.children) std::list<filter>(other.group_.children);
84 group_.orlogic = other.group_.orlogic;
85
86 break;
87 }
88 }
89 }
90
91 filter::filter(filter&& other) : filter()
92 {
93 swap(*this, other);
94 }
95
96 filter& filter::operator=(filter other)
97 {
98 swap(*this, other);
99
100 return *this;
101 }
102
103 void swap(filter& first, filter& second)
104 {
105 using type = filter::type;
106 using comparison = filter::comparison;
107
108 type tempType = first.type_;
109 field tempField;
110 comparison tempComparison;
111 std::unique_ptr<filter> tempJoin;
112 std::string tempStringValue;
113 int tempIntValue;
114 bool tempBoolValue;
115 std::list<filter> tempChildren;
116 bool tempOrlogic;
117
118 switch (tempType)
119 {
120 case type::empty:
121 {
122 break;
123 }
124
125 case type::singleton:
126 {
127 tempField = std::move(first.singleton_.filterField);
128 tempComparison = first.singleton_.filterType;
129
130 switch (tempComparison)
131 {
132 case comparison::int_equals:
133 case comparison::int_does_not_equal:
134 case comparison::int_is_at_least:
135 case comparison::int_is_greater_than:
136 case comparison::int_is_at_most:
137 case comparison::int_is_less_than:
138 {
139 tempIntValue = first.singleton_.intValue;
140
141 break;
142 }
143
144 case comparison::boolean_equals:
145 {
146 tempBoolValue = first.singleton_.boolValue;
147
148 break;
149 }
150
151 case comparison::string_equals:
152 case comparison::string_does_not_equal:
153 case comparison::string_is_like:
154 case comparison::string_is_not_like:
155 {
156 tempStringValue = std::move(first.singleton_.stringValue);
157
158 break;
159 }
160
161 case comparison::is_null:
162 case comparison::is_not_null:
163 {
164 break;
165 }
166
167 case comparison::matches:
168 case comparison::does_not_match:
169 case comparison::hierarchally_matches:
170 case comparison::does_not_hierarchally_match:
171 {
172 tempJoin = std::move(first.singleton_.join);
173
174 break;
175 }
176 }
177
178 break;
179 }
180
181 case type::group:
182 {
183 tempChildren = std::move(first.group_.children);
184 tempOrlogic = first.group_.orlogic;
185
186 break;
187 }
188 }
189
190 first.~filter();
191
192 first.type_ = second.type_;
193
194 switch (first.type_)
195 {
196 case type::empty:
197 {
198 break;
199 }
200
201 case type::singleton:
202 {
203 new(&first.singleton_.filterField) field(std::move(second.singleton_.filterField));
204 first.singleton_.filterType = second.singleton_.filterType;
205
206 switch (first.singleton_.filterType)
207 {
208 case comparison::int_equals:
209 case comparison::int_does_not_equal:
210 case comparison::int_is_at_least:
211 case comparison::int_is_greater_than:
212 case comparison::int_is_at_most:
213 case comparison::int_is_less_than:
214 {
215 first.singleton_.intValue = second.singleton_.intValue;
216
217 break;
218 }
219
220 case comparison::boolean_equals:
221 {
222 first.singleton_.boolValue = second.singleton_.boolValue;
223
224 break;
225 }
226
227 case comparison::string_equals:
228 case comparison::string_does_not_equal:
229 case comparison::string_is_like:
230 case comparison::string_is_not_like:
231 {
232 new(&first.singleton_.stringValue) std::string(std::move(second.singleton_.stringValue));
233
234 break;
235 }
236
237 case comparison::is_null:
238 case comparison::is_not_null:
239 {
240 break;
241 }
242
243 case comparison::matches:
244 case comparison::does_not_match:
245 case comparison::hierarchally_matches:
246 case comparison::does_not_hierarchally_match:
247 {
248 new(&first.singleton_.join) std::unique_ptr<filter>(std::move(second.singleton_.join));
249
250 break;
251 }
252 }
253
254 break;
255 }
256
257 case type::group:
258 {
259 new(&first.group_.children) std::list<filter>(std::move(second.group_.children));
260 first.group_.orlogic = second.group_.orlogic;
261
262 break;
263 }
264 }
265
266 second.~filter();
267
268 second.type_ = tempType;
269
270 switch (second.type_)
271 {
272 case type::empty:
273 {
274 break;
275 }
276
277 case type::singleton:
278 {
279 new(&second.singleton_.filterField) field(std::move(tempField));
280 second.singleton_.filterType = tempComparison;
281
282 switch (second.singleton_.filterType)
283 {
284 case comparison::int_equals:
285 case comparison::int_does_not_equal:
286 case comparison::int_is_at_least:
287 case comparison::int_is_greater_than:
288 case comparison::int_is_at_most:
289 case comparison::int_is_less_than:
290 {
291 second.singleton_.intValue = tempIntValue;
292
293 break;
294 }
295
296 case comparison::boolean_equals:
297 {
298 second.singleton_.boolValue = tempBoolValue;
299
300 break;
301 }
302
303 case comparison::string_equals:
304 case comparison::string_does_not_equal:
305 case comparison::string_is_like:
306 case comparison::string_is_not_like:
307 {
308 new(&second.singleton_.stringValue) std::string(std::move(tempStringValue));
309
310 break;
311 }
312
313 case comparison::is_null:
314 case comparison::is_not_null:
315 {
316 break;
317 }
318
319 case comparison::matches:
320 case comparison::does_not_match:
321 case comparison::hierarchally_matches:
322 case comparison::does_not_hierarchally_match:
323 {
324 new(&second.singleton_.join) std::unique_ptr<filter>(std::move(tempJoin));
325
326 break;
327 }
328 }
329
330 break;
331 }
332
333 case type::group:
334 {
335 new(&second.group_.children) std::list<filter>(std::move(tempChildren));
336 second.group_.orlogic = tempOrlogic;
337
338 break;
339 }
340 }
341 }
342
343 filter::~filter()
344 {
345 switch (type_)
346 {
347 case type::empty:
348 {
349 break;
350 }
351
352 case type::singleton:
353 {
354 singleton_.filterField.~field();
355
356 switch (singleton_.filterType)
357 {
358 case comparison::int_equals:
359 case comparison::int_does_not_equal:
360 case comparison::int_is_at_least:
361 case comparison::int_is_greater_than:
362 case comparison::int_is_at_most:
363 case comparison::int_is_less_than:
364 case comparison::boolean_equals:
365 case comparison::is_null:
366 case comparison::is_not_null:
367 {
368 break;
369 }
370
371 case comparison::string_equals:
372 case comparison::string_does_not_equal:
373 case comparison::string_is_like:
374 case comparison::string_is_not_like:
375 {
376 using string_type = std::string;
377
378 singleton_.stringValue.~string_type();
379
380 break;
381 }
382
383 case comparison::matches:
384 case comparison::does_not_match:
385 case comparison::hierarchally_matches:
386 case comparison::does_not_hierarchally_match:
387 {
388 using ptr_type = std::unique_ptr<filter>;
389
390 singleton_.join.~ptr_type();
391
392 break;
393 }
394 }
395
396 break;
397 }
398
399 case type::group:
400 {
401 using list_type = std::list<filter>;
402
403 group_.children.~list_type();
404
405 break;
406 }
407 }
408 }
409
410 filter::filter()
411 {
412 }
413
414 filter::filter(
415 field filterField,
416 comparison filterType,
417 int filterValue) :
418 type_(type::singleton)
419 {
420 if (filterField.getType() == field::type::integer)
421 {
422 switch (filterType)
423 {
424 case comparison::int_equals:
425 case comparison::int_does_not_equal:
426 case comparison::int_is_at_least:
427 case comparison::int_is_greater_than:
428 case comparison::int_is_at_most:
429 case comparison::int_is_less_than:
430 {
431 new(&singleton_.filterField) field(std::move(filterField));
432 singleton_.filterType = filterType;
433 singleton_.intValue = filterValue;
434
435 break;
436 }
437
438 case comparison::boolean_equals:
439 case comparison::string_equals:
440 case comparison::string_does_not_equal:
441 case comparison::string_is_like:
442 case comparison::string_is_not_like:
443 case comparison::is_null:
444 case comparison::is_not_null:
445 case comparison::matches:
446 case comparison::does_not_match:
447 case comparison::hierarchally_matches:
448 case comparison::does_not_hierarchally_match:
449 {
450 throw std::invalid_argument("Invalid comparison for integer field");
451 }
452 }
453 } else {
454 throw std::domain_error("Cannot match a non-integer field against an integer value");
455 }
456 }
457
458 filter::filter(
459 field filterField,
460 comparison filterType,
461 std::string filterValue) :
462 type_(type::singleton)
463 {
464 if (filterField.getType() == field::type::string)
465 {
466 switch (filterType)
467 {
468 case comparison::string_equals:
469 case comparison::string_does_not_equal:
470 case comparison::string_is_like:
471 case comparison::string_is_not_like:
472 {
473 new(&singleton_.filterField) field(std::move(filterField));
474 singleton_.filterType = filterType;
475 new(&singleton_.stringValue) std::string(std::move(filterValue));
476
477 break;
478 }
479
480 case comparison::int_equals:
481 case comparison::int_does_not_equal:
482 case comparison::int_is_at_least:
483 case comparison::int_is_greater_than:
484 case comparison::int_is_at_most:
485 case comparison::int_is_less_than:
486 case comparison::boolean_equals:
487 case comparison::is_null:
488 case comparison::is_not_null:
489 case comparison::matches:
490 case comparison::does_not_match:
491 case comparison::hierarchally_matches:
492 case comparison::does_not_hierarchally_match:
493 {
494 throw std::invalid_argument("Invalid comparison for string field");
495 }
496 }
497 } else {
498 throw std::domain_error("Cannot match a non-string field against an string value");
499 }
500 }
501
502 filter::filter(
503 field filterField,
504 comparison filterType,
505 bool filterValue) :
506 type_(type::singleton)
507 {
508 if (filterField.getType() == field::type::boolean)
509 {
510 switch (filterType)
511 {
512 case comparison::boolean_equals:
513 {
514 new(&singleton_.filterField) field(std::move(filterField));
515 singleton_.filterType = filterType;
516 singleton_.boolValue = filterValue;
517
518 break;
519 }
520
521 case comparison::string_equals:
522 case comparison::string_does_not_equal:
523 case comparison::string_is_like:
524 case comparison::string_is_not_like:
525 case comparison::int_equals:
526 case comparison::int_does_not_equal:
527 case comparison::int_is_at_least:
528 case comparison::int_is_greater_than:
529 case comparison::int_is_at_most:
530 case comparison::int_is_less_than:
531 case comparison::is_null:
532 case comparison::is_not_null:
533 case comparison::matches:
534 case comparison::does_not_match:
535 case comparison::hierarchally_matches:
536 case comparison::does_not_hierarchally_match:
537 {
538 throw std::invalid_argument("Invalid comparison for boolean field");
539 }
540 }
541 } else {
542 throw std::domain_error("Cannot match a non-boolean field against a boolean value");
543 }
544 }
545
546 filter::filter(
547 field filterField,
548 comparison filterType) :
549 type_(type::singleton)
550 {
551 if (filterField.isNullable())
552 {
553 switch (filterType)
554 {
555 case comparison::is_null:
556 case comparison::is_not_null:
557 {
558 new(&singleton_.filterField) field(std::move(filterField));
559 singleton_.filterType = filterType;
560
561 break;
562 }
563
564 case comparison::string_equals:
565 case comparison::string_does_not_equal:
566 case comparison::string_is_like:
567 case comparison::string_is_not_like:
568 case comparison::int_equals:
569 case comparison::int_does_not_equal:
570 case comparison::int_is_at_least:
571 case comparison::int_is_greater_than:
572 case comparison::int_is_at_most:
573 case comparison::int_is_less_than:
574 case comparison::boolean_equals:
575 case comparison::matches:
576 case comparison::does_not_match:
577 case comparison::hierarchally_matches:
578 case comparison::does_not_hierarchally_match:
579 {
580 throw std::invalid_argument("Incorrect constructor for given comparison");
581 }
582 }
583 } else {
584 throw std::domain_error("Cannot check nullity/non-nullity of non-nullable field");
585 }
586 }
587
588 filter::filter(
589 field joinOn,
590 comparison filterType,
591 filter joinCondition) :
592 type_(type::singleton)
593 {
594 switch (joinOn.getType())
595 {
596 case field::type::join:
597 case field::type::join_through:
598 {
599 switch (filterType)
600 {
601 case comparison::matches:
602 case comparison::does_not_match:
603 {
604 new(&singleton_.filterField) field(std::move(joinOn));
605 singleton_.filterType = filterType;
606 new(&singleton_.join) std::unique_ptr<filter>(new filter(joinCondition.normalize(singleton_.filterField.getJoinObject())));
607
608 break;
609 }
610
611 case comparison::int_equals:
612 case comparison::int_does_not_equal:
613 case comparison::int_is_at_least:
614 case comparison::int_is_greater_than:
615 case comparison::int_is_at_most:
616 case comparison::int_is_less_than:
617 case comparison::boolean_equals:
618 case comparison::string_equals:
619 case comparison::string_does_not_equal:
620 case comparison::string_is_like:
621 case comparison::string_is_not_like:
622 case comparison::is_null:
623 case comparison::is_not_null:
624 case comparison::hierarchally_matches:
625 case comparison::does_not_hierarchally_match:
626 {
627 throw std::invalid_argument("Incorrect constructor for given comparison");
628 }
629 }
630
631 break;
632 }
633
634 case field::type::hierarchal_join:
635 {
636 switch (filterType)
637 {
638 case comparison::hierarchally_matches:
639 case comparison::does_not_hierarchally_match:
640 {
641 new(&singleton_.filterField) field(std::move(joinOn));
642 singleton_.filterType = filterType;
643 new(&singleton_.join) std::unique_ptr<filter>(new filter(joinCondition.normalize(singleton_.filterField.getObject())));
644
645 break;
646 }
647
648 case comparison::int_equals:
649 case comparison::int_does_not_equal:
650 case comparison::int_is_at_least:
651 case comparison::int_is_greater_than:
652 case comparison::int_is_at_most:
653 case comparison::int_is_less_than:
654 case comparison::boolean_equals:
655 case comparison::string_equals:
656 case comparison::string_does_not_equal:
657 case comparison::string_is_like:
658 case comparison::string_is_not_like:
659 case comparison::is_null:
660 case comparison::is_not_null:
661 case comparison::matches:
662 case comparison::does_not_match:
663 {
664 throw std::invalid_argument("Incorrect constructor for given comparison");
665 }
666 }
667
668 break;
669 }
670
671 case field::type::undefined:
672 case field::type::string:
673 case field::type::integer:
674 case field::type::boolean:
675 {
676 throw std::domain_error("Matching field must be a join field");
677 }
678 }
679 }
680
681 field filter::getField() const
682 {
683 if (type_ == type::singleton)
684 {
685 return singleton_.filterField;
686 } else {
687 throw std::domain_error("This filter does not have a field");
688 }
689 }
690
691 filter::comparison filter::getComparison() const
692 {
693 if (type_ == type::singleton)
694 {
695 return singleton_.filterType;
696 } else {
697 throw std::domain_error("This filter does not have a comparison");
698 }
699 }
700
701 filter filter::getJoinCondition() const
702 {
703 if (type_ == type::singleton)
704 {
705 switch (singleton_.filterType)
706 {
707 case comparison::matches:
708 case comparison::does_not_match:
709 case comparison::hierarchally_matches:
710 case comparison::does_not_hierarchally_match:
711 {
712 return *singleton_.join;
713 }
714
715 case comparison::string_equals:
716 case comparison::string_does_not_equal:
717 case comparison::string_is_like:
718 case comparison::string_is_not_like:
719 case comparison::int_equals:
720 case comparison::int_does_not_equal:
721 case comparison::int_is_at_least:
722 case comparison::int_is_greater_than:
723 case comparison::int_is_at_most:
724 case comparison::int_is_less_than:
725 case comparison::boolean_equals:
726 case comparison::is_null:
727 case comparison::is_not_null:
728 {
729 throw std::domain_error("This filter does not have a join condition");
730 }
731 }
732 } else {
733 throw std::domain_error("This filter does not have a join condition");
734 }
735 }
736
737 std::string filter::getStringArgument() const
738 {
739 if (type_ == type::singleton)
740 {
741 switch (singleton_.filterType)
742 {
743 case comparison::string_equals:
744 case comparison::string_does_not_equal:
745 case comparison::string_is_like:
746 case comparison::string_is_not_like:
747 {
748 return singleton_.stringValue;
749 }
750
751 case comparison::int_equals:
752 case comparison::int_does_not_equal:
753 case comparison::int_is_at_least:
754 case comparison::int_is_greater_than:
755 case comparison::int_is_at_most:
756 case comparison::int_is_less_than:
757 case comparison::boolean_equals:
758 case comparison::is_null:
759 case comparison::is_not_null:
760 case comparison::matches:
761 case comparison::does_not_match:
762 case comparison::hierarchally_matches:
763 case comparison::does_not_hierarchally_match:
764 {
765 throw std::domain_error("This filter does not have a string argument");
766 }
767 }
768 } else {
769 throw std::domain_error("This filter does not have a string argument");
770 }
771 }
772
773 int filter::getIntegerArgument() const
774 {
775 if (type_ == type::singleton)
776 {
777 switch (singleton_.filterType)
778 {
779 case comparison::int_equals:
780 case comparison::int_does_not_equal:
781 case comparison::int_is_at_least:
782 case comparison::int_is_greater_than:
783 case comparison::int_is_at_most:
784 case comparison::int_is_less_than:
785 {
786 return singleton_.intValue;
787 }
788
789 case comparison::string_equals:
790 case comparison::string_does_not_equal:
791 case comparison::string_is_like:
792 case comparison::string_is_not_like:
793 case comparison::boolean_equals:
794 case comparison::is_null:
795 case comparison::is_not_null:
796 case comparison::matches:
797 case comparison::does_not_match:
798 case comparison::hierarchally_matches:
799 case comparison::does_not_hierarchally_match:
800 {
801 throw std::domain_error("This filter does not have an integer argument");
802 }
803 }
804 } else {
805 throw std::domain_error("This filter does not have an integer argument");
806 }
807 }
808
809 bool filter::getBooleanArgument() const
810 {
811 if ((type_ == type::singleton) && (singleton_.filterType == comparison::boolean_equals))
812 {
813 return singleton_.boolValue;
814 } else {
815 throw std::domain_error("This filter does not have a boolean argument");
816 }
817 }
818
819 filter::filter(bool orlogic) : type_(type::group)
820 {
821 new(&group_.children) std::list<filter>();
822 group_.orlogic = orlogic;
823 }
824
825 bool filter::getOrlogic() const
826 {
827 if (type_ == type::group)
828 {
829 return group_.orlogic;
830 } else {
831 throw std::domain_error("This filter is not a group filter");
832 }
833 }
834
835 filter filter::operator+(filter condition) const
836 {
837 filter result(*this);
838 result += std::move(condition);
839
840 return result;
841 }
842
843 filter& filter::operator+=(filter condition)
844 {
845 if (type_ == type::group)
846 {
847 group_.children.push_back(std::move(condition));
848
849 return *this;
850 } else {
851 throw std::domain_error("Children can only be added to group filters");
852 }
853 }
854
855 filter::const_iterator filter::begin() const
856 {
857 if (type_ == type::group)
858 {
859 return std::begin(group_.children);
860 } else {
861 throw std::domain_error("This filter has no children");
862 }
863 }
864
865 filter::const_iterator filter::end() const
866 {
867 if (type_ == type::group)
868 {
869 return std::end(group_.children);
870 } else {
871 throw std::domain_error("This filter has no children");
872 }
873 }
874
875 filter filter::operator!() const
876 {
877 switch (type_)
878 {
879 case type::empty:
880 {
881 return {};
882 }
883
884 case type::singleton:
885 {
886 switch (singleton_.filterType)
887 {
888 case comparison::int_equals:
889 {
890 return filter(singleton_.filterField, comparison::int_does_not_equal, singleton_.intValue);
891 }
892
893 case comparison::int_does_not_equal:
894 {
895 return filter(singleton_.filterField, comparison::int_equals, singleton_.intValue);
896 }
897
898 case comparison::int_is_at_least:
899 {
900 return filter(singleton_.filterField, comparison::int_is_less_than, singleton_.intValue);
901 }
902
903 case comparison::int_is_greater_than:
904 {
905 return filter(singleton_.filterField, comparison::int_is_at_most, singleton_.intValue);
906 }
907
908 case comparison::int_is_at_most:
909 {
910 return filter(singleton_.filterField, comparison::int_is_greater_than, singleton_.intValue);
911 }
912
913 case comparison::int_is_less_than:
914 {
915 return filter(singleton_.filterField, comparison::int_is_at_least, singleton_.intValue);
916 }
917
918 case comparison::boolean_equals:
919 {
920 return filter(singleton_.filterField, comparison::boolean_equals, !singleton_.boolValue);
921 }
922
923 case comparison::string_equals:
924 {
925 return filter(singleton_.filterField, comparison::string_does_not_equal, singleton_.stringValue);
926 }
927
928 case comparison::string_does_not_equal:
929 {
930 return filter(singleton_.filterField, comparison::string_equals, singleton_.stringValue);
931 }
932
933 case comparison::string_is_like:
934 {
935 return filter(singleton_.filterField, comparison::string_is_not_like, singleton_.stringValue);
936 }
937
938 case comparison::string_is_not_like:
939 {
940 return filter(singleton_.filterField, comparison::string_is_like, singleton_.stringValue);
941 }
942
943 case comparison::is_null:
944 {
945 return filter(singleton_.filterField, comparison::is_not_null);
946 }
947
948 case comparison::is_not_null:
949 {
950 return filter(singleton_.filterField, comparison::is_null);
951 }
952
953 case comparison::matches:
954 {
955 return filter(singleton_.filterField, comparison::does_not_match, *singleton_.join);
956 }
957
958 case comparison::does_not_match:
959 {
960 return filter(singleton_.filterField, comparison::matches, *singleton_.join);
961 }
962
963 case comparison::hierarchally_matches:
964 {
965 return filter(singleton_.filterField, comparison::does_not_hierarchally_match, *singleton_.join);
966 }
967
968 case comparison::does_not_hierarchally_match:
969 {
970 return filter(singleton_.filterField, comparison::hierarchally_matches, *singleton_.join);
971 }
972 }
973 }
974
975 case type::group:
976 {
977 filter result(!group_.orlogic);
978
979 for (const filter& child : group_.children)
980 {
981 result += !child;
982 }
983
984 return result;
985 }
986 }
987 }
988
989 filter& filter::operator&=(filter condition)
990 {
991 return (*this = (*this && std::move(condition)));
992 }
993
994 filter& filter::operator|=(filter condition)
995 {
996 return (*this = (*this || std::move(condition)));
997 }
998
999 filter filter::operator&&(filter condition) const
1000 {
1001 switch (type_)
1002 {
1003 case type::empty:
1004 {
1005 return condition;
1006 }
1007
1008 case type::singleton:
1009 {
1010 filter result(false);
1011 result.group_.children.push_back(*this);
1012 result.group_.children.push_back(std::move(condition));
1013
1014 return result;
1015 }
1016
1017 case type::group:
1018 {
1019 if (group_.orlogic)
1020 {
1021 filter result(false);
1022 result.group_.children.push_back(*this);
1023 result.group_.children.push_back(std::move(condition));
1024
1025 return result;
1026 } else {
1027 filter result(*this);
1028 result.group_.children.push_back(std::move(condition));
1029
1030 return result;
1031 }
1032 }
1033 }
1034 }
1035
1036 filter filter::operator||(filter condition) const
1037 {
1038 switch (type_)
1039 {
1040 case type::empty:
1041 {
1042 return condition;
1043 }
1044
1045 case type::singleton:
1046 {
1047 filter result(true);
1048 result.group_.children.push_back(*this);
1049 result.group_.children.push_back(std::move(condition));
1050
1051 return result;
1052 }
1053
1054 case type::group:
1055 {
1056 if (!group_.orlogic)
1057 {
1058 filter result(true);
1059 result.group_.children.push_back(*this);
1060 result.group_.children.push_back(std::move(condition));
1061
1062 return result;
1063 } else {
1064 filter result(*this);
1065 result.group_.children.push_back(std::move(condition));
1066
1067 return result;
1068 }
1069 }
1070 }
1071 }
1072
1073 filter filter::normalize(object context) const
1074 {
1075 {
1076 switch (type_)
1077 {
1078 case type::empty:
1079 {
1080 return *this;
1081 }
1082
1083 case type::singleton:
1084 {
1085 // First, switch on the normalized context, and then switch on the
1086 // current context. We recursively recontextualize by using the
1087 // current filter as a subquery for a join such that the context of
1088 // the subquery is one step closer to the context of the current
1089 // filter, and then letting the filter constructor normalize the
1090 // subquery.
1091 switch (context)
1092 {
1093 case object::undefined:
1094 {
1095 // An undefined object indicates no participation in
1096 // recontexualization.
1097 return *this;
1098 }
1099
1100 case object::notion:
1101 {
1102 switch (singleton_.filterField.getObject())
1103 {
1104 case object::undefined:
1105 case object::notion:
1106 {
1107 return *this;
1108 }
1109
1110 case object::word:
1111 case object::group:
1112 case object::frame:
1113 case object::lemma:
1114 case object::form:
1115 case object::pronunciation:
1116 {
1117 return (verbly::notion::word %= *this);
1118 }
1119 }
1120 }
1121
1122 case object::word:
1123 {
1124 switch (singleton_.filterField.getObject())
1125 {
1126 case object::notion:
1127 {
1128 return (verbly::word::notion %= *this);
1129 }
1130
1131 case object::undefined:
1132 case object::word:
1133 {
1134 return *this;
1135 }
1136
1137 case object::group:
1138 case object::frame:
1139 {
1140 return (verbly::word::group %= *this);
1141 }
1142
1143 case object::lemma:
1144 case object::form:
1145 case object::pronunciation:
1146 {
1147 return (verbly::word::lemma %= *this);
1148 }
1149 }
1150
1151 case object::group:
1152 {
1153 switch (singleton_.filterField.getObject())
1154 {
1155 case object::undefined:
1156 case object::group:
1157 {
1158 return *this;
1159 }
1160
1161 case object::notion:
1162 case object::word:
1163 case object::lemma:
1164 case object::form:
1165 case object::pronunciation:
1166 {
1167 return (verbly::group::word %= *this);
1168 }
1169
1170 case object::frame:
1171 {
1172 return (verbly::group::frame %= *this);
1173 }
1174 }
1175 }
1176
1177 case object::frame:
1178 {
1179 switch (singleton_.filterField.getObject())
1180 {
1181 case object::undefined:
1182 case object::frame:
1183 {
1184 return *this;
1185 }
1186
1187 case object::notion:
1188 case object::word:
1189 case object::group:
1190 case object::lemma:
1191 case object::form:
1192 case object::pronunciation:
1193 {
1194 return (verbly::frame::group %= *this);
1195 }
1196 }
1197 }
1198
1199 case object::lemma:
1200 {
1201 switch (singleton_.filterField.getObject())
1202 {
1203 case object::notion:
1204 case object::word:
1205 case object::group:
1206 case object::frame:
1207 {
1208 return verbly::lemma::word %= *this;
1209 }
1210
1211 case object::undefined:
1212 case object::lemma:
1213 {
1214 return *this;
1215 }
1216
1217 case object::form:
1218 case object::pronunciation:
1219 {
1220 return (verbly::lemma::form(inflection::base) %= *this);
1221 }
1222 }
1223 }
1224
1225 case object::form:
1226 {
1227 switch (singleton_.filterField.getObject())
1228 {
1229 case object::notion:
1230 case object::word:
1231 case object::group:
1232 case object::frame:
1233 case object::lemma:
1234 {
1235 return verbly::form::lemma(inflection::base) %= *this;
1236 }
1237
1238 case object::undefined:
1239 case object::form:
1240 {
1241 return *this;
1242 }
1243
1244 case object::pronunciation:
1245 {
1246 return (verbly::form::pronunciation %= *this);
1247 }
1248 }
1249 }
1250
1251 case object::pronunciation:
1252 {
1253 switch (singleton_.filterField.getObject())
1254 {
1255 case object::notion:
1256 case object::word:
1257 case object::group:
1258 case object::frame:
1259 case object::lemma:
1260 case object::form:
1261 {
1262 return verbly::pronunciation::form %= *this;
1263 }
1264
1265 case object::undefined:
1266 case object::pronunciation:
1267 {
1268 return *this;
1269 }
1270 }
1271 }
1272 }
1273 }
1274 }
1275
1276 case type::group:
1277 {
1278 filter result(group_.orlogic);
1279 std::map<field, filter> joins;
1280
1281 for (const filter& child : group_.children)
1282 {
1283 filter normalized = child.normalize(context);
1284
1285 // Notably, this does not attempt to merge hierarchal matches.
1286 switch (normalized.getType())
1287 {
1288 case type::singleton:
1289 {
1290 switch (normalized.getComparison())
1291 {
1292 case comparison::matches:
1293 {
1294 if (!joins.count(normalized.singleton_.filterField))
1295 {
1296 joins[normalized.getField()] = filter(group_.orlogic);
1297 }
1298
1299 joins.at(normalized.getField()) += std::move(*normalized.singleton_.join);
1300
1301 break;
1302 }
1303
1304 case comparison::does_not_match:
1305 {
1306 if (!joins.count(normalized.singleton_.filterField))
1307 {
1308 joins[normalized.getField()] = filter(group_.orlogic);
1309 }
1310
1311 joins.at(normalized.getField()) += !*normalized.singleton_.join;
1312
1313 break;
1314 }
1315
1316 case comparison::int_equals:
1317 case comparison::int_does_not_equal:
1318 case comparison::int_is_at_least:
1319 case comparison::int_is_greater_than:
1320 case comparison::int_is_at_most:
1321 case comparison::int_is_less_than:
1322 case comparison::boolean_equals:
1323 case comparison::string_equals:
1324 case comparison::string_does_not_equal:
1325 case comparison::string_is_like:
1326 case comparison::string_is_not_like:
1327 case comparison::is_null:
1328 case comparison::is_not_null:
1329 case comparison::hierarchally_matches:
1330 case comparison::does_not_hierarchally_match:
1331 {
1332 result += std::move(normalized);
1333
1334 break;
1335 }
1336 }
1337
1338 break;
1339 }
1340
1341 case type::group:
1342 case type::empty:
1343 {
1344 result += std::move(normalized);
1345
1346 break;
1347 }
1348 }
1349 }
1350
1351 for (auto& mapping : joins)
1352 {
1353 const field& joinOn = mapping.first;
1354 filter& joinCondition = mapping.second;
1355
1356 result += (joinOn %= joinCondition.normalize(joinOn.getJoinObject()));
1357 }
1358
1359 return result;
1360 }
1361 }
1362 }
1363 }
1364
1365};
diff --git a/lib/filter.h b/lib/filter.h new file mode 100644 index 0000000..d213d7a --- /dev/null +++ b/lib/filter.h
@@ -0,0 +1,143 @@
1#ifndef FILTER_H_932BA9C6
2#define FILTER_H_932BA9C6
3
4#include <list>
5#include <string>
6#include <memory>
7#include "field.h"
8#include "enums.h"
9
10namespace verbly {
11
12 class filter {
13 public:
14 enum class type {
15 empty,
16 singleton,
17 group
18 };
19
20 enum class comparison {
21 int_equals,
22 int_does_not_equal,
23 int_is_at_least,
24 int_is_greater_than,
25 int_is_at_most,
26 int_is_less_than,
27 boolean_equals,
28 string_equals,
29 string_does_not_equal,
30 string_is_like,
31 string_is_not_like,
32 is_null,
33 is_not_null,
34 matches,
35 does_not_match,
36 hierarchally_matches,
37 does_not_hierarchally_match
38 };
39
40 // Copy and move constructors
41
42 filter(const filter& other);
43 filter(filter&& other);
44
45 // Assignment
46
47 filter& operator=(filter other);
48
49 // Swap
50
51 friend void swap(filter& first, filter& second);
52
53 // Destructor
54
55 ~filter();
56
57 // Accessors
58
59 type getType() const
60 {
61 return type_;
62 }
63
64 // Empty
65
66 filter();
67
68 // Singleton
69
70 filter(field filterField, comparison filterType, int filterValue);
71 filter(field filterField, comparison filterType, std::string filterValue);
72 filter(field filterField, comparison filterType, bool filterValue);
73 filter(field filterField, comparison filterType);
74 filter(field joinOn, comparison filterType, filter joinCondition);
75
76 field getField() const;
77
78 comparison getComparison() const;
79
80 filter getJoinCondition() const;
81
82 std::string getStringArgument() const;
83
84 int getIntegerArgument() const;
85
86 bool getBooleanArgument() const;
87
88 // Group
89
90 explicit filter(bool orlogic);
91
92 bool getOrlogic() const;
93
94 filter operator+(filter condition) const;
95
96 filter& operator+=(filter condition);
97
98 using const_iterator = std::list<filter>::const_iterator;
99
100 const_iterator begin() const;
101
102 const_iterator end() const;
103
104 // Negation
105
106 filter operator!() const;
107
108 // Groupifying
109
110 filter operator&&(filter condition) const;
111 filter operator||(filter condition) const;
112
113 filter& operator&=(filter condition);
114 filter& operator|=(filter condition);
115
116 // Utility
117
118 filter normalize(object context) const;
119
120 private:
121 union {
122 struct {
123 field filterField;
124 comparison filterType;
125 union {
126 std::unique_ptr<filter> join;
127 std::string stringValue;
128 int intValue;
129 bool boolValue;
130 };
131 } singleton_;
132 struct {
133 std::list<filter> children;
134 bool orlogic;
135 } group_;
136 };
137 type type_ = type::empty;
138
139 };
140
141};
142
143#endif /* end of include guard: FILTER_H_932BA9C6 */
diff --git a/lib/form.cpp b/lib/form.cpp new file mode 100644 index 0000000..8ba3bd7 --- /dev/null +++ b/lib/form.cpp
@@ -0,0 +1,53 @@
1#include "form.h"
2#include <sqlite3.h>
3#include "filter.h"
4#include "pronunciation.h"
5#include "database.h"
6#include "query.h"
7
8namespace verbly {
9
10 const object form::objectType = object::form;
11
12 const std::list<std::string> form::select = {"form_id", "form", "complexity", "proper"};
13
14 const field form::id = field::integerField(object::form, "form_id");
15 const field form::text = field::stringField(object::form, "form");
16 const field form::complexity = field::integerField(object::form, "complexity");
17 const field form::proper = field::booleanField(object::form, "proper");
18
19 const field form::pronunciation = field::joinThrough(object::form, "form_id", object::pronunciation, "forms_pronunciations", "pronunciation_id");
20
21 const field form::lemmaJoin = field::joinField(object::form, "form_id", object::lemma);
22 const field form::inflectionCategory = field::integerField("lemmas_forms", "category");
23
24 form::form(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
25 {
26 id_ = sqlite3_column_int(row, 0);
27 text_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 1)));
28 complexity_ = sqlite3_column_int(row, 2);
29 proper_ = (sqlite3_column_int(row, 3) == 1);
30 }
31
32 filter operator%=(form::inflection_field check, filter joinCondition)
33 {
34 return (form::lemmaJoin %= (joinCondition && (form::inflectionCategory == check.getCategory())));
35 }
36
37 const std::vector<pronunciation>& form::getPronunciations() const
38 {
39 if (!valid_)
40 {
41 throw std::domain_error("Bad access to uninitialized form");
42 }
43
44 if (!initializedPronunciations_)
45 {
46 pronunciations_ = db_->pronunciations(pronunciation::form %= *this, false, -1).all();
47 initializedPronunciations_ = true;
48 }
49
50 return pronunciations_;
51 }
52
53};
diff --git a/lib/form.h b/lib/form.h new file mode 100644 index 0000000..c6a1353 --- /dev/null +++ b/lib/form.h
@@ -0,0 +1,149 @@
1#ifndef FORM_H_3A6C962C
2#define FORM_H_3A6C962C
3
4#include <list>
5#include <vector>
6#include <string>
7#include <stdexcept>
8#include "field.h"
9#include "filter.h"
10
11struct sqlite3_stmt;
12
13namespace verbly {
14
15 class pronunciation;
16 class database;
17
18 class form {
19 public:
20
21 // Default constructor
22
23 form() = default;
24
25 // Construct from database
26
27 form(const database& db, sqlite3_stmt* row);
28
29 // Accessors
30
31 operator bool() const
32 {
33 return valid_;
34 }
35
36 int getId() const
37 {
38 if (!valid_)
39 {
40 throw std::domain_error("Bad access to uninitialized form");
41 }
42
43 return id_;
44 }
45
46 std::string getText() const
47 {
48 if (!valid_)
49 {
50 throw std::domain_error("Bad access to uninitialized form");
51 }
52
53 return text_;
54 }
55
56 int getComplexity() const
57 {
58 if (!valid_)
59 {
60 throw std::domain_error("Bad access to uninitialized form");
61 }
62
63 return complexity_;
64 }
65
66 bool isProper() const
67 {
68 if (!valid_)
69 {
70 throw std::domain_error("Bad access to uninitialized form");
71 }
72
73 return proper_;
74 }
75
76 const std::vector<pronunciation>& getPronunciations() const;
77
78 // Type info
79
80 static const object objectType;
81
82 static const std::list<std::string> select;
83
84 // Query fields
85
86 static const field id;
87 static const field text;
88 static const field complexity;
89 static const field proper;
90
91 operator filter() const
92 {
93 if (!valid_)
94 {
95 throw std::domain_error("Bad access to uninitialized form");
96 }
97
98 return (id == id_);
99 }
100
101 // Relationships to other objects
102
103 static const field pronunciation;
104
105 class inflection_field {
106 public:
107
108 inflection_field(inflection category) : category_(category)
109 {
110 }
111
112 const inflection getCategory() const
113 {
114 return category_;
115 }
116
117 private:
118
119 const inflection category_;
120 };
121
122 static const inflection_field lemma(inflection category)
123 {
124 return inflection_field(category);
125 }
126
127 friend filter operator%=(form::inflection_field check, filter joinCondition);
128
129 private:
130 bool valid_ = false;
131
132 int id_;
133 std::string text_;
134 int complexity_ ;
135 bool proper_;
136
137 const database* db_;
138
139 mutable bool initializedPronunciations_ = false;
140 mutable std::vector<class pronunciation> pronunciations_;
141
142 static const field lemmaJoin;
143 static const field inflectionCategory;
144
145 };
146
147};
148
149#endif /* end of include guard: FORM_H_3A6C962C */
diff --git a/lib/frame.cpp b/lib/frame.cpp index ccec81b..bc3f842 100644 --- a/lib/frame.cpp +++ b/lib/frame.cpp
@@ -1,320 +1,21 @@
1#include "verbly.h" 1#include "frame.h"
2#include <sqlite3.h>
2 3
3namespace verbly { 4namespace verbly {
4 5
5 frame::selrestr::type frame::selrestr::get_type() const 6 const object frame::objectType = object::frame;
6 {
7 return _type;
8 }
9
10 frame::selrestr::selrestr(const selrestr& other)
11 {
12 _type = other._type;
13
14 switch (_type)
15 {
16 case frame::selrestr::type::singleton:
17 {
18 _singleton.pos = other._singleton.pos;
19 new(&_singleton.restriction) std::string(other._singleton.restriction);
20
21 break;
22 }
23
24 case frame::selrestr::type::group:
25 {
26 new(&_group.children) std::list<selrestr>(other._group.children);
27 _group.orlogic = other._group.orlogic;
28
29 break;
30 }
31
32 case frame::selrestr::type::empty:
33 {
34 // Nothing!
35
36 break;
37 }
38 }
39 }
40
41 frame::selrestr::~selrestr()
42 {
43 switch (_type)
44 {
45 case frame::selrestr::type::singleton:
46 {
47 using string_type = std::string;
48 _singleton.restriction.~string_type();
49
50 break;
51 }
52
53 case frame::selrestr::type::group:
54 {
55 using list_type = std::list<selrestr>;
56 _group.children.~list_type();
57
58 break;
59 }
60
61 case frame::selrestr::type::empty:
62 {
63 // Nothing!
64
65 break;
66 }
67 }
68 }
69
70 frame::selrestr& frame::selrestr::operator=(const selrestr& other)
71 {
72 this->~selrestr();
73
74 _type = other._type;
75
76 switch (_type)
77 {
78 case frame::selrestr::type::singleton:
79 {
80 _singleton.pos = other._singleton.pos;
81 new(&_singleton.restriction) std::string(other._singleton.restriction);
82
83 break;
84 }
85
86 case frame::selrestr::type::group:
87 {
88 new(&_group.children) std::list<selrestr>(other._group.children);
89 _group.orlogic = other._group.orlogic;
90
91 break;
92 }
93
94 case frame::selrestr::type::empty:
95 {
96 // Nothing!
97
98 break;
99 }
100 }
101
102 return *this;
103 }
104
105 frame::selrestr::selrestr() : _type(frame::selrestr::type::empty)
106 {
107
108 }
109
110 frame::selrestr::selrestr(std::string restriction, bool pos) : _type(frame::selrestr::type::singleton)
111 {
112 new(&_singleton.restriction) std::string(restriction);
113 _singleton.pos = pos;
114 }
115
116 std::string frame::selrestr::get_restriction() const
117 {
118 assert(_type == frame::selrestr::type::singleton);
119
120 return _singleton.restriction;
121 }
122
123 bool frame::selrestr::get_pos() const
124 {
125 assert(_type == frame::selrestr::type::singleton);
126
127 return _singleton.pos;
128 }
129
130 frame::selrestr::selrestr(std::list<selrestr> children, bool orlogic) : _type(frame::selrestr::type::group)
131 {
132 new(&_group.children) std::list<selrestr>(children);
133 _group.orlogic = orlogic;
134 }
135
136 std::list<frame::selrestr> frame::selrestr::get_children() const
137 {
138 assert(_type == frame::selrestr::type::group);
139
140 return _group.children;
141 }
142
143 std::list<frame::selrestr>::const_iterator frame::selrestr::begin() const
144 {
145 assert(_type == frame::selrestr::type::group);
146
147 return _group.children.begin();
148 }
149
150 std::list<frame::selrestr>::const_iterator frame::selrestr::end() const
151 {
152 assert(_type == frame::selrestr::type::group);
153
154 return _group.children.end();
155 }
156
157 bool frame::selrestr::get_orlogic() const
158 {
159 assert(_type == frame::selrestr::type::group);
160
161 return _group.orlogic;
162 }
163
164 frame::part::type frame::part::get_type() const
165 {
166 return _type;
167 }
168
169 frame::part::part()
170 {
171
172 }
173 7
174 frame::part::part(const part& other) 8 const std::list<std::string> frame::select = {"frame_id", "data"};
175 {
176 _type = other._type;
177
178 switch (_type)
179 {
180 case frame::part::type::noun_phrase:
181 {
182 new(&_noun_phrase.role) std::string(other._noun_phrase.role);
183 new(&_noun_phrase.selrestrs) selrestr(other._noun_phrase.selrestrs);
184 new(&_noun_phrase.synrestrs) std::set<std::string>(other._noun_phrase.synrestrs);
185
186 break;
187 }
188
189 case frame::part::type::literal_preposition:
190 {
191 new(&_literal_preposition.choices) std::vector<std::string>(other._literal_preposition.choices);
192
193 break;
194 }
195
196 case frame::part::type::selection_preposition:
197 {
198 new(&_selection_preposition.preprestrs) std::vector<std::string>(other._selection_preposition.preprestrs);
199
200 break;
201 }
202
203 case frame::part::type::literal:
204 {
205 new(&_literal.lexval) std::string(other._literal.lexval);
206
207 break;
208 }
209
210 default:
211 {
212 // Nothing!
213
214 break;
215 }
216 }
217 }
218 9
219 frame::part::~part() 10 const field frame::id = field::integerField(object::frame, "frame_id");
220 {
221 switch (_type)
222 {
223 case frame::part::type::noun_phrase:
224 {
225 using string_type = std::string;
226 using set_type = std::set<std::string>;
227
228 _noun_phrase.role.~string_type();
229 _noun_phrase.selrestrs.~selrestr();
230 _noun_phrase.synrestrs.~set_type();
231
232 break;
233 }
234
235 case frame::part::type::literal_preposition:
236 {
237 using vector_type = std::vector<std::string>;
238 _literal_preposition.choices.~vector_type();
239
240 break;
241 }
242
243 case frame::part::type::selection_preposition:
244 {
245 using vector_type = std::vector<std::string>;
246 _selection_preposition.preprestrs.~vector_type();
247
248 break;
249 }
250
251 case frame::part::type::literal:
252 {
253 using string_type = std::string;
254 _literal.lexval.~string_type();
255
256 break;
257 }
258
259 default:
260 {
261 // Nothing!
262
263 break;
264 }
265 }
266 }
267 11
268 std::string frame::part::get_role() const 12 const field frame::group = field::joinThrough(object::frame, "frame_id", object::group, "groups_frames", "group_id");
269 {
270 assert(_type == frame::part::type::noun_phrase);
271
272 return _noun_phrase.role;
273 }
274 13
275 frame::selrestr frame::part::get_selrestrs() const 14 frame::frame(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
276 { 15 {
277 assert(_type == frame::part::type::noun_phrase); 16 id_ = sqlite3_column_int(row, 0);
278 17
279 return _noun_phrase.selrestrs; 18 // TODO: Initialize frame data from row.
280 }
281
282 std::set<std::string> frame::part::get_synrestrs() const
283 {
284 assert(_type == frame::part::type::noun_phrase);
285
286 return _noun_phrase.synrestrs;
287 }
288
289 std::vector<std::string> frame::part::get_choices() const
290 {
291 assert(_type == frame::part::type::literal_preposition);
292
293 return _literal_preposition.choices;
294 }
295
296 std::vector<std::string> frame::part::get_preprestrs() const
297 {
298 assert(_type == frame::part::type::selection_preposition);
299
300 return _selection_preposition.preprestrs;
301 }
302
303 std::string frame::part::get_literal() const
304 {
305 assert(_type == frame::part::type::literal);
306
307 return _literal.lexval;
308 }
309
310 std::vector<frame::part> frame::parts() const
311 {
312 return _parts;
313 }
314
315 std::map<std::string, frame::selrestr> frame::roles() const
316 {
317 return _roles;
318 } 19 }
319 20
320}; 21};
diff --git a/lib/frame.h b/lib/frame.h index fa57e1b..68a4346 100644 --- a/lib/frame.h +++ b/lib/frame.h
@@ -1,118 +1,78 @@
1#ifndef FRAME_H_9A5D90FE 1#ifndef FRAME_H_EA29065A
2#define FRAME_H_9A5D90FE 2#define FRAME_H_EA29065A
3
4#include <stdexcept>
5#include <list>
6#include "field.h"
7#include "filter.h"
8
9struct sqlite3_stmt;
3 10
4namespace verbly { 11namespace verbly {
5 12
6 class frame_query; 13 class database;
7 14
8 class frame { 15 class frame {
9 public: 16 public:
10 class selrestr { 17
11 public: 18 // Default constructor
12 enum class type { 19
13 empty, 20 frame() = default;
14 singleton, 21
15 group 22 // Construct from database
16 }; 23
17 24 frame(const database& db, sqlite3_stmt* row);
18 type get_type() const; 25
19 selrestr(const selrestr& other); 26 // Accessors
20 ~selrestr(); 27
21 selrestr& operator=(const selrestr& other); 28 operator bool() const
22 29 {
23 // Empty 30 return valid_;
24 selrestr(); 31 }
25 32
26 // Singleton 33 int getId() const
27 selrestr(std::string restriction, bool pos); 34 {
28 std::string get_restriction() const; 35 if (!valid_)
29 bool get_pos() const; 36 {
30 37 throw std::domain_error("Bad access to uninitialized frame");
31 // Group 38 }
32 selrestr(std::list<selrestr> children, bool orlogic);
33 std::list<selrestr> get_children() const;
34 std::list<selrestr>::const_iterator begin() const;
35 std::list<selrestr>::const_iterator end() const;
36 bool get_orlogic() const;
37
38 private:
39 union {
40 struct {
41 bool pos;
42 std::string restriction;
43 } _singleton;
44 struct {
45 std::list<selrestr> children;
46 bool orlogic;
47 } _group;
48 };
49 type _type;
50 };
51 39
52 class part { 40 return id_;
53 public: 41 }
54 enum class type { 42
55 noun_phrase, 43 // Type info
56 verb, 44
57 literal_preposition, 45 static const object objectType;
58 selection_preposition, 46
59 adjective, 47 static const std::list<std::string> select;
60 adverb, 48
61 literal 49 // Query fields
62 }; 50
63 51 static const field id;
64 type get_type() const; 52
65 part(const part& other); 53 operator filter() const
66 ~part(); 54 {
67 55 if (!valid_)
68 // Noun phrase 56 {
69 std::string get_role() const; 57 throw std::domain_error("Bad access to uninitialized frame");
70 selrestr get_selrestrs() const; 58 }
71 std::set<std::string> get_synrestrs() const;
72
73 // Literal preposition
74 std::vector<std::string> get_choices() const;
75
76 // Selection preposition
77 std::vector<std::string> get_preprestrs() const;
78
79 // Literal
80 std::string get_literal() const;
81
82 private:
83 friend class frame_query;
84
85 part();
86
87 union {
88 struct {
89 std::string role;
90 selrestr selrestrs;
91 std::set<std::string> synrestrs;
92 } _noun_phrase;
93 struct {
94 std::vector<std::string> choices;
95 } _literal_preposition;
96 struct {
97 std::vector<std::string> preprestrs;
98 } _selection_preposition;
99 struct {
100 std::string lexval;
101 } _literal;
102 };
103 type _type;
104 };
105 59
106 std::vector<part> parts() const; 60 return (id == id_);
107 std::map<std::string, selrestr> roles() const; 61 }
108 62
109 private: 63 // Relationships to other objects
110 friend class frame_query; 64
111 65 static const field group;
112 std::vector<part> _parts; 66
113 std::map<std::string, selrestr> _roles; 67 private:
68 bool valid_ = false;
69
70 int id_;
71
72 const database* db_;
73
114 }; 74 };
115 75
116}; 76};
117 77
118#endif /* end of include guard: FRAME_H_9A5D90FE */ 78#endif /* end of include guard: FRAME_H_EA29065A */
diff --git a/lib/group.cpp b/lib/group.cpp new file mode 100644 index 0000000..8b6d985 --- /dev/null +++ b/lib/group.cpp
@@ -0,0 +1,43 @@
1#include "group.h"
2#include <sqlite3.h>
3#include "frame.h"
4#include "database.h"
5#include "query.h"
6
7namespace verbly {
8
9 const object group::objectType = object::group;
10
11 const std::list<std::string> group::select = {"group_id", "data"};
12
13 const field group::id = field::integerField(object::group, "group_id");
14
15 const field group::frame = field::joinThrough(object::group, "group_id", object::frame, "groups_frames", "frame_id");
16 const field group::word = field::joinField(object::group, "group_id", object::word);
17
18 group::group(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
19 {
20 id_ = sqlite3_column_int(row, 0);
21
22 // TODO: Initialize role data from row.
23 }
24
25 const std::vector<frame>& group::getFrames() const
26 {
27 if (!valid_)
28 {
29 throw std::domain_error("Bad access to uninitialized group");
30 }
31
32 if (!initializedFrames_)
33 {
34 frames_ = db_->frames(frame::group %= *this, false, -1).all();
35
36 initializedFrames_ = true;
37 }
38
39 return frames_;
40 }
41
42};
43
diff --git a/lib/group.h b/lib/group.h new file mode 100644 index 0000000..dd53503 --- /dev/null +++ b/lib/group.h
@@ -0,0 +1,87 @@
1#ifndef GROUP_H_BD6933C0
2#define GROUP_H_BD6933C0
3
4#include <stdexcept>
5#include <list>
6#include <vector>
7#include "field.h"
8#include "filter.h"
9
10struct sqlite3_stmt;
11
12namespace verbly {
13
14 class database;
15 class frame;
16
17 class group {
18 public:
19
20 // Default constructor
21
22 group() = default;
23
24 // Construct from database
25
26 group(const database& db, sqlite3_stmt* row);
27
28 // Accessors
29
30 operator bool() const
31 {
32 return valid_;
33 }
34
35 int getId() const
36 {
37 if (!valid_)
38 {
39 throw std::domain_error("Bad access to uninitialized group");
40 }
41
42 return id_;
43 }
44
45 const std::vector<frame>& getFrames() const;
46
47 // Type info
48
49 static const object objectType;
50
51 static const std::list<std::string> select;
52
53 // Query fields
54
55 static const field id;
56
57 operator filter() const
58 {
59 if (!valid_)
60 {
61 throw std::domain_error("Bad access to uninitialized group");
62 }
63
64 return (id == id_);
65 }
66
67 // Relationships to other objects
68
69 static const field frame;
70
71 static const field word;
72
73 private:
74 bool valid_ = false;
75
76 int id_;
77
78 const database* db_;
79
80 mutable bool initializedFrames_ = false;
81 mutable std::vector<class frame> frames_;
82
83 };
84
85};
86
87#endif /* end of include guard: GROUP_H_BD6933C0 */
diff --git a/lib/lemma.cpp b/lib/lemma.cpp new file mode 100644 index 0000000..f9e9fcc --- /dev/null +++ b/lib/lemma.cpp
@@ -0,0 +1,69 @@
1#include "lemma.h"
2#include <sqlite3.h>
3#include "database.h"
4#include "query.h"
5
6namespace verbly {
7
8 const object lemma::objectType = object::lemma;
9
10 const std::list<std::string> lemma::select = {"lemma_id"};
11
12 const field lemma::id = field::integerField(object::lemma, "lemma_id");
13
14 const field lemma::word = field::joinField(object::lemma, "lemma_id", object::word);
15
16 const field lemma::formJoin = field::joinField(object::lemma, "form_id", object::form);
17 const field lemma::inflectionCategory = field::integerField(object::lemma, "category");
18
19 filter operator%=(lemma::inflection_field check, filter joinCondition)
20 {
21 return (lemma::formJoin %= joinCondition) && (lemma::inflectionCategory == check.getCategory());
22 }
23
24 lemma::lemma(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
25 {
26 id_ = sqlite3_column_int(row, 0);
27 }
28
29 const form& lemma::getBaseForm() const
30 {
31 if (!valid_)
32 {
33 throw std::domain_error("Bad access to uninitialized lemma");
34 }
35
36 if (!forms_.count(inflection::base))
37 {
38 initializeForm(inflection::base);
39 }
40
41 return forms_.at(inflection::base).front();
42 }
43
44 bool lemma::hasInflection(inflection category) const
45 {
46 return !getInflections(category).empty();
47 }
48
49 const std::vector<form>& lemma::getInflections(inflection category) const
50 {
51 if (!valid_)
52 {
53 throw std::domain_error("Bad access to uninitialized lemma");
54 }
55
56 if (!forms_.count(category))
57 {
58 initializeForm(category);
59 }
60
61 return forms_.at(category);
62 }
63
64 void lemma::initializeForm(inflection infl) const
65 {
66 forms_[infl] = db_->forms(form::lemma(infl) %= *this, false, -1).all();
67 }
68
69};
diff --git a/lib/lemma.h b/lib/lemma.h new file mode 100644 index 0000000..9a07f16 --- /dev/null +++ b/lib/lemma.h
@@ -0,0 +1,120 @@
1#ifndef LEMMA_H_0A180D30
2#define LEMMA_H_0A180D30
3
4#include <stdexcept>
5#include <vector>
6#include <list>
7#include <map>
8#include "field.h"
9#include "enums.h"
10#include "filter.h"
11
12struct sqlite3_stmt;
13
14namespace verbly {
15
16 class form;
17 class database;
18
19 class lemma {
20 public:
21
22 // Default constructor
23
24 lemma() = default;
25
26 // Construct from database
27
28 lemma(const database& db, sqlite3_stmt* row);
29
30 // Accessors
31
32 operator bool() const
33 {
34 return valid_;
35 }
36
37 int getId() const
38 {
39 if (!valid_)
40 {
41 throw std::domain_error("Bad access to uninitialized lemma");
42 }
43
44 return id_;
45 }
46
47 const form& getBaseForm() const;
48
49 bool hasInflection(inflection category) const;
50
51 const std::vector<form>& getInflections(inflection category) const;
52
53 // Type info
54
55 static const object objectType;
56
57 static const std::list<std::string> select;
58
59 // Query fields
60
61 static const field id;
62
63 operator filter() const
64 {
65 if (!valid_)
66 {
67 throw std::domain_error("Bad access to uninitialized lemma");
68 }
69
70 return (id == id_);
71 }
72
73 // Relationships to other objects
74
75 static const field word;
76
77 class inflection_field {
78 public:
79
80 inflection_field(inflection category) : category_(category)
81 {
82 }
83
84 const inflection getCategory() const
85 {
86 return category_;
87 }
88
89 private:
90
91 const inflection category_;
92 };
93
94 static const inflection_field form(inflection category)
95 {
96 return inflection_field(category);
97 }
98
99 friend filter operator%=(lemma::inflection_field check, filter joinCondition);
100
101 private:
102
103 void initializeForm(inflection category) const;
104
105 bool valid_ = false;
106
107 int id_;
108
109 mutable std::map<inflection, std::vector<class form>> forms_;
110
111 const database* db_;
112
113 static const field formJoin;
114 static const field inflectionCategory;
115
116 };
117
118};
119
120#endif /* end of include guard: LEMMA_H_0A180D30 */
diff --git a/lib/notion.cpp b/lib/notion.cpp new file mode 100644 index 0000000..16794d3 --- /dev/null +++ b/lib/notion.cpp
@@ -0,0 +1,94 @@
1#include "notion.h"
2#include <sqlite3.h>
3#include <sstream>
4
5namespace verbly {
6
7 const object notion::objectType = object::notion;
8
9 const std::list<std::string> notion::select = {"notion_id", "part_of_speech", "wnid", "images"};
10
11 const field notion::id = field::integerField(object::notion, "notion_id");
12 const field notion::partOfSpeech = field::integerField(object::notion, "part_of_speech");
13 const field notion::wnid = field::integerField(object::notion, "wnid", true);
14 const field notion::numOfImages = field::integerField(object::notion, "images", true);
15
16 const field notion::word = field::joinField(object::notion, "word_id", object::word);
17
18 const field notion::hypernyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id");
19 const field notion::hyponyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id");
20
21 const field notion::fullHypernyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id");
22 const field notion::fullHyponyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id");
23
24 const field notion::instances = field::selfJoin(object::notion, "notion_id", "instantiation", "class_id", "instance_id");
25 const field notion::classes = field::selfJoin(object::notion, "notion_id", "instantiation", "instance_id", "class_id");
26
27 const field notion::memberMeronyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id");
28 const field notion::memberHolonyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id");
29
30 const field notion::fullMemberMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id");
31 const field notion::fullMemberHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id");
32
33 const field notion::partMeronyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id");
34 const field notion::partHolonyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id");
35
36 const field notion::fullPartMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id");
37 const field notion::fullPartHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id");
38
39 const field notion::substanceMeronyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id");
40 const field notion::substanceHolonyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id");
41
42 const field notion::fullSubstanceMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id");
43 const field notion::fullSubstanceHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id");
44
45 const field notion::variants = field::selfJoin(object::notion, "notion_id", "variation", "noun_id", "adjective_id");
46 const field notion::attributes = field::selfJoin(object::notion, "notion_id", "variation", "adjective_id", "noun_id");
47
48 const field notion::similarAdjectives = field::selfJoin(object::notion, "notion_id", "similarity", "adjective_2_id", "adjective_1_id");
49
50 const field notion::entails = field::selfJoin(object::notion, "notion_id", "entailment", "given_id", "entailment_id");
51 const field notion::entailedBy = field::selfJoin(object::notion, "notion_id", "entailment", "entailment_id", "given_id");
52
53 const field notion::causes = field::selfJoin(object::notion, "notion_id", "causality", "effect_id", "cause_id");
54 const field notion::effects = field::selfJoin(object::notion, "notion_id", "causality", "cause_id", "effect_id");
55
56 const notion::preposition_group_field prepositionGroup = {};
57
58 const field notion::preposition_group_field::isA = field::joinField(object::notion, "notion_id", "is_a");
59 const field notion::preposition_group_field::groupNameField = field::stringField("is_a", "groupname");
60
61 notion::notion(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
62 {
63 id_ = sqlite3_column_int(row, 0);
64 partOfSpeech_ = static_cast<part_of_speech>(sqlite3_column_int(row, 1));
65
66 if (sqlite3_column_type(row, 2) != SQLITE_NULL)
67 {
68 hasWnid_ = true;
69 wnid_ = sqlite3_column_int(row, 2);
70 }
71
72 if (sqlite3_column_type(row, 3) != SQLITE_NULL)
73 {
74 hasNumOfImages_ = true;
75 numOfImages_ = sqlite3_column_int(row, 3);
76 }
77 }
78
79 std::string notion::getImageNetUrl() const
80 {
81 std::stringstream url;
82 url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n";
83 url.width(8);
84 url.fill('0');
85 url << (getWnid() % 100000000);
86 return url.str();
87 }
88
89 filter notion::preposition_group_field::operator==(std::string groupName) const
90 {
91 return (isA %= (groupNameField == groupName));
92 }
93
94};
diff --git a/lib/notion.h b/lib/notion.h new file mode 100644 index 0000000..a180d73 --- /dev/null +++ b/lib/notion.h
@@ -0,0 +1,200 @@
1#ifndef NOTION_H_FD1C7646
2#define NOTION_H_FD1C7646
3
4#include <stdexcept>
5#include <string>
6#include "field.h"
7#include "filter.h"
8
9struct sqlite3_stmt;
10
11namespace verbly {
12
13 class database;
14
15 class notion {
16 public:
17
18 // Default constructor
19
20 notion() = default;
21
22 // Construct from database
23
24 notion(const database& db, sqlite3_stmt* row);
25
26 // Accessors
27
28 operator bool() const
29 {
30 return valid_;
31 }
32
33 int getId() const
34 {
35 if (!valid_)
36 {
37 throw std::domain_error("Bad access to uninitialized notion");
38 }
39
40 return id_;
41 }
42
43 part_of_speech getPartOfSpeech() const
44 {
45 if (!valid_)
46 {
47 throw std::domain_error("Bad access to uninitialized notion");
48 }
49
50 return partOfSpeech_;
51 }
52
53 bool hasWnid() const
54 {
55 if (!valid_)
56 {
57 throw std::domain_error("Bad access to uninitialized notion");
58 }
59
60 return hasWnid_;
61 }
62
63 int getWnid() const
64 {
65 if (!valid_)
66 {
67 throw std::domain_error("Bad access to uninitialized notion");
68 }
69
70 if (!hasWnid_)
71 {
72 throw std::domain_error("Notion has no wnid");
73 }
74
75 return wnid_;
76 }
77
78 bool hasNumOfImages() const
79 {
80 if (!valid_)
81 {
82 throw std::domain_error("Bad access to uninitialized notion");
83 }
84
85 return hasNumOfImages_;
86 }
87
88 int getNumOfImages() const
89 {
90 if (!valid_)
91 {
92 throw std::domain_error("Bad access to uninitialized notion");
93 }
94
95 if (!hasNumOfImages_)
96 {
97 throw std::domain_error("Notion does not have a number of images");
98 }
99
100 return numOfImages_;
101 }
102
103 // Convenience
104
105 std::string getImageNetUrl() const;
106
107 // Type info
108
109 static const object objectType;
110
111 static const std::list<std::string> select;
112
113 // Query fields
114
115 static const field id;
116 static const field partOfSpeech;
117 static const field wnid;
118 static const field numOfImages;
119
120 operator filter() const
121 {
122 return (id == id_);
123 }
124
125 // Relationships with other objects
126
127 static const field word;
128
129 // Relationships with self
130
131 static const field hypernyms;
132 static const field hyponyms;
133
134 static const field fullHypernyms;
135 static const field fullHyponyms;
136
137 static const field instances;
138 static const field classes;
139
140 static const field memberMeronyms;
141 static const field memberHolonyms;
142
143 static const field fullMemberMeronyms;
144 static const field fullMemberHolonyms;
145
146 static const field partMeronyms;
147 static const field partHolonyms;
148
149 static const field fullPartMeronyms;
150 static const field fullPartHolonyms;
151
152 static const field substanceMeronyms;
153 static const field substanceHolonyms;
154
155 static const field fullSubstanceMeronyms;
156 static const field fullSubstanceHolonyms;
157
158 static const field variants;
159 static const field attributes;
160
161 static const field similarAdjectives;
162
163 static const field entails;
164 static const field entailedBy;
165
166 static const field causes;
167 static const field effects;
168
169 // Preposition group relationship
170
171 class preposition_group_field {
172 public:
173
174 filter operator==(std::string groupName) const;
175
176 private:
177
178 static const field isA;
179 static const field groupNameField;
180 };
181
182 static const preposition_group_field prepositionGroup;
183
184 private:
185 bool valid_ = false;
186
187 int id_;
188 part_of_speech partOfSpeech_;
189 bool hasWnid_ = false;
190 int wnid_;
191 bool hasNumOfImages_ = false;
192 int numOfImages_;
193
194 const database* db_;
195
196 };
197
198};
199
200#endif /* end of include guard: NOTION_H_FD1C7646 */
diff --git a/lib/noun.cpp b/lib/noun.cpp deleted file mode 100644 index d8b34c9..0000000 --- a/lib/noun.cpp +++ /dev/null
@@ -1,221 +0,0 @@
1#include "verbly.h"
2#include <set>
3#include <iostream>
4
5namespace verbly {
6
7 noun::noun()
8 {
9
10 }
11
12 noun::noun(const data& _data, int _id) : word(_data, _id)
13 {
14
15 }
16
17 std::string noun::base_form() const
18 {
19 assert(_valid == true);
20
21 return _singular;
22 }
23
24 std::string noun::singular_form() const
25 {
26 assert(_valid == true);
27
28 return _singular;
29 }
30
31 std::string noun::plural_form() const
32 {
33 assert(_valid == true);
34
35 return _plural;
36 }
37
38 int noun::wnid() const
39 {
40 assert(_valid == true);
41
42 return _wnid;
43 }
44
45 bool noun::has_plural_form() const
46 {
47 assert(_valid == true);
48
49 return !_plural.empty();
50 }
51
52 noun_query noun::hypernyms() const
53 {
54 assert(_valid == true);
55
56 return _data->nouns().hypernym_of(*this);
57 }
58
59 noun_query noun::full_hypernyms() const
60 {
61 assert(_valid == true);
62
63 return _data->nouns().full_hypernym_of(*this);
64 }
65
66 noun_query noun::hyponyms() const
67 {
68 assert(_valid == true);
69
70 return _data->nouns().hyponym_of(*this);
71 }
72
73 noun_query noun::full_hyponyms() const
74 {
75 assert(_valid == true);
76
77 return _data->nouns().full_hyponym_of(*this);
78 }
79
80 noun_query noun::part_meronyms() const
81 {
82 assert(_valid == true);
83
84 return _data->nouns().part_meronym_of(*this);
85 }
86
87 noun_query noun::full_part_meronyms() const
88 {
89 assert(_valid == true);
90
91 return _data->nouns().full_part_meronym_of(*this);
92 }
93
94 noun_query noun::part_holonyms() const
95 {
96 assert(_valid == true);
97
98 return _data->nouns().part_holonym_of(*this);
99 }
100
101 noun_query noun::full_part_holonyms() const
102 {
103 assert(_valid == true);
104
105 return _data->nouns().full_part_holonym_of(*this);
106 }
107
108 noun_query noun::substance_meronyms() const
109 {
110 assert(_valid == true);
111
112 return _data->nouns().substance_meronym_of(*this);
113 }
114
115 noun_query noun::full_substance_meronyms() const
116 {
117 assert(_valid == true);
118
119 return _data->nouns().full_substance_meronym_of(*this);
120 }
121
122 noun_query noun::substance_holonyms() const
123 {
124 assert(_valid == true);
125
126 return _data->nouns().substance_holonym_of(*this);
127 }
128
129 noun_query noun::full_substance_holonyms() const
130 {
131 assert(_valid == true);
132
133 return _data->nouns().full_substance_holonym_of(*this);
134 }
135
136 noun_query noun::member_meronyms() const
137 {
138 assert(_valid == true);
139
140 return _data->nouns().member_meronym_of(*this);
141 }
142
143 noun_query noun::full_member_meronyms() const
144 {
145 assert(_valid == true);
146
147 return _data->nouns().full_member_meronym_of(*this);
148 }
149
150 noun_query noun::member_holonyms() const
151 {
152 assert(_valid == true);
153
154 return _data->nouns().member_holonym_of(*this);
155 }
156
157 noun_query noun::full_member_holonyms() const
158 {
159 assert(_valid == true);
160
161 return _data->nouns().full_member_holonym_of(*this);
162 }
163
164 noun_query noun::classes() const
165 {
166 assert(_valid == true);
167
168 return _data->nouns().class_of(*this);
169 }
170
171 noun_query noun::instances() const
172 {
173 assert(_valid == true);
174
175 return _data->nouns().instance_of(*this);
176 }
177
178 noun_query noun::synonyms() const
179 {
180 assert(_valid == true);
181
182 return _data->nouns().synonym_of(*this);
183 }
184
185 noun_query noun::antonyms() const
186 {
187 assert(_valid == true);
188
189 return _data->nouns().antonym_of(*this);
190 }
191
192 adjective_query noun::pertainyms() const
193 {
194 assert(_valid == true);
195
196 return _data->adjectives().pertainym_of(*this);
197 }
198
199 adjective_query noun::variations() const
200 {
201 assert(_valid == true);
202
203 return _data->adjectives().variant_of(*this);
204 }
205
206 std::string noun::imagenet_url() const
207 {
208 std::stringstream url;
209 url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n";
210 url.width(8);
211 url.fill('0');
212 url << (_wnid % 100000000);
213 return url.str();
214 }
215
216 bool noun::operator<(const noun& other) const
217 {
218 return _id < other._id;
219 }
220
221};
diff --git a/lib/noun.h b/lib/noun.h deleted file mode 100644 index bd71e57..0000000 --- a/lib/noun.h +++ /dev/null
@@ -1,55 +0,0 @@
1#ifndef NOUN_H_24A03C83
2#define NOUN_H_24A03C83
3
4namespace verbly {
5
6 class noun : public word {
7 private:
8 std::string _singular;
9 std::string _plural;
10 int _wnid;
11
12 friend class noun_query;
13
14 public:
15 noun();
16 noun(const data& _data, int _id);
17
18 std::string base_form() const;
19 std::string singular_form() const;
20 std::string plural_form() const;
21 int wnid() const;
22
23 bool has_plural_form() const;
24
25 noun_query hypernyms() const;
26 noun_query full_hypernyms() const;
27 noun_query hyponyms() const;
28 noun_query full_hyponyms() const;
29 noun_query part_meronyms() const;
30 noun_query full_part_meronyms() const;
31 noun_query part_holonyms() const;
32 noun_query full_part_holonyms() const;
33 noun_query substance_meronyms() const;
34 noun_query full_substance_meronyms() const;
35 noun_query substance_holonyms() const;
36 noun_query full_substance_holonyms() const;
37 noun_query member_meronyms() const;
38 noun_query full_member_meronyms() const;
39 noun_query member_holonyms() const;
40 noun_query full_member_holonyms() const;
41 noun_query classes() const;
42 noun_query instances() const;
43 noun_query synonyms() const;
44 noun_query antonyms() const;
45 adjective_query pertainyms() const;
46 adjective_query variations() const;
47
48 std::string imagenet_url() const;
49
50 bool operator<(const noun& other) const;
51 };
52
53};
54
55#endif /* end of include guard: NOUN_H_24A03C83 */
diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp deleted file mode 100644 index 8648227..0000000 --- a/lib/noun_query.cpp +++ /dev/null
@@ -1,2013 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 noun_query::noun_query(const data& _data) : _data(_data)
6 {
7
8 }
9
10 noun_query& noun_query::limit(int _limit)
11 {
12 if ((_limit > 0) || (_limit == unlimited))
13 {
14 this->_limit = _limit;
15 }
16
17 return *this;
18 }
19
20 noun_query& noun_query::random()
21 {
22 this->_random = true;
23
24 return *this;
25 }
26
27 noun_query& noun_query::except(const noun& _word)
28 {
29 _except.push_back(_word);
30
31 return *this;
32 }
33
34 noun_query& noun_query::rhymes_with(const word& _word)
35 {
36 for (auto rhyme : _word.get_rhymes())
37 {
38 _rhymes.push_back(rhyme);
39 }
40
41 if (dynamic_cast<const noun*>(&_word) != nullptr)
42 {
43 _except.push_back(dynamic_cast<const noun&>(_word));
44 }
45
46 return *this;
47 }
48
49 noun_query& noun_query::rhymes_with(rhyme _r)
50 {
51 _rhymes.push_back(_r);
52
53 return *this;
54 }
55
56 noun_query& noun_query::has_pronunciation()
57 {
58 this->_has_prn = true;
59
60 return *this;
61 }
62
63 noun_query& noun_query::has_rhyming_noun()
64 {
65 _has_rhyming_noun = true;
66
67 return *this;
68 }
69
70 noun_query& noun_query::has_rhyming_adjective()
71 {
72 _has_rhyming_adjective = true;
73
74 return *this;
75 }
76
77 noun_query& noun_query::has_rhyming_adverb()
78 {
79 _has_rhyming_adverb = true;
80
81 return *this;
82 }
83
84 noun_query& noun_query::has_rhyming_verb()
85 {
86 _has_rhyming_verb = true;
87
88 return *this;
89 }
90
91 noun_query& noun_query::with_stress(filter<std::vector<bool>> _arg)
92 {
93 _stress = _arg;
94
95 return *this;
96 }
97
98 noun_query& noun_query::with_singular_form(std::string _arg)
99 {
100 _with_singular_form.push_back(_arg);
101
102 return *this;
103 }
104
105 noun_query& noun_query::with_prefix(filter<std::string> _f)
106 {
107 _f.clean();
108 _with_prefix = _f;
109
110 return *this;
111 }
112
113 noun_query& noun_query::with_suffix(filter<std::string> _f)
114 {
115 _f.clean();
116 _with_suffix = _f;
117
118 return *this;
119 }
120
121 noun_query& noun_query::requires_plural_form()
122 {
123 _requires_plural_form = true;
124
125 return *this;
126 }
127
128 noun_query& noun_query::with_complexity(int _arg)
129 {
130 _with_complexity = _arg;
131
132 return *this;
133 }
134
135 noun_query& noun_query::is_hypernym()
136 {
137 _is_hypernym = true;
138
139 return *this;
140 }
141
142 noun_query& noun_query::hypernym_of(filter<noun> _f)
143 {
144 _f.clean();
145 _hypernym_of = _f;
146
147 return *this;
148 }
149
150 noun_query& noun_query::full_hypernym_of(filter<noun> _f)
151 {
152 _f.clean();
153 _full_hypernym_of = _f;
154
155 return *this;
156 }
157
158 noun_query& noun_query::is_hyponym()
159 {
160 _is_hyponym = true;
161
162 return *this;
163 }
164
165 noun_query& noun_query::hyponym_of(filter<noun> _f)
166 {
167 _f.clean();
168 _hyponym_of = _f;
169
170 return *this;
171 }
172
173 noun_query& noun_query::full_hyponym_of(filter<noun> _f)
174 {
175 _f.clean();
176 _full_hyponym_of = _f;
177
178 return *this;
179 }
180
181 noun_query& noun_query::is_part_meronym()
182 {
183 _is_part_meronym = true;
184
185 return *this;
186 }
187
188 noun_query& noun_query::part_meronym_of(filter<noun> _f)
189 {
190 _f.clean();
191 _part_meronym_of = _f;
192
193 return *this;
194 }
195
196 noun_query& noun_query::full_part_meronym_of(filter<noun> _f)
197 {
198 _f.clean();
199 _full_part_meronym_of = _f;
200
201 return *this;
202 }
203
204 noun_query& noun_query::is_part_holonym()
205 {
206 _is_part_holonym = true;
207
208 return *this;
209 }
210
211 noun_query& noun_query::part_holonym_of(filter<noun> _f)
212 {
213 _f.clean();
214 _part_holonym_of = _f;
215
216 return *this;
217 }
218
219 noun_query& noun_query::full_part_holonym_of(filter<noun> _f)
220 {
221 _f.clean();
222 _full_part_holonym_of = _f;
223
224 return *this;
225 }
226
227 noun_query& noun_query::is_substance_meronym()
228 {
229 _is_substance_meronym = true;
230
231 return *this;
232 }
233
234 noun_query& noun_query::substance_meronym_of(filter<noun> _f)
235 {
236 _f.clean();
237 _substance_meronym_of = _f;
238
239 return *this;
240 }
241
242 noun_query& noun_query::full_substance_meronym_of(filter<noun> _f)
243 {
244 _f.clean();
245 _full_substance_meronym_of = _f;
246
247 return *this;
248 }
249
250 noun_query& noun_query::is_substance_holonym()
251 {
252 _is_substance_holonym = true;
253
254 return *this;
255 }
256
257 noun_query& noun_query::substance_holonym_of(filter<noun> _f)
258 {
259 _f.clean();
260 _substance_holonym_of = _f;
261
262 return *this;
263 }
264
265 noun_query& noun_query::full_substance_holonym_of(filter<noun> _f)
266 {
267 _f.clean();
268 _full_substance_holonym_of = _f;
269
270 return *this;
271 }
272
273 noun_query& noun_query::is_member_meronym()
274 {
275 _is_member_meronym = true;
276
277 return *this;
278 }
279
280 noun_query& noun_query::member_meronym_of(filter<noun> _f)
281 {
282 _f.clean();
283 _member_meronym_of = _f;
284
285 return *this;
286 }
287
288 noun_query& noun_query::full_member_meronym_of(filter<noun> _f)
289 {
290 _f.clean();
291 _full_member_meronym_of = _f;
292
293 return *this;
294 }
295
296 noun_query& noun_query::is_member_holonym()
297 {
298 _is_member_holonym = true;
299
300 return *this;
301 }
302
303 noun_query& noun_query::member_holonym_of(filter<noun> _f)
304 {
305 _f.clean();
306 _member_holonym_of = _f;
307
308 return *this;
309 }
310
311 noun_query& noun_query::full_member_holonym_of(filter<noun> _f)
312 {
313 _f.clean();
314 _full_member_holonym_of = _f;
315
316 return *this;
317 }
318
319 noun_query& noun_query::is_proper()
320 {
321 _is_proper = true;
322
323 return *this;
324 }
325
326 noun_query& noun_query::is_not_proper()
327 {
328 _is_not_proper = true;
329
330 return *this;
331 }
332
333 noun_query& noun_query::is_instance()
334 {
335 _is_instance = true;
336
337 return *this;
338 }
339
340 noun_query& noun_query::instance_of(filter<noun> _f)
341 {
342 _f.clean();
343 _instance_of = _f;
344
345 return *this;
346 }
347
348 noun_query& noun_query::is_class()
349 {
350 _is_class = true;
351
352 return *this;
353 }
354
355 noun_query& noun_query::class_of(filter<noun> _f)
356 {
357 _f.clean();
358 _class_of = _f;
359
360 return *this;
361 }
362
363 noun_query& noun_query::has_synonyms()
364 {
365 _has_synonyms = true;
366
367 return *this;
368 }
369
370 noun_query& noun_query::synonym_of(filter<noun> _f)
371 {
372 _f.clean();
373 _synonym_of = _f;
374
375 return *this;
376 }
377
378 noun_query& noun_query::has_antonyms()
379 {
380 _has_antonyms = true;
381
382 return *this;
383 }
384
385 noun_query& noun_query::antonym_of(filter<noun> _f)
386 {
387 _f.clean();
388 _antonym_of = _f;
389
390 return *this;
391 }
392
393 noun_query& noun_query::has_pertainym()
394 {
395 _has_pertainym = true;
396
397 return *this;
398 }
399
400 noun_query& noun_query::anti_pertainym_of(filter<adjective> _f)
401 {
402 _f.clean();
403 _anti_pertainym_of = _f;
404
405 return *this;
406 }
407
408 noun_query& noun_query::is_attribute()
409 {
410 _is_attribute = true;
411
412 return *this;
413 }
414
415 noun_query& noun_query::attribute_of(filter<adjective> _f)
416 {
417 _f.clean();
418 _attribute_of = _f;
419
420 return *this;
421 }
422
423 noun_query& noun_query::at_least_n_images(int _arg)
424 {
425 _at_least_n_images = _arg;
426
427 return *this;
428 }
429
430 noun_query& noun_query::with_wnid(int _arg)
431 {
432 _with_wnid.insert(_arg);
433
434 return *this;
435 }
436
437 /*
438 noun_query& noun_query::derived_from(const word& _w)
439 {
440 if (dynamic_cast<const adjective*>(&_w) != nullptr)
441 {
442 _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
443 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
444 {
445 _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
446 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
447 {
448 _derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
449 }
450
451 return *this;
452 }
453
454 noun_query& noun_query::not_derived_from(const word& _w)
455 {
456 if (dynamic_cast<const adjective*>(&_w) != nullptr)
457 {
458 _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
459 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
460 {
461 _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
462 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
463 {
464 _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
465 }
466
467 return *this;
468 }*/
469
470 std::list<noun> noun_query::run() const
471 {
472 std::stringstream construct;
473
474 if (!_full_hypernym_of.empty() || !_full_hyponym_of.empty() || !_full_part_meronym_of.empty() || !_full_part_holonym_of.empty() || !_full_substance_meronym_of.empty() || !_full_substance_holonym_of.empty() || !_full_member_meronym_of.empty() || !_full_member_holonym_of.empty())
475 {
476 construct << "WITH RECURSIVE ";
477
478 std::list<std::string> ctes;
479
480 for (auto hyponym : _full_hypernym_of.uniq_flatten())
481 {
482 ctes.push_back("hypernym_tree_" + std::to_string(hyponym._id) + " AS (SELECT hypernym_id FROM hypernymy WHERE hyponym_id = " + std::to_string(hyponym._id) + " UNION SELECT h.hypernym_id FROM hypernym_tree_" + std::to_string(hyponym._id) + " AS t INNER JOIN hypernymy AS h ON t.hypernym_id = h.hyponym_id)");
483 }
484
485 for (auto hypernym : _full_hyponym_of.uniq_flatten())
486 {
487 ctes.push_back("hyponym_tree_" + std::to_string(hypernym._id) + " AS (SELECT hyponym_id FROM hypernymy WHERE hypernym_id = " + std::to_string(hypernym._id) + " UNION SELECT h.hyponym_id FROM hyponym_tree_" + std::to_string(hypernym._id) + " AS t INNER JOIN hypernymy AS h ON t.hyponym_id = h.hypernym_id)");
488 }
489
490 for (auto holonym : _full_part_meronym_of.uniq_flatten())
491 {
492 ctes.push_back("part_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM part_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM part_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN part_meronymy AS h ON t.meronym_id = h.holonym_id)");
493 }
494
495 for (auto meronym : _full_part_holonym_of.uniq_flatten())
496 {
497 ctes.push_back("part_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM part_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM part_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN part_meronymy AS h ON t.holonym_id = h.meronym_id)");
498 }
499
500 for (auto holonym : _full_substance_meronym_of.uniq_flatten())
501 {
502 ctes.push_back("substance_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM substance_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM substance_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.meronym_id = h.holonym_id)");
503 }
504
505 for (auto meronym : _full_substance_holonym_of.uniq_flatten())
506 {
507 ctes.push_back("substance_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM substance_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM substance_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.holonym_id = h.meronym_id)");
508 }
509
510 for (auto holonym : _full_member_meronym_of.uniq_flatten())
511 {
512 ctes.push_back("member_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM member_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM member_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN member_meronymy AS h ON t.meronym_id = h.holonym_id)");
513 }
514
515 for (auto meronym : _full_member_holonym_of.uniq_flatten())
516 {
517 ctes.push_back("member_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM member_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM member_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN member_meronymy AS h ON t.holonym_id = h.meronym_id)");
518 }
519
520 construct << verbly::implode(std::begin(ctes), std::end(ctes), ", ");
521 construct << " ";
522 }
523
524 construct << "SELECT noun_id, singular, plural, wnid FROM nouns";
525 std::list<std::string> conditions;
526 std::list<binding> bindings;
527
528 if (_has_prn)
529 {
530 conditions.push_back("noun_id IN (SELECT noun_id FROM noun_pronunciations)");
531 }
532
533 if (!_rhymes.empty())
534 {
535 std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)");
536 std::string cond = "noun_id IN (SELECT noun_id FROM noun_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
537 conditions.push_back(cond);
538
539 for (auto rhy : _rhymes)
540 {
541 bindings.emplace_back(rhy.get_prerhyme());
542 bindings.emplace_back(rhy.get_rhyme());
543 }
544 }
545
546 if (_has_rhyming_noun)
547 {
548 conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.noun_id != curp.noun_id)");
549 }
550
551 if (_has_rhyming_adjective)
552 {
553 conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
554 }
555
556 if (_has_rhyming_adverb)
557 {
558 conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
559 }
560
561 if (_has_rhyming_verb)
562 {
563 conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
564 }
565
566 if (!_stress.empty())
567 {
568 std::stringstream cond;
569 if (_stress.get_notlogic())
570 {
571 cond << "noun_id NOT IN";
572 } else {
573 cond << "noun_id IN";
574 }
575
576 cond << "(SELECT noun_id FROM noun_pronunciations WHERE ";
577
578 std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string {
579 switch (f.get_type())
580 {
581 case filter<std::vector<bool>>::type::singleton:
582 {
583 std::ostringstream _val;
584 for (auto syl : f.get_elem())
585 {
586 if (syl)
587 {
588 _val << "1";
589 } else {
590 _val << "0";
591 }
592 }
593
594 bindings.emplace_back(_val.str());
595
596 if (notlogic == f.get_notlogic())
597 {
598 return "stress = ?";
599 } else {
600 return "stress != ?";
601 }
602 }
603
604 case filter<std::vector<bool>>::type::group:
605 {
606 bool truelogic = notlogic != f.get_notlogic();
607
608 std::list<std::string> clauses;
609 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) {
610 return recur(f2, truelogic);
611 });
612
613 if (truelogic == f.get_orlogic())
614 {
615 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
616 } else {
617 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
618 }
619 }
620 }
621 };
622
623 cond << recur(_stress, _stress.get_notlogic());
624 cond << ")";
625 conditions.push_back(cond.str());
626 }
627
628 for (auto except : _except)
629 {
630 conditions.push_back("noun_id != ?");
631 bindings.emplace_back(except._id);
632 }
633
634 if (!_with_singular_form.empty())
635 {
636 std::list<std::string> clauses(_with_singular_form.size(), "singular = ?");
637 std::string cond = "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
638 conditions.push_back(cond);
639
640 for (auto form : _with_singular_form)
641 {
642 bindings.emplace_back(form);
643 }
644 }
645
646 if (_requires_plural_form)
647 {
648 conditions.push_back("plural IS NOT NULL");
649 }
650
651 if (!_with_prefix.empty())
652 {
653 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
654 switch (f.get_type())
655 {
656 case filter<std::string>::type::singleton:
657 {
658 bindings.emplace_back(f.get_elem() + "%");
659
660 if (notlogic == f.get_notlogic())
661 {
662 return "singular LIKE ?";
663 } else {
664 return "singular NOT LIKE ?";
665 }
666 }
667
668 case filter<std::string>::type::group:
669 {
670 bool truelogic = notlogic != f.get_notlogic();
671
672 std::list<std::string> clauses;
673 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
674 return recur(f2, truelogic);
675 });
676
677 if (truelogic == f.get_orlogic())
678 {
679 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
680 } else {
681 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
682 }
683 }
684 }
685 };
686
687 conditions.push_back(recur(_with_prefix, false));
688 }
689
690 if (!_with_suffix.empty())
691 {
692 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
693 switch (f.get_type())
694 {
695 case filter<std::string>::type::singleton:
696 {
697 bindings.emplace_back("%" + f.get_elem());
698
699 if (notlogic == f.get_notlogic())
700 {
701 return "singular LIKE ?";
702 } else {
703 return "singular NOT LIKE ?";
704 }
705 }
706
707 case filter<std::string>::type::group:
708 {
709 bool truelogic = notlogic != f.get_notlogic();
710
711 std::list<std::string> clauses;
712 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
713 return recur(f2, truelogic);
714 });
715
716 if (truelogic == f.get_orlogic())
717 {
718 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
719 } else {
720 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
721 }
722 }
723 }
724 };
725
726 conditions.push_back(recur(_with_suffix, false));
727 }
728
729 if (_with_complexity != unlimited)
730 {
731 conditions.push_back("complexity = ?");
732 bindings.emplace_back(_with_complexity);
733 }
734
735 if (_is_hypernym)
736 {
737 conditions.push_back("noun_id IN (SELECT hypernym_id FROM hypernymy)");
738 }
739
740 if (!_hypernym_of.empty())
741 {
742 std::stringstream cond;
743 if (_hypernym_of.get_notlogic())
744 {
745 cond << "noun_id NOT IN";
746 } else {
747 cond << "noun_id IN";
748 }
749
750 cond << "(SELECT hypernym_id FROM hypernymy WHERE ";
751
752 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
753 switch (f.get_type())
754 {
755 case filter<noun>::type::singleton:
756 {
757 bindings.emplace_back(f.get_elem()._id);
758
759 if (notlogic == f.get_notlogic())
760 {
761 return "hyponym_id = ?";
762 } else {
763 return "hyponym_id != ?";
764 }
765 }
766
767 case filter<noun>::type::group:
768 {
769 bool truelogic = notlogic != f.get_notlogic();
770
771 std::list<std::string> clauses;
772 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
773 return recur(f2, truelogic);
774 });
775
776 if (truelogic == f.get_orlogic())
777 {
778 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
779 } else {
780 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
781 }
782 }
783 }
784 };
785
786 cond << recur(_hypernym_of, _hypernym_of.get_notlogic());
787 cond << ")";
788 conditions.push_back(cond.str());
789 }
790
791 if (!_full_hypernym_of.empty())
792 {
793 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
794 switch (f.get_type())
795 {
796 case filter<noun>::type::singleton:
797 {
798 if (notlogic == f.get_notlogic())
799 {
800 return "noun_id IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")";
801 } else {
802 return "noun_id NOT IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")";
803 }
804 }
805
806 case filter<noun>::type::group:
807 {
808 bool truelogic = notlogic != f.get_notlogic();
809
810 std::list<std::string> clauses;
811 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
812 return recur(f2, truelogic);
813 });
814
815 if (truelogic == f.get_orlogic())
816 {
817 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
818 } else {
819 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
820 }
821 }
822 }
823 };
824
825 conditions.push_back(recur(_full_hypernym_of, false));
826 }
827
828 if (!_full_hyponym_of.empty())
829 {
830 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
831 switch (f.get_type())
832 {
833 case filter<noun>::type::singleton:
834 {
835 if (notlogic == f.get_notlogic())
836 {
837 return "noun_id IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")";
838 } else {
839 return "noun_id NOT IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")";
840 }
841 }
842
843 case filter<noun>::type::group:
844 {
845 bool truelogic = notlogic != f.get_notlogic();
846
847 std::list<std::string> clauses;
848 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
849 return recur(f2, truelogic);
850 });
851
852 if (truelogic == f.get_orlogic())
853 {
854 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
855 } else {
856 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
857 }
858 }
859 }
860 };
861
862 conditions.push_back(recur(_full_hyponym_of, false));
863 }
864
865 if (_is_hyponym)
866 {
867 conditions.push_back("noun_id IN (SELECT hyponym_id FROM hypernymy)");
868 }
869
870 if (!_hyponym_of.empty())
871 {
872 std::stringstream cond;
873 if (_hyponym_of.get_notlogic())
874 {
875 cond << "noun_id NOT IN";
876 } else {
877 cond << "noun_id IN";
878 }
879
880 cond << "(SELECT hyponym_id FROM hypernymy WHERE ";
881
882 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
883 switch (f.get_type())
884 {
885 case filter<noun>::type::singleton:
886 {
887 bindings.emplace_back(f.get_elem()._id);
888
889 if (notlogic == f.get_notlogic())
890 {
891 return "hypernym_id = ?";
892 } else {
893 return "hypernym_id != ?";
894 }
895 }
896
897 case filter<noun>::type::group:
898 {
899 bool truelogic = notlogic != f.get_notlogic();
900
901 std::list<std::string> clauses;
902 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
903 return recur(f2, truelogic);
904 });
905
906 if (truelogic == f.get_orlogic())
907 {
908 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
909 } else {
910 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
911 }
912 }
913 }
914 };
915
916 cond << recur(_hyponym_of, _hyponym_of.get_notlogic());
917 cond << ")";
918 conditions.push_back(cond.str());
919 }
920
921 if (_is_part_meronym)
922 {
923 conditions.push_back("noun_id IN (SELECT meronym_id FROM part_meronymy)");
924 }
925
926 if (!_part_meronym_of.empty())
927 {
928 std::stringstream cond;
929 if (_part_meronym_of.get_notlogic())
930 {
931 cond << "noun_id NOT IN";
932 } else {
933 cond << "noun_id IN";
934 }
935
936 cond << "(SELECT meronym_id FROM part_meronymy WHERE ";
937
938 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
939 switch (f.get_type())
940 {
941 case filter<noun>::type::singleton:
942 {
943 bindings.emplace_back(f.get_elem()._id);
944
945 if (notlogic == f.get_notlogic())
946 {
947 return "holonym_id = ?";
948 } else {
949 return "holonym_id != ?";
950 }
951 }
952
953 case filter<noun>::type::group:
954 {
955 bool truelogic = notlogic != f.get_notlogic();
956
957 std::list<std::string> clauses;
958 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
959 return recur(f2, truelogic);
960 });
961
962 if (truelogic == f.get_orlogic())
963 {
964 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
965 } else {
966 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
967 }
968 }
969 }
970 };
971
972 cond << recur(_part_meronym_of, _part_meronym_of.get_notlogic());
973 cond << ")";
974 conditions.push_back(cond.str());
975 }
976
977 if (!_full_part_meronym_of.empty())
978 {
979 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
980 switch (f.get_type())
981 {
982 case filter<noun>::type::singleton:
983 {
984 if (notlogic == f.get_notlogic())
985 {
986 return "noun_id IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
987 } else {
988 return "noun_id NOT IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
989 }
990 }
991
992 case filter<noun>::type::group:
993 {
994 bool truelogic = notlogic != f.get_notlogic();
995
996 std::list<std::string> clauses;
997 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
998 return recur(f2, truelogic);
999 });
1000
1001 if (truelogic == f.get_orlogic())
1002 {
1003 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1004 } else {
1005 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1006 }
1007 }
1008 }
1009 };
1010
1011 conditions.push_back(recur(_full_part_meronym_of, false));
1012 }
1013
1014 if (_is_part_holonym)
1015 {
1016 conditions.push_back("noun_id IN (SELECT holonym_id FROM part_meronymy)");
1017 }
1018
1019 if (!_part_holonym_of.empty())
1020 {
1021 std::stringstream cond;
1022 if (_part_holonym_of.get_notlogic())
1023 {
1024 cond << "noun_id NOT IN";
1025 } else {
1026 cond << "noun_id IN";
1027 }
1028
1029 cond << "(SELECT holonym_id FROM part_meronymy WHERE ";
1030
1031 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1032 switch (f.get_type())
1033 {
1034 case filter<noun>::type::singleton:
1035 {
1036 bindings.emplace_back(f.get_elem()._id);
1037
1038 if (notlogic == f.get_notlogic())
1039 {
1040 return "meronym_id = ?";
1041 } else {
1042 return "meronym_id != ?";
1043 }
1044 }
1045
1046 case filter<noun>::type::group:
1047 {
1048 bool truelogic = notlogic != f.get_notlogic();
1049
1050 std::list<std::string> clauses;
1051 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1052 return recur(f2, truelogic);
1053 });
1054
1055 if (truelogic == f.get_orlogic())
1056 {
1057 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1058 } else {
1059 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1060 }
1061 }
1062 }
1063 };
1064
1065 cond << recur(_part_holonym_of, _part_holonym_of.get_notlogic());
1066 cond << ")";
1067 conditions.push_back(cond.str());
1068 }
1069
1070 if (!_full_part_holonym_of.empty())
1071 {
1072 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1073 switch (f.get_type())
1074 {
1075 case filter<noun>::type::singleton:
1076 {
1077 if (notlogic == f.get_notlogic())
1078 {
1079 return "noun_id IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1080 } else {
1081 return "noun_id NOT IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1082 }
1083 }
1084
1085 case filter<noun>::type::group:
1086 {
1087 bool truelogic = notlogic != f.get_notlogic();
1088
1089 std::list<std::string> clauses;
1090 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1091 return recur(f2, truelogic);
1092 });
1093
1094 if (truelogic == f.get_orlogic())
1095 {
1096 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1097 } else {
1098 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1099 }
1100 }
1101 }
1102 };
1103
1104 conditions.push_back(recur(_full_part_holonym_of, false));
1105 }
1106
1107 if (_is_substance_meronym)
1108 {
1109 conditions.push_back("noun_id IN (SELECT meronym_id FROM substance_meronymy)");
1110 }
1111
1112 if (!_substance_meronym_of.empty())
1113 {
1114 std::stringstream cond;
1115 if (_substance_meronym_of.get_notlogic())
1116 {
1117 cond << "noun_id NOT IN";
1118 } else {
1119 cond << "noun_id IN";
1120 }
1121
1122 cond << "(SELECT meronym_id FROM substance_meronymy WHERE ";
1123
1124 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1125 switch (f.get_type())
1126 {
1127 case filter<noun>::type::singleton:
1128 {
1129 bindings.emplace_back(f.get_elem()._id);
1130
1131 if (notlogic == f.get_notlogic())
1132 {
1133 return "holonym_id = ?";
1134 } else {
1135 return "holonym_id != ?";
1136 }
1137 }
1138
1139 case filter<noun>::type::group:
1140 {
1141 bool truelogic = notlogic != f.get_notlogic();
1142
1143 std::list<std::string> clauses;
1144 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1145 return recur(f2, truelogic);
1146 });
1147
1148 if (truelogic == f.get_orlogic())
1149 {
1150 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1151 } else {
1152 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1153 }
1154 }
1155 }
1156 };
1157
1158 cond << recur(_substance_meronym_of, _substance_meronym_of.get_notlogic());
1159 cond << ")";
1160 conditions.push_back(cond.str());
1161 }
1162
1163 if (!_full_substance_meronym_of.empty())
1164 {
1165 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1166 switch (f.get_type())
1167 {
1168 case filter<noun>::type::singleton:
1169 {
1170 if (notlogic == f.get_notlogic())
1171 {
1172 return "noun_id IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
1173 } else {
1174 return "noun_id NOT IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
1175 }
1176 }
1177
1178 case filter<noun>::type::group:
1179 {
1180 bool truelogic = notlogic != f.get_notlogic();
1181
1182 std::list<std::string> clauses;
1183 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1184 return recur(f2, truelogic);
1185 });
1186
1187 if (truelogic == f.get_orlogic())
1188 {
1189 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1190 } else {
1191 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1192 }
1193 }
1194 }
1195 };
1196
1197 conditions.push_back(recur(_full_substance_meronym_of, false));
1198 }
1199
1200 if (_is_substance_holonym)
1201 {
1202 conditions.push_back("noun_id IN (SELECT holonym_id FROM substance_meronymy)");
1203 }
1204
1205 if (!_substance_holonym_of.empty())
1206 {
1207 std::stringstream cond;
1208 if (_substance_holonym_of.get_notlogic())
1209 {
1210 cond << "noun_id NOT IN";
1211 } else {
1212 cond << "noun_id IN";
1213 }
1214
1215 cond << "(SELECT holonym_id FROM substance_meronymy WHERE ";
1216
1217 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1218 switch (f.get_type())
1219 {
1220 case filter<noun>::type::singleton:
1221 {
1222 bindings.emplace_back(f.get_elem()._id);
1223
1224 if (notlogic == f.get_notlogic())
1225 {
1226 return "meronym_id = ?";
1227 } else {
1228 return "meronym_id != ?";
1229 }
1230 }
1231
1232 case filter<noun>::type::group:
1233 {
1234 bool truelogic = notlogic != f.get_notlogic();
1235
1236 std::list<std::string> clauses;
1237 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1238 return recur(f2, truelogic);
1239 });
1240
1241 if (truelogic == f.get_orlogic())
1242 {
1243 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1244 } else {
1245 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1246 }
1247 }
1248 }
1249 };
1250
1251 cond << recur(_substance_holonym_of, _substance_holonym_of.get_notlogic());
1252 cond << ")";
1253 conditions.push_back(cond.str());
1254 }
1255
1256 if (!_full_substance_holonym_of.empty())
1257 {
1258 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1259 switch (f.get_type())
1260 {
1261 case filter<noun>::type::singleton:
1262 {
1263 if (notlogic == f.get_notlogic())
1264 {
1265 return "noun_id IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1266 } else {
1267 return "noun_id NOT IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1268 }
1269 }
1270
1271 case filter<noun>::type::group:
1272 {
1273 bool truelogic = notlogic != f.get_notlogic();
1274
1275 std::list<std::string> clauses;
1276 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1277 return recur(f2, truelogic);
1278 });
1279
1280 if (truelogic == f.get_orlogic())
1281 {
1282 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1283 } else {
1284 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1285 }
1286 }
1287 }
1288 };
1289
1290 conditions.push_back(recur(_full_substance_holonym_of, false));
1291 }
1292
1293 if (_is_member_meronym)
1294 {
1295 conditions.push_back("noun_id IN (SELECT meronym_id FROM member_meronymy)");
1296 }
1297
1298 if (!_member_meronym_of.empty())
1299 {
1300 std::stringstream cond;
1301 if (_member_meronym_of.get_notlogic())
1302 {
1303 cond << "noun_id NOT IN";
1304 } else {
1305 cond << "noun_id IN";
1306 }
1307
1308 cond << "(SELECT meronym_id FROM member_meronymy WHERE ";
1309
1310 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1311 switch (f.get_type())
1312 {
1313 case filter<noun>::type::singleton:
1314 {
1315 bindings.emplace_back(f.get_elem()._id);
1316
1317 if (notlogic == f.get_notlogic())
1318 {
1319 return "holonym_id = ?";
1320 } else {
1321 return "holonym_id != ?";
1322 }
1323 }
1324
1325 case filter<noun>::type::group:
1326 {
1327 bool truelogic = notlogic != f.get_notlogic();
1328
1329 std::list<std::string> clauses;
1330 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1331 return recur(f2, truelogic);
1332 });
1333
1334 if (truelogic == f.get_orlogic())
1335 {
1336 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1337 } else {
1338 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1339 }
1340 }
1341 }
1342 };
1343
1344 cond << recur(_member_meronym_of, _member_meronym_of.get_notlogic());
1345 cond << ")";
1346 conditions.push_back(cond.str());
1347 }
1348
1349 if (!_full_member_meronym_of.empty())
1350 {
1351 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1352 switch (f.get_type())
1353 {
1354 case filter<noun>::type::singleton:
1355 {
1356 if (notlogic == f.get_notlogic())
1357 {
1358 return "noun_id IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
1359 } else {
1360 return "noun_id NOT IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
1361 }
1362 }
1363
1364 case filter<noun>::type::group:
1365 {
1366 bool truelogic = notlogic != f.get_notlogic();
1367
1368 std::list<std::string> clauses;
1369 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1370 return recur(f2, truelogic);
1371 });
1372
1373 if (truelogic == f.get_orlogic())
1374 {
1375 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1376 } else {
1377 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1378 }
1379 }
1380 }
1381 };
1382
1383 conditions.push_back(recur(_full_member_meronym_of, false));
1384 }
1385
1386 if (_is_member_holonym)
1387 {
1388 conditions.push_back("noun_id IN (SELECT holonym_id FROM member_meronym)");
1389 }
1390
1391 if (!_member_holonym_of.empty())
1392 {
1393 std::stringstream cond;
1394 if (_member_holonym_of.get_notlogic())
1395 {
1396 cond << "noun_id NOT IN";
1397 } else {
1398 cond << "noun_id IN";
1399 }
1400
1401 cond << "(SELECT holonym_id FROM member_meronymy WHERE ";
1402
1403 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1404 switch (f.get_type())
1405 {
1406 case filter<noun>::type::singleton:
1407 {
1408 bindings.emplace_back(f.get_elem()._id);
1409
1410 if (notlogic == f.get_notlogic())
1411 {
1412 return "meronym_id = ?";
1413 } else {
1414 return "meronym_id != ?";
1415 }
1416 }
1417
1418 case filter<noun>::type::group:
1419 {
1420 bool truelogic = notlogic != f.get_notlogic();
1421
1422 std::list<std::string> clauses;
1423 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1424 return recur(f2, truelogic);
1425 });
1426
1427 if (truelogic == f.get_orlogic())
1428 {
1429 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1430 } else {
1431 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1432 }
1433 }
1434 }
1435 };
1436
1437 cond << recur(_member_holonym_of, _member_holonym_of.get_notlogic());
1438 cond << ")";
1439 conditions.push_back(cond.str());
1440 }
1441
1442 if (!_full_member_holonym_of.empty())
1443 {
1444 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1445 switch (f.get_type())
1446 {
1447 case filter<noun>::type::singleton:
1448 {
1449 if (notlogic == f.get_notlogic())
1450 {
1451 return "noun_id IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1452 } else {
1453 return "noun_id NOT IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1454 }
1455 }
1456
1457 case filter<noun>::type::group:
1458 {
1459 bool truelogic = notlogic != f.get_notlogic();
1460
1461 std::list<std::string> clauses;
1462 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1463 return recur(f2, truelogic);
1464 });
1465
1466 if (truelogic == f.get_orlogic())
1467 {
1468 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1469 } else {
1470 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1471 }
1472 }
1473 }
1474 };
1475
1476 conditions.push_back(recur(_full_member_holonym_of, false));
1477 }
1478
1479 if (_is_proper)
1480 {
1481 conditions.push_back("proper = 1");
1482 }
1483
1484 if (_is_not_proper)
1485 {
1486 conditions.push_back("proper = 0");
1487 }
1488
1489 if (_is_instance)
1490 {
1491 conditions.push_back("noun_id IN (SELECT instance_id FROM instantiation)");
1492 }
1493
1494 if (!_instance_of.empty())
1495 {
1496 std::stringstream cond;
1497 if (_instance_of.get_notlogic())
1498 {
1499 cond << "noun_id NOT IN";
1500 } else {
1501 cond << "noun_id IN";
1502 }
1503
1504 cond << "(SELECT instance_id FROM instantiation WHERE ";
1505
1506 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1507 switch (f.get_type())
1508 {
1509 case filter<noun>::type::singleton:
1510 {
1511 bindings.emplace_back(f.get_elem()._id);
1512
1513 if (notlogic == f.get_notlogic())
1514 {
1515 return "class_id = ?";
1516 } else {
1517 return "class_id != ?";
1518 }
1519 }
1520
1521 case filter<noun>::type::group:
1522 {
1523 bool truelogic = notlogic != f.get_notlogic();
1524
1525 std::list<std::string> clauses;
1526 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1527 return recur(f2, truelogic);
1528 });
1529
1530 if (truelogic == f.get_orlogic())
1531 {
1532 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1533 } else {
1534 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1535 }
1536 }
1537 }
1538 };
1539
1540 cond << recur(_instance_of, _instance_of.get_notlogic());
1541 cond << ")";
1542 conditions.push_back(cond.str());
1543 }
1544
1545 if (_is_class)
1546 {
1547 conditions.push_back("noun_id IN (SELECT class_id FROM instantiation)");
1548 }
1549
1550 if (!_class_of.empty())
1551 {
1552 std::stringstream cond;
1553 if (_class_of.get_notlogic())
1554 {
1555 cond << "noun_id NOT IN";
1556 } else {
1557 cond << "noun_id IN";
1558 }
1559
1560 cond << "(SELECT class_id FROM instantiation WHERE ";
1561
1562 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1563 switch (f.get_type())
1564 {
1565 case filter<noun>::type::singleton:
1566 {
1567 bindings.emplace_back(f.get_elem()._id);
1568
1569 if (notlogic == f.get_notlogic())
1570 {
1571 return "instance_id = ?";
1572 } else {
1573 return "instance_id != ?";
1574 }
1575 }
1576
1577 case filter<noun>::type::group:
1578 {
1579 bool truelogic = notlogic != f.get_notlogic();
1580
1581 std::list<std::string> clauses;
1582 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1583 return recur(f2, truelogic);
1584 });
1585
1586 if (truelogic == f.get_orlogic())
1587 {
1588 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1589 } else {
1590 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1591 }
1592 }
1593 }
1594 };
1595
1596 cond << recur(_class_of, _class_of.get_notlogic());
1597 cond << ")";
1598 conditions.push_back(cond.str());
1599 }
1600
1601 if (_has_synonyms)
1602 {
1603 conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_synonymy)");
1604 }
1605
1606 if (!_synonym_of.empty())
1607 {
1608 std::stringstream cond;
1609 if (_synonym_of.get_notlogic())
1610 {
1611 cond << "noun_id NOT IN";
1612 } else {
1613 cond << "noun_id IN";
1614 }
1615
1616 cond << "(SELECT noun_2_id FROM noun_synonymy WHERE ";
1617
1618 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1619 switch (f.get_type())
1620 {
1621 case filter<noun>::type::singleton:
1622 {
1623 bindings.emplace_back(f.get_elem()._id);
1624
1625 if (notlogic == f.get_notlogic())
1626 {
1627 return "noun_1_id = ?";
1628 } else {
1629 return "noun_1_id != ?";
1630 }
1631 }
1632
1633 case filter<noun>::type::group:
1634 {
1635 bool truelogic = notlogic != f.get_notlogic();
1636
1637 std::list<std::string> clauses;
1638 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1639 return recur(f2, truelogic);
1640 });
1641
1642 if (truelogic == f.get_orlogic())
1643 {
1644 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1645 } else {
1646 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1647 }
1648 }
1649 }
1650 };
1651
1652 cond << recur(_synonym_of, _synonym_of.get_notlogic());
1653 cond << ")";
1654 conditions.push_back(cond.str());
1655 }
1656
1657 if (_has_antonyms)
1658 {
1659 conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_antonymy)");
1660 }
1661
1662 if (!_antonym_of.empty())
1663 {
1664 std::stringstream cond;
1665 if (_antonym_of.get_notlogic())
1666 {
1667 cond << "noun_id NOT IN";
1668 } else {
1669 cond << "noun_id IN";
1670 }
1671
1672 cond << "(SELECT noun_2_id FROM noun_antonymy WHERE ";
1673
1674 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1675 switch (f.get_type())
1676 {
1677 case filter<noun>::type::singleton:
1678 {
1679 bindings.emplace_back(f.get_elem()._id);
1680
1681 if (notlogic == f.get_notlogic())
1682 {
1683 return "noun_1_id = ?";
1684 } else {
1685 return "noun_1_id != ?";
1686 }
1687 }
1688
1689 case filter<noun>::type::group:
1690 {
1691 bool truelogic = notlogic != f.get_notlogic();
1692
1693 std::list<std::string> clauses;
1694 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1695 return recur(f2, truelogic);
1696 });
1697
1698 if (truelogic == f.get_orlogic())
1699 {
1700 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1701 } else {
1702 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1703 }
1704 }
1705 }
1706 };
1707
1708 cond << recur(_antonym_of, _antonym_of.get_notlogic());
1709 cond << ")";
1710 conditions.push_back(cond.str());
1711 }
1712
1713 if (_has_pertainym)
1714 {
1715 conditions.push_back("noun_id IN (SELECT noun_id FROM pertainymy)");
1716 }
1717
1718 if (!_anti_pertainym_of.empty())
1719 {
1720 std::stringstream cond;
1721 if (_anti_pertainym_of.get_notlogic())
1722 {
1723 cond << "noun_id NOT IN";
1724 } else {
1725 cond << "noun_id IN";
1726 }
1727
1728 cond << "(SELECT noun_id FROM pertainymy WHERE ";
1729
1730 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
1731 switch (f.get_type())
1732 {
1733 case filter<adjective>::type::singleton:
1734 {
1735 bindings.emplace_back(f.get_elem()._id);
1736
1737 if (notlogic == f.get_notlogic())
1738 {
1739 return "pertainym_id = ?";
1740 } else {
1741 return "pertainym_id != ?";
1742 }
1743 }
1744
1745 case filter<adjective>::type::group:
1746 {
1747 bool truelogic = notlogic != f.get_notlogic();
1748
1749 std::list<std::string> clauses;
1750 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
1751 return recur(f2, truelogic);
1752 });
1753
1754 if (truelogic == f.get_orlogic())
1755 {
1756 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1757 } else {
1758 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1759 }
1760 }
1761 }
1762 };
1763
1764 cond << recur(_anti_pertainym_of, _anti_pertainym_of.get_notlogic());
1765 cond << ")";
1766 conditions.push_back(cond.str());
1767 }
1768
1769 if (_is_attribute)
1770 {
1771 conditions.push_back("noun_id IN (SELECT noun_id FROM variation)");
1772 }
1773
1774 if (!_attribute_of.empty())
1775 {
1776 std::stringstream cond;
1777 if (_attribute_of.get_notlogic())
1778 {
1779 cond << "noun_id NOT IN";
1780 } else {
1781 cond << "noun_id IN";
1782 }
1783
1784 cond << "(SELECT noun_id FROM variation WHERE ";
1785
1786 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
1787 switch (f.get_type())
1788 {
1789 case filter<adjective>::type::singleton:
1790 {
1791 bindings.emplace_back(f.get_elem()._id);
1792
1793 if (notlogic == f.get_notlogic())
1794 {
1795 return "adjective_id = ?";
1796 } else {
1797 return "adjective_id != ?";
1798 }
1799 }
1800
1801 case filter<adjective>::type::group:
1802 {
1803 bool truelogic = notlogic != f.get_notlogic();
1804
1805 std::list<std::string> clauses;
1806 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
1807 return recur(f2, truelogic);
1808 });
1809
1810 if (truelogic == f.get_orlogic())
1811 {
1812 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1813 } else {
1814 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1815 }
1816 }
1817 }
1818 };
1819
1820 cond << recur(_attribute_of, _attribute_of.get_notlogic());
1821 cond << ")";
1822 conditions.push_back(cond.str());
1823 }
1824
1825 if (_at_least_n_images != unlimited)
1826 {
1827 conditions.push_back("images >= ?");
1828 bindings.emplace_back(_at_least_n_images);
1829 }
1830
1831 if (!_with_wnid.empty())
1832 {
1833 std::vector<std::string> clauses(_with_wnid.size(), "wnid = ?");
1834 std::string cond = verbly::implode(std::begin(clauses), std::end(clauses), " OR ");
1835 conditions.push_back("(" + cond + ")");
1836
1837 for (auto wnid : _with_wnid)
1838 {
1839 bindings.emplace_back(wnid);
1840 }
1841 }
1842
1843 /*
1844 if (!_derived_from_adjective.empty())
1845 {
1846 std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ");
1847 std::string cond = "noun_id IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1848 conditions.push_back(cond);
1849 }
1850
1851 if (!_not_derived_from_adjective.empty())
1852 {
1853 std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ");
1854 std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1855 conditions.push_back(cond);
1856 }
1857
1858 if (!_derived_from_adverb.empty())
1859 {
1860 std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV");
1861 std::string cond = "noun_id IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1862 conditions.push_back(cond);
1863 }
1864
1865 if (!_not_derived_from_adverb.empty())
1866 {
1867 std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV");
1868 std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1869 conditions.push_back(cond);
1870 }
1871
1872 if (!_derived_from_noun.empty())
1873 {
1874 std::list<std::string> clauses(_derived_from_noun.size(), "noun_2_id = @DERN");
1875 std::string cond = "noun_id IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1876 conditions.push_back(cond);
1877 }
1878
1879 if (!_not_derived_from_noun.empty())
1880 {
1881 std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_2_id = @NDERN");
1882 std::string cond = "noun_id NOT IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1883 conditions.push_back(cond);
1884 }
1885 */
1886 if (!conditions.empty())
1887 {
1888 construct << " WHERE ";
1889 construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND ");
1890 }
1891
1892 if (_random)
1893 {
1894 construct << " ORDER BY RANDOM()";
1895 }
1896
1897 if (_limit != unlimited)
1898 {
1899 construct << " LIMIT " << _limit;
1900 }
1901
1902 sqlite3_stmt* ppstmt;
1903 std::string query = construct.str();
1904 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1905 {
1906 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
1907 }
1908
1909 int i = 1;
1910 for (auto& binding : bindings)
1911 {
1912 switch (binding.get_type())
1913 {
1914 case binding::type::integer:
1915 {
1916 sqlite3_bind_int(ppstmt, i, binding.get_integer());
1917
1918 break;
1919 }
1920
1921 case binding::type::string:
1922 {
1923 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
1924
1925 break;
1926 }
1927 }
1928
1929 i++;
1930 }
1931
1932 /*
1933 for (auto adj : _derived_from_adjective)
1934 {
1935 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id);
1936 }
1937
1938 for (auto adj : _not_derived_from_adjective)
1939 {
1940 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id);
1941 }
1942
1943 for (auto adv : _derived_from_adverb)
1944 {
1945 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id);
1946 }
1947
1948 for (auto adv : _not_derived_from_adverb)
1949 {
1950 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id);
1951 }
1952
1953 for (auto n : _derived_from_noun)
1954 {
1955 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id);
1956 }
1957
1958 for (auto n : _not_derived_from_noun)
1959 {
1960 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id);
1961 }
1962*/
1963 std::list<noun> output;
1964 while (sqlite3_step(ppstmt) == SQLITE_ROW)
1965 {
1966 noun tnc {_data, sqlite3_column_int(ppstmt, 0)};
1967 tnc._singular = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
1968
1969 if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)
1970 {
1971 tnc._plural = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
1972 }
1973
1974 tnc._wnid = sqlite3_column_int(ppstmt, 3);
1975
1976 output.push_back(tnc);
1977 }
1978
1979 sqlite3_finalize(ppstmt);
1980
1981 for (auto& noun : output)
1982 {
1983 query = "SELECT pronunciation, prerhyme, rhyme FROM noun_pronunciations WHERE noun_id = ?";
1984 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1985 {
1986 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
1987 }
1988
1989 sqlite3_bind_int(ppstmt, 1, noun._id);
1990
1991 while (sqlite3_step(ppstmt) == SQLITE_ROW)
1992 {
1993 std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
1994 auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " ");
1995
1996 noun.pronunciations.push_back(phonemes);
1997
1998 if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL))
1999 {
2000 std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
2001 std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
2002
2003 noun.rhymes.emplace_back(prerhyme, rhyming);
2004 }
2005 }
2006
2007 sqlite3_finalize(ppstmt);
2008 }
2009
2010 return output;
2011 }
2012
2013};
diff --git a/lib/noun_query.h b/lib/noun_query.h deleted file mode 100644 index 74df260..0000000 --- a/lib/noun_query.h +++ /dev/null
@@ -1,180 +0,0 @@
1#ifndef NOUN_QUERY_H_5DE51DD7
2#define NOUN_QUERY_H_5DE51DD7
3
4namespace verbly {
5
6 class noun_query {
7 public:
8 noun_query(const data& _data);
9
10 noun_query& limit(int _limit);
11 noun_query& random();
12 noun_query& except(const noun& _word);
13 noun_query& rhymes_with(const word& _word);
14 noun_query& rhymes_with(rhyme _r);
15 noun_query& has_pronunciation();
16 noun_query& has_rhyming_noun();
17 noun_query& has_rhyming_adjective();
18 noun_query& has_rhyming_adverb();
19 noun_query& has_rhyming_verb();
20 noun_query& with_stress(filter<std::vector<bool>> _arg);
21
22 noun_query& with_singular_form(std::string _arg);
23 noun_query& with_prefix(filter<std::string> _f);
24 noun_query& with_suffix(filter<std::string> _f);
25
26 noun_query& requires_plural_form();
27
28 noun_query& with_complexity(int _arg);
29
30 noun_query& is_hypernym();
31 noun_query& hypernym_of(filter<noun> _f);
32 noun_query& full_hypernym_of(filter<noun> _f);
33
34 noun_query& is_hyponym();
35 noun_query& hyponym_of(filter<noun> _f);
36 noun_query& full_hyponym_of(filter<noun> _f);
37
38 noun_query& is_part_meronym();
39 noun_query& part_meronym_of(filter<noun> _f);
40 noun_query& full_part_meronym_of(filter<noun> _f);
41
42 noun_query& is_part_holonym();
43 noun_query& part_holonym_of(filter<noun> _f);
44 noun_query& full_part_holonym_of(filter<noun> _f);
45
46 noun_query& is_substance_meronym();
47 noun_query& substance_meronym_of(filter<noun> _f);
48 noun_query& full_substance_meronym_of(filter<noun> _f);
49
50 noun_query& is_substance_holonym();
51 noun_query& substance_holonym_of(filter<noun> _f);
52 noun_query& full_substance_holonym_of(filter<noun> _f);
53
54 noun_query& is_member_meronym();
55 noun_query& member_meronym_of(filter<noun> _f);
56 noun_query& full_member_meronym_of(filter<noun> _f);
57
58 noun_query& is_member_holonym();
59 noun_query& member_holonym_of(filter<noun> _f);
60 noun_query& full_member_holonym_of(filter<noun> _f);
61
62 noun_query& is_proper();
63 noun_query& is_not_proper();
64
65 noun_query& is_instance();
66 noun_query& instance_of(filter<noun> _f);
67
68 noun_query& is_class();
69 noun_query& class_of(filter<noun> _f);
70
71 noun_query& has_synonyms();
72 noun_query& synonym_of(filter<noun> _f);
73
74 noun_query& has_antonyms();
75 noun_query& antonym_of(filter<noun> _f);
76
77 noun_query& has_pertainym();
78 noun_query& anti_pertainym_of(filter<adjective> _f);
79
80 noun_query& is_attribute();
81 noun_query& attribute_of(filter<adjective> _f);
82
83 noun_query& at_least_n_images(int _arg);
84 noun_query& with_wnid(int _arg);
85
86/* noun_query& derived_from(const word& _w);
87 noun_query& not_derived_from(const word& _w);*/
88
89 std::list<noun> run() const;
90
91 const static int unlimited = -1;
92
93 private:
94 const data& _data;
95 int _limit = unlimited;
96 bool _random = false;
97 std::list<rhyme> _rhymes;
98 std::list<noun> _except;
99 bool _has_prn = false;
100 bool _has_rhyming_noun = false;
101 bool _has_rhyming_adjective = false;
102 bool _has_rhyming_adverb = false;
103 bool _has_rhyming_verb = false;
104 filter<std::vector<bool>> _stress;
105
106 std::list<std::string> _with_singular_form;
107 filter<std::string> _with_prefix;
108 filter<std::string> _with_suffix;
109
110 int _with_complexity = unlimited;
111
112 bool _requires_plural_form = false;
113
114 bool _is_hypernym = false;
115 filter<noun> _hypernym_of;
116 filter<noun> _full_hypernym_of;
117
118 bool _is_hyponym = false;
119 filter<noun> _hyponym_of;
120 filter<noun> _full_hyponym_of;
121
122 bool _is_part_meronym = false;
123 filter<noun> _part_meronym_of;
124 filter<noun> _full_part_meronym_of;
125
126 bool _is_substance_meronym = false;
127 filter<noun> _substance_meronym_of;
128 filter<noun> _full_substance_meronym_of;
129
130 bool _is_member_meronym = false;
131 filter<noun> _member_meronym_of;
132 filter<noun> _full_member_meronym_of;
133
134 bool _is_part_holonym = false;
135 filter<noun> _part_holonym_of;
136 filter<noun> _full_part_holonym_of;
137
138 bool _is_substance_holonym = false;
139 filter<noun> _substance_holonym_of;
140 filter<noun> _full_substance_holonym_of;
141
142 bool _is_member_holonym = false;
143 filter<noun> _member_holonym_of;
144 filter<noun> _full_member_holonym_of;
145
146 bool _is_proper = false;
147 bool _is_not_proper = false;
148
149 bool _is_instance = false;
150 filter<noun> _instance_of;
151
152 bool _is_class = false;
153 filter<noun> _class_of;
154
155 bool _has_synonyms = false;
156 filter<noun> _synonym_of;
157
158 bool _has_antonyms = false;
159 filter<noun> _antonym_of;
160
161 bool _has_pertainym = false;
162 filter<adjective> _anti_pertainym_of;
163
164 bool _is_attribute = false;
165 filter<adjective> _attribute_of;
166
167 int _at_least_n_images = unlimited;
168 std::set<int> _with_wnid;
169
170/* std::list<adjective> _derived_from_adjective;
171 std::list<adjective> _not_derived_from_adjective;
172 std::list<adverb> _derived_from_adverb;
173 std::list<adverb> _not_derived_from_adverb;
174 std::list<noun> _derived_from_noun;
175 std::list<noun> _not_derived_from_noun;*/
176 };
177
178};
179
180#endif /* end of include guard: NOUN_QUERY_H_5DE51DD7 */
diff --git a/lib/preposition.cpp b/lib/preposition.cpp deleted file mode 100644 index cea9165..0000000 --- a/lib/preposition.cpp +++ /dev/null
@@ -1,107 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 std::string preposition::get_form() const
6 {
7 return form;
8 }
9
10 preposition_query::preposition_query(const data& _data) : _data(_data)
11 {
12
13 }
14
15 preposition_query& preposition_query::limit(int _limit)
16 {
17 this->_limit = _limit;
18
19 return *this;
20 }
21
22 preposition_query& preposition_query::random()
23 {
24 _random = true;
25
26 return *this;
27 }
28
29 preposition_query& preposition_query::in_group(std::string _arg)
30 {
31 _in_group.push_back(_arg);
32
33 return *this;
34 }
35
36 std::list<preposition> preposition_query::run() const
37 {
38 std::stringstream construct;
39 construct << "SELECT form FROM prepositions";
40 std::list<binding> bindings;
41
42 if (!_in_group.empty())
43 {
44 std::list<std::string> clauses(_in_group.size(), "groupname = ?");
45 construct << " WHERE preposition_id IN (SELECT preposition_id FROM preposition_groups WHERE ";
46 construct << verbly::implode(std::begin(clauses), std::end(clauses), " OR ");
47 construct << ")";
48
49 for (auto g : _in_group)
50 {
51 bindings.emplace_back(g);
52 }
53 }
54
55 if (_random)
56 {
57 construct << " ORDER BY RANDOM()";
58 }
59
60 if (_limit != unlimited)
61 {
62 construct << " LIMIT " << _limit;
63 }
64
65 sqlite3_stmt* ppstmt;
66 std::string query = construct.str();
67 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
68 {
69 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
70 }
71
72 int i = 1;
73 for (auto& binding : bindings)
74 {
75 switch (binding.get_type())
76 {
77 case binding::type::integer:
78 {
79 sqlite3_bind_int(ppstmt, i, binding.get_integer());
80
81 break;
82 }
83
84 case binding::type::string:
85 {
86 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
87
88 break;
89 }
90 }
91
92 i++;
93 }
94
95 std::list<preposition> output;
96 while (sqlite3_step(ppstmt) == SQLITE_ROW)
97 {
98 preposition pp;
99 pp.form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
100
101 output.push_back(pp);
102 }
103
104 return output;
105 }
106
107};
diff --git a/lib/preposition.h b/lib/preposition.h deleted file mode 100644 index 89f24fa..0000000 --- a/lib/preposition.h +++ /dev/null
@@ -1,38 +0,0 @@
1#ifndef PREPOSITION_H_FF908021
2#define PREPOSITION_H_FF908021
3
4namespace verbly {
5
6 class preposition_query;
7
8 class preposition {
9 public:
10 std::string get_form() const;
11
12 private:
13 friend class preposition_query;
14
15 std::string form;
16 };
17
18 class preposition_query {
19 public:
20 preposition_query(const data& _data);
21
22 preposition_query& limit(int _limit);
23 preposition_query& random();
24 preposition_query& in_group(std::string _arg);
25
26 std::list<preposition> run() const;
27
28 const static int unlimited = -1;
29 private:
30 const data& _data;
31 int _limit = unlimited;
32 bool _random = false;
33 std::list<std::string> _in_group;
34 };
35
36};
37
38#endif /* end of include guard: PREPOSITION_H_FF908021 */
diff --git a/lib/pronunciation.cpp b/lib/pronunciation.cpp new file mode 100644 index 0000000..f5b742f --- /dev/null +++ b/lib/pronunciation.cpp
@@ -0,0 +1,69 @@
1#include "pronunciation.h"
2#include <sqlite3.h>
3#include "form.h"
4#include "lemma.h"
5#include "word.h"
6#include "util.h"
7
8namespace verbly {
9
10 const object pronunciation::objectType = object::pronunciation;
11
12 const std::list<std::string> pronunciation::select = {"pronunciation_id", "phonemes", "syllables", "stress", "prerhyme", "rhyme"};
13
14 const field pronunciation::id = field::integerField(object::pronunciation, "pronunciation_id");
15 const field pronunciation::numOfSyllables = field::integerField(object::pronunciation, "syllables");
16 const field pronunciation::stress = field::stringField(object::pronunciation, "stress");
17
18 const field pronunciation::form = field::joinThrough(object::pronunciation, "pronunciation_id", object::form, "forms_pronunciations", "form_id");
19
20 const field pronunciation::prerhyme = field::stringField(object::pronunciation, "prerhyme", true);
21 const field pronunciation::rhyme = field::stringField(object::pronunciation, "rhyme", true);
22
23 pronunciation::pronunciation(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
24 {
25 id_ = sqlite3_column_int(row, 0);
26
27 std::string phonemesStr(reinterpret_cast<const char*>(sqlite3_column_text(row, 1)));
28 phonemes_ = split<std::vector<std::string>>(phonemesStr, " ");
29
30 syllables_ = sqlite3_column_int(row, 2);
31 stress_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 3)));
32
33 if (sqlite3_column_type(row, 5) != SQLITE_NULL)
34 {
35 hasRhyme_ = true;
36
37 prerhyme_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 4)));
38 rhyme_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 5)));
39 }
40 }
41
42 filter pronunciation::rhymesWith(const pronunciation& arg)
43 {
44 return (prerhyme != arg.getPrerhyme()) && (rhyme == arg.getRhyme());
45 }
46
47 /*filter pronunciation::rhymesWith(const class form& arg)
48 {
49 filter result;
50
51 for (const pronunciation& p : arg.getPronunciations())
52 {
53 result |= rhymesWith(p);
54 }
55
56 return result;
57 }
58
59 filter pronunciation::rhymesWith(const lemma& arg)
60 {
61 return rhymesWith(arg.getBaseForm());
62 }
63
64 filter pronunciation::rhymesWith(const word& arg)
65 {
66 return rhymesWith(arg.getLemma());
67 }*/
68
69};
diff --git a/lib/pronunciation.h b/lib/pronunciation.h new file mode 100644 index 0000000..c7a1d4d --- /dev/null +++ b/lib/pronunciation.h
@@ -0,0 +1,163 @@
1#ifndef PRONUNCIATION_H_C68F86B0
2#define PRONUNCIATION_H_C68F86B0
3
4#include <stdexcept>
5#include <vector>
6#include <string>
7#include "field.h"
8#include "filter.h"
9
10struct sqlite3_stmt;
11
12namespace verbly {
13
14 class form;
15 class lemma;
16 class word;
17 class database;
18
19 class pronunciation {
20 public:
21
22 // Default constructor
23
24 pronunciation() = default;
25
26 // Construct from database
27
28 pronunciation(const database& db, sqlite3_stmt* row);
29
30 // Accessors
31
32 operator bool() const
33 {
34 return valid_;
35 }
36
37 int getId() const
38 {
39 if (!valid_)
40 {
41 throw std::domain_error("Bad access to uninitialized pronunciation");
42 }
43
44 return id_;
45 }
46
47 const std::vector<std::string>& getPhonemes() const
48 {
49 if (!valid_)
50 {
51 throw std::domain_error("Bad access to uninitialized pronunciation");
52 }
53
54 return phonemes_;
55 }
56
57 int getSyllables() const
58 {
59 if (!valid_)
60 {
61 throw std::domain_error("Bad access to uninitialized pronunciation");
62 }
63
64 return syllables_;
65 }
66
67 std::string getStress() const
68 {
69 if (!valid_)
70 {
71 throw std::domain_error("Bad access to uninitialized pronunciation");
72 }
73
74 return stress_;
75 }
76
77 bool hasRhyme() const
78 {
79 if (!valid_)
80 {
81 throw std::domain_error("Bad access to uninitialized pronunciation");
82 }
83
84 return hasRhyme_;
85 }
86
87 std::string getPrerhyme() const
88 {
89 if (!valid_)
90 {
91 throw std::domain_error("Bad access to uninitialized pronunciation");
92 }
93
94 if (!hasRhyme_)
95 {
96 throw std::domain_error("This pronunciation has no rhyme");
97 }
98
99 return prerhyme_;
100 }
101
102 std::string getRhyme() const
103 {
104 if (!valid_)
105 {
106 throw std::domain_error("Bad access to uninitialized pronunciation");
107 }
108
109 if (!hasRhyme_)
110 {
111 throw std::domain_error("This pronunciation has no rhyme");
112 }
113
114 return rhyme_;
115 }
116
117 // Type info
118
119 static const object objectType;
120
121 static const std::list<std::string> select;
122
123 // Query fields
124
125 static const field id;
126 static const field numOfSyllables;
127 static const field stress;
128
129 operator filter() const
130 {
131 return (id == id_);
132 }
133
134 static filter rhymesWith(const pronunciation& arg);
135 static filter rhymesWith(const class form& arg);
136 static filter rhymesWith(const lemma& arg);
137 static filter rhymesWith(const word& arg);
138
139 // Relationships to other objects
140
141 static const field form;
142
143 private:
144 bool valid_ = false;
145
146 int id_;
147 std::vector<std::string> phonemes_;
148 int syllables_;
149 std::string stress_;
150 bool hasRhyme_ = false;
151 std::string prerhyme_;
152 std::string rhyme_;
153
154 const database* db_;
155
156 static const field prerhyme;
157 static const field rhyme;
158
159 };
160
161};
162
163#endif /* end of include guard: PRONUNCIATION_H_C68F86B0 */
diff --git a/lib/query.h b/lib/query.h new file mode 100644 index 0000000..e31be3d --- /dev/null +++ b/lib/query.h
@@ -0,0 +1,123 @@
1#ifndef QUERY_H_7CC5284C
2#define QUERY_H_7CC5284C
3
4#include <vector>
5#include <stdexcept>
6#include <string>
7#include <list>
8#include <sqlite3.h>
9#include <iostream>
10#include "statement.h"
11#include "binding.h"
12
13namespace verbly {
14
15 class database_error : public std::logic_error {
16 public:
17
18 database_error(std::string msg, std::string sqlMsg) : std::logic_error(msg + " (" + sqlMsg + ")")
19 {
20 }
21 };
22
23 template <typename Object>
24 class query {
25 public:
26
27 query(const database& db, sqlite3* ppdb, filter queryFilter, bool random, int limit) : db_(&db)
28 {
29 statement stmt(Object::objectType, std::move(queryFilter));
30
31 std::string queryString = stmt.getQueryString(Object::select, random, limit);
32 std::list<binding> bindings = stmt.getBindings();
33
34 std::cout << queryString << std::endl;
35
36 if (sqlite3_prepare_v2(ppdb, queryString.c_str(), queryString.length(), &ppstmt_, NULL) != SQLITE_OK)
37 {
38 std::string errorMsg = sqlite3_errmsg(ppdb);
39 sqlite3_finalize(ppstmt_);
40
41 throw database_error("Error preparing query", errorMsg);
42 }
43
44 int i = 1;
45 for (const binding& value : bindings)
46 {
47 switch (value.getType())
48 {
49 case binding::type::integer:
50 {
51 if (sqlite3_bind_int(ppstmt_, i, value.getInteger()) != SQLITE_OK)
52 {
53 std::string errorMsg = sqlite3_errmsg(ppdb);
54 sqlite3_finalize(ppstmt_);
55
56 throw database_error("Error binding value to query", errorMsg);
57 }
58
59 break;
60 }
61
62 case binding::type::string:
63 {
64 if (sqlite3_bind_text(ppstmt_, i, value.getString().c_str(), value.getString().length(), SQLITE_TRANSIENT) != SQLITE_OK)
65 {
66 std::string errorMsg = sqlite3_errmsg(ppdb);
67 sqlite3_finalize(ppstmt_);
68
69 throw database_error("Error binding value to query", errorMsg);
70 }
71
72 break;
73 }
74
75 case binding::type::invalid:
76 {
77 throw std::logic_error("Cannot use invalid bindings");
78 }
79 }
80
81 i++;
82 }
83 }
84
85 ~query()
86 {
87 sqlite3_finalize(ppstmt_);
88 }
89
90 std::vector<Object> all() const
91 {
92 std::vector<Object> result;
93
94 while (sqlite3_step(ppstmt_) == SQLITE_ROW)
95 {
96 result.emplace_back(*db_, ppstmt_);
97 }
98
99 sqlite3_reset(ppstmt_);
100
101 return result;
102 }
103
104 Object first() const
105 {
106 std::vector<Object> results = all();
107 if (!results.empty())
108 {
109 return results.front();
110 } else {
111 throw std::logic_error("query returned empty dataset");
112 }
113 }
114
115 private:
116 const database* db_;
117 sqlite3_stmt* ppstmt_;
118
119 };
120
121};
122
123#endif /* end of include guard: QUERY_H_7CC5284C */
diff --git a/lib/statement.cpp b/lib/statement.cpp new file mode 100644 index 0000000..52fa00d --- /dev/null +++ b/lib/statement.cpp
@@ -0,0 +1,806 @@
1#include "statement.h"
2#include <sstream>
3#include <utility>
4#include "filter.h"
5#include "util.h"
6#include "notion.h"
7#include "word.h"
8#include "group.h"
9#include "frame.h"
10#include "lemma.h"
11#include "form.h"
12#include "pronunciation.h"
13
14namespace verbly {
15
16 statement::statement(
17 object context,
18 filter queryFilter) :
19 statement(getTableForContext(context), queryFilter.normalize(context))
20 {
21 }
22
23 std::string statement::getQueryString(std::list<std::string> select, bool random, int limit) const
24 {
25 std::stringstream queryStream;
26
27 if (!withs_.empty())
28 {
29 queryStream << "WITH RECURSIVE ";
30
31 std::list<std::string> ctes;
32 for (const with& cte : withs_)
33 {
34 std::stringstream cteStream;
35 cteStream << cte.getIdentifier();
36 cteStream << " AS (SELECT ";
37 cteStream << cte.getTopTable();
38 cteStream << ".* FROM ";
39 cteStream << cte.getTableForId(cte.getTopTable());
40 cteStream << " AS ";
41 cteStream << cte.getTopTable();
42
43 for (const join& j : cte.getJoins())
44 {
45 cteStream << " ";
46 cteStream << j;
47 }
48
49 if (cte.getCondition().getType() != condition::type::empty)
50 {
51 cteStream << " WHERE ";
52 cteStream << cte.getCondition().toSql();
53 }
54
55 cteStream << " UNION SELECT l.* FROM ";
56 cteStream << cte.getIdentifier();
57 cteStream << " AS t INNER JOIN ";
58 cteStream << cte.getField().getTable();
59 cteStream << " AS j ON t.";
60 cteStream << cte.getField().getColumn();
61 cteStream << " = j.";
62 cteStream << cte.getField().getForeignJoinColumn();
63 cteStream << " INNER JOIN ";
64 cteStream << cte.getTableForId(cte.getTopTable());
65 cteStream << " AS l ON j.";
66 cteStream << cte.getField().getJoinColumn();
67 cteStream << " = l.";
68 cteStream << cte.getField().getColumn();
69 cteStream << ")";
70
71 ctes.push_back(cteStream.str());
72 }
73
74 queryStream << implode(std::begin(ctes), std::end(ctes), ", ");
75 queryStream << " ";
76 }
77
78 std::list<std::string> realSelect;
79 for (std::string& s : select)
80 {
81 realSelect.push_back(topTable_ + "." + s);
82 }
83
84 queryStream << "SELECT ";
85 queryStream << implode(std::begin(realSelect), std::end(realSelect), ", ");
86 queryStream << " FROM ";
87 queryStream << tables_.at(topTable_);
88 queryStream << " AS ";
89 queryStream << topTable_;
90
91 for (const join& j : joins_)
92 {
93 queryStream << " ";
94 queryStream << j;
95 }
96
97 if (topCondition_.getType() != condition::type::empty)
98 {
99 queryStream << " WHERE ";
100 queryStream << topCondition_.toSql();
101 }
102
103 if (random)
104 {
105 queryStream << " ORDER BY RANDOM()";
106 }
107
108 if (limit > 0)
109 {
110 queryStream << " LIMIT ";
111 queryStream << limit;
112 }
113
114 return queryStream.str();
115 }
116
117 std::list<binding> statement::getBindings() const
118 {
119 std::list<binding> result;
120
121 for (const with& w : withs_)
122 {
123 for (binding value : w.getCondition().flattenBindings())
124 {
125 result.push_back(std::move(value));
126 }
127 }
128
129 for (binding value : topCondition_.flattenBindings())
130 {
131 result.push_back(std::move(value));
132 }
133
134 return result;
135 }
136
137 statement::statement(
138 std::string tableName,
139 filter clause,
140 int nextTableId,
141 int nextWithId) :
142 nextTableId_(nextTableId),
143 nextWithId_(nextWithId),
144 topTable_(instantiateTable(std::move(tableName))),
145 topCondition_(parseFilter(std::move(clause)))
146 {
147 }
148
149 statement::condition statement::parseFilter(filter clause)
150 {
151 switch (clause.getType())
152 {
153 case filter::type::empty:
154 {
155 return {};
156 }
157
158 case filter::type::singleton:
159 {
160 switch (clause.getField().getType())
161 {
162 case field::type::undefined:
163 {
164 return {};
165 }
166
167 case field::type::string:
168 case field::type::integer:
169 case field::type::boolean:
170 {
171 switch (clause.getComparison())
172 {
173 case filter::comparison::is_null:
174 {
175 return condition(topTable_, clause.getField().getColumn(), true);
176 }
177
178 case filter::comparison::is_not_null:
179 {
180 return condition(topTable_, clause.getField().getColumn(), false);
181 }
182
183 case filter::comparison::int_equals:
184 {
185 return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getIntegerArgument());
186 }
187
188 case filter::comparison::int_does_not_equal:
189 {
190 return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getIntegerArgument());
191 }
192
193 case filter::comparison::int_is_at_least:
194 {
195 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_least, clause.getIntegerArgument());
196 }
197
198 case filter::comparison::int_is_greater_than:
199 {
200 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_greater_than, clause.getIntegerArgument());
201 }
202
203 case filter::comparison::int_is_at_most:
204 {
205 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_most, clause.getIntegerArgument());
206 }
207
208 case filter::comparison::int_is_less_than:
209 {
210 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_less_than, clause.getIntegerArgument());
211 }
212
213 case filter::comparison::boolean_equals:
214 {
215 return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getBooleanArgument() ? 1 : 0);
216 }
217
218 case filter::comparison::string_equals:
219 {
220 return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getStringArgument());
221 }
222
223 case filter::comparison::string_does_not_equal:
224 {
225 return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getStringArgument());
226 }
227
228 case filter::comparison::string_is_like:
229 {
230 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_like, clause.getStringArgument());
231 }
232
233 case filter::comparison::string_is_not_like:
234 {
235 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_not_like, clause.getStringArgument());
236 }
237
238 case filter::comparison::matches:
239 case filter::comparison::does_not_match:
240 case filter::comparison::hierarchally_matches:
241 case filter::comparison::does_not_hierarchally_match:
242 {
243 throw std::logic_error("Invalid comparison type for field");
244 }
245 }
246 }
247
248 case field::type::join:
249 {
250 std::string joinTableName;
251 if (clause.getField().hasTable())
252 {
253 joinTableName = clause.getField().getTable();
254 } else {
255 joinTableName = getTableForContext(clause.getField().getJoinObject());
256 }
257
258 statement joinStmt(
259 joinTableName,
260 clause.getJoinCondition().normalize(clause.getField().getJoinObject()),
261 nextTableId_,
262 nextWithId_);
263
264 std::string joinTable = joinStmt.topTable_;
265 condition curCond = integrate(std::move(joinStmt));
266
267 bool outer = false;
268 if (clause.getComparison() == filter::comparison::does_not_match)
269 {
270 outer = true;
271
272 curCond &= condition(joinTable, clause.getField().getColumn(), true);
273 }
274
275 joins_.emplace_back(outer, joinTableName, topTable_, clause.getField().getColumn(), joinTable, clause.getField().getColumn());
276
277 return curCond;
278 }
279
280 case field::type::join_through:
281 {
282 statement joinStmt(
283 getTableForContext(clause.getField().getJoinObject()),
284 clause.getJoinCondition().normalize(clause.getField().getJoinObject()),
285 nextTableId_,
286 nextWithId_);
287
288 std::string joinTable = joinStmt.topTable_;
289 std::string throughTable = instantiateTable(clause.getField().getTable());
290 condition curCond = integrate(std::move(joinStmt));
291
292 bool outer = false;
293 if (clause.getComparison() == filter::comparison::does_not_match)
294 {
295 outer = true;
296
297 curCond &= condition(throughTable, clause.getField().getJoinColumn(), true);
298 }
299
300 joins_.emplace_back(outer, clause.getField().getTable(), topTable_, clause.getField().getColumn(), throughTable, clause.getField().getJoinColumn());
301 joins_.emplace_back(false, getTableForContext(clause.getField().getJoinObject()), throughTable, clause.getField().getForeignJoinColumn(), joinTable, clause.getField().getForeignColumn());
302
303 return curCond;
304 }
305
306 case field::type::hierarchal_join:
307 {
308 std::string withName = std::string(clause.getField().getTable()) + "_tree_" + std::to_string(nextWithId_++);
309 std::string withInstName = instantiateTable(withName);
310
311 bool outer = false;
312 if (clause.getComparison() == filter::comparison::does_not_hierarchally_match)
313 {
314 outer = true;
315 }
316
317 joins_.emplace_back(outer, withName, topTable_, clause.getField().getColumn(), withInstName, clause.getField().getColumn());
318
319 statement withStmt(
320 getTableForContext(clause.getField().getObject()),
321 clause.getJoinCondition().normalize(clause.getField().getObject()),
322 nextTableId_,
323 nextWithId_);
324
325 for (auto& w : withStmt.withs_)
326 {
327 withs_.push_back(std::move(w));
328 }
329
330 nextTableId_ = withStmt.nextTableId_;
331 nextWithId_ = withStmt.nextWithId_;
332
333 withs_.emplace_back(
334 withName,
335 clause.getField(),
336 std::move(withStmt.tables_),
337 std::move(withStmt.topTable_),
338 std::move(withStmt.topCondition_),
339 std::move(withStmt.joins_));
340
341 if (clause.getComparison() == filter::comparison::does_not_hierarchally_match)
342 {
343 return condition(withInstName, clause.getField().getColumn(), true);
344 } else {
345 return {};
346 }
347 }
348 }
349 }
350
351 case filter::type::group:
352 {
353 condition grp(clause.getOrlogic());
354
355 for (const filter& child : clause)
356 {
357 condition newChild = parseFilter(child);
358 if (newChild.getType() != condition::type::empty)
359 {
360 grp += std::move(newChild);
361 }
362 }
363
364 if (grp.getChildren().empty())
365 {
366 grp = {};
367 }
368
369 return grp;
370 }
371 }
372 }
373
374 std::string statement::instantiateTable(std::string name)
375 {
376 std::string identifier = name + "_" + std::to_string(nextTableId_++);
377 tables_[identifier] = name;
378
379 return identifier;
380 }
381
382 statement::condition statement::integrate(statement subStmt)
383 {
384 for (auto& mapping : subStmt.tables_)
385 {
386 tables_[mapping.first] = mapping.second;
387 }
388
389 for (auto& j : subStmt.joins_)
390 {
391 joins_.push_back(j);
392 }
393
394 for (auto& w : subStmt.withs_)
395 {
396 withs_.push_back(w);
397 }
398
399 nextTableId_ = subStmt.nextTableId_;
400 nextWithId_ = subStmt.nextWithId_;
401
402 return subStmt.topCondition_;
403 }
404
405 std::ostream& operator<<(std::ostream& oss, const statement::join& j)
406 {
407 if (j.isOuterJoin())
408 {
409 oss << "LEFT";
410 } else {
411 oss << "INNER";
412 }
413
414 return oss
415 << " JOIN "
416 << j.getForeignTableName()
417 << " AS "
418 << j.getForeignTable()
419 << " ON "
420 << j.getForeignTable()
421 << "."
422 << j.getForeignColumn()
423 << " = "
424 << j.getJoinTable()
425 << "."
426 << j.getJoinColumn();
427 }
428
429 statement::condition::condition(const condition& other)
430 {
431 type_ = other.type_;
432
433 switch (type_)
434 {
435 case type::empty:
436 {
437 break;
438 }
439
440 case type::singleton:
441 {
442 new(&singleton_.table_) std::string(other.singleton_.table_);
443 new(&singleton_.column_) std::string(other.singleton_.column_);
444 singleton_.comparison_ = other.singleton_.comparison_;
445 new(&singleton_.value_) binding(other.singleton_.value_);
446
447 break;
448 }
449
450 case type::group:
451 {
452 new(&group_.children_) std::list<condition>(other.group_.children_);
453 group_.orlogic_ = other.group_.orlogic_;
454
455 break;
456 }
457 }
458 }
459
460 statement::condition::condition(condition&& other) : condition()
461 {
462 swap(*this, other);
463 }
464
465 statement::condition& statement::condition::operator=(condition other)
466 {
467 swap(*this, other);
468
469 return *this;
470 }
471
472 void swap(statement::condition& first, statement::condition& second)
473 {
474 using type = statement::condition::type;
475 using condition = statement::condition;
476
477 type tempType = first.type_;
478 std::string tempTable;
479 std::string tempColumn;
480 condition::comparison tempComparison;
481 binding tempBinding;
482 std::list<condition> tempChildren;
483 bool tempOrlogic;
484
485 switch (tempType)
486 {
487 case type::empty:
488 {
489 break;
490 }
491
492 case type::singleton:
493 {
494 tempTable = std::move(first.singleton_.table_);
495 tempColumn = std::move(first.singleton_.column_);
496 tempComparison = first.singleton_.comparison_;
497 tempBinding = std::move(first.singleton_.value_);
498
499 break;
500 }
501
502 case type::group:
503 {
504 tempChildren = std::move(first.group_.children_);
505 tempOrlogic = first.group_.orlogic_;
506
507 break;
508 }
509 }
510
511 first.~condition();
512
513 first.type_ = second.type_;
514
515 switch (first.type_)
516 {
517 case type::empty:
518 {
519 break;
520 }
521
522 case type::singleton:
523 {
524 new(&first.singleton_.table_) std::string(std::move(second.singleton_.table_));
525 new(&first.singleton_.column_) std::string(std::move(second.singleton_.column_));
526 first.singleton_.comparison_ = second.singleton_.comparison_;
527 new(&first.singleton_.value_) binding(std::move(second.singleton_.value_));
528
529 break;
530 }
531
532 case type::group:
533 {
534 new(&first.group_.children_) std::list<condition>(std::move(second.group_.children_));
535 first.group_.orlogic_ = second.group_.orlogic_;
536
537 break;
538 }
539 }
540
541 second.~condition();
542
543 second.type_ = tempType;
544
545 switch (second.type_)
546 {
547 case type::empty:
548 {
549 break;
550 }
551
552 case type::singleton:
553 {
554 new(&second.singleton_.table_) std::string(std::move(tempTable));
555 new(&second.singleton_.column_) std::string(std::move(tempColumn));
556 second.singleton_.comparison_ = tempComparison;
557 new(&second.singleton_.value_) binding(std::move(tempBinding));
558
559 break;
560 }
561
562 case type::group:
563 {
564 new(&second.group_.children_) std::list<condition>(std::move(tempChildren));
565 second.group_.orlogic_ = tempOrlogic;
566
567 break;
568 }
569 }
570 }
571
572 statement::condition::~condition()
573 {
574 switch (type_)
575 {
576 case type::empty:
577 {
578 break;
579 }
580
581 case type::singleton:
582 {
583 using string_type = std::string;
584
585 singleton_.table_.~string_type();
586 singleton_.column_.~string_type();
587 singleton_.value_.~binding();
588
589 break;
590 }
591
592 case type::group:
593 {
594 using list_type = std::list<condition>;
595
596 group_.children_.~list_type();
597
598 break;
599 }
600 }
601 }
602
603 statement::condition::condition() : type_(type::empty)
604 {
605 }
606
607 statement::condition::condition(
608 std::string table,
609 std::string column,
610 bool isNull) :
611 type_(type::singleton)
612 {
613 new(&singleton_.table_) std::string(std::move(table));
614 new(&singleton_.column_) std::string(std::move(column));
615
616 if (isNull)
617 {
618 singleton_.comparison_ = comparison::is_null;
619 } else {
620 singleton_.comparison_ = comparison::is_not_null;
621 }
622 }
623
624 statement::condition::condition(
625 std::string table,
626 std::string column,
627 comparison comp,
628 binding value) :
629 type_(type::singleton)
630 {
631 new(&singleton_.table_) std::string(std::move(table));
632 new(&singleton_.column_) std::string(std::move(column));
633 singleton_.comparison_ = comp;
634 new(&singleton_.value_) binding(std::move(value));
635 }
636
637 std::string statement::condition::toSql() const
638 {
639 switch (type_)
640 {
641 case type::empty:
642 {
643 return "";
644 }
645
646 case type::singleton:
647 {
648 switch (singleton_.comparison_)
649 {
650 case comparison::equals:
651 {
652 return singleton_.table_ + "." + singleton_.column_ + " = ?";
653 }
654
655 case comparison::does_not_equal:
656 {
657 return singleton_.table_ + "." + singleton_.column_ + " != ?";
658 }
659
660 case comparison::is_greater_than:
661 {
662 return singleton_.table_ + "." + singleton_.column_ + " > ?";
663 }
664
665 case comparison::is_at_most:
666 {
667 return singleton_.table_ + "." + singleton_.column_ + " <= ?";
668 }
669
670 case comparison::is_less_than:
671 {
672 return singleton_.table_ + "." + singleton_.column_ + " < ?";
673 }
674
675 case comparison::is_at_least:
676 {
677 return singleton_.table_ + "." + singleton_.column_ + " >= ?";
678 }
679
680 case comparison::is_like:
681 {
682 return singleton_.table_ + "." + singleton_.column_ + " LIKE ?";
683 }
684
685 case comparison::is_not_like:
686 {
687 return singleton_.table_ + "." + singleton_.column_ + " NOT LIKE ?";
688 }
689
690 case comparison::is_not_null:
691 {
692 return singleton_.table_ + "." + singleton_.column_ + " IS NOT NULL";
693 }
694
695 case comparison::is_null:
696 {
697 return singleton_.table_ + "." + singleton_.column_ + " IS NULL";
698 }
699 }
700 }
701
702 case type::group:
703 {
704 std::list<std::string> clauses;
705 for (const condition& cond : group_.children_)
706 {
707 clauses.push_back(cond.toSql());
708 }
709
710 return implode(std::begin(clauses), std::end(clauses), group_.orlogic_ ? " OR " : " AND ");
711 }
712 }
713 }
714
715 std::list<binding> statement::condition::flattenBindings() const
716 {
717 switch (type_)
718 {
719 case type::empty:
720 {
721 return {};
722 }
723
724 case type::singleton:
725 {
726 return {singleton_.value_};
727 }
728
729 case type::group:
730 {
731 std::list<binding> bindings;
732 for (const condition& cond : group_.children_)
733 {
734 for (binding value : cond.flattenBindings())
735 {
736 bindings.push_back(std::move(value));
737 }
738 }
739
740 return bindings;
741 }
742 }
743 }
744
745 statement::condition::condition(bool orlogic) : type_(type::group)
746 {
747 new(&group_.children_) std::list<condition>();
748 group_.orlogic_ = orlogic;
749 }
750
751 statement::condition& statement::condition::operator+=(condition n)
752 {
753 if (type_ == type::group)
754 {
755 group_.children_.push_back(std::move(n));
756
757 return *this;
758 } else {
759 throw std::domain_error("Cannot add condition to non-group condition");
760 }
761 }
762
763 statement::condition& statement::condition::operator&=(condition n)
764 {
765 switch (type_)
766 {
767 case type::empty:
768 {
769 *this = std::move(n);
770
771 break;
772 }
773
774 case type::singleton:
775 {
776 condition grp(false);
777 grp += *this;
778 grp += std::move(n);
779
780 *this = grp;
781
782 break;
783 }
784
785 case type::group:
786 {
787 *this += std::move(n);
788
789 break;
790 }
791 }
792
793 return *this;
794 }
795
796 const std::list<statement::condition>& statement::condition::getChildren() const
797 {
798 if (type_ == type::group)
799 {
800 return group_.children_;
801 } else {
802 throw std::domain_error("Cannot get children of non-group condition");
803 }
804 }
805
806};
diff --git a/lib/statement.h b/lib/statement.h new file mode 100644 index 0000000..a528d60 --- /dev/null +++ b/lib/statement.h
@@ -0,0 +1,272 @@
1#ifndef STATEMENT_H_29F51659
2#define STATEMENT_H_29F51659
3
4#include <string>
5#include <list>
6#include <map>
7#include <set>
8#include "binding.h"
9#include "enums.h"
10#include "field.h"
11#include "filter.h"
12
13namespace verbly {
14
15 class filter;
16
17 class statement {
18 public:
19
20 statement(object context, filter queryFilter);
21
22 std::string getQueryString(std::list<std::string> select, bool random, int limit) const;
23
24 std::list<binding> getBindings() const;
25
26 private:
27
28 class join {
29 public:
30
31 join(
32 bool outer,
33 std::string foreignTableName,
34 std::string joinTable,
35 std::string joinColumn,
36 std::string foreignTable,
37 std::string foreignColumn) :
38 outer_(outer),
39 foreignTableName_(std::move(foreignTableName)),
40 joinTable_(std::move(joinTable)),
41 joinColumn_(std::move(joinColumn)),
42 foreignTable_(std::move(foreignTable)),
43 foreignColumn_(std::move(foreignColumn))
44 {
45 }
46
47 bool isOuterJoin() const
48 {
49 return outer_;
50 }
51
52 const std::string& getForeignTableName() const
53 {
54 return foreignTableName_;
55 }
56
57 const std::string& getJoinTable() const
58 {
59 return joinTable_;
60 }
61
62 const std::string& getJoinColumn() const
63 {
64 return joinColumn_;
65 }
66
67 const std::string& getForeignTable() const
68 {
69 return foreignTable_;
70 }
71
72 const std::string& getForeignColumn() const
73 {
74 return foreignColumn_;
75 }
76
77 private:
78 bool outer_ = false;
79 std::string foreignTableName_;
80 std::string joinTable_;
81 std::string joinColumn_;
82 std::string foreignTable_;
83 std::string foreignColumn_;
84
85 };
86
87 friend std::ostream& operator<<(std::ostream& oss, const join& j);
88
89 class condition {
90 public:
91 enum class type {
92 empty,
93 singleton,
94 group
95 };
96
97 enum class comparison {
98 equals,
99 does_not_equal,
100 is_greater_than,
101 is_at_most,
102 is_less_than,
103 is_at_least,
104 is_like,
105 is_not_like,
106 is_not_null,
107 is_null
108 };
109
110 // Copy and move constructors
111
112 condition(const condition& other);
113 condition(condition&& other);
114
115 // Assignment
116
117 condition& operator=(condition other);
118
119 // Swap
120
121 friend void swap(condition& first, condition& second);
122
123 // Destructor
124
125 ~condition();
126
127 // Accessors
128
129 type getType() const
130 {
131 return type_;
132 }
133
134 // Empty
135
136 condition();
137
138 // Singleton
139
140 condition(std::string table, std::string column, bool isNull);
141
142 condition(std::string table, std::string column, comparison comp, binding value);
143
144 // Group
145
146 explicit condition(bool orlogic);
147
148 condition& operator+=(condition n);
149
150 condition& operator&=(condition n);
151
152 const std::list<condition>& getChildren() const;
153
154 // Utility
155
156 std::string toSql() const;
157
158 std::list<binding> flattenBindings() const;
159
160 private:
161 union {
162 struct {
163 std::string table_;
164 std::string column_;
165 comparison comparison_;
166 binding value_;
167 } singleton_;
168 struct {
169 std::list<condition> children_;
170 bool orlogic_;
171 } group_;
172 };
173 type type_;
174 };
175
176 friend void swap(condition& first, condition& second);
177
178 class with {
179 public:
180
181 with(
182 std::string identifier,
183 field f,
184 std::map<std::string, std::string> tables,
185 std::string topTable,
186 condition where,
187 std::list<join> joins) :
188 identifier_(std::move(identifier)),
189 field_(f),
190 tables_(std::move(tables)),
191 topTable_(std::move(topTable)),
192 topCondition_(std::move(where)),
193 joins_(std::move(joins))
194 {
195 }
196
197 const std::string& getIdentifier() const
198 {
199 return identifier_;
200 }
201
202 field getField() const
203 {
204 return field_;
205 }
206
207 std::string getTableForId(std::string identifier) const
208 {
209 return tables_.at(identifier);
210 }
211
212 const std::string& getTopTable() const
213 {
214 return topTable_;
215 }
216
217 const condition& getCondition() const
218 {
219 return topCondition_;
220 }
221
222 const std::list<join>& getJoins() const
223 {
224 return joins_;
225 }
226
227 private:
228 std::string identifier_;
229 field field_;
230 std::map<std::string, std::string> tables_;
231 std::string topTable_;
232 condition topCondition_;
233 std::list<join> joins_;
234
235 };
236
237 static constexpr const char* getTableForContext(object context)
238 {
239 return (context == object::notion) ? "notions"
240 : (context == object::word) ? "words"
241 : (context == object::group) ? "groups"
242 : (context == object::frame) ? "frames"
243 : (context == object::lemma) ? "lemmas_forms"
244 : (context == object::form) ? "forms"
245 : (context == object::pronunciation) ? "pronunciations"
246 : throw std::domain_error("Provided context has no associated table");
247 }
248
249 static const std::list<field> getSelectForContext(object context);
250
251 statement(std::string tableName, filter clause, int nextTableId = 0, int nextWithId = 0);
252
253 condition parseFilter(filter queryFilter);
254
255 std::string instantiateTable(std::string name);
256
257 condition integrate(statement subStmt);
258
259 int nextTableId_;
260 int nextWithId_;
261
262 std::map<std::string, std::string> tables_;
263 std::string topTable_;
264 std::list<join> joins_;
265 std::list<with> withs_;
266 condition topCondition_;
267
268 };
269
270};
271
272#endif /* end of include guard: STATEMENT_H_29F51659 */
diff --git a/lib/util.h b/lib/util.h index fb5fe67..b74b050 100644 --- a/lib/util.h +++ b/lib/util.h
@@ -1,6 +1,10 @@
1#ifndef UTIL_H_15DDCA2D 1#ifndef UTIL_H_15DDCA2D
2#define UTIL_H_15DDCA2D 2#define UTIL_H_15DDCA2D
3 3
4#include <string>
5#include <sstream>
6#include <iterator>
7
4namespace verbly { 8namespace verbly {
5 9
6 template <class InputIterator> 10 template <class InputIterator>
@@ -21,25 +25,33 @@ namespace verbly {
21 return result.str(); 25 return result.str();
22 } 26 }
23 27
24 template <class Container> 28 template <class OutputIterator>
25 Container split(std::string input, std::string delimiter) 29 void split(std::string input, std::string delimiter, OutputIterator out)
26 { 30 {
27 Container result;
28
29 while (!input.empty()) 31 while (!input.empty())
30 { 32 {
31 int divider = input.find(delimiter); 33 int divider = input.find(delimiter);
32 if (divider == std::string::npos) 34 if (divider == std::string::npos)
33 { 35 {
34 result.push_back(input); 36 *out = input;
37 out++;
35 38
36 input = ""; 39 input = "";
37 } else { 40 } else {
38 result.push_back(input.substr(0, divider)); 41 *out = input.substr(0, divider);
42 out++;
39 43
40 input = input.substr(divider+delimiter.length()); 44 input = input.substr(divider+delimiter.length());
41 } 45 }
42 } 46 }
47 }
48
49 template <class Container>
50 Container split(std::string input, std::string delimiter)
51 {
52 Container result;
53
54 split(input, delimiter, std::back_inserter(result));
43 55
44 return result; 56 return result;
45 } 57 }
diff --git a/lib/verb.cpp b/lib/verb.cpp deleted file mode 100644 index 1f45d53..0000000 --- a/lib/verb.cpp +++ /dev/null
@@ -1,64 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 verb::verb()
6 {
7
8 }
9
10 verb::verb(const data& _data, int _id) : word(_data, _id)
11 {
12
13 }
14
15 std::string verb::base_form() const
16 {
17 assert(_valid == true);
18
19 return _infinitive;
20 }
21
22 std::string verb::infinitive_form() const
23 {
24 assert(_valid == true);
25
26 return _infinitive;
27 }
28
29 std::string verb::past_tense_form() const
30 {
31 assert(_valid == true);
32
33 return _past_tense;
34 }
35
36 std::string verb::past_participle_form() const
37 {
38 assert(_valid == true);
39
40 return _past_participle;
41 }
42
43 std::string verb::ing_form() const
44 {
45 assert(_valid == true);
46
47 return _ing_form;
48 }
49
50 std::string verb::s_form() const
51 {
52 assert(_valid == true);
53
54 return _s_form;
55 }
56
57 frame_query verb::frames() const
58 {
59 assert(_valid == true);
60
61 return _data->frames().for_verb(*this);
62 }
63
64};
diff --git a/lib/verb.h b/lib/verb.h deleted file mode 100644 index 7a2486e..0000000 --- a/lib/verb.h +++ /dev/null
@@ -1,34 +0,0 @@
1#ifndef VERB_H_BCC929AD
2#define VERB_H_BCC929AD
3
4namespace verbly {
5
6 class frame_query;
7
8 class verb : public word {
9 private:
10 std::string _infinitive;
11 std::string _past_tense;
12 std::string _past_participle;
13 std::string _ing_form;
14 std::string _s_form;
15
16 friend class verb_query;
17
18 public:
19 verb();
20 verb(const data& _data, int _id);
21
22 std::string base_form() const;
23 std::string infinitive_form() const;
24 std::string past_tense_form() const;
25 std::string past_participle_form() const;
26 std::string ing_form() const;
27 std::string s_form() const;
28
29 frame_query frames() const;
30 };
31
32};
33
34#endif /* end of include guard: VERB_H_BCC929AD */
diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp deleted file mode 100644 index 4e6c253..0000000 --- a/lib/verb_query.cpp +++ /dev/null
@@ -1,315 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 verb_query::verb_query(const data& _data) : _data(_data)
6 {
7
8 }
9
10 verb_query& verb_query::limit(int _limit)
11 {
12 if ((_limit > 0) || (_limit == unlimited))
13 {
14 this->_limit = _limit;
15 }
16
17 return *this;
18 }
19
20 verb_query& verb_query::random()
21 {
22 this->_random = true;
23
24 return *this;
25 }
26
27 verb_query& verb_query::except(const verb& _word)
28 {
29 _except.push_back(_word);
30
31 return *this;
32 }
33
34 verb_query& verb_query::rhymes_with(const word& _word)
35 {
36 for (auto rhyme : _word.get_rhymes())
37 {
38 _rhymes.push_back(rhyme);
39 }
40
41 if (dynamic_cast<const verb*>(&_word) != nullptr)
42 {
43 _except.push_back(dynamic_cast<const verb&>(_word));
44 }
45
46 return *this;
47 }
48
49 verb_query& verb_query::rhymes_with(rhyme _r)
50 {
51 _rhymes.push_back(_r);
52
53 return *this;
54 }
55
56 verb_query& verb_query::has_pronunciation()
57 {
58 this->_has_prn = true;
59
60 return *this;
61 }
62
63 verb_query& verb_query::has_rhyming_noun()
64 {
65 _has_rhyming_noun = true;
66
67 return *this;
68 }
69
70 verb_query& verb_query::has_rhyming_adjective()
71 {
72 _has_rhyming_adjective = true;
73
74 return *this;
75 }
76
77 verb_query& verb_query::has_rhyming_adverb()
78 {
79 _has_rhyming_adverb = true;
80
81 return *this;
82 }
83
84 verb_query& verb_query::has_rhyming_verb()
85 {
86 _has_rhyming_verb = true;
87
88 return *this;
89 }
90
91 verb_query& verb_query::with_stress(filter<std::vector<bool>> _arg)
92 {
93 _stress = _arg;
94
95 return *this;
96 }
97
98 verb_query& verb_query::has_frames()
99 {
100 this->_has_frames = true;
101
102 return *this;
103 }
104
105 std::list<verb> verb_query::run() const
106 {
107 std::stringstream construct;
108 construct << "SELECT verb_id, infinitive, past_tense, past_participle, ing_form, s_form FROM verbs";
109 std::list<std::string> conditions;
110 std::list<binding> bindings;
111
112 if (_has_prn)
113 {
114 conditions.push_back("verb_id IN (SELECT verb_id FROM verb_pronunciations)");
115 }
116
117 if (!_rhymes.empty())
118 {
119 std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)");
120 std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
121 conditions.push_back(cond);
122
123 for (auto rhy : _rhymes)
124 {
125 bindings.emplace_back(rhy.get_prerhyme());
126 bindings.emplace_back(rhy.get_rhyme());
127 }
128 }
129
130 if (_has_rhyming_noun)
131 {
132 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
133 }
134
135 if (_has_rhyming_adjective)
136 {
137 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
138 }
139
140 if (_has_rhyming_adverb)
141 {
142 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
143 }
144
145 if (_has_rhyming_verb)
146 {
147 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)");
148 }
149
150 if (!_stress.empty())
151 {
152 std::stringstream cond;
153 if (_stress.get_notlogic())
154 {
155 cond << "verb_id NOT IN";
156 } else {
157 cond << "verb_id IN";
158 }
159
160 cond << "(SELECT verb_id FROM verb_pronunciations WHERE ";
161
162 std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string {
163 switch (f.get_type())
164 {
165 case filter<std::vector<bool>>::type::singleton:
166 {
167 std::ostringstream _val;
168 for (auto syl : f.get_elem())
169 {
170 if (syl)
171 {
172 _val << "1";
173 } else {
174 _val << "0";
175 }
176 }
177
178 bindings.emplace_back(_val.str());
179
180 if (notlogic == f.get_notlogic())
181 {
182 return "stress = ?";
183 } else {
184 return "stress != ?";
185 }
186 }
187
188 case filter<std::vector<bool>>::type::group:
189 {
190 bool truelogic = notlogic != f.get_notlogic();
191
192 std::list<std::string> clauses;
193 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) {
194 return recur(f2, truelogic);
195 });
196
197 if (truelogic == f.get_orlogic())
198 {
199 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
200 } else {
201 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
202 }
203 }
204 }
205 };
206
207 cond << recur(_stress, _stress.get_notlogic());
208 cond << ")";
209 conditions.push_back(cond.str());
210 }
211
212 for (auto except : _except)
213 {
214 conditions.push_back("verb_id != ?");
215 bindings.emplace_back(except._id);
216 }
217
218 if (!_has_frames)
219 {
220 conditions.push_back("verb_id IN (SELECT verb_id FROM verb_groups)");
221 }
222
223 if (!conditions.empty())
224 {
225 construct << " WHERE ";
226 construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND ");
227 }
228
229 if (_random)
230 {
231 construct << " ORDER BY RANDOM()";
232 }
233
234 if (_limit != unlimited)
235 {
236 construct << " LIMIT " << _limit;
237 }
238
239 sqlite3_stmt* ppstmt;
240 std::string query = construct.str();
241 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
242 {
243 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
244 }
245
246 int i = 1;
247 for (auto& binding : bindings)
248 {
249 switch (binding.get_type())
250 {
251 case binding::type::integer:
252 {
253 sqlite3_bind_int(ppstmt, i, binding.get_integer());
254
255 break;
256 }
257
258 case binding::type::string:
259 {
260 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
261
262 break;
263 }
264 }
265
266 i++;
267 }
268
269 std::list<verb> output;
270 while (sqlite3_step(ppstmt) == SQLITE_ROW)
271 {
272 verb tnc {_data, sqlite3_column_int(ppstmt, 0)};
273 tnc._infinitive = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
274 tnc._past_tense = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
275 tnc._past_participle = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3)));
276 tnc._ing_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4)));
277 tnc._s_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 5)));
278
279 output.push_back(tnc);
280 }
281
282 sqlite3_finalize(ppstmt);
283
284 for (auto& verb : output)
285 {
286 query = "SELECT pronunciation, prerhyme, rhyme FROM verb_pronunciations WHERE verb_id = ?";
287 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
288 {
289 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
290 }
291
292 sqlite3_bind_int(ppstmt, 1, verb._id);
293
294 while (sqlite3_step(ppstmt) == SQLITE_ROW)
295 {
296 std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
297 auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " ");
298
299 verb.pronunciations.push_back(phonemes);
300
301 if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL))
302 {
303 std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
304 std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
305 verb.rhymes.emplace_back(prerhyme, rhyming);
306 }
307 }
308
309 sqlite3_finalize(ppstmt);
310 }
311
312 return output;
313 }
314
315};
diff --git a/lib/verb_query.h b/lib/verb_query.h deleted file mode 100644 index 566ae37..0000000 --- a/lib/verb_query.h +++ /dev/null
@@ -1,45 +0,0 @@
1#ifndef VERB_QUERY_H_34E5A679
2#define VERB_QUERY_H_34E5A679
3
4namespace verbly {
5
6 class verb_query {
7 public:
8 verb_query(const data& _data);
9
10 verb_query& limit(int _limit);
11 verb_query& random();
12 verb_query& except(const verb& _word);
13 verb_query& rhymes_with(const word& _word);
14 verb_query& rhymes_with(rhyme _r);
15 verb_query& has_pronunciation();
16 verb_query& has_rhyming_noun();
17 verb_query& has_rhyming_adjective();
18 verb_query& has_rhyming_adverb();
19 verb_query& has_rhyming_verb();
20 verb_query& with_stress(filter<std::vector<bool>> _arg);
21
22 verb_query& has_frames();
23
24 std::list<verb> run() const;
25
26 const static int unlimited = -1;
27
28 private:
29 const data& _data;
30 int _limit = unlimited;
31 bool _random = false;
32 std::list<rhyme> _rhymes;
33 std::list<verb> _except;
34 bool _has_prn = false;
35 bool _has_frames = false;
36 bool _has_rhyming_noun = false;
37 bool _has_rhyming_adjective = false;
38 bool _has_rhyming_adverb = false;
39 bool _has_rhyming_verb = false;
40 filter<std::vector<bool>> _stress;
41 };
42
43};
44
45#endif /* end of include guard: VERB_QUERY_H_34E5A679 */
diff --git a/lib/verbly.h b/lib/verbly.h index cfaf5bc..6dfc01a 100644 --- a/lib/verbly.h +++ b/lib/verbly.h
@@ -1,35 +1,17 @@
1#ifndef VERBLY_H_5B39CE50 1#ifndef VERBLY_H_5B39CE50
2#define VERBLY_H_5B39CE50 2#define VERBLY_H_5B39CE50
3 3
4#include <string>
5#include <list>
6#include <sstream>
7#include <algorithm>
8#include <cassert>
9#include <set>
10#include <stdexcept>
11#include <vector>
12#include <map>
13#include <iterator>
14#include <sstream>
15#include <functional>
16#include <iostream>
17#include <new>
18
19#include "util.h" 4#include "util.h"
20#include "data.h" 5#include "database.h"
6#include "filter.h"
7#include "field.h"
8#include "query.h"
9#include "notion.h"
21#include "word.h" 10#include "word.h"
22#include "verb.h" 11#include "group.h"
23#include "adverb.h"
24#include "adjective.h"
25#include "noun.h"
26#include "frame.h" 12#include "frame.h"
27#include "preposition.h" 13#include "lemma.h"
28#include "token.h" 14#include "form.h"
29#include "noun_query.h" 15#include "pronunciation.h"
30#include "adverb_query.h"
31#include "adjective_query.h"
32#include "verb_query.h"
33#include "frame_query.h"
34 16
35#endif /* end of include guard: VERBLY_H_5B39CE50 */ 17#endif /* end of include guard: VERBLY_H_5B39CE50 */
diff --git a/lib/word.cpp b/lib/word.cpp index 49e34a1..3edf2d2 100644 --- a/lib/word.cpp +++ b/lib/word.cpp
@@ -1,60 +1,112 @@
1#include "verbly.h" 1#include "word.h"
2#include <algorithm> 2#include <sqlite3.h>
3#include "form.h"
4#include "util.h"
5#include "database.h"
6#include "query.h"
3 7
4namespace verbly { 8namespace verbly {
5 9
6 rhyme::rhyme(std::string prerhyme, std::string phonemes) : _prerhyme(prerhyme), _rhyme(phonemes) 10 const object word::objectType = object::word;
7 {
8
9 }
10 11
11 std::string rhyme::get_prerhyme() const 12 const std::list<std::string> word::select = {"word_id", "notion_id", "lemma_id", "tag_count", "position", "group_id"};
12 {
13 return _prerhyme;
14 }
15 13
16 std::string rhyme::get_rhyme() const 14 const field word::id = field::integerField(object::word, "word_id");
17 { 15 const field word::tagCount = field::integerField(object::word, "tag_count", true);
18 return _rhyme; 16 const field word::adjectivePosition = field::integerField(object::word, "position", true);
19 } 17
18 const field word::notion = field::joinField(object::word, "notion_id", object::notion);
19 const field word::lemma = field::joinField(object::word, "lemma_id", object::lemma);
20 const field word::group = field::joinField(object::word, "group_id", object::group, true);
21
22 const field word::antonyms = field::selfJoin(object::word, "word_id", "antonymy", "antonym_2_id", "antonym_1_id");
23
24 const field word::specifications = field::selfJoin(object::word, "word_id", "specification", "general_id", "specific_id");
25 const field word::generalizations = field::selfJoin(object::word, "word_id", "specification", "specific_id", "general_id");
20 26
21 bool rhyme::operator==(const rhyme& other) const 27 const field word::pertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "noun_id", "pertainym_id");
28 const field word::antiPertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "pertainym_id", "noun_id");
29
30 const field word::mannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "adjective_id", "mannernym_id");
31 const field word::antiMannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "mannernym_id", "adjective_id");
32
33 const field word::usageTerms = field::selfJoin(object::word, "word_id", "usage", "domain_id", "term_id");
34 const field word::usageDomains = field::selfJoin(object::word, "word_id", "usage", "term_id", "domain_id");
35
36 const field word::topicalTerms = field::selfJoin(object::word, "word_id", "topicality", "domain_id", "term_id");
37 const field word::topicalDomains = field::selfJoin(object::word, "word_id", "topicality", "term_id", "domain_id");
38
39 const field word::regionalTerms = field::selfJoin(object::word, "word_id", "regionality", "domain_id", "term_id");
40 const field word::regionalDomains = field::selfJoin(object::word, "word_id", "regionality", "term_id", "domain_id");
41
42 word::word(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
22 { 43 {
23 return std::tie(_prerhyme, _rhyme) == std::tie(other._prerhyme, other._rhyme); 44 id_ = sqlite3_column_int(row, 0);
45 notionId_ = sqlite3_column_int(row, 1);
46 lemmaId_ = sqlite3_column_int(row, 2);
47
48 if (sqlite3_column_type(row, 3) != SQLITE_NULL)
49 {
50 hasTagCount_ = true;
51 tagCount_ = sqlite3_column_int(row, 3);
52 }
53
54 if (sqlite3_column_type(row, 4) != SQLITE_NULL)
55 {
56 adjectivePosition_ = static_cast<positioning>(sqlite3_column_int(row, 4));
57 }
58
59 if (sqlite3_column_type(row, 5) != SQLITE_NULL)
60 {
61 hasGroup_ = true;
62 groupId_ = sqlite3_column_int(row, 5);
63 }
24 } 64 }
25 65
26 word::word() 66 const notion& word::getNotion() const
27 { 67 {
68 if (!valid_)
69 {
70 throw std::domain_error("Bad access to uninitialized word");
71 }
72
73 if (!notion_)
74 {
75 notion_ = db_->notions(notion::id == notionId_).first();
76 }
28 77
78 return notion_;
29 } 79 }
30 80
31 word::word(const data& _data, int _id) : _data(&_data), _id(_id), _valid(true) 81 const lemma& word::getLemma() const
32 { 82 {
83 if (!valid_)
84 {
85 throw std::domain_error("Bad access to uninitialized word");
86 }
33 87
88 if (!lemma_)
89 {
90 lemma_ = db_->lemmas(lemma::id == lemmaId_).first();
91 }
92
93 return lemma_;
34 } 94 }
35 95
36 std::list<rhyme> word::get_rhymes() const 96 std::string word::getBaseForm() const
37 { 97 {
38 assert(_valid == true); 98 return getLemma().getBaseForm().getText();
39
40 return rhymes;
41 } 99 }
42 100
43 bool word::starts_with_vowel_sound() const 101 std::list<std::string> word::getInflections(inflection category) const
44 { 102 {
45 assert(_valid == true); 103 std::list<std::string> result;
46 104 for (const form& infl : getLemma().getInflections(category))
47 if (pronunciations.size() > 0)
48 { 105 {
49 return std::any_of(std::begin(pronunciations), std::end(pronunciations), [] (std::list<std::string> phonemes) { 106 result.push_back(infl.getText());
50 return (phonemes.front().find_first_of("012") != std::string::npos);
51 });
52 } else {
53 // If the word is not in CMUDICT, fall back to checking whether the first letter is a vowel
54 // Not perfect but will work in most cases
55 char ch = tolower(base_form().front());
56 return (ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u');
57 } 107 }
108
109 return result;
58 } 110 }
59 111
60}; 112};
diff --git a/lib/word.h b/lib/word.h index 08797a3..f71dad9 100644 --- a/lib/word.h +++ b/lib/word.h
@@ -1,48 +1,173 @@
1#ifndef WORD_H_8FC89498 1#ifndef WORD_H_DF91B1B4
2#define WORD_H_8FC89498 2#define WORD_H_DF91B1B4
3
4#include <stdexcept>
5#include <map>
6#include "field.h"
7#include "filter.h"
8#include "notion.h"
9#include "lemma.h"
10#include "group.h"
11
12struct sqlite3_stmt;
3 13
4namespace verbly { 14namespace verbly {
5 15
6 class rhyme { 16 class database;
7 public: 17
8 rhyme(std::string prerhyme, std::string phonemes); 18 class word {
19 public:
20
21 // Default constructor
22
23 word() = default;
24
25 // Construct from database
26
27 word(const database& db, sqlite3_stmt* row);
28
29 // Accessors
30
31 operator bool() const
32 {
33 return valid_;
34 }
35
36 int getId() const
37 {
38 if (!valid_)
39 {
40 throw std::domain_error("Bad access to uninitialized word");
41 }
9 42
10 std::string get_prerhyme() const; 43 return id_;
11 std::string get_rhyme() const; 44 }
45
46 bool hasTagCount() const
47 {
48 if (!valid_)
49 {
50 throw std::domain_error("Bad access to uninitialized word");
51 }
12 52
13 bool operator==(const rhyme& other) const; 53 return hasTagCount_;
54 }
55
56 int getTagCount() const
57 {
58 if (!valid_)
59 {
60 throw std::domain_error("Bad access to uninitialized word");
61 }
14 62
15 private: 63 if (!hasTagCount_)
16 std::string _prerhyme; 64 {
17 std::string _rhyme; 65 throw std::domain_error("Word has no tag count");
18 }; 66 }
19
20 class word {
21 protected:
22 const data* _data;
23 int _id;
24 bool _valid = false;
25 67
26 std::list<std::list<std::string>> pronunciations; 68 return tagCount_;
27 std::list<rhyme> rhymes; 69 }
70
71 bool hasAdjectivePositioning() const
72 {
73 if (!valid_)
74 {
75 throw std::domain_error("Bad access to uninitialized word");
76 }
28 77
29 word(); 78 return (adjectivePosition_ != positioning::undefined);
30 word(const data& _data, int _id); 79 }
80
81 positioning getAdjectivePosition() const
82 {
83 if (!valid_)
84 {
85 throw std::domain_error("Bad access to uninitialized word");
86 }
31 87
32 friend class adjective_query; 88 if (adjectivePosition_ == positioning::undefined)
33 friend class verb_query; 89 {
34 friend class noun_query; 90 throw std::domain_error("Word has no adjective position");
35 friend class adverb_query; 91 }
36 friend class frame_query;
37 friend class preposition_query;
38
39 public:
40 virtual std::string base_form() const = 0;
41 92
42 std::list<rhyme> get_rhymes() const; 93 return adjectivePosition_;
43 bool starts_with_vowel_sound() const; 94 }
95
96 const notion& getNotion() const;
97
98 const lemma& getLemma() const;
99
100 // Convenience accessors
101
102 std::string getBaseForm() const;
103
104 std::list<std::string> getInflections(inflection infl) const;
105
106 // Type info
107
108 static const object objectType;
109
110 static const std::list<std::string> select;
111
112 // Query fields
113
114 static const field id;
115 static const field tagCount;
116 static const field adjectivePosition;
117
118 operator filter() const
119 {
120 return (id == id_);
121 }
122
123 // Relationships with other objects
124
125 static const field notion;
126 static const field lemma;
127 static const field group;
128
129 // Relationships with self
130
131 static const field antonyms;
132
133 static const field specifications;
134 static const field generalizations;
135
136 static const field pertainyms;
137 static const field antiPertainyms;
138
139 static const field mannernyms;
140 static const field antiMannernyms;
141
142 static const field usageTerms;
143 static const field usageDomains;
144
145 static const field topicalTerms;
146 static const field topicalDomains;
147
148 static const field regionalTerms;
149 static const field regionalDomains;
150
151 private:
152 bool valid_ = false;
153
154 int id_;
155 bool hasTagCount_ = false;
156 int tagCount_;
157 positioning adjectivePosition_ = positioning::undefined;
158 int notionId_;
159 int lemmaId_;
160 bool hasGroup_ = false;
161 int groupId_;
162
163 const database* db_;
164
165 mutable class notion notion_;
166 mutable class lemma lemma_;
167 mutable class group group_;
168
44 }; 169 };
45 170
46}; 171};
47 172
48#endif /* end of include guard: WORD_H_8FC89498 */ 173#endif /* end of include guard: WORD_H_DF91B1B4 */