diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-16 18:02:50 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-16 18:02:50 -0500 |
commit | 6746da6edd7d9d50efe374eabbb79a3cac882d81 (patch) | |
tree | ff20917e08b08d36b9541c1371106596e7bec442 | |
parent | 4af7e55733098ca42f75a4ffaca1b0f6bab4dd36 (diff) | |
download | verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.gz verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.bz2 verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.zip |
Started structural rewrite
The new object structure was designed to build on the existing WordNet structure, while also adding in all of the data that we get from other sources. More information about this can be found on the project wiki. The generator has already been completely rewritten to generate a datafile that uses the new structure. In addition, a number of indexes are created, which does double the size of the datafile, but also allows for much faster lookups. Finally, the new generator is written modularly and is a lot more readable than the old one. The verbly interface to the new object structure has mostly been completed, but has not been tested fully. There is a completely new search API which utilizes a lot of operator overloading; documentation on how to use it should go up at some point. Token processing and verb frames are currently unimplemented. Source for these have been left in the repository for now.
78 files changed, 8971 insertions, 8696 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c39d99..61fcce2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt | |||
@@ -4,8 +4,10 @@ project (verbly) | |||
4 | find_package(PkgConfig) | 4 | find_package(PkgConfig) |
5 | pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) | 5 | pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) |
6 | 6 | ||
7 | set(CMAKE_BUILD_TYPE Debug) | ||
8 | |||
7 | include_directories(vendor/json) | 9 | include_directories(vendor/json) |
8 | add_library(verbly lib/data.cpp lib/adjective.cpp lib/noun.cpp lib/verb.cpp lib/adverb.cpp lib/token.cpp lib/word.cpp lib/frame.cpp lib/preposition.cpp lib/adjective_query.cpp lib/adverb_query.cpp lib/noun_query.cpp lib/verb_query.cpp lib/frame_query.cpp) | 10 | add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp) |
9 | set_property(TARGET verbly PROPERTY CXX_STANDARD 11) | 11 | set_property(TARGET verbly PROPERTY CXX_STANDARD 11) |
10 | set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) | 12 | set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) |
11 | target_link_libraries(verbly ${sqlite3_LIBRARIES}) | 13 | target_link_libraries(verbly ${sqlite3_LIBRARIES}) |
diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 552526d..4f78eb8 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt | |||
@@ -1,12 +1,12 @@ | |||
1 | cmake_minimum_required (VERSION 2.6) | 1 | cmake_minimum_required (VERSION 3.1) |
2 | project (generator) | 2 | project (generator) |
3 | 3 | ||
4 | find_package(PkgConfig) | 4 | find_package(PkgConfig) |
5 | pkg_check_modules(sqlite3 sqlite3 REQUIRED) | 5 | pkg_check_modules(sqlite3 sqlite3 REQUIRED) |
6 | find_package(libxml2 REQUIRED) | 6 | find_package(libxml2 REQUIRED) |
7 | 7 | ||
8 | include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json/src) | 8 | include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json) |
9 | add_executable(generator generator.cpp) | 9 | add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp selrestr.cpp database.cpp field.cpp generator.cpp main.cpp) |
10 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) | 10 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) |
11 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) | 11 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) |
12 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) | 12 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) |
diff --git a/generator/database.cpp b/generator/database.cpp new file mode 100644 index 0000000..c7e4cfa --- /dev/null +++ b/generator/database.cpp | |||
@@ -0,0 +1,173 @@ | |||
1 | #include "database.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include <cassert> | ||
4 | #include <fstream> | ||
5 | #include <stdexcept> | ||
6 | #include <cstdio> | ||
7 | #include <sstream> | ||
8 | #include "field.h" | ||
9 | #include "../lib/util.h" | ||
10 | |||
11 | namespace verbly { | ||
12 | namespace generator { | ||
13 | |||
14 | sqlite3_error::sqlite3_error( | ||
15 | const std::string& what, | ||
16 | const std::string& db_err) : | ||
17 | what_(what + " (" + db_err + ")"), | ||
18 | db_err_(db_err) | ||
19 | { | ||
20 | } | ||
21 | |||
22 | const char* sqlite3_error::what() const noexcept | ||
23 | { | ||
24 | return what_.c_str(); | ||
25 | } | ||
26 | |||
27 | const char* sqlite3_error::db_err() const noexcept | ||
28 | { | ||
29 | return db_err_.c_str(); | ||
30 | } | ||
31 | |||
32 | database::database(std::string path) | ||
33 | { | ||
34 | // If there is already a file at this path, overwrite it. | ||
35 | if (std::ifstream(path)) | ||
36 | { | ||
37 | if (std::remove(path.c_str())) | ||
38 | { | ||
39 | throw std::logic_error("Could not overwrite file at path"); | ||
40 | } | ||
41 | } | ||
42 | |||
43 | if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) | ||
44 | { | ||
45 | // We still have to free the resources allocated. In the event that | ||
46 | // allocation failed, ppdb will be null and sqlite3_close_v2 will just | ||
47 | // ignore it. | ||
48 | std::string errmsg(sqlite3_errmsg(ppdb_)); | ||
49 | sqlite3_close_v2(ppdb_); | ||
50 | |||
51 | throw sqlite3_error("Could not create output datafile", errmsg); | ||
52 | } | ||
53 | } | ||
54 | |||
55 | database::database(database&& other) : database() | ||
56 | { | ||
57 | swap(*this, other); | ||
58 | } | ||
59 | |||
60 | database& database::operator=(database&& other) | ||
61 | { | ||
62 | swap(*this, other); | ||
63 | |||
64 | return *this; | ||
65 | } | ||
66 | |||
67 | void swap(database& first, database& second) | ||
68 | { | ||
69 | std::swap(first.ppdb_, second.ppdb_); | ||
70 | } | ||
71 | |||
72 | database::~database() | ||
73 | { | ||
74 | sqlite3_close_v2(ppdb_); | ||
75 | } | ||
76 | |||
77 | void database::runQuery(std::string query) | ||
78 | { | ||
79 | // This can only happen when doing bad things with move semantics. | ||
80 | assert(ppdb_ != nullptr); | ||
81 | |||
82 | sqlite3_stmt* ppstmt; | ||
83 | |||
84 | if (sqlite3_prepare_v2(ppdb_, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
85 | { | ||
86 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
87 | } | ||
88 | |||
89 | int result = sqlite3_step(ppstmt); | ||
90 | sqlite3_finalize(ppstmt); | ||
91 | |||
92 | if (result != SQLITE_DONE) | ||
93 | { | ||
94 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
95 | } | ||
96 | } | ||
97 | |||
98 | void database::insertIntoTable(std::string table, std::list<field> fields) | ||
99 | { | ||
100 | // This can only happen when doing bad things with move semantics. | ||
101 | assert(ppdb_ != nullptr); | ||
102 | |||
103 | // This shouldn't happen. | ||
104 | assert(!fields.empty()); | ||
105 | |||
106 | std::list<std::string> fieldNames; | ||
107 | std::list<std::string> qs; | ||
108 | for (field& f : fields) | ||
109 | { | ||
110 | fieldNames.push_back(f.getName()); | ||
111 | qs.push_back("?"); | ||
112 | } | ||
113 | |||
114 | std::ostringstream query; | ||
115 | query << "INSERT INTO "; | ||
116 | query << table; | ||
117 | query << " ("; | ||
118 | query << implode(std::begin(fieldNames), std::end(fieldNames), ", "); | ||
119 | query << ") VALUES ("; | ||
120 | query << implode(std::begin(qs), std::end(qs), ", "); | ||
121 | query << ")"; | ||
122 | |||
123 | std::string query_str = query.str(); | ||
124 | |||
125 | sqlite3_stmt* ppstmt; | ||
126 | |||
127 | if (sqlite3_prepare_v2(ppdb_, query_str.c_str(), query_str.length(), &ppstmt, NULL) != SQLITE_OK) | ||
128 | { | ||
129 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
130 | } | ||
131 | |||
132 | int i = 1; | ||
133 | for (field& f : fields) | ||
134 | { | ||
135 | switch (f.getType()) | ||
136 | { | ||
137 | case field::type::integer: | ||
138 | { | ||
139 | sqlite3_bind_int(ppstmt, i, f.getInteger()); | ||
140 | |||
141 | break; | ||
142 | } | ||
143 | |||
144 | case field::type::string: | ||
145 | { | ||
146 | sqlite3_bind_text(ppstmt, i, f.getString().c_str(), f.getString().length(), SQLITE_TRANSIENT); | ||
147 | |||
148 | break; | ||
149 | } | ||
150 | |||
151 | case field::type::invalid: | ||
152 | { | ||
153 | // Fields can only be invalid when doing bad things with move semantics. | ||
154 | assert(false); | ||
155 | |||
156 | break; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | i++; | ||
161 | } | ||
162 | |||
163 | int result = sqlite3_step(ppstmt); | ||
164 | sqlite3_finalize(ppstmt); | ||
165 | |||
166 | if (result != SQLITE_DONE) | ||
167 | { | ||
168 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | }; | ||
173 | }; | ||
diff --git a/generator/database.h b/generator/database.h new file mode 100644 index 0000000..15cdff5 --- /dev/null +++ b/generator/database.h | |||
@@ -0,0 +1,73 @@ | |||
1 | #ifndef DATABASE_H_0B0A47D2 | ||
2 | #define DATABASE_H_0B0A47D2 | ||
3 | |||
4 | #include <string> | ||
5 | #include <exception> | ||
6 | #include <list> | ||
7 | |||
8 | struct sqlite3; | ||
9 | |||
10 | namespace verbly { | ||
11 | namespace generator { | ||
12 | |||
13 | class field; | ||
14 | |||
15 | class sqlite3_error : public std::exception { | ||
16 | public: | ||
17 | |||
18 | sqlite3_error(const std::string& what, const std::string& db_err); | ||
19 | |||
20 | const char* what() const noexcept override; | ||
21 | const char* db_err() const noexcept; | ||
22 | |||
23 | private: | ||
24 | std::string what_; | ||
25 | std::string db_err_; | ||
26 | |||
27 | }; | ||
28 | |||
29 | class database { | ||
30 | public: | ||
31 | |||
32 | // Constructor | ||
33 | |||
34 | explicit database(std::string path); | ||
35 | |||
36 | // Disable copying | ||
37 | |||
38 | database(const database& other) = delete; | ||
39 | database& operator=(const database& other) = delete; | ||
40 | |||
41 | // Move constructor and move assignment | ||
42 | |||
43 | database(database&& other); | ||
44 | database& operator=(database&& other); | ||
45 | |||
46 | // Swap | ||
47 | |||
48 | friend void swap(database& first, database& second); | ||
49 | |||
50 | // Destructor | ||
51 | |||
52 | ~database(); | ||
53 | |||
54 | // Actions | ||
55 | |||
56 | void runQuery(std::string query); | ||
57 | |||
58 | void insertIntoTable(std::string table, std::list<field> fields); | ||
59 | |||
60 | private: | ||
61 | |||
62 | database() | ||
63 | { | ||
64 | } | ||
65 | |||
66 | sqlite3* ppdb_ = nullptr; | ||
67 | |||
68 | }; | ||
69 | |||
70 | }; | ||
71 | }; | ||
72 | |||
73 | #endif /* end of include guard: DATABASE_H_0B0A47D2 */ | ||
diff --git a/generator/field.cpp b/generator/field.cpp new file mode 100644 index 0000000..84b2f91 --- /dev/null +++ b/generator/field.cpp | |||
@@ -0,0 +1,193 @@ | |||
1 | #include "field.h" | ||
2 | #include <stdexcept> | ||
3 | #include <utility> | ||
4 | |||
5 | namespace verbly { | ||
6 | namespace generator { | ||
7 | |||
8 | field::field(const field& other) | ||
9 | { | ||
10 | type_ = other.type_; | ||
11 | name_ = other.name_; | ||
12 | |||
13 | switch (type_) | ||
14 | { | ||
15 | case type::integer: | ||
16 | { | ||
17 | integer_ = other.integer_; | ||
18 | |||
19 | break; | ||
20 | } | ||
21 | |||
22 | case type::string: | ||
23 | { | ||
24 | new(&string_) std::string(other.string_); | ||
25 | |||
26 | break; | ||
27 | } | ||
28 | |||
29 | case type::invalid: | ||
30 | { | ||
31 | break; | ||
32 | } | ||
33 | } | ||
34 | } | ||
35 | |||
36 | field::field(field&& other) : field() | ||
37 | { | ||
38 | swap(*this, other); | ||
39 | } | ||
40 | |||
41 | field& field::operator=(field other) | ||
42 | { | ||
43 | swap(*this, other); | ||
44 | |||
45 | return *this; | ||
46 | } | ||
47 | |||
48 | void swap(field& first, field& second) | ||
49 | { | ||
50 | using type = field::type; | ||
51 | |||
52 | type tempType = first.type_; | ||
53 | std::string tempName = std::move(first.name_); | ||
54 | int tempInteger; | ||
55 | std::string tempString; | ||
56 | |||
57 | switch (first.type_) | ||
58 | { | ||
59 | case type::integer: | ||
60 | { | ||
61 | tempInteger = first.integer_; | ||
62 | |||
63 | break; | ||
64 | } | ||
65 | |||
66 | case type::string: | ||
67 | { | ||
68 | tempString = std::move(tempString); | ||
69 | |||
70 | break; | ||
71 | } | ||
72 | |||
73 | case type::invalid: | ||
74 | { | ||
75 | break; | ||
76 | } | ||
77 | } | ||
78 | |||
79 | first.~field(); | ||
80 | |||
81 | first.type_ = second.type_; | ||
82 | first.name_ = std::move(second.name_); | ||
83 | |||
84 | switch (second.type_) | ||
85 | { | ||
86 | case type::integer: | ||
87 | { | ||
88 | first.integer_ = second.integer_; | ||
89 | |||
90 | break; | ||
91 | } | ||
92 | |||
93 | case type::string: | ||
94 | { | ||
95 | new(&first.string_) std::string(std::move(second.string_)); | ||
96 | |||
97 | break; | ||
98 | } | ||
99 | |||
100 | case type::invalid: | ||
101 | { | ||
102 | break; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | second.~field(); | ||
107 | |||
108 | second.type_ = tempType; | ||
109 | second.name_ = std::move(tempName); | ||
110 | |||
111 | switch (tempType) | ||
112 | { | ||
113 | case type::integer: | ||
114 | { | ||
115 | second.integer_ = tempInteger; | ||
116 | |||
117 | break; | ||
118 | } | ||
119 | |||
120 | case type::string: | ||
121 | { | ||
122 | new(&second.string_) std::string(std::move(tempString)); | ||
123 | |||
124 | break; | ||
125 | } | ||
126 | |||
127 | case type::invalid: | ||
128 | { | ||
129 | break; | ||
130 | } | ||
131 | } | ||
132 | } | ||
133 | |||
134 | field::~field() | ||
135 | { | ||
136 | switch (type_) | ||
137 | { | ||
138 | case type::string: | ||
139 | { | ||
140 | using string_type = std::string; | ||
141 | string_.~string_type(); | ||
142 | |||
143 | break; | ||
144 | } | ||
145 | |||
146 | case type::integer: | ||
147 | case type::invalid: | ||
148 | { | ||
149 | break; | ||
150 | } | ||
151 | } | ||
152 | } | ||
153 | |||
154 | field::field( | ||
155 | std::string name, | ||
156 | int arg) : | ||
157 | type_(type::integer), | ||
158 | name_(name), | ||
159 | integer_(arg) | ||
160 | { | ||
161 | } | ||
162 | |||
163 | int field::getInteger() const | ||
164 | { | ||
165 | if (type_ != type::integer) | ||
166 | { | ||
167 | throw std::domain_error("field::getInteger called on non-integer field"); | ||
168 | } | ||
169 | |||
170 | return integer_; | ||
171 | } | ||
172 | |||
173 | field::field( | ||
174 | std::string name, | ||
175 | std::string arg) : | ||
176 | type_(type::string), | ||
177 | name_(name) | ||
178 | { | ||
179 | new(&string_) std::string(arg); | ||
180 | } | ||
181 | |||
182 | std::string field::getString() const | ||
183 | { | ||
184 | if (type_ != type::string) | ||
185 | { | ||
186 | throw std::domain_error("field::getString called on non-string field"); | ||
187 | } | ||
188 | |||
189 | return string_; | ||
190 | } | ||
191 | |||
192 | }; | ||
193 | }; | ||
diff --git a/generator/field.h b/generator/field.h new file mode 100644 index 0000000..1fbabfc --- /dev/null +++ b/generator/field.h | |||
@@ -0,0 +1,76 @@ | |||
1 | #ifndef BINDING_H_CAE0B18E | ||
2 | #define BINDING_H_CAE0B18E | ||
3 | |||
4 | #include <string> | ||
5 | |||
6 | namespace verbly { | ||
7 | namespace generator { | ||
8 | |||
9 | class field { | ||
10 | public: | ||
11 | enum class type { | ||
12 | invalid, | ||
13 | integer, | ||
14 | string | ||
15 | }; | ||
16 | |||
17 | // Copy and move constructors | ||
18 | |||
19 | field(const field& other); | ||
20 | field(field&& other); | ||
21 | |||
22 | // Assignment | ||
23 | |||
24 | field& operator=(field other); | ||
25 | |||
26 | // Swap | ||
27 | |||
28 | friend void swap(field& first, field& second); | ||
29 | |||
30 | // Destructor | ||
31 | |||
32 | ~field(); | ||
33 | |||
34 | // Generic accessors | ||
35 | |||
36 | type getType() const | ||
37 | { | ||
38 | return type_; | ||
39 | } | ||
40 | |||
41 | std::string getName() const | ||
42 | { | ||
43 | return name_; | ||
44 | } | ||
45 | |||
46 | // Integer | ||
47 | |||
48 | field(std::string name, int arg); | ||
49 | |||
50 | int getInteger() const; | ||
51 | |||
52 | // String | ||
53 | |||
54 | field(std::string name, std::string arg); | ||
55 | |||
56 | std::string getString() const; | ||
57 | |||
58 | private: | ||
59 | |||
60 | field() | ||
61 | { | ||
62 | } | ||
63 | |||
64 | union { | ||
65 | int integer_; | ||
66 | std::string string_; | ||
67 | }; | ||
68 | |||
69 | type type_ = type::invalid; | ||
70 | std::string name_; | ||
71 | }; | ||
72 | |||
73 | }; | ||
74 | }; | ||
75 | |||
76 | #endif /* end of include guard: BINDING_H_CAE0B18E */ | ||
diff --git a/generator/form.cpp b/generator/form.cpp new file mode 100644 index 0000000..6be9d47 --- /dev/null +++ b/generator/form.cpp | |||
@@ -0,0 +1,53 @@ | |||
1 | #include "form.h" | ||
2 | #include <algorithm> | ||
3 | #include <list> | ||
4 | #include "database.h" | ||
5 | #include "field.h" | ||
6 | #include "pronunciation.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | namespace generator { | ||
10 | |||
11 | int form::nextId_ = 0; | ||
12 | |||
13 | form::form(std::string text) : | ||
14 | id_(nextId_++), | ||
15 | text_(text), | ||
16 | complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), | ||
17 | proper_(std::any_of(std::begin(text), std::end(text), std::isupper)) | ||
18 | { | ||
19 | } | ||
20 | |||
21 | void form::addPronunciation(const pronunciation& p) | ||
22 | { | ||
23 | pronunciations_.insert(&p); | ||
24 | } | ||
25 | |||
26 | database& operator<<(database& db, const form& arg) | ||
27 | { | ||
28 | // Serialize the form first. | ||
29 | { | ||
30 | std::list<field> fields; | ||
31 | fields.emplace_back("form_id", arg.getId()); | ||
32 | fields.emplace_back("form", arg.getText()); | ||
33 | fields.emplace_back("complexity", arg.getComplexity()); | ||
34 | fields.emplace_back("proper", arg.isProper()); | ||
35 | |||
36 | db.insertIntoTable("forms", std::move(fields)); | ||
37 | } | ||
38 | |||
39 | // Then, serialize the form/pronunciation relationship. | ||
40 | for (const pronunciation* p : arg.getPronunciations()) | ||
41 | { | ||
42 | std::list<field> fields; | ||
43 | fields.emplace_back("form_id", arg.getId()); | ||
44 | fields.emplace_back("pronunciation_id", p->getId()); | ||
45 | |||
46 | db.insertIntoTable("forms_pronunciations", std::move(fields)); | ||
47 | } | ||
48 | |||
49 | return db; | ||
50 | } | ||
51 | |||
52 | }; | ||
53 | }; | ||
diff --git a/generator/form.h b/generator/form.h new file mode 100644 index 0000000..5576035 --- /dev/null +++ b/generator/form.h | |||
@@ -0,0 +1,71 @@ | |||
1 | #ifndef FORM_H_7EFBC970 | ||
2 | #define FORM_H_7EFBC970 | ||
3 | |||
4 | #include <string> | ||
5 | #include <set> | ||
6 | |||
7 | namespace verbly { | ||
8 | namespace generator { | ||
9 | |||
10 | class pronunciation; | ||
11 | class database; | ||
12 | |||
13 | class form { | ||
14 | public: | ||
15 | |||
16 | // Constructor | ||
17 | |||
18 | explicit form(std::string text); | ||
19 | |||
20 | // Mutators | ||
21 | |||
22 | void addPronunciation(const pronunciation& p); | ||
23 | |||
24 | // Accessors | ||
25 | |||
26 | int getId() const | ||
27 | { | ||
28 | return id_; | ||
29 | } | ||
30 | |||
31 | std::string getText() const | ||
32 | { | ||
33 | return text_; | ||
34 | } | ||
35 | |||
36 | int getComplexity() const | ||
37 | { | ||
38 | return complexity_; | ||
39 | } | ||
40 | |||
41 | bool isProper() const | ||
42 | { | ||
43 | return proper_; | ||
44 | } | ||
45 | |||
46 | std::set<const pronunciation*> getPronunciations() const | ||
47 | { | ||
48 | return pronunciations_; | ||
49 | } | ||
50 | |||
51 | private: | ||
52 | |||
53 | static int nextId_; | ||
54 | |||
55 | const int id_; | ||
56 | const std::string text_; | ||
57 | const int complexity_; | ||
58 | const bool proper_; | ||
59 | |||
60 | std::set<const pronunciation*> pronunciations_; | ||
61 | |||
62 | }; | ||
63 | |||
64 | // Serializer | ||
65 | |||
66 | database& operator<<(database& db, const form& arg); | ||
67 | |||
68 | }; | ||
69 | }; | ||
70 | |||
71 | #endif /* end of include guard: FORM_H_7EFBC970 */ | ||
diff --git a/generator/frame.cpp b/generator/frame.cpp new file mode 100644 index 0000000..9f0653f --- /dev/null +++ b/generator/frame.cpp | |||
@@ -0,0 +1,83 @@ | |||
1 | #include "frame.h" | ||
2 | #include "database.h" | ||
3 | #include "field.h" | ||
4 | |||
5 | namespace verbly { | ||
6 | namespace generator { | ||
7 | |||
8 | int frame::nextId_ = 0; | ||
9 | |||
10 | frame::frame() : id_(nextId_++) | ||
11 | { | ||
12 | } | ||
13 | |||
14 | void frame::push_back(part fp) | ||
15 | { | ||
16 | parts_.push_back(std::move(fp)); | ||
17 | } | ||
18 | |||
19 | database& operator<<(database& db, const frame& arg) | ||
20 | { | ||
21 | std::list<field> fields; | ||
22 | fields.emplace_back("frame_id", arg.getId()); | ||
23 | |||
24 | nlohmann::json jsonParts; | ||
25 | for (const part& p : arg) | ||
26 | { | ||
27 | nlohmann::json jsonPart; | ||
28 | jsonPart["type"] = static_cast<int>(p.getType()); | ||
29 | |||
30 | switch (p.getType()) | ||
31 | { | ||
32 | case part::type::noun_phrase: | ||
33 | { | ||
34 | jsonPart["role"] = p.getNounRole(); | ||
35 | jsonPart["selrestrs"] = p.getNounSelrestrs().toJson(); | ||
36 | jsonPart["synrestrs"] = p.getNounSynrestrs(); | ||
37 | |||
38 | break; | ||
39 | } | ||
40 | |||
41 | case part::type::preposition: | ||
42 | { | ||
43 | jsonPart["choices"] = p.getPrepositionChoices(); | ||
44 | jsonPart["literal"] = p.isPrepositionLiteral(); | ||
45 | |||
46 | break; | ||
47 | } | ||
48 | |||
49 | case part::type::literal: | ||
50 | { | ||
51 | jsonPart["value"] = p.getLiteralValue(); | ||
52 | |||
53 | break; | ||
54 | } | ||
55 | |||
56 | case part::type::verb: | ||
57 | case part::type::adjective: | ||
58 | case part::type::adverb: | ||
59 | { | ||
60 | break; | ||
61 | } | ||
62 | |||
63 | case part::type::invalid: | ||
64 | { | ||
65 | // Invalid parts should not be serialized. | ||
66 | assert(false); | ||
67 | |||
68 | break; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | jsonParts.emplace_back(std::move(jsonPart)); | ||
73 | } | ||
74 | |||
75 | fields.emplace_back("data", jsonParts.dump()); | ||
76 | |||
77 | db.insertIntoTable("frames", std::move(fields)); | ||
78 | |||
79 | return db; | ||
80 | } | ||
81 | |||
82 | }; | ||
83 | }; | ||
diff --git a/generator/frame.h b/generator/frame.h new file mode 100644 index 0000000..411ce6c --- /dev/null +++ b/generator/frame.h | |||
@@ -0,0 +1,59 @@ | |||
1 | #ifndef FRAME_H_26770FF1 | ||
2 | #define FRAME_H_26770FF1 | ||
3 | |||
4 | #include <list> | ||
5 | #include "part.h" | ||
6 | |||
7 | namespace verbly { | ||
8 | namespace generator { | ||
9 | |||
10 | class database; | ||
11 | |||
12 | class frame { | ||
13 | public: | ||
14 | |||
15 | // Aliases | ||
16 | |||
17 | using const_iterator = std::list<part>::const_iterator; | ||
18 | |||
19 | // Constructor | ||
20 | |||
21 | frame(); | ||
22 | |||
23 | // Mutators | ||
24 | |||
25 | void push_back(part fp); | ||
26 | |||
27 | // Accessors | ||
28 | |||
29 | int getId() const | ||
30 | { | ||
31 | return id_; | ||
32 | } | ||
33 | |||
34 | const_iterator begin() const | ||
35 | { | ||
36 | return std::begin(parts_); | ||
37 | } | ||
38 | |||
39 | const_iterator end() const | ||
40 | { | ||
41 | return std::end(parts_); | ||
42 | } | ||
43 | |||
44 | private: | ||
45 | |||
46 | static int nextId_; | ||
47 | |||
48 | const int id_; | ||
49 | |||
50 | std::list<part> parts_; | ||
51 | |||
52 | }; | ||
53 | |||
54 | database& operator<<(database& db, const frame& arg); | ||
55 | |||
56 | }; | ||
57 | }; | ||
58 | |||
59 | #endif /* end of include guard: FRAME_H_26770FF1 */ | ||
diff --git a/generator/generator.cpp b/generator/generator.cpp index 6a16467..d88cb31 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -1,2320 +1,1477 @@ | |||
1 | #include <libxml/parser.h> | 1 | #include "generator.h" |
2 | #include <cassert> | ||
3 | #include <stdexcept> | ||
2 | #include <iostream> | 4 | #include <iostream> |
5 | #include <regex> | ||
3 | #include <dirent.h> | 6 | #include <dirent.h> |
4 | #include <set> | ||
5 | #include <map> | ||
6 | #include <string> | ||
7 | #include <vector> | ||
8 | #include <fstream> | 7 | #include <fstream> |
9 | #include <sqlite3.h> | 8 | #include "enums.h" |
10 | #include <sstream> | ||
11 | #include <regex> | ||
12 | #include <list> | ||
13 | #include <algorithm> | ||
14 | #include <json.hpp> | ||
15 | #include "progress.h" | 9 | #include "progress.h" |
10 | #include "selrestr.h" | ||
11 | #include "role.h" | ||
12 | #include "part.h" | ||
13 | #include "field.h" | ||
16 | #include "../lib/util.h" | 14 | #include "../lib/util.h" |
17 | 15 | ||
18 | using json = nlohmann::json; | 16 | namespace verbly { |
19 | 17 | namespace generator { | |
20 | struct verb_t { | ||
21 | std::string infinitive; | ||
22 | std::string past_tense; | ||
23 | std::string past_participle; | ||
24 | std::string ing_form; | ||
25 | std::string s_form; | ||
26 | int id; | ||
27 | }; | ||
28 | |||
29 | struct adjective_t { | ||
30 | std::string base; | ||
31 | std::string comparative; | ||
32 | std::string superlative; | ||
33 | }; | ||
34 | |||
35 | struct noun_t { | ||
36 | std::string singular; | ||
37 | std::string plural; | ||
38 | }; | ||
39 | |||
40 | struct selrestr_t { | ||
41 | enum class type_t { | ||
42 | singleton, | ||
43 | andlogic, | ||
44 | orlogic, | ||
45 | empty | ||
46 | }; | ||
47 | type_t type; | ||
48 | std::string restriction; | ||
49 | bool pos; | ||
50 | std::list<selrestr_t> subordinates; | ||
51 | }; | ||
52 | |||
53 | struct framepart_t { | ||
54 | enum class type_t { | ||
55 | np, | ||
56 | v, | ||
57 | pp, | ||
58 | adj, | ||
59 | adv, | ||
60 | lex | ||
61 | }; | ||
62 | type_t type; | ||
63 | std::string role; | ||
64 | selrestr_t selrestrs; | ||
65 | std::set<std::string> preprestrs; | ||
66 | std::set<std::string> synrestrs; | ||
67 | std::list<std::string> choices; | ||
68 | std::string lexval; | ||
69 | }; | ||
70 | |||
71 | struct group_t { | ||
72 | std::string id; | ||
73 | std::string parent; | ||
74 | std::set<std::string> members; | ||
75 | std::map<std::string, selrestr_t> roles; | ||
76 | std::list<std::list<framepart_t>> frames; | ||
77 | }; | ||
78 | |||
79 | struct pronunciation_t { | ||
80 | std::string phonemes; | ||
81 | std::string prerhyme; | ||
82 | std::string rhyme; | ||
83 | int syllables = 0; | ||
84 | std::string stress; | ||
85 | |||
86 | bool operator<(const pronunciation_t& other) const | ||
87 | { | ||
88 | return phonemes < other.phonemes; | ||
89 | } | ||
90 | }; | ||
91 | |||
92 | std::map<std::string, group_t> groups; | ||
93 | std::map<std::string, verb_t> verbs; | ||
94 | std::map<std::string, adjective_t> adjectives; | ||
95 | std::map<std::string, noun_t> nouns; | ||
96 | std::map<int, std::map<int, int>> wn; | ||
97 | std::map<int, int> images; | ||
98 | std::map<std::string, std::set<pronunciation_t>> pronunciations; | ||
99 | |||
100 | void print_usage() | ||
101 | { | ||
102 | std::cout << "Verbly Datafile Generator" << std::endl; | ||
103 | std::cout << "-------------------------" << std::endl; | ||
104 | std::cout << "Requires exactly six arguments." << std::endl; | ||
105 | std::cout << "1. The path to a VerbNet data directory." << std::endl; | ||
106 | std::cout << "2. The path to an AGID infl.txt file." << std::endl; | ||
107 | std::cout << "3. The path to a WordNet prolog data directory." << std::endl; | ||
108 | std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl; | ||
109 | std::cout << "5. The path to an ImageNet urls.txt file." << std::endl; | ||
110 | std::cout << "6. Datafile output path." << std::endl; | ||
111 | |||
112 | exit(1); | ||
113 | } | ||
114 | |||
115 | void db_error(sqlite3* ppdb, std::string query) | ||
116 | { | ||
117 | std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; | ||
118 | std::cout << query << std::endl; | ||
119 | sqlite3_close_v2(ppdb); | ||
120 | print_usage(); | ||
121 | } | ||
122 | |||
123 | json export_selrestrs(selrestr_t r) | ||
124 | { | ||
125 | if (r.type == selrestr_t::type_t::empty) | ||
126 | { | ||
127 | return {}; | ||
128 | } else if (r.type == selrestr_t::type_t::singleton) | ||
129 | { | ||
130 | json result; | ||
131 | result["type"] = r.restriction; | ||
132 | result["pos"] = r.pos; | ||
133 | return result; | ||
134 | } else { | ||
135 | json result; | ||
136 | if (r.type == selrestr_t::type_t::andlogic) | ||
137 | { | ||
138 | result["logic"] = "and"; | ||
139 | } else { | ||
140 | result["logic"] = "or"; | ||
141 | } | ||
142 | |||
143 | std::list<json> outlist; | ||
144 | std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs); | ||
145 | result["children"] = outlist; | ||
146 | 18 | ||
147 | return result; | 19 | generator::generator( |
148 | } | 20 | std::string verbNetPath, |
149 | } | 21 | std::string agidPath, |
150 | 22 | std::string wordNetPath, | |
151 | selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename) | 23 | std::string cmudictPath, |
152 | { | 24 | std::string imageNetPath, |
153 | selrestr_t r; | 25 | std::string outputPath) : |
154 | xmlChar* key; | 26 | verbNetPath_(verbNetPath), |
155 | 27 | agidPath_(agidPath), | |
156 | if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS")) | 28 | wordNetPath_(wordNetPath), |
157 | { | 29 | cmudictPath_(cmudictPath), |
158 | if (xmlChildElementCount(top) == 0) | 30 | imageNetPath_(imageNetPath), |
31 | db_(outputPath) | ||
159 | { | 32 | { |
160 | r.type = selrestr_t::type_t::empty; | 33 | // Ensure VerbNet directory exists |
161 | } else if (xmlChildElementCount(top) == 1) | 34 | DIR* dir; |
162 | { | 35 | if ((dir = opendir(verbNetPath_.c_str())) == nullptr) |
163 | r = parse_selrestrs(xmlFirstElementChild(top), filename); | ||
164 | } else { | ||
165 | r.type = selrestr_t::type_t::andlogic; | ||
166 | |||
167 | if (xmlHasProp(top, (const xmlChar*) "logic")) | ||
168 | { | 36 | { |
169 | key = xmlGetProp(top, (const xmlChar*) "logic"); | 37 | throw std::invalid_argument("Invalid VerbNet data directory"); |
170 | if (!xmlStrcmp(key, (const xmlChar*) "or")) | ||
171 | { | ||
172 | r.type = selrestr_t::type_t::orlogic; | ||
173 | } | ||
174 | xmlFree(key); | ||
175 | } | 38 | } |
176 | 39 | ||
177 | for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) | 40 | closedir(dir); |
41 | |||
42 | // Ensure AGID infl.txt exists | ||
43 | if (!std::ifstream(agidPath_)) | ||
178 | { | 44 | { |
179 | if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR")) | 45 | throw std::invalid_argument("AGID infl.txt file not found"); |
180 | { | ||
181 | r.subordinates.push_back(parse_selrestrs(selrestr, filename)); | ||
182 | } | ||
183 | } | 46 | } |
184 | } | 47 | |
185 | } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR")) | 48 | // Add directory separator to WordNet path |
186 | { | 49 | if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\')) |
187 | r.type = selrestr_t::type_t::singleton; | ||
188 | |||
189 | key = xmlGetProp(top, (xmlChar*) "Value"); | ||
190 | r.pos = (std::string((const char*)key) == "+"); | ||
191 | xmlFree(key); | ||
192 | |||
193 | key = xmlGetProp(top, (xmlChar*) "type"); | ||
194 | r.restriction = (const char*) key; | ||
195 | xmlFree(key); | ||
196 | } else { | ||
197 | // Invalid | ||
198 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
199 | print_usage(); | ||
200 | } | ||
201 | |||
202 | return r; | ||
203 | } | ||
204 | |||
205 | group_t& parse_group(xmlNodePtr top, std::string filename) | ||
206 | { | ||
207 | xmlChar* key = xmlGetProp(top, (xmlChar*) "ID"); | ||
208 | if (key == 0) | ||
209 | { | ||
210 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
211 | print_usage(); | ||
212 | } | ||
213 | std::string vnid = (const char*)key; | ||
214 | vnid = vnid.substr(vnid.find_first_of("-")+1); | ||
215 | xmlFree(key); | ||
216 | |||
217 | group_t g; | ||
218 | g.id = vnid; | ||
219 | |||
220 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) | ||
221 | { | ||
222 | if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES")) | ||
223 | { | ||
224 | for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next) | ||
225 | { | 50 | { |
226 | if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS")) | 51 | wordNetPath_ += '/'; |
227 | { | ||
228 | auto& sg = parse_group(subclass, filename); | ||
229 | sg.parent = vnid; | ||
230 | |||
231 | for (auto member : sg.members) | ||
232 | { | ||
233 | g.members.insert(member); | ||
234 | } | ||
235 | |||
236 | // The schema requires that subclasses appear after role definitions, so we can do this now | ||
237 | for (auto role : g.roles) | ||
238 | { | ||
239 | if (sg.roles.count(role.first) == 0) | ||
240 | { | ||
241 | sg.roles[role.first] = role.second; | ||
242 | } | ||
243 | } | ||
244 | } | ||
245 | } | 52 | } |
246 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) | 53 | |
247 | { | 54 | // Ensure WordNet tables exist |
248 | for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) | 55 | for (std::string table : { |
56 | "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax" | ||
57 | }) | ||
249 | { | 58 | { |
250 | if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) | 59 | if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl")) |
251 | { | 60 | { |
252 | key = xmlGetProp(member, (xmlChar*) "name"); | 61 | throw std::invalid_argument("WordNet " + table + " table not found"); |
253 | g.members.insert((const char*)key); | ||
254 | xmlFree(key); | ||
255 | } | 62 | } |
256 | } | 63 | } |
257 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES")) | 64 | |
258 | { | 65 | // Ensure CMUDICT file exists |
259 | for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next) | 66 | if (!std::ifstream(cmudictPath_)) |
260 | { | 67 | { |
261 | if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE")) | 68 | throw std::invalid_argument("CMUDICT file not found"); |
262 | { | ||
263 | selrestr_t r; | ||
264 | r.type = selrestr_t::type_t::empty; | ||
265 | |||
266 | key = xmlGetProp(role, (const xmlChar*) "type"); | ||
267 | std::string type = (const char*)key; | ||
268 | xmlFree(key); | ||
269 | |||
270 | for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) | ||
271 | { | ||
272 | if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS")) | ||
273 | { | ||
274 | r = parse_selrestrs(rolenode, filename); | ||
275 | } | ||
276 | } | ||
277 | |||
278 | g.roles[type] = r; | ||
279 | } | ||
280 | } | 69 | } |
281 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) | 70 | |
282 | { | 71 | // Ensure ImageNet urls.txt exists |
283 | for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) | 72 | if (!std::ifstream(imageNetPath_)) |
284 | { | 73 | { |
285 | if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) | 74 | throw std::invalid_argument("ImageNet urls.txt file not found"); |
286 | { | ||
287 | std::list<framepart_t> f; | ||
288 | |||
289 | for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) | ||
290 | { | ||
291 | if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX")) | ||
292 | { | ||
293 | for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) | ||
294 | { | ||
295 | framepart_t fp; | ||
296 | |||
297 | if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP")) | ||
298 | { | ||
299 | fp.type = framepart_t::type_t::np; | ||
300 | |||
301 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
302 | fp.role = (const char*)key; | ||
303 | xmlFree(key); | ||
304 | |||
305 | fp.selrestrs.type = selrestr_t::type_t::empty; | ||
306 | |||
307 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
308 | { | ||
309 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS")) | ||
310 | { | ||
311 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
312 | { | ||
313 | if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR")) | ||
314 | { | ||
315 | key = xmlGetProp(synrestr, (xmlChar*) "type"); | ||
316 | fp.synrestrs.insert(std::string((const char*)key)); | ||
317 | xmlFree(key); | ||
318 | } | ||
319 | } | ||
320 | } | ||
321 | |||
322 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) | ||
323 | { | ||
324 | fp.selrestrs = parse_selrestrs(npnode, filename); | ||
325 | } | ||
326 | } | ||
327 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB")) | ||
328 | { | ||
329 | fp.type = framepart_t::type_t::v; | ||
330 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP")) | ||
331 | { | ||
332 | fp.type = framepart_t::type_t::pp; | ||
333 | |||
334 | if (xmlHasProp(syntaxnode, (xmlChar*) "value")) | ||
335 | { | ||
336 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
337 | std::string choices = (const char*)key; | ||
338 | xmlFree(key); | ||
339 | |||
340 | fp.choices = verbly::split<std::list<std::string>>(choices, " "); | ||
341 | } | ||
342 | |||
343 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
344 | { | ||
345 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) | ||
346 | { | ||
347 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
348 | { | ||
349 | if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR")) | ||
350 | { | ||
351 | key = xmlGetProp(synrestr, (xmlChar*) "type"); | ||
352 | fp.preprestrs.insert(std::string((const char*)key)); | ||
353 | xmlFree(key); | ||
354 | } | ||
355 | } | ||
356 | } | ||
357 | } | ||
358 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ")) | ||
359 | { | ||
360 | fp.type = framepart_t::type_t::adj; | ||
361 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV")) | ||
362 | { | ||
363 | fp.type = framepart_t::type_t::adv; | ||
364 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX")) | ||
365 | { | ||
366 | fp.type = framepart_t::type_t::lex; | ||
367 | |||
368 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
369 | fp.lexval = (const char*)key; | ||
370 | xmlFree(key); | ||
371 | } else { | ||
372 | continue; | ||
373 | } | ||
374 | |||
375 | f.push_back(fp); | ||
376 | } | ||
377 | |||
378 | g.frames.push_back(f); | ||
379 | } | ||
380 | } | ||
381 | } | ||
382 | } | 75 | } |
383 | } | 76 | } |
384 | } | ||
385 | |||
386 | groups[vnid] = g; | ||
387 | |||
388 | return groups[vnid]; | ||
389 | } | ||
390 | |||
391 | int main(int argc, char** argv) | ||
392 | { | ||
393 | if (argc != 7) | ||
394 | { | ||
395 | print_usage(); | ||
396 | } | ||
397 | |||
398 | // VerbNet data | ||
399 | std::cout << "Reading verb frames..." << std::endl; | ||
400 | |||
401 | DIR* dir; | ||
402 | if ((dir = opendir(argv[1])) == nullptr) | ||
403 | { | ||
404 | std::cout << "Invalid VerbNet data directory." << std::endl; | ||
405 | |||
406 | print_usage(); | ||
407 | } | ||
408 | |||
409 | struct dirent* ent; | ||
410 | while ((ent = readdir(dir)) != nullptr) | ||
411 | { | ||
412 | std::string filename(argv[1]); | ||
413 | if (filename.back() != '/') | ||
414 | { | ||
415 | filename += '/'; | ||
416 | } | ||
417 | 77 | ||
418 | filename += ent->d_name; | 78 | void generator::run() |
419 | //std::cout << ent->d_name << std::endl; | ||
420 | |||
421 | if (filename.rfind(".xml") != filename.size() - 4) | ||
422 | { | ||
423 | continue; | ||
424 | } | ||
425 | |||
426 | xmlDocPtr doc = xmlParseFile(filename.c_str()); | ||
427 | if (doc == nullptr) | ||
428 | { | ||
429 | std::cout << "Error opening " << filename << std::endl; | ||
430 | print_usage(); | ||
431 | } | ||
432 | |||
433 | xmlNodePtr top = xmlDocGetRootElement(doc); | ||
434 | if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS"))) | ||
435 | { | ||
436 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
437 | print_usage(); | ||
438 | } | ||
439 | |||
440 | parse_group(top, filename); | ||
441 | } | ||
442 | |||
443 | closedir(dir); | ||
444 | |||
445 | // Get verbs from AGID | ||
446 | std::cout << "Reading inflections..." << std::endl; | ||
447 | |||
448 | std::ifstream agidfile(argv[2]); | ||
449 | if (!agidfile.is_open()) | ||
450 | { | ||
451 | std::cout << "Could not open AGID file: " << argv[2] << std::endl; | ||
452 | print_usage(); | ||
453 | } | ||
454 | |||
455 | for (;;) | ||
456 | { | ||
457 | std::string line; | ||
458 | if (!getline(agidfile, line)) | ||
459 | { | ||
460 | break; | ||
461 | } | ||
462 | |||
463 | if (line.back() == '\r') | ||
464 | { | 79 | { |
465 | line.pop_back(); | 80 | // Create notions, words, lemmas, and forms from WordNet synsets |
466 | } | 81 | readWordNetSynsets(); |
467 | 82 | ||
468 | int divider = line.find_first_of(" "); | 83 | // Reads adjective positioning WordNet data |
469 | std::string word = line.substr(0, divider); | 84 | readAdjectivePositioning(); |
470 | line = line.substr(divider+1); | 85 | |
471 | char type = line[0]; | 86 | // Counts the number of URLs ImageNet has per notion |
472 | 87 | readImageNetUrls(); | |
473 | if (line[1] == '?') | 88 | |
474 | { | 89 | // Creates a word by WordNet sense key lookup table |
475 | line.erase(0, 4); | 90 | readWordNetSenseKeys(); |
476 | } else { | 91 | |
477 | line.erase(0, 3); | 92 | // Creates groups and frames from VerbNet data |
478 | } | 93 | readVerbNet(); |
479 | 94 | ||
480 | std::vector<std::string> forms; | 95 | // Creates forms and inflections from AGID. To reduce the amount of forms |
481 | while (!line.empty()) | 96 | // created, we do this after most lemmas that need inflecting have been |
482 | { | 97 | // created through other means, and then only generate forms for |
483 | std::string inflection; | 98 | // inflections of already-existing lemmas. The exception to this regards |
484 | if ((divider = line.find(" | ")) != std::string::npos) | 99 | // verb lemmas. If a verb lemma in AGID either does not exist yet, or does |
485 | { | 100 | // exist but is not related to any words that are related to verb notions, |
486 | inflection = line.substr(0, divider); | 101 | // then a notion and a word is generated and the form generation proceeds |
487 | line = line.substr(divider + 3); | 102 | // as usual. |
488 | } else { | 103 | readAgidInflections(); |
489 | inflection = line; | 104 | |
490 | line = ""; | 105 | // Reads in prepositions and the is_a relationship |
491 | } | 106 | readPrepositions(); |
492 | 107 | ||
493 | if ((divider = inflection.find_first_of(",?")) != std::string::npos) | 108 | // Creates pronunciations from CMUDICT. To reduce the amount of |
494 | { | 109 | // pronunciations created, we do this after all forms have been created, |
495 | inflection = inflection.substr(0, divider); | 110 | // and then only generate pronunciations for already-exisiting forms. |
496 | } | 111 | readCmudictPronunciations(); |
497 | 112 | ||
498 | forms.push_back(inflection); | 113 | // Writes the database schema |
114 | writeSchema(); | ||
115 | |||
116 | // Dumps data to the database | ||
117 | dumpObjects(); | ||
118 | |||
119 | // Populates the antonymy relationship from WordNet | ||
120 | readWordNetAntonymy(); | ||
121 | |||
122 | // Populates the variation relationship from WordNet | ||
123 | readWordNetVariation(); | ||
124 | |||
125 | // Populates the usage, topicality, and regionality relationships from | ||
126 | // WordNet | ||
127 | readWordNetClasses(); | ||
128 | |||
129 | // Populates the causality relationship from WordNet | ||
130 | readWordNetCausality(); | ||
131 | |||
132 | // Populates the entailment relationship from WordNet | ||
133 | readWordNetEntailment(); | ||
134 | |||
135 | // Populates the hypernymy relationship from WordNet | ||
136 | readWordNetHypernymy(); | ||
137 | |||
138 | // Populates the instantiation relationship from WordNet | ||
139 | readWordNetInstantiation(); | ||
140 | |||
141 | // Populates the member meronymy relationship from WordNet | ||
142 | readWordNetMemberMeronymy(); | ||
143 | |||
144 | // Populates the part meronymy relationship from WordNet | ||
145 | readWordNetPartMeronymy(); | ||
146 | |||
147 | // Populates the substance meronymy relationship from WordNet | ||
148 | readWordNetSubstanceMeronymy(); | ||
149 | |||
150 | // Populates the pertainymy and mannernymy relationships from WordNet | ||
151 | readWordNetPertainymy(); | ||
152 | |||
153 | // Populates the specification relationship from WordNet | ||
154 | readWordNetSpecification(); | ||
155 | |||
156 | // Populates the adjective similarity relationship from WordNet | ||
157 | readWordNetSimilarity(); | ||
158 | |||
159 | |||
160 | |||
161 | |||
162 | |||
163 | |||
164 | |||
165 | |||
499 | } | 166 | } |
500 | 167 | ||
501 | switch (type) | 168 | void generator::readWordNetSynsets() |
502 | { | 169 | { |
503 | case 'V': | 170 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); |
171 | progress ppgs("Reading synsets from WordNet...", lines.size()); | ||
172 | |||
173 | for (std::string line : lines) | ||
504 | { | 174 | { |
505 | verb_t v; | 175 | ppgs.update(); |
506 | v.infinitive = word; | 176 | |
507 | if (forms.size() == 4) | 177 | std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); |
508 | { | 178 | std::smatch relation_data; |
509 | v.past_tense = forms[0]; | 179 | if (!std::regex_search(line, relation_data, relation)) |
510 | v.past_participle = forms[1]; | 180 | { |
511 | v.ing_form = forms[2]; | 181 | continue; |
512 | v.s_form = forms[3]; | ||
513 | } else if (forms.size() == 3) | ||
514 | { | ||
515 | v.past_tense = forms[0]; | ||
516 | v.past_participle = forms[0]; | ||
517 | v.ing_form = forms[1]; | ||
518 | v.s_form = forms[2]; | ||
519 | } else if (forms.size() == 8) | ||
520 | { | ||
521 | // As of AGID 2014.08.11, this is only "to be" | ||
522 | v.past_tense = forms[0]; | ||
523 | v.past_participle = forms[2]; | ||
524 | v.ing_form = forms[3]; | ||
525 | v.s_form = forms[4]; | ||
526 | } else { | ||
527 | // Words that don't fit the cases above as of AGID 2014.08.11: | ||
528 | // - may and shall do not conjugate the way we want them to | ||
529 | // - methinks only has a past tense and is an outlier | ||
530 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now | ||
531 | std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl; | ||
532 | } | 182 | } |
533 | 183 | ||
534 | verbs[word] = v; | 184 | int synset_id = std::stoi(relation_data[1]); |
535 | 185 | int wnum = std::stoi(relation_data[2]); | |
536 | break; | 186 | std::string text = relation_data[3]; |
537 | } | 187 | int tag_count = std::stoi(relation_data[4]); |
538 | 188 | size_t word_it; | |
539 | case 'A': | 189 | while ((word_it = text.find("''")) != std::string::npos) |
540 | { | ||
541 | adjective_t adj; | ||
542 | adj.base = word; | ||
543 | if (forms.size() == 2) | ||
544 | { | 190 | { |
545 | adj.comparative = forms[0]; | 191 | text.erase(word_it, 1); |
546 | adj.superlative = forms[1]; | ||
547 | } else { | ||
548 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" | ||
549 | std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl; | ||
550 | } | 192 | } |
551 | 193 | ||
552 | adjectives[word] = adj; | 194 | // The WordNet data does contain duplicates, so we need to check that we |
553 | 195 | // haven't already created this word. | |
554 | break; | 196 | std::pair<int, int> lookup(synset_id, wnum); |
555 | } | 197 | if (!wordByWnidAndWnum_.count(lookup)) |
556 | |||
557 | case 'N': | ||
558 | { | ||
559 | noun_t n; | ||
560 | n.singular = word; | ||
561 | if (forms.size() == 1) | ||
562 | { | 198 | { |
563 | n.plural = forms[0]; | 199 | notion& synset = lookupOrCreateNotion(synset_id); |
564 | } else { | 200 | lemma& lex = lookupOrCreateLemma(text); |
565 | // As of AGID 2014.08.11, this is non-existent. | 201 | word& entry = createWord(synset, lex, tag_count); |
566 | std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl; | 202 | |
203 | wordByWnidAndWnum_[lookup] = &entry; | ||
567 | } | 204 | } |
568 | |||
569 | nouns[word] = n; | ||
570 | |||
571 | break; | ||
572 | } | 205 | } |
573 | } | 206 | } |
574 | } | ||
575 | |||
576 | // Pronounciations | ||
577 | std::cout << "Reading pronunciations..." << std::endl; | ||
578 | |||
579 | std::ifstream pronfile(argv[4]); | ||
580 | if (!pronfile.is_open()) | ||
581 | { | ||
582 | std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl; | ||
583 | print_usage(); | ||
584 | } | ||
585 | |||
586 | for (;;) | ||
587 | { | ||
588 | std::string line; | ||
589 | if (!getline(pronfile, line)) | ||
590 | { | ||
591 | break; | ||
592 | } | ||
593 | |||
594 | if (line.back() == '\r') | ||
595 | { | ||
596 | line.pop_back(); | ||
597 | } | ||
598 | 207 | ||
599 | std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); | 208 | void generator::readAdjectivePositioning() |
600 | std::smatch phoneme_data; | ||
601 | if (std::regex_search(line, phoneme_data, phoneme)) | ||
602 | { | 209 | { |
603 | std::string canonical(phoneme_data[1]); | 210 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl")); |
604 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | 211 | progress ppgs("Reading adjective positionings from WordNet...", lines.size()); |
605 | |||
606 | std::string phonemes = phoneme_data[2]; | ||
607 | auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " "); | ||
608 | auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) { | ||
609 | return phoneme.find("1") != std::string::npos; | ||
610 | }); | ||
611 | 212 | ||
612 | pronunciation_t p; | 213 | for (std::string line : lines) |
613 | p.phonemes = phonemes; | ||
614 | |||
615 | // Rhyme detection | ||
616 | if (phemstrt != std::end(phoneme_set)) | ||
617 | { | 214 | { |
618 | std::stringstream rhymer; | 215 | ppgs.update(); |
619 | for (auto it = phemstrt; it != std::end(phoneme_set); it++) | ||
620 | { | ||
621 | std::string naked; | ||
622 | std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) { | ||
623 | return isdigit(ch); | ||
624 | }); | ||
625 | |||
626 | if (it != phemstrt) | ||
627 | { | ||
628 | rhymer << " "; | ||
629 | } | ||
630 | |||
631 | rhymer << naked; | ||
632 | } | ||
633 | 216 | ||
634 | p.rhyme = rhymer.str(); | 217 | std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); |
635 | 218 | std::smatch relation_data; | |
636 | if (phemstrt != std::begin(phoneme_set)) | 219 | if (!std::regex_search(line, relation_data, relation)) |
637 | { | 220 | { |
638 | phemstrt--; | 221 | continue; |
639 | p.prerhyme = *phemstrt; | ||
640 | } else { | ||
641 | p.prerhyme = ""; | ||
642 | } | 222 | } |
643 | } else { | ||
644 | p.prerhyme = ""; | ||
645 | p.rhyme = ""; | ||
646 | } | ||
647 | 223 | ||
648 | // Syllable/stress | 224 | int synset_id = stoi(relation_data[1]); |
649 | for (auto phm : phoneme_set) | 225 | int wnum = stoi(relation_data[2]); |
650 | { | 226 | std::string adjpos_str = relation_data[3]; |
651 | if (isdigit(phm.back())) | ||
652 | { | ||
653 | // It's a vowel! | ||
654 | p.syllables++; | ||
655 | 227 | ||
656 | if (phm.back() == '1') | 228 | std::pair<int, int> lookup(synset_id, wnum); |
229 | if (wordByWnidAndWnum_.count(lookup)) | ||
230 | { | ||
231 | word& adj = *wordByWnidAndWnum_.at(lookup); | ||
232 | |||
233 | if (adjpos_str == "p") | ||
234 | { | ||
235 | adj.setAdjectivePosition(positioning::predicate); | ||
236 | } else if (adjpos_str == "a") | ||
237 | { | ||
238 | adj.setAdjectivePosition(positioning::attributive); | ||
239 | } else if (adjpos_str == "i") | ||
657 | { | 240 | { |
658 | p.stress.push_back('1'); | 241 | adj.setAdjectivePosition(positioning::postnominal); |
659 | } else { | 242 | } else { |
660 | p.stress.push_back('0'); | 243 | // Can't happen because of how we specified the regex. |
244 | assert(false); | ||
661 | } | 245 | } |
662 | } | 246 | } |
663 | } | 247 | } |
664 | |||
665 | pronunciations[canonical].insert(p); | ||
666 | } | ||
667 | } | ||
668 | |||
669 | // Images | ||
670 | std::cout << "Reading images..." << std::endl; | ||
671 | |||
672 | std::ifstream imagefile(argv[5]); | ||
673 | if (!imagefile.is_open()) | ||
674 | { | ||
675 | std::cout << "Could not open ImageNet file: " << argv[5] << std::endl; | ||
676 | print_usage(); | ||
677 | } | ||
678 | |||
679 | for (;;) | ||
680 | { | ||
681 | std::string line; | ||
682 | if (!getline(imagefile, line)) | ||
683 | { | ||
684 | break; | ||
685 | } | ||
686 | |||
687 | if (line.back() == '\r') | ||
688 | { | ||
689 | line.pop_back(); | ||
690 | } | ||
691 | |||
692 | std::string wnid_s = line.substr(1, 8); | ||
693 | int wnid = stoi(wnid_s) + 100000000; | ||
694 | images[wnid]++; | ||
695 | } | ||
696 | |||
697 | imagefile.close(); | ||
698 | |||
699 | // Start writing output | ||
700 | std::cout << "Writing schema..." << std::endl; | ||
701 | |||
702 | sqlite3* ppdb; | ||
703 | if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) | ||
704 | { | ||
705 | std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl; | ||
706 | print_usage(); | ||
707 | } | ||
708 | |||
709 | std::ifstream schemafile("schema.sql"); | ||
710 | if (!schemafile.is_open()) | ||
711 | { | ||
712 | std::cout << "Could not find schema file" << std::endl; | ||
713 | print_usage(); | ||
714 | } | ||
715 | |||
716 | std::stringstream schemabuilder; | ||
717 | for (;;) | ||
718 | { | ||
719 | std::string line; | ||
720 | if (!getline(schemafile, line)) | ||
721 | { | ||
722 | break; | ||
723 | } | ||
724 | |||
725 | if (line.back() == '\r') | ||
726 | { | ||
727 | line.pop_back(); | ||
728 | } | ||
729 | |||
730 | schemabuilder << line << std::endl; | ||
731 | } | ||
732 | |||
733 | std::string schema = schemabuilder.str(); | ||
734 | while (!schema.empty()) | ||
735 | { | ||
736 | std::string query; | ||
737 | int divider = schema.find(";"); | ||
738 | if (divider != std::string::npos) | ||
739 | { | ||
740 | query = schema.substr(0, divider+1); | ||
741 | schema = schema.substr(divider+2); | ||
742 | } else { | ||
743 | break; | ||
744 | } | 248 | } |
745 | 249 | ||
746 | sqlite3_stmt* schmstmt; | 250 | void generator::readImageNetUrls() |
747 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK) | ||
748 | { | 251 | { |
749 | db_error(ppdb, query); | 252 | // The ImageNet datafile is so large that it is unreasonable and |
750 | } | 253 | // unnecessary to read it into memory; instead, we will parse each line as |
751 | 254 | // we read it. This has the caveat that we cannot display a progress bar. | |
752 | if (sqlite3_step(schmstmt) != SQLITE_DONE) | 255 | std::cout << "Reading image counts from ImageNet..." << std::endl; |
753 | { | ||
754 | db_error(ppdb, query); | ||
755 | } | ||
756 | |||
757 | sqlite3_finalize(schmstmt); | ||
758 | } | ||
759 | |||
760 | std::cout << "Writing prepositions..." << std::endl; | ||
761 | std::ifstream prepfile("prepositions.txt"); | ||
762 | if (!prepfile.is_open()) | ||
763 | { | ||
764 | std::cout << "Could not find prepositions file" << std::endl; | ||
765 | print_usage(); | ||
766 | } | ||
767 | |||
768 | for (;;) | ||
769 | { | ||
770 | std::string line; | ||
771 | if (!getline(prepfile, line)) | ||
772 | { | ||
773 | break; | ||
774 | } | ||
775 | |||
776 | if (line.back() == '\r') | ||
777 | { | ||
778 | line.pop_back(); | ||
779 | } | ||
780 | |||
781 | std::regex relation("^([^:]+): (.+)"); | ||
782 | std::smatch relation_data; | ||
783 | std::regex_search(line, relation_data, relation); | ||
784 | std::string prep = relation_data[1]; | ||
785 | std::list<std::string> groups = verbly::split<std::list<std::string>>(relation_data[2], ", "); | ||
786 | |||
787 | std::string query("INSERT INTO prepositions (form) VALUES (?)"); | ||
788 | sqlite3_stmt* ppstmt; | ||
789 | |||
790 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
791 | { | ||
792 | db_error(ppdb, query); | ||
793 | } | ||
794 | |||
795 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT); | ||
796 | |||
797 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
798 | { | ||
799 | db_error(ppdb, query); | ||
800 | } | ||
801 | |||
802 | sqlite3_finalize(ppstmt); | ||
803 | |||
804 | query = "SELECT last_insert_rowid()"; | ||
805 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
806 | { | ||
807 | db_error(ppdb, query); | ||
808 | } | ||
809 | |||
810 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
811 | { | ||
812 | db_error(ppdb, query); | ||
813 | } | ||
814 | |||
815 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
816 | sqlite3_finalize(ppstmt); | ||
817 | |||
818 | for (auto group : groups) | ||
819 | { | ||
820 | query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)"; | ||
821 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
822 | { | ||
823 | db_error(ppdb, query); | ||
824 | } | ||
825 | 256 | ||
826 | sqlite3_bind_int(ppstmt, 1, rowid); | 257 | std::ifstream file(imageNetPath_); |
827 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); | 258 | if (!file) |
828 | |||
829 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
830 | { | 259 | { |
831 | db_error(ppdb, query); | 260 | throw std::invalid_argument("Could not find file " + imageNetPath_); |
832 | } | 261 | } |
833 | |||
834 | sqlite3_finalize(ppstmt); | ||
835 | } | ||
836 | } | ||
837 | |||
838 | 262 | ||
839 | { | 263 | std::string line; |
840 | progress ppgs("Writing verbs...", verbs.size()); | 264 | while (std::getline(file, line)) |
841 | for (auto& mapping : verbs) | ||
842 | { | ||
843 | sqlite3_stmt* ppstmt; | ||
844 | std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)"); | ||
845 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
846 | { | ||
847 | db_error(ppdb, query); | ||
848 | } | ||
849 | |||
850 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT); | ||
851 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT); | ||
852 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT); | ||
853 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT); | ||
854 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT); | ||
855 | |||
856 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
857 | { | ||
858 | db_error(ppdb, query); | ||
859 | } | ||
860 | |||
861 | sqlite3_finalize(ppstmt); | ||
862 | |||
863 | std::string canonical(mapping.second.infinitive); | ||
864 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | ||
865 | if (pronunciations.count(canonical) == 1) | ||
866 | { | 265 | { |
867 | query = "SELECT last_insert_rowid()"; | 266 | if (line.back() == '\r') |
868 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
869 | { | 267 | { |
870 | db_error(ppdb, query); | 268 | line.pop_back(); |
871 | } | 269 | } |
872 | 270 | ||
873 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | 271 | std::string wnid_s = line.substr(1, 8); |
272 | int wnid = stoi(wnid_s) + 100000000; | ||
273 | if (notionByWnid_.count(wnid)) | ||
874 | { | 274 | { |
875 | db_error(ppdb, query); | 275 | // We know that this notion has a wnid and is a noun. |
876 | } | 276 | notionByWnid_.at(wnid)->incrementNumOfImages(); |
877 | |||
878 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
879 | |||
880 | sqlite3_finalize(ppstmt); | ||
881 | |||
882 | mapping.second.id = rowid; | ||
883 | |||
884 | for (auto pronunciation : pronunciations[canonical]) | ||
885 | { | ||
886 | if (!pronunciation.rhyme.empty()) | ||
887 | { | ||
888 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | ||
889 | } else { | ||
890 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | ||
891 | } | ||
892 | |||
893 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
894 | { | ||
895 | db_error(ppdb, query); | ||
896 | } | ||
897 | |||
898 | sqlite3_bind_int(ppstmt, 1, rowid); | ||
899 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); | ||
900 | sqlite3_bind_int(ppstmt, 3, pronunciation.syllables); | ||
901 | sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT); | ||
902 | |||
903 | if (!pronunciation.rhyme.empty()) | ||
904 | { | ||
905 | sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
906 | sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
907 | } | ||
908 | |||
909 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
910 | { | ||
911 | db_error(ppdb, query); | ||
912 | } | ||
913 | |||
914 | sqlite3_finalize(ppstmt); | ||
915 | } | 277 | } |
916 | } | 278 | } |
917 | |||
918 | ppgs.update(); | ||
919 | } | 279 | } |
920 | } | 280 | |
921 | 281 | void generator::readWordNetSenseKeys() | |
922 | { | ||
923 | progress ppgs("Writing verb frames...", groups.size()); | ||
924 | for (auto& mapping : groups) | ||
925 | { | 282 | { |
926 | std::list<json> roledatal; | 283 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl")); |
927 | std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair<std::string, selrestr_t> r) { | 284 | progress ppgs("Reading sense keys from WordNet...", lines.size()); |
928 | json role; | ||
929 | role["type"] = r.first; | ||
930 | role["selrestrs"] = export_selrestrs(r.second); | ||
931 | |||
932 | return role; | ||
933 | }); | ||
934 | |||
935 | json roledata(roledatal); | ||
936 | std::string rdm = roledata.dump(); | ||
937 | |||
938 | sqlite3_stmt* ppstmt; | ||
939 | std::string query("INSERT INTO groups (data) VALUES (?)"); | ||
940 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
941 | { | ||
942 | db_error(ppdb, query); | ||
943 | } | ||
944 | |||
945 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT); | ||
946 | |||
947 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
948 | { | ||
949 | db_error(ppdb, query); | ||
950 | } | ||
951 | 285 | ||
952 | sqlite3_finalize(ppstmt); | 286 | for (std::string line : lines) |
953 | |||
954 | query = "SELECT last_insert_rowid()"; | ||
955 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
956 | { | ||
957 | db_error(ppdb, query); | ||
958 | } | ||
959 | |||
960 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
961 | { | ||
962 | db_error(ppdb, query); | ||
963 | } | ||
964 | |||
965 | int gid = sqlite3_column_int(ppstmt, 0); | ||
966 | sqlite3_finalize(ppstmt); | ||
967 | |||
968 | for (auto frame : mapping.second.frames) | ||
969 | { | 287 | { |
970 | std::list<json> fdatap; | 288 | ppgs.update(); |
971 | std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) { | ||
972 | json part; | ||
973 | |||
974 | switch (fp.type) | ||
975 | { | ||
976 | case framepart_t::type_t::np: | ||
977 | { | ||
978 | part["type"] = "np"; | ||
979 | part["role"] = fp.role; | ||
980 | part["selrestrs"] = export_selrestrs(fp.selrestrs); | ||
981 | part["synrestrs"] = fp.synrestrs; | ||
982 | |||
983 | break; | ||
984 | } | ||
985 | |||
986 | case framepart_t::type_t::pp: | ||
987 | { | ||
988 | part["type"] = "pp"; | ||
989 | part["values"] = fp.choices; | ||
990 | part["preprestrs"] = fp.preprestrs; | ||
991 | |||
992 | break; | ||
993 | } | ||
994 | |||
995 | case framepart_t::type_t::v: | ||
996 | { | ||
997 | part["type"] = "v"; | ||
998 | |||
999 | break; | ||
1000 | } | ||
1001 | |||
1002 | case framepart_t::type_t::adj: | ||
1003 | { | ||
1004 | part["type"] = "adj"; | ||
1005 | |||
1006 | break; | ||
1007 | } | ||
1008 | |||
1009 | case framepart_t::type_t::adv: | ||
1010 | { | ||
1011 | part["type"] = "adv"; | ||
1012 | |||
1013 | break; | ||
1014 | } | ||
1015 | |||
1016 | case framepart_t::type_t::lex: | ||
1017 | { | ||
1018 | part["type"] = "lex"; | ||
1019 | part["value"] = fp.lexval; | ||
1020 | |||
1021 | break; | ||
1022 | } | ||
1023 | } | ||
1024 | |||
1025 | return part; | ||
1026 | }); | ||
1027 | |||
1028 | json fdata(fdatap); | ||
1029 | std::string marshall = fdata.dump(); | ||
1030 | |||
1031 | query = "INSERT INTO frames (group_id, data) VALUES (?, ?)"; | ||
1032 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1033 | { | ||
1034 | db_error(ppdb, query); | ||
1035 | } | ||
1036 | |||
1037 | sqlite3_bind_int(ppstmt, 1, gid); | ||
1038 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT); | ||
1039 | 289 | ||
1040 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 290 | // We only actually need to lookup verbs by sense key so we'll just |
291 | // ignore everything that isn't a verb. | ||
292 | std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$"); | ||
293 | std::smatch relation_data; | ||
294 | if (!std::regex_search(line, relation_data, relation)) | ||
1041 | { | 295 | { |
1042 | db_error(ppdb, query); | 296 | continue; |
1043 | } | 297 | } |
298 | |||
299 | int synset_id = stoi(relation_data[1]); | ||
300 | int wnum = stoi(relation_data[2]); | ||
301 | std::string sense_key = relation_data[3]; | ||
1044 | 302 | ||
1045 | sqlite3_finalize(ppstmt); | 303 | // We are treating this mapping as injective, which is not entirely |
1046 | } | 304 | // accurate. First, the WordNet table contains duplicate rows, so those |
1047 | 305 | // need to be ignored. More importantly, a small number of sense keys | |
1048 | for (auto member : mapping.second.members) | 306 | // (one for each letter of the Latin alphabet, plus 9 other words) each |
1049 | { | 307 | // map to two different words in the same synset which differ only by |
1050 | if (verbs.count(member) == 1) | 308 | // capitalization. Luckily, none of these exceptions are verbs, so we |
309 | // can pretend that the mapping is injective. | ||
310 | if (!wnSenseKeys_.count(sense_key)) | ||
1051 | { | 311 | { |
1052 | auto& v = verbs[member]; | 312 | std::pair<int, int> lookup(synset_id, wnum); |
1053 | 313 | if (wordByWnidAndWnum_.count(lookup)) | |
1054 | query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)"; | ||
1055 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1056 | { | ||
1057 | db_error(ppdb, query); | ||
1058 | } | ||
1059 | |||
1060 | sqlite3_bind_int(ppstmt, 1, v.id); | ||
1061 | sqlite3_bind_int(ppstmt, 2, gid); | ||
1062 | |||
1063 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1064 | { | 314 | { |
1065 | db_error(ppdb, query); | 315 | wnSenseKeys_[sense_key] = wordByWnidAndWnum_.at(lookup); |
1066 | } | 316 | } |
1067 | |||
1068 | sqlite3_finalize(ppstmt); | ||
1069 | } | 317 | } |
1070 | } | 318 | } |
1071 | |||
1072 | ppgs.update(); | ||
1073 | } | 319 | } |
1074 | } | 320 | |
1075 | 321 | void generator::readVerbNet() | |
1076 | // Get nouns/adjectives/adverbs from WordNet | ||
1077 | // Useful relations: | ||
1078 | // - s: master list | ||
1079 | // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness) | ||
1080 | // - at: variation (e.g. a measurement can be standard or nonstandard) | ||
1081 | // - der: derivation (e.g. happy/happily, happily/happy) | ||
1082 | // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue) | ||
1083 | // - ins: instantiation (do we need this? let's see) | ||
1084 | // - mm: member meronymy/holonymy (e.g. family/mother, family/child) | ||
1085 | // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire) | ||
1086 | // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber) | ||
1087 | // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska) | ||
1088 | // mannernymy (e.g. something done quickly is done in a manner that is quick) | ||
1089 | // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) | ||
1090 | // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) | ||
1091 | // - syntax: positioning flags for some adjectives | ||
1092 | std::string wnpref {argv[3]}; | ||
1093 | if (wnpref.back() != '/') | ||
1094 | { | ||
1095 | wnpref += '/'; | ||
1096 | } | ||
1097 | |||
1098 | // s table | ||
1099 | { | ||
1100 | std::ifstream wnsfile(wnpref + "wn_s.pl"); | ||
1101 | if (!wnsfile.is_open()) | ||
1102 | { | 322 | { |
1103 | std::cout << "Invalid WordNet data directory." << std::endl; | 323 | std::cout << "Reading frames from VerbNet..." << std::endl; |
1104 | print_usage(); | ||
1105 | } | ||
1106 | 324 | ||
1107 | std::list<std::string> lines; | 325 | DIR* dir; |
1108 | for (;;) | 326 | if ((dir = opendir(verbNetPath_.c_str())) == nullptr) |
1109 | { | ||
1110 | std::string line; | ||
1111 | if (!getline(wnsfile, line)) | ||
1112 | { | 327 | { |
1113 | break; | 328 | throw std::invalid_argument("Invalid VerbNet data directory"); |
1114 | } | 329 | } |
1115 | 330 | ||
1116 | if (line.back() == '\r') | 331 | struct dirent* ent; |
1117 | { | 332 | while ((ent = readdir(dir)) != nullptr) |
1118 | line.pop_back(); | ||
1119 | } | ||
1120 | |||
1121 | lines.push_back(line); | ||
1122 | } | ||
1123 | |||
1124 | progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size()); | ||
1125 | for (auto line : lines) | ||
1126 | { | ||
1127 | ppgs.update(); | ||
1128 | |||
1129 | std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$"); | ||
1130 | std::smatch relation_data; | ||
1131 | if (!std::regex_search(line, relation_data, relation)) | ||
1132 | { | 333 | { |
1133 | continue; | 334 | std::string filename(verbNetPath_); |
1134 | } | 335 | |
336 | if (filename.back() != '/') | ||
337 | { | ||
338 | filename += '/'; | ||
339 | } | ||
1135 | 340 | ||
1136 | int synset_id = stoi(relation_data[1]); | 341 | filename += ent->d_name; |
1137 | int wnum = stoi(relation_data[2]); | ||
1138 | std::string word = relation_data[3]; | ||
1139 | size_t word_it; | ||
1140 | while ((word_it = word.find("''")) != std::string::npos) | ||
1141 | { | ||
1142 | word.erase(word_it, 1); | ||
1143 | } | ||
1144 | 342 | ||
1145 | std::string query; | 343 | if (filename.rfind(".xml") != filename.size() - 4) |
1146 | switch (synset_id / 100000000) | ||
1147 | { | ||
1148 | case 1: // Noun | ||
1149 | { | 344 | { |
1150 | if (nouns.count(word) == 1) | 345 | continue; |
1151 | { | ||
1152 | query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)"; | ||
1153 | } else { | ||
1154 | query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)"; | ||
1155 | } | ||
1156 | |||
1157 | break; | ||
1158 | } | 346 | } |
1159 | 347 | ||
1160 | case 2: // Verb | 348 | xmlDocPtr doc = xmlParseFile(filename.c_str()); |
349 | if (doc == nullptr) | ||
1161 | { | 350 | { |
1162 | // Ignore | 351 | throw std::logic_error("Error opening " + filename); |
1163 | |||
1164 | break; | ||
1165 | } | 352 | } |
1166 | 353 | ||
1167 | case 3: // Adjective | 354 | xmlNodePtr top = xmlDocGetRootElement(doc); |
355 | if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS")))) | ||
1168 | { | 356 | { |
1169 | if (adjectives.count(word) == 1) | 357 | throw std::logic_error("Bad VerbNet file format: " + filename); |
1170 | { | ||
1171 | query = "INSERT INTO adjectives (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; | ||
1172 | } else { | ||
1173 | query = "INSERT INTO adjectives (base_form, complexity) VALUES (?, ?)"; | ||
1174 | } | ||
1175 | |||
1176 | break; | ||
1177 | } | 358 | } |
1178 | 359 | ||
1179 | case 4: // Adverb | 360 | try |
1180 | { | 361 | { |
1181 | if (adjectives.count(word) == 1) | 362 | createGroup(top); |
1182 | { | 363 | } catch (const std::exception& e) |
1183 | query = "INSERT INTO adverbs (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; | 364 | { |
1184 | } else { | 365 | std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename)); |
1185 | query = "INSERT INTO adverbs (base_form, complexity) VALUES (?, ?)"; | ||
1186 | } | ||
1187 | |||
1188 | break; | ||
1189 | } | 366 | } |
1190 | } | 367 | } |
368 | |||
369 | closedir(dir); | ||
370 | } | ||
1191 | 371 | ||
1192 | sqlite3_stmt* ppstmt; | 372 | void generator::readAgidInflections() |
1193 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | 373 | { |
374 | std::list<std::string> lines(readFile(agidPath_)); | ||
375 | progress ppgs("Reading inflections from AGID...", lines.size()); | ||
376 | |||
377 | for (std::string line : lines) | ||
1194 | { | 378 | { |
1195 | db_error(ppdb, query); | 379 | ppgs.update(); |
1196 | } | 380 | |
381 | int divider = line.find_first_of(" "); | ||
382 | std::string infinitive = line.substr(0, divider); | ||
383 | line = line.substr(divider+1); | ||
384 | char type = line[0]; | ||
1197 | 385 | ||
1198 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); | 386 | if (line[1] == '?') |
1199 | switch (synset_id / 100000000) | ||
1200 | { | ||
1201 | case 1: // Noun | ||
1202 | { | 387 | { |
1203 | sqlite3_bind_int(ppstmt, 2, (std::any_of(std::begin(word), std::end(word), [] (char ch) { | 388 | line.erase(0, 4); |
1204 | return isupper(ch); | 389 | } else { |
1205 | }) ? 1 : 0)); | 390 | line.erase(0, 3); |
1206 | |||
1207 | sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size()); | ||
1208 | sqlite3_bind_int(ppstmt, 4, images[synset_id]); | ||
1209 | sqlite3_bind_int(ppstmt, 5, synset_id); | ||
1210 | |||
1211 | if (nouns.count(word) == 1) | ||
1212 | { | ||
1213 | sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT); | ||
1214 | } | ||
1215 | |||
1216 | break; | ||
1217 | } | 391 | } |
1218 | 392 | ||
1219 | case 3: // Adjective | 393 | if (!lemmaByBaseForm_.count(infinitive) && (type != 'V')) |
1220 | case 4: // Adverb | ||
1221 | { | 394 | { |
1222 | sqlite3_bind_int(ppstmt, 2, verbly::split<std::list<std::string>>(word, " ").size()); | 395 | continue; |
1223 | 396 | } | |
1224 | if (adjectives.count(word) == 1) | 397 | |
398 | lemma& curLemma = lookupOrCreateLemma(infinitive); | ||
399 | |||
400 | auto forms = split<std::vector<std::string>>(line, " | "); | ||
401 | for (std::string& inflForm : forms) | ||
402 | { | ||
403 | int sympos = inflForm.find_first_of(",?"); | ||
404 | if (sympos != std::string::npos) | ||
1225 | { | 405 | { |
1226 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); | 406 | inflForm = inflForm.substr(0, sympos); |
1227 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT); | ||
1228 | } | 407 | } |
1229 | |||
1230 | break; | ||
1231 | } | 408 | } |
1232 | } | ||
1233 | 409 | ||
1234 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 410 | switch (type) |
1235 | { | ||
1236 | db_error(ppdb, query); | ||
1237 | } | ||
1238 | |||
1239 | sqlite3_finalize(ppstmt); | ||
1240 | |||
1241 | query = "SELECT last_insert_rowid()"; | ||
1242 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1243 | { | ||
1244 | db_error(ppdb, query); | ||
1245 | } | ||
1246 | |||
1247 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
1248 | { | ||
1249 | db_error(ppdb, query); | ||
1250 | } | ||
1251 | |||
1252 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
1253 | wn[synset_id][wnum] = rowid; | ||
1254 | |||
1255 | sqlite3_finalize(ppstmt); | ||
1256 | |||
1257 | std::string canonical(word); | ||
1258 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | ||
1259 | if (pronunciations.count(canonical) == 1) | ||
1260 | { | ||
1261 | for (auto pronunciation : pronunciations[canonical]) | ||
1262 | { | 411 | { |
1263 | switch (synset_id / 100000000) | 412 | case 'V': |
1264 | { | 413 | { |
1265 | case 1: // Noun | 414 | if (forms.size() == 4) |
1266 | { | 415 | { |
1267 | if (!pronunciation.rhyme.empty()) | 416 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); |
1268 | { | 417 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1])); |
1269 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | 418 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2])); |
1270 | } else { | 419 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3])); |
1271 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | 420 | } else if (forms.size() == 3) |
1272 | } | ||
1273 | |||
1274 | break; | ||
1275 | } | ||
1276 | |||
1277 | case 3: // Adjective | ||
1278 | { | 421 | { |
1279 | if (!pronunciation.rhyme.empty()) | 422 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); |
1280 | { | 423 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0])); |
1281 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | 424 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1])); |
1282 | } else { | 425 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2])); |
1283 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | 426 | } else if (forms.size() == 8) |
1284 | } | 427 | { |
1285 | 428 | // As of AGID 2014.08.11, this is only "to be" | |
1286 | break; | 429 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); |
430 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2])); | ||
431 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3])); | ||
432 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4])); | ||
433 | } else { | ||
434 | // Words that don't fit the cases above as of AGID 2014.08.11: | ||
435 | // - may and shall do not conjugate the way we want them to | ||
436 | // - methinks only has a past tense and is an outlier | ||
437 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now | ||
438 | std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | ||
1287 | } | 439 | } |
1288 | 440 | ||
1289 | case 4: // Adverb | 441 | // For verbs in particular, we sometimes create a notion and a word |
442 | // from inflection data. Specifically, if there are not yet any | ||
443 | // verbs existing that have the same infinitive form. "Yet" means | ||
444 | // that this verb appears in the AGID data but not in either WordNet | ||
445 | // or VerbNet. | ||
446 | if (!wordsByBaseForm_.count(infinitive) | ||
447 | || !std::any_of(std::begin(wordsByBaseForm_.at(infinitive)), std::end(wordsByBaseForm_.at(infinitive)), [] (word* w) { | ||
448 | return w->getNotion().getPartOfSpeech() == part_of_speech::verb; | ||
449 | })) | ||
1290 | { | 450 | { |
1291 | if (!pronunciation.rhyme.empty()) | 451 | notion& n = createNotion(part_of_speech::verb); |
1292 | { | 452 | createWord(n, curLemma); |
1293 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | ||
1294 | } else { | ||
1295 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | ||
1296 | } | ||
1297 | |||
1298 | break; | ||
1299 | } | 453 | } |
1300 | } | ||
1301 | |||
1302 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1303 | { | ||
1304 | db_error(ppdb, query); | ||
1305 | } | ||
1306 | |||
1307 | sqlite3_bind_int(ppstmt, 1, rowid); | ||
1308 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); | ||
1309 | sqlite3_bind_int(ppstmt, 3, pronunciation.syllables); | ||
1310 | sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT); | ||
1311 | |||
1312 | if (!pronunciation.rhyme.empty()) | ||
1313 | { | ||
1314 | sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
1315 | sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
1316 | } | ||
1317 | 454 | ||
1318 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 455 | break; |
1319 | { | ||
1320 | db_error(ppdb, query); | ||
1321 | } | 456 | } |
1322 | |||
1323 | sqlite3_finalize(ppstmt); | ||
1324 | } | ||
1325 | } | ||
1326 | } | ||
1327 | } | ||
1328 | |||
1329 | // While we're working on s | ||
1330 | { | ||
1331 | progress ppgs("Writing word synonyms...", wn.size()); | ||
1332 | for (auto sense : wn) | ||
1333 | { | ||
1334 | ppgs.update(); | ||
1335 | 457 | ||
1336 | for (auto word1 : sense.second) | 458 | case 'A': |
1337 | { | ||
1338 | for (auto word2 : sense.second) | ||
1339 | { | ||
1340 | if (word1 != word2) | ||
1341 | { | 459 | { |
1342 | std::string query; | 460 | if (forms.size() == 2) |
1343 | switch (sense.first / 100000000) | ||
1344 | { | 461 | { |
1345 | case 1: // Noun | 462 | curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0])); |
1346 | { | 463 | curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1])); |
1347 | query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; | 464 | } else { |
1348 | 465 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" | |
1349 | break; | 466 | std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; |
1350 | } | 467 | } |
1351 | |||
1352 | case 2: // Verb | ||
1353 | { | ||
1354 | // Ignore | ||
1355 | |||
1356 | break; | ||
1357 | } | ||
1358 | |||
1359 | case 3: // Adjective | ||
1360 | { | ||
1361 | query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; | ||
1362 | 468 | ||
1363 | break; | 469 | break; |
1364 | } | 470 | } |
1365 | 471 | ||
1366 | case 4: // Adverb | 472 | case 'N': |
1367 | { | 473 | { |
1368 | query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | 474 | if (forms.size() == 1) |
1369 | |||
1370 | break; | ||
1371 | } | ||
1372 | } | ||
1373 | |||
1374 | sqlite3_stmt* ppstmt; | ||
1375 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1376 | { | ||
1377 | db_error(ppdb, query); | ||
1378 | } | ||
1379 | |||
1380 | sqlite3_bind_int(ppstmt, 1, word1.second); | ||
1381 | sqlite3_bind_int(ppstmt, 2, word2.second); | ||
1382 | |||
1383 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1384 | { | 475 | { |
1385 | db_error(ppdb, query); | 476 | curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0])); |
477 | } else { | ||
478 | // As of AGID 2014.08.11, this is non-existent. | ||
479 | std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | ||
1386 | } | 480 | } |
1387 | 481 | ||
1388 | sqlite3_finalize(ppstmt); | 482 | break; |
1389 | } | 483 | } |
1390 | } | 484 | } |
1391 | } | 485 | } |
1392 | } | 486 | } |
1393 | } | ||
1394 | |||
1395 | // ant table | ||
1396 | { | ||
1397 | std::ifstream wnantfile(wnpref + "wn_ant.pl"); | ||
1398 | if (!wnantfile.is_open()) | ||
1399 | { | ||
1400 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1401 | print_usage(); | ||
1402 | } | ||
1403 | |||
1404 | std::list<std::string> lines; | ||
1405 | for (;;) | ||
1406 | { | ||
1407 | std::string line; | ||
1408 | if (!getline(wnantfile, line)) | ||
1409 | { | ||
1410 | break; | ||
1411 | } | ||
1412 | 487 | ||
1413 | if (line.back() == '\r') | 488 | void generator::readPrepositions() |
1414 | { | ||
1415 | line.pop_back(); | ||
1416 | } | ||
1417 | |||
1418 | lines.push_back(line); | ||
1419 | } | ||
1420 | |||
1421 | progress ppgs("Writing antonyms...", lines.size()); | ||
1422 | for (auto line : lines) | ||
1423 | { | 489 | { |
1424 | ppgs.update(); | 490 | std::list<std::string> lines(readFile("prepositions.txt")); |
491 | progress ppgs("Reading prepositions...", lines.size()); | ||
1425 | 492 | ||
1426 | std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | 493 | for (std::string line : lines) |
1427 | std::smatch relation_data; | ||
1428 | if (!std::regex_search(line, relation_data, relation)) | ||
1429 | { | ||
1430 | continue; | ||
1431 | } | ||
1432 | |||
1433 | int synset_id_1 = stoi(relation_data[1]); | ||
1434 | int wnum_1 = stoi(relation_data[2]); | ||
1435 | int synset_id_2 = stoi(relation_data[3]); | ||
1436 | int wnum_2 = stoi(relation_data[4]); | ||
1437 | |||
1438 | std::string query; | ||
1439 | switch (synset_id_1 / 100000000) | ||
1440 | { | 494 | { |
1441 | case 1: // Noun | 495 | ppgs.update(); |
1442 | { | ||
1443 | query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; | ||
1444 | 496 | ||
1445 | break; | 497 | std::regex relation("^([^:]+): (.+)"); |
1446 | } | 498 | std::smatch relation_data; |
1447 | 499 | std::regex_search(line, relation_data, relation); | |
1448 | case 2: // Verb | 500 | std::string prep = relation_data[1]; |
1449 | { | 501 | auto groups = split<std::list<std::string>>(relation_data[2], ", "); |
1450 | // Ignore | ||
1451 | 502 | ||
1452 | break; | 503 | notion& n = createNotion(part_of_speech::preposition); |
1453 | } | 504 | lemma& l = lookupOrCreateLemma(prep); |
1454 | 505 | word& w = createWord(n, l); | |
1455 | case 3: // Adjective | ||
1456 | { | ||
1457 | query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; | ||
1458 | 506 | ||
1459 | break; | 507 | n.setPrepositionGroups(groups); |
1460 | } | ||
1461 | |||
1462 | case 4: // Adverb | ||
1463 | { | ||
1464 | query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | ||
1465 | |||
1466 | break; | ||
1467 | } | ||
1468 | } | ||
1469 | |||
1470 | sqlite3_stmt* ppstmt; | ||
1471 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1472 | { | ||
1473 | db_error(ppdb, query); | ||
1474 | } | ||
1475 | |||
1476 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
1477 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
1478 | |||
1479 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1480 | { | ||
1481 | db_error(ppdb, query); | ||
1482 | } | ||
1483 | |||
1484 | sqlite3_finalize(ppstmt); | ||
1485 | } | ||
1486 | } | ||
1487 | |||
1488 | // at table | ||
1489 | { | ||
1490 | std::ifstream wnatfile(wnpref + "wn_at.pl"); | ||
1491 | if (!wnatfile.is_open()) | ||
1492 | { | ||
1493 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1494 | print_usage(); | ||
1495 | } | ||
1496 | |||
1497 | std::list<std::string> lines; | ||
1498 | for (;;) | ||
1499 | { | ||
1500 | std::string line; | ||
1501 | if (!getline(wnatfile, line)) | ||
1502 | { | ||
1503 | break; | ||
1504 | } | 508 | } |
1505 | |||
1506 | if (line.back() == '\r') | ||
1507 | { | ||
1508 | line.pop_back(); | ||
1509 | } | ||
1510 | |||
1511 | lines.push_back(line); | ||
1512 | } | 509 | } |
1513 | 510 | ||
1514 | progress ppgs("Writing variations...", lines.size()); | 511 | void generator::readCmudictPronunciations() |
1515 | for (auto line : lines) | ||
1516 | { | 512 | { |
1517 | ppgs.update(); | 513 | std::list<std::string> lines(readFile(cmudictPath_)); |
514 | progress ppgs("Reading pronunciations from CMUDICT...", lines.size()); | ||
1518 | 515 | ||
1519 | std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); | 516 | for (std::string line : lines) |
1520 | std::smatch relation_data; | ||
1521 | if (!std::regex_search(line, relation_data, relation)) | ||
1522 | { | 517 | { |
1523 | continue; | 518 | ppgs.update(); |
1524 | } | 519 | |
1525 | 520 | std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); | |
1526 | int synset_id_1 = stoi(relation_data[1]); | 521 | std::smatch phoneme_data; |
1527 | int synset_id_2 = stoi(relation_data[2]); | 522 | if (std::regex_search(line, phoneme_data, phoneme)) |
1528 | std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)"); | ||
1529 | |||
1530 | for (auto mapping1 : wn[synset_id_1]) | ||
1531 | { | ||
1532 | for (auto mapping2 : wn[synset_id_2]) | ||
1533 | { | 523 | { |
1534 | sqlite3_stmt* ppstmt; | 524 | std::string canonical(phoneme_data[1]); |
1535 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 525 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); |
1536 | { | ||
1537 | db_error(ppdb, query); | ||
1538 | } | ||
1539 | |||
1540 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | ||
1541 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
1542 | 526 | ||
1543 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 527 | if (!formByText_.count(canonical)) |
1544 | { | 528 | { |
1545 | db_error(ppdb, query); | 529 | continue; |
1546 | } | 530 | } |
1547 | 531 | ||
1548 | sqlite3_finalize(ppstmt); | 532 | std::string phonemes = phoneme_data[2]; |
533 | pronunciations_.emplace_back(phonemes); | ||
534 | pronunciation& p = pronunciations_.back(); | ||
535 | formByText_.at(canonical)->addPronunciation(p); | ||
1549 | } | 536 | } |
1550 | } | 537 | } |
1551 | } | 538 | } |
1552 | } | ||
1553 | |||
1554 | // der table | ||
1555 | { | ||
1556 | std::ifstream wnderfile(wnpref + "wn_der.pl"); | ||
1557 | if (!wnderfile.is_open()) | ||
1558 | { | ||
1559 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1560 | print_usage(); | ||
1561 | } | ||
1562 | 539 | ||
1563 | std::list<std::string> lines; | 540 | void generator::writeSchema() |
1564 | for (;;) | ||
1565 | { | 541 | { |
1566 | std::string line; | 542 | std::ifstream file("schema.sql"); |
1567 | if (!getline(wnderfile, line)) | 543 | if (!file) |
1568 | { | 544 | { |
1569 | break; | 545 | throw std::invalid_argument("Could not find database schema"); |
1570 | } | 546 | } |
1571 | 547 | ||
1572 | if (line.back() == '\r') | 548 | std::ostringstream schemaBuilder; |
549 | std::string line; | ||
550 | while (std::getline(file, line)) | ||
1573 | { | 551 | { |
1574 | line.pop_back(); | 552 | if (line.back() == '\r') |
553 | { | ||
554 | line.pop_back(); | ||
555 | } | ||
556 | |||
557 | schemaBuilder << line; | ||
1575 | } | 558 | } |
1576 | 559 | ||
1577 | lines.push_back(line); | 560 | std::string schema = schemaBuilder.str(); |
561 | auto queries = split<std::list<std::string>>(schema, ";"); | ||
562 | progress ppgs("Writing database schema...", queries.size()); | ||
563 | for (std::string query : queries) | ||
564 | { | ||
565 | if (!queries.empty()) | ||
566 | { | ||
567 | db_.runQuery(query); | ||
568 | } | ||
569 | |||
570 | ppgs.update(); | ||
571 | } | ||
1578 | } | 572 | } |
1579 | 573 | ||
1580 | progress ppgs("Writing morphological derivation...", lines.size()); | 574 | void generator::dumpObjects() |
1581 | for (auto line : lines) | ||
1582 | { | 575 | { |
1583 | ppgs.update(); | ||
1584 | |||
1585 | std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | ||
1586 | std::smatch relation_data; | ||
1587 | if (!std::regex_search(line, relation_data, relation)) | ||
1588 | { | 576 | { |
1589 | continue; | 577 | progress ppgs("Writing notions...", notions_.size()); |
578 | |||
579 | for (notion& n : notions_) | ||
580 | { | ||
581 | db_ << n; | ||
582 | |||
583 | ppgs.update(); | ||
584 | } | ||
1590 | } | 585 | } |
1591 | 586 | ||
1592 | int synset_id_1 = stoi(relation_data[1]); | ||
1593 | int wnum_1 = stoi(relation_data[2]); | ||
1594 | int synset_id_2 = stoi(relation_data[3]); | ||
1595 | int wnum_2 = stoi(relation_data[4]); | ||
1596 | std::string query; | ||
1597 | switch (synset_id_1 / 100000000) | ||
1598 | { | 587 | { |
1599 | case 1: // Noun | 588 | progress ppgs("Writing words...", words_.size()); |
589 | |||
590 | for (word& w : words_) | ||
1600 | { | 591 | { |
1601 | switch (synset_id_2 / 100000000) | 592 | db_ << w; |
1602 | { | ||
1603 | case 1: // Noun | ||
1604 | { | ||
1605 | query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)"; | ||
1606 | break; | ||
1607 | } | ||
1608 | |||
1609 | case 3: // Adjective | ||
1610 | { | ||
1611 | query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)"; | ||
1612 | break; | ||
1613 | } | ||
1614 | |||
1615 | case 4: // Adverb | ||
1616 | { | ||
1617 | query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)"; | ||
1618 | break; | ||
1619 | } | ||
1620 | } | ||
1621 | 593 | ||
1622 | break; | 594 | ppgs.update(); |
1623 | } | 595 | } |
596 | } | ||
597 | |||
598 | { | ||
599 | progress ppgs("Writing lemmas...", lemmas_.size()); | ||
1624 | 600 | ||
1625 | case 3: // Adjective | 601 | for (lemma& l : lemmas_) |
1626 | { | 602 | { |
1627 | switch (synset_id_2 / 100000000) | 603 | db_ << l; |
1628 | { | ||
1629 | case 1: // Noun | ||
1630 | { | ||
1631 | query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)"; | ||
1632 | break; | ||
1633 | } | ||
1634 | |||
1635 | case 3: // Adjective | ||
1636 | { | ||
1637 | query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)"; | ||
1638 | break; | ||
1639 | } | ||
1640 | |||
1641 | case 4: // Adverb | ||
1642 | { | ||
1643 | query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)"; | ||
1644 | break; | ||
1645 | } | ||
1646 | } | ||
1647 | 604 | ||
1648 | break; | 605 | ppgs.update(); |
1649 | } | 606 | } |
607 | } | ||
608 | |||
609 | { | ||
610 | progress ppgs("Writing forms...", forms_.size()); | ||
1650 | 611 | ||
1651 | case 4: // Adverb | 612 | for (form& f : forms_) |
1652 | { | 613 | { |
1653 | switch (synset_id_2 / 100000000) | 614 | db_ << f; |
1654 | { | ||
1655 | case 1: // Noun | ||
1656 | { | ||
1657 | query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)"; | ||
1658 | break; | ||
1659 | } | ||
1660 | |||
1661 | case 3: // Adjective | ||
1662 | { | ||
1663 | query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)"; | ||
1664 | break; | ||
1665 | } | ||
1666 | |||
1667 | case 4: // Adverb | ||
1668 | { | ||
1669 | query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | ||
1670 | break; | ||
1671 | } | ||
1672 | } | ||
1673 | 615 | ||
1674 | break; | 616 | ppgs.update(); |
1675 | } | 617 | } |
1676 | } | 618 | } |
1677 | 619 | ||
1678 | sqlite3_stmt* ppstmt; | ||
1679 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
1680 | { | 620 | { |
1681 | db_error(ppdb, query); | 621 | progress ppgs("Writing pronunciations...", pronunciations_.size()); |
622 | |||
623 | for (pronunciation& p : pronunciations_) | ||
624 | { | ||
625 | db_ << p; | ||
626 | |||
627 | ppgs.update(); | ||
628 | } | ||
1682 | } | 629 | } |
1683 | 630 | ||
1684 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
1685 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
1686 | |||
1687 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1688 | { | 631 | { |
1689 | db_error(ppdb, query); | 632 | progress ppgs("Writing verb groups...", groups_.size()); |
633 | |||
634 | for (group& g : groups_) | ||
635 | { | ||
636 | db_ << g; | ||
637 | |||
638 | ppgs.update(); | ||
639 | } | ||
1690 | } | 640 | } |
1691 | 641 | ||
1692 | sqlite3_finalize(ppstmt); | ||
1693 | } | ||
1694 | } | ||
1695 | |||
1696 | // hyp table | ||
1697 | { | ||
1698 | std::ifstream wnhypfile(wnpref + "wn_hyp.pl"); | ||
1699 | if (!wnhypfile.is_open()) | ||
1700 | { | ||
1701 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1702 | print_usage(); | ||
1703 | } | ||
1704 | |||
1705 | std::list<std::string> lines; | ||
1706 | for (;;) | ||
1707 | { | ||
1708 | std::string line; | ||
1709 | if (!getline(wnhypfile, line)) | ||
1710 | { | ||
1711 | break; | ||
1712 | } | ||
1713 | |||
1714 | if (line.back() == '\r') | ||
1715 | { | 642 | { |
1716 | line.pop_back(); | 643 | progress ppgs("Writing verb frames...", frames_.size()); |
644 | |||
645 | for (frame& f : frames_) | ||
646 | { | ||
647 | db_ << f; | ||
648 | |||
649 | ppgs.update(); | ||
650 | } | ||
1717 | } | 651 | } |
1718 | |||
1719 | lines.push_back(line); | ||
1720 | } | 652 | } |
1721 | 653 | ||
1722 | progress ppgs("Writing hypernyms...", lines.size()); | 654 | void generator::readWordNetAntonymy() |
1723 | for (auto line : lines) | ||
1724 | { | 655 | { |
1725 | ppgs.update(); | 656 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); |
1726 | 657 | progress ppgs("Writing antonyms...", lines.size()); | |
1727 | std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\."); | 658 | for (auto line : lines) |
1728 | std::smatch relation_data; | ||
1729 | if (!std::regex_search(line, relation_data, relation)) | ||
1730 | { | 659 | { |
1731 | continue; | 660 | ppgs.update(); |
1732 | } | ||
1733 | |||
1734 | int synset_id_1 = stoi(relation_data[1]); | ||
1735 | int synset_id_2 = stoi(relation_data[2]); | ||
1736 | std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)"); | ||
1737 | 661 | ||
1738 | for (auto mapping1 : wn[synset_id_1]) | 662 | std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); |
1739 | { | 663 | std::smatch relation_data; |
1740 | for (auto mapping2 : wn[synset_id_2]) | 664 | if (!std::regex_search(line, relation_data, relation)) |
1741 | { | 665 | { |
1742 | sqlite3_stmt* ppstmt; | 666 | continue; |
1743 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 667 | } |
1744 | { | 668 | |
1745 | db_error(ppdb, query); | 669 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); |
1746 | } | 670 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); |
1747 | 671 | ||
1748 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 672 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) |
1749 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 673 | { |
674 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | ||
675 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | ||
1750 | 676 | ||
1751 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 677 | std::list<field> fields; |
1752 | { | 678 | fields.emplace_back("antonym_1_id", word1.getId()); |
1753 | db_error(ppdb, query); | 679 | fields.emplace_back("antonym_2_id", word2.getId()); |
1754 | } | ||
1755 | 680 | ||
1756 | sqlite3_finalize(ppstmt); | 681 | db_.insertIntoTable("antonymy", std::move(fields)); |
1757 | } | 682 | } |
1758 | } | 683 | } |
1759 | } | 684 | } |
1760 | } | ||
1761 | |||
1762 | // ins table | ||
1763 | { | ||
1764 | std::ifstream wninsfile(wnpref + "wn_ins.pl"); | ||
1765 | if (!wninsfile.is_open()) | ||
1766 | { | ||
1767 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1768 | print_usage(); | ||
1769 | } | ||
1770 | |||
1771 | std::list<std::string> lines; | ||
1772 | for (;;) | ||
1773 | { | ||
1774 | std::string line; | ||
1775 | if (!getline(wninsfile, line)) | ||
1776 | { | ||
1777 | break; | ||
1778 | } | ||
1779 | 685 | ||
1780 | if (line.back() == '\r') | 686 | void generator::readWordNetVariation() |
687 | { | ||
688 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl")); | ||
689 | progress ppgs("Writing variation...", lines.size()); | ||
690 | for (auto line : lines) | ||
1781 | { | 691 | { |
1782 | line.pop_back(); | 692 | ppgs.update(); |
1783 | } | ||
1784 | 693 | ||
1785 | lines.push_back(line); | 694 | std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); |
695 | std::smatch relation_data; | ||
696 | if (!std::regex_search(line, relation_data, relation)) | ||
697 | { | ||
698 | continue; | ||
699 | } | ||
700 | |||
701 | int lookup1 = std::stoi(relation_data[1]); | ||
702 | int lookup2 = std::stoi(relation_data[2]); | ||
703 | |||
704 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
705 | { | ||
706 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
707 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
708 | |||
709 | std::list<field> fields; | ||
710 | fields.emplace_back("noun_id", notion1.getId()); | ||
711 | fields.emplace_back("adjective_id", notion2.getId()); | ||
712 | |||
713 | db_.insertIntoTable("variation", std::move(fields)); | ||
714 | } | ||
715 | } | ||
1786 | } | 716 | } |
1787 | 717 | ||
1788 | progress ppgs("Writing instantiations...", lines.size()); | 718 | void generator::readWordNetClasses() |
1789 | for (auto line : lines) | ||
1790 | { | 719 | { |
1791 | ppgs.update(); | 720 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); |
1792 | 721 | progress ppgs("Writing usage, topicality, and regionality...", lines.size()); | |
1793 | std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); | 722 | for (auto line : lines) |
1794 | std::smatch relation_data; | ||
1795 | if (!std::regex_search(line, relation_data, relation)) | ||
1796 | { | 723 | { |
1797 | continue; | 724 | ppgs.update(); |
1798 | } | ||
1799 | |||
1800 | int synset_id_1 = stoi(relation_data[1]); | ||
1801 | int synset_id_2 = stoi(relation_data[2]); | ||
1802 | std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)"); | ||
1803 | 725 | ||
1804 | for (auto mapping1 : wn[synset_id_1]) | 726 | std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); |
1805 | { | 727 | std::smatch relation_data; |
1806 | for (auto mapping2 : wn[synset_id_2]) | 728 | if (!std::regex_search(line, relation_data, relation)) |
729 | { | ||
730 | continue; | ||
731 | } | ||
732 | |||
733 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | ||
734 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | ||
735 | std::string class_type = relation_data[5]; | ||
736 | |||
737 | std::string table_name; | ||
738 | if (class_type == "t") | ||
739 | { | ||
740 | table_name += "topicality"; | ||
741 | } else if (class_type == "u") | ||
742 | { | ||
743 | table_name += "usage"; | ||
744 | } else if (class_type == "r") | ||
745 | { | ||
746 | table_name += "regionality"; | ||
747 | } | ||
748 | |||
749 | std::list<int> leftJoin; | ||
750 | std::list<int> rightJoin; | ||
751 | |||
752 | if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) | ||
1807 | { | 753 | { |
1808 | sqlite3_stmt* ppstmt; | 754 | std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) { |
1809 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 755 | return w->getId(); |
756 | }); | ||
757 | } else if (wordByWnidAndWnum_.count(lookup1)) { | ||
758 | leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); | ||
759 | } | ||
760 | |||
761 | if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) | ||
762 | { | ||
763 | std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) { | ||
764 | return w->getId(); | ||
765 | }); | ||
766 | } else if (wordByWnidAndWnum_.count(lookup2)) { | ||
767 | rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); | ||
768 | } | ||
769 | |||
770 | for (int word1 : leftJoin) | ||
771 | { | ||
772 | for (int word2 : rightJoin) | ||
1810 | { | 773 | { |
1811 | db_error(ppdb, query); | 774 | std::list<field> fields; |
1812 | } | 775 | fields.emplace_back("term_id", word1); |
776 | fields.emplace_back("domain_id", word2); | ||
1813 | 777 | ||
1814 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 778 | db_.insertIntoTable(table_name, std::move(fields)); |
1815 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
1816 | |||
1817 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1818 | { | ||
1819 | db_error(ppdb, query); | ||
1820 | } | 779 | } |
1821 | |||
1822 | sqlite3_finalize(ppstmt); | ||
1823 | } | 780 | } |
1824 | } | 781 | } |
1825 | } | 782 | } |
1826 | } | ||
1827 | |||
1828 | // mm table | ||
1829 | { | ||
1830 | std::ifstream wnmmfile(wnpref + "wn_mm.pl"); | ||
1831 | if (!wnmmfile.is_open()) | ||
1832 | { | ||
1833 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1834 | print_usage(); | ||
1835 | } | ||
1836 | |||
1837 | std::list<std::string> lines; | ||
1838 | for (;;) | ||
1839 | { | ||
1840 | std::string line; | ||
1841 | if (!getline(wnmmfile, line)) | ||
1842 | { | ||
1843 | break; | ||
1844 | } | ||
1845 | 783 | ||
1846 | if (line.back() == '\r') | 784 | void generator::readWordNetCausality() |
785 | { | ||
786 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl")); | ||
787 | progress ppgs("Writing causality...", lines.size()); | ||
788 | for (auto line : lines) | ||
1847 | { | 789 | { |
1848 | line.pop_back(); | 790 | ppgs.update(); |
1849 | } | ||
1850 | 791 | ||
1851 | lines.push_back(line); | 792 | std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\."); |
793 | std::smatch relation_data; | ||
794 | if (!std::regex_search(line, relation_data, relation)) | ||
795 | { | ||
796 | continue; | ||
797 | } | ||
798 | |||
799 | int lookup1 = std::stoi(relation_data[1]); | ||
800 | int lookup2 = std::stoi(relation_data[2]); | ||
801 | |||
802 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
803 | { | ||
804 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
805 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
806 | |||
807 | std::list<field> fields; | ||
808 | fields.emplace_back("effect_id", notion1.getId()); | ||
809 | fields.emplace_back("cause_id", notion2.getId()); | ||
810 | |||
811 | db_.insertIntoTable("causality", std::move(fields)); | ||
812 | } | ||
813 | } | ||
1852 | } | 814 | } |
1853 | 815 | ||
1854 | progress ppgs("Writing member meronyms...", lines.size()); | 816 | void generator::readWordNetEntailment() |
1855 | for (auto line : lines) | ||
1856 | { | 817 | { |
1857 | ppgs.update(); | 818 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl")); |
1858 | 819 | progress ppgs("Writing entailment...", lines.size()); | |
1859 | std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); | 820 | for (auto line : lines) |
1860 | std::smatch relation_data; | ||
1861 | if (!std::regex_search(line, relation_data, relation)) | ||
1862 | { | 821 | { |
1863 | continue; | 822 | ppgs.update(); |
1864 | } | ||
1865 | 823 | ||
1866 | int synset_id_1 = stoi(relation_data[1]); | 824 | std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\."); |
1867 | int synset_id_2 = stoi(relation_data[2]); | 825 | std::smatch relation_data; |
1868 | std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | 826 | if (!std::regex_search(line, relation_data, relation)) |
1869 | |||
1870 | for (auto mapping1 : wn[synset_id_1]) | ||
1871 | { | ||
1872 | for (auto mapping2 : wn[synset_id_2]) | ||
1873 | { | 827 | { |
1874 | sqlite3_stmt* ppstmt; | 828 | continue; |
1875 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 829 | } |
1876 | { | 830 | |
1877 | db_error(ppdb, query); | 831 | int lookup1 = std::stoi(relation_data[1]); |
1878 | } | 832 | int lookup2 = std::stoi(relation_data[2]); |
1879 | 833 | ||
1880 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 834 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
1881 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 835 | { |
836 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
837 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
1882 | 838 | ||
1883 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 839 | std::list<field> fields; |
1884 | { | 840 | fields.emplace_back("given_id", notion1.getId()); |
1885 | db_error(ppdb, query); | 841 | fields.emplace_back("entailment_id", notion2.getId()); |
1886 | } | ||
1887 | 842 | ||
1888 | sqlite3_finalize(ppstmt); | 843 | db_.insertIntoTable("entailment", std::move(fields)); |
1889 | } | 844 | } |
1890 | } | 845 | } |
1891 | } | 846 | } |
1892 | } | 847 | |
1893 | 848 | void generator::readWordNetHypernymy() | |
1894 | // ms table | ||
1895 | { | ||
1896 | std::ifstream wnmsfile(wnpref + "wn_ms.pl"); | ||
1897 | if (!wnmsfile.is_open()) | ||
1898 | { | ||
1899 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1900 | print_usage(); | ||
1901 | } | ||
1902 | |||
1903 | std::list<std::string> lines; | ||
1904 | for (;;) | ||
1905 | { | 849 | { |
1906 | std::string line; | 850 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl")); |
1907 | if (!getline(wnmsfile, line)) | 851 | progress ppgs("Writing hypernymy...", lines.size()); |
852 | for (auto line : lines) | ||
1908 | { | 853 | { |
1909 | break; | 854 | ppgs.update(); |
855 | |||
856 | std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\."); | ||
857 | std::smatch relation_data; | ||
858 | if (!std::regex_search(line, relation_data, relation)) | ||
859 | { | ||
860 | continue; | ||
861 | } | ||
862 | |||
863 | int lookup1 = std::stoi(relation_data[1]); | ||
864 | int lookup2 = std::stoi(relation_data[2]); | ||
865 | |||
866 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
867 | { | ||
868 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
869 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
870 | |||
871 | std::list<field> fields; | ||
872 | fields.emplace_back("hyponym_id", notion1.getId()); | ||
873 | fields.emplace_back("hypernym_id", notion2.getId()); | ||
874 | |||
875 | db_.insertIntoTable("hypernymy", std::move(fields)); | ||
876 | } | ||
1910 | } | 877 | } |
878 | } | ||
1911 | 879 | ||
1912 | if (line.back() == '\r') | 880 | void generator::readWordNetInstantiation() |
881 | { | ||
882 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl")); | ||
883 | progress ppgs("Writing instantiation...", lines.size()); | ||
884 | for (auto line : lines) | ||
1913 | { | 885 | { |
1914 | line.pop_back(); | 886 | ppgs.update(); |
1915 | } | ||
1916 | 887 | ||
1917 | lines.push_back(line); | 888 | std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); |
889 | std::smatch relation_data; | ||
890 | if (!std::regex_search(line, relation_data, relation)) | ||
891 | { | ||
892 | continue; | ||
893 | } | ||
894 | |||
895 | int lookup1 = std::stoi(relation_data[1]); | ||
896 | int lookup2 = std::stoi(relation_data[2]); | ||
897 | |||
898 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
899 | { | ||
900 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
901 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
902 | |||
903 | std::list<field> fields; | ||
904 | fields.emplace_back("instance_id", notion1.getId()); | ||
905 | fields.emplace_back("class_id", notion2.getId()); | ||
906 | |||
907 | db_.insertIntoTable("instantiation", std::move(fields)); | ||
908 | } | ||
909 | } | ||
1918 | } | 910 | } |
1919 | 911 | ||
1920 | progress ppgs("Writing substance meronyms...", lines.size()); | 912 | void generator::readWordNetMemberMeronymy() |
1921 | for (auto line : lines) | ||
1922 | { | 913 | { |
1923 | ppgs.update(); | 914 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl")); |
1924 | 915 | progress ppgs("Writing member meronymy...", lines.size()); | |
1925 | std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); | 916 | for (auto line : lines) |
1926 | std::smatch relation_data; | ||
1927 | if (!std::regex_search(line, relation_data, relation)) | ||
1928 | { | 917 | { |
1929 | continue; | 918 | ppgs.update(); |
1930 | } | ||
1931 | |||
1932 | int synset_id_1 = stoi(relation_data[1]); | ||
1933 | int synset_id_2 = stoi(relation_data[2]); | ||
1934 | std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | ||
1935 | 919 | ||
1936 | for (auto mapping1 : wn[synset_id_1]) | 920 | std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); |
1937 | { | 921 | std::smatch relation_data; |
1938 | for (auto mapping2 : wn[synset_id_2]) | 922 | if (!std::regex_search(line, relation_data, relation)) |
1939 | { | 923 | { |
1940 | sqlite3_stmt* ppstmt; | 924 | continue; |
1941 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 925 | } |
1942 | { | 926 | |
1943 | db_error(ppdb, query); | 927 | int lookup1 = std::stoi(relation_data[1]); |
1944 | } | 928 | int lookup2 = std::stoi(relation_data[2]); |
929 | |||
930 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
931 | { | ||
932 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
933 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
1945 | 934 | ||
1946 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 935 | std::list<field> fields; |
1947 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 936 | fields.emplace_back("holonym_id", notion1.getId()); |
937 | fields.emplace_back("meronym_id", notion2.getId()); | ||
1948 | 938 | ||
1949 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 939 | db_.insertIntoTable("member_meronymy", std::move(fields)); |
1950 | { | ||
1951 | db_error(ppdb, query); | ||
1952 | } | ||
1953 | |||
1954 | sqlite3_finalize(ppstmt); | ||
1955 | } | 940 | } |
1956 | } | 941 | } |
1957 | } | 942 | } |
1958 | } | 943 | |
1959 | 944 | void generator::readWordNetPartMeronymy() | |
1960 | // mm table | ||
1961 | { | ||
1962 | std::ifstream wnmpfile(wnpref + "wn_mp.pl"); | ||
1963 | if (!wnmpfile.is_open()) | ||
1964 | { | ||
1965 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1966 | print_usage(); | ||
1967 | } | ||
1968 | |||
1969 | std::list<std::string> lines; | ||
1970 | for (;;) | ||
1971 | { | 945 | { |
1972 | std::string line; | 946 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl")); |
1973 | if (!getline(wnmpfile, line)) | 947 | progress ppgs("Writing part meronymy...", lines.size()); |
948 | for (auto line : lines) | ||
1974 | { | 949 | { |
1975 | break; | 950 | ppgs.update(); |
951 | |||
952 | std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); | ||
953 | std::smatch relation_data; | ||
954 | if (!std::regex_search(line, relation_data, relation)) | ||
955 | { | ||
956 | continue; | ||
957 | } | ||
958 | |||
959 | int lookup1 = std::stoi(relation_data[1]); | ||
960 | int lookup2 = std::stoi(relation_data[2]); | ||
961 | |||
962 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
963 | { | ||
964 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
965 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
966 | |||
967 | std::list<field> fields; | ||
968 | fields.emplace_back("holonym_id", notion1.getId()); | ||
969 | fields.emplace_back("meronym_id", notion2.getId()); | ||
970 | |||
971 | db_.insertIntoTable("part_meronymy", std::move(fields)); | ||
972 | } | ||
1976 | } | 973 | } |
974 | } | ||
1977 | 975 | ||
1978 | if (line.back() == '\r') | 976 | void generator::readWordNetSubstanceMeronymy() |
977 | { | ||
978 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl")); | ||
979 | progress ppgs("Writing substance meronymy...", lines.size()); | ||
980 | for (auto line : lines) | ||
1979 | { | 981 | { |
1980 | line.pop_back(); | 982 | ppgs.update(); |
1981 | } | ||
1982 | 983 | ||
1983 | lines.push_back(line); | 984 | std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); |
985 | std::smatch relation_data; | ||
986 | if (!std::regex_search(line, relation_data, relation)) | ||
987 | { | ||
988 | continue; | ||
989 | } | ||
990 | |||
991 | int lookup1 = std::stoi(relation_data[1]); | ||
992 | int lookup2 = std::stoi(relation_data[2]); | ||
993 | |||
994 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
995 | { | ||
996 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
997 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
998 | |||
999 | std::list<field> fields; | ||
1000 | fields.emplace_back("holonym_id", notion1.getId()); | ||
1001 | fields.emplace_back("meronym_id", notion2.getId()); | ||
1002 | |||
1003 | db_.insertIntoTable("substance_meronymy", std::move(fields)); | ||
1004 | } | ||
1005 | } | ||
1984 | } | 1006 | } |
1985 | 1007 | ||
1986 | progress ppgs("Writing part meronyms...", lines.size()); | 1008 | void generator::readWordNetPertainymy() |
1987 | for (auto line : lines) | ||
1988 | { | 1009 | { |
1989 | ppgs.update(); | 1010 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); |
1990 | 1011 | progress ppgs("Writing pertainymy and mannernymy...", lines.size()); | |
1991 | std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); | 1012 | for (auto line : lines) |
1992 | std::smatch relation_data; | ||
1993 | if (!std::regex_search(line, relation_data, relation)) | ||
1994 | { | 1013 | { |
1995 | continue; | 1014 | ppgs.update(); |
1996 | } | ||
1997 | |||
1998 | int synset_id_1 = stoi(relation_data[1]); | ||
1999 | int synset_id_2 = stoi(relation_data[2]); | ||
2000 | std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | ||
2001 | 1015 | ||
2002 | for (auto mapping1 : wn[synset_id_1]) | 1016 | std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); |
2003 | { | 1017 | std::smatch relation_data; |
2004 | for (auto mapping2 : wn[synset_id_2]) | 1018 | if (!std::regex_search(line, relation_data, relation)) |
2005 | { | 1019 | { |
2006 | sqlite3_stmt* ppstmt; | 1020 | continue; |
2007 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 1021 | } |
2008 | { | 1022 | |
2009 | db_error(ppdb, query); | 1023 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); |
2010 | } | 1024 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); |
1025 | |||
1026 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | ||
1027 | { | ||
1028 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | ||
1029 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | ||
2011 | 1030 | ||
2012 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 1031 | if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective) |
2013 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 1032 | { |
1033 | std::list<field> fields; | ||
1034 | fields.emplace_back("pertainym_id", word1.getId()); | ||
1035 | fields.emplace_back("noun_id", word2.getId()); | ||
2014 | 1036 | ||
2015 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1037 | db_.insertIntoTable("pertainymy", std::move(fields)); |
1038 | } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb) | ||
2016 | { | 1039 | { |
2017 | db_error(ppdb, query); | 1040 | std::list<field> fields; |
2018 | } | 1041 | fields.emplace_back("mannernym_id", word1.getId()); |
1042 | fields.emplace_back("adjective_id", word2.getId()); | ||
2019 | 1043 | ||
2020 | sqlite3_finalize(ppstmt); | 1044 | db_.insertIntoTable("mannernymy", std::move(fields)); |
1045 | } | ||
2021 | } | 1046 | } |
2022 | } | 1047 | } |
2023 | } | 1048 | } |
2024 | } | ||
2025 | |||
2026 | // per table | ||
2027 | { | ||
2028 | std::ifstream wnperfile(wnpref + "wn_per.pl"); | ||
2029 | if (!wnperfile.is_open()) | ||
2030 | { | ||
2031 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
2032 | print_usage(); | ||
2033 | } | ||
2034 | |||
2035 | std::list<std::string> lines; | ||
2036 | for (;;) | ||
2037 | { | ||
2038 | std::string line; | ||
2039 | if (!getline(wnperfile, line)) | ||
2040 | { | ||
2041 | break; | ||
2042 | } | ||
2043 | 1049 | ||
2044 | if (line.back() == '\r') | 1050 | void generator::readWordNetSpecification() |
1051 | { | ||
1052 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl")); | ||
1053 | progress ppgs("Writing specifications...", lines.size()); | ||
1054 | for (auto line : lines) | ||
2045 | { | 1055 | { |
2046 | line.pop_back(); | 1056 | ppgs.update(); |
1057 | |||
1058 | std::regex relation("^sa\\((23\\d{8}),(\\d+),(23\\d{8}),(\\d+)\\)\\."); | ||
1059 | std::smatch relation_data; | ||
1060 | if (!std::regex_search(line, relation_data, relation)) | ||
1061 | { | ||
1062 | continue; | ||
1063 | } | ||
1064 | |||
1065 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | ||
1066 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | ||
1067 | |||
1068 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | ||
1069 | { | ||
1070 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | ||
1071 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | ||
1072 | |||
1073 | std::list<field> fields; | ||
1074 | fields.emplace_back("general_id", word1.getId()); | ||
1075 | fields.emplace_back("specific_id", word2.getId()); | ||
1076 | |||
1077 | db_.insertIntoTable("specification", std::move(fields)); | ||
1078 | } | ||
2047 | } | 1079 | } |
2048 | |||
2049 | lines.push_back(line); | ||
2050 | } | 1080 | } |
2051 | 1081 | ||
2052 | progress ppgs("Writing pertainyms and mannernyms...", lines.size()); | 1082 | void generator::readWordNetSimilarity() |
2053 | for (auto line : lines) | ||
2054 | { | 1083 | { |
2055 | ppgs.update(); | 1084 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl")); |
2056 | 1085 | progress ppgs("Writing adjective similarity...", lines.size()); | |
2057 | std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); | 1086 | for (auto line : lines) |
2058 | std::smatch relation_data; | ||
2059 | if (!std::regex_search(line, relation_data, relation)) | ||
2060 | { | 1087 | { |
2061 | continue; | 1088 | ppgs.update(); |
2062 | } | ||
2063 | 1089 | ||
2064 | int synset_id_1 = stoi(relation_data[1]); | 1090 | std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); |
2065 | int wnum_1 = stoi(relation_data[2]); | 1091 | std::smatch relation_data; |
2066 | int synset_id_2 = stoi(relation_data[3]); | 1092 | if (!std::regex_search(line, relation_data, relation)) |
2067 | int wnum_2 = stoi(relation_data[4]); | ||
2068 | std::string query; | ||
2069 | switch (synset_id_1 / 100000000) | ||
2070 | { | ||
2071 | case 3: // Adjective | ||
2072 | { | 1093 | { |
2073 | // This is a pertainym, the second word should be a noun | 1094 | continue; |
2074 | // Technically it can be an adjective but we're ignoring that | ||
2075 | if (synset_id_2 / 100000000 != 1) | ||
2076 | { | ||
2077 | continue; | ||
2078 | } | ||
2079 | |||
2080 | query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)"; | ||
2081 | |||
2082 | break; | ||
2083 | } | 1095 | } |
1096 | |||
1097 | int lookup1 = std::stoi(relation_data[1]); | ||
1098 | int lookup2 = std::stoi(relation_data[2]); | ||
2084 | 1099 | ||
2085 | case 4: // Adverb | 1100 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
2086 | { | 1101 | { |
2087 | // This is a mannernym, the second word should be an adjective | 1102 | notion& notion1 = *notionByWnid_.at(lookup1); |
2088 | if (synset_id_2 / 100000000 != 3) | 1103 | notion& notion2 = *notionByWnid_.at(lookup2); |
2089 | { | ||
2090 | continue; | ||
2091 | } | ||
2092 | 1104 | ||
2093 | query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)"; | 1105 | std::list<field> fields; |
1106 | fields.emplace_back("adjective_1_id", notion1.getId()); | ||
1107 | fields.emplace_back("adjective_2_id", notion2.getId()); | ||
2094 | 1108 | ||
2095 | break; | 1109 | db_.insertIntoTable("similarity", std::move(fields)); |
2096 | } | 1110 | } |
2097 | } | 1111 | } |
2098 | 1112 | } | |
2099 | sqlite3_stmt* ppstmt; | ||
2100 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
2101 | { | ||
2102 | db_error(ppdb, query); | ||
2103 | } | ||
2104 | |||
2105 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
2106 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
2107 | 1113 | ||
2108 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1114 | std::list<std::string> generator::readFile(std::string path) |
1115 | { | ||
1116 | std::ifstream file(path); | ||
1117 | if (!file) | ||
2109 | { | 1118 | { |
2110 | db_error(ppdb, query); | 1119 | throw std::invalid_argument("Could not find file " + path); |
2111 | } | 1120 | } |
2112 | |||
2113 | sqlite3_finalize(ppstmt); | ||
2114 | } | ||
2115 | } | ||
2116 | 1121 | ||
2117 | // sa table | 1122 | std::list<std::string> lines; |
2118 | { | ||
2119 | std::ifstream wnsafile(wnpref + "wn_sa.pl"); | ||
2120 | if (!wnsafile.is_open()) | ||
2121 | { | ||
2122 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
2123 | print_usage(); | ||
2124 | } | ||
2125 | |||
2126 | std::list<std::string> lines; | ||
2127 | for (;;) | ||
2128 | { | ||
2129 | std::string line; | 1123 | std::string line; |
2130 | if (!getline(wnsafile, line)) | 1124 | while (std::getline(file, line)) |
2131 | { | ||
2132 | break; | ||
2133 | } | ||
2134 | |||
2135 | if (line.back() == '\r') | ||
2136 | { | 1125 | { |
2137 | line.pop_back(); | 1126 | if (line.back() == '\r') |
1127 | { | ||
1128 | line.pop_back(); | ||
1129 | } | ||
1130 | |||
1131 | lines.push_back(line); | ||
2138 | } | 1132 | } |
2139 | 1133 | ||
2140 | lines.push_back(line); | 1134 | return lines; |
2141 | } | 1135 | } |
2142 | 1136 | ||
2143 | progress ppgs("Writing specifications...", lines.size()); | 1137 | part_of_speech generator::partOfSpeechByWnid(int wnid) |
2144 | for (auto line : lines) | ||
2145 | { | 1138 | { |
2146 | ppgs.update(); | 1139 | switch (wnid / 100000000) |
2147 | |||
2148 | std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\."); | ||
2149 | std::smatch relation_data; | ||
2150 | if (!std::regex_search(line, relation_data, relation)) | ||
2151 | { | ||
2152 | continue; | ||
2153 | } | ||
2154 | |||
2155 | int synset_id_1 = stoi(relation_data[1]); | ||
2156 | int wnum_1 = stoi(relation_data[2]); | ||
2157 | int synset_id_2 = stoi(relation_data[3]); | ||
2158 | int wnum_2 = stoi(relation_data[4]); | ||
2159 | std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)"); | ||
2160 | |||
2161 | sqlite3_stmt* ppstmt; | ||
2162 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
2163 | { | 1140 | { |
2164 | db_error(ppdb, query); | 1141 | case 1: return part_of_speech::noun; |
1142 | case 2: return part_of_speech::verb; | ||
1143 | case 3: return part_of_speech::adjective; | ||
1144 | case 4: return part_of_speech::adverb; | ||
1145 | default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid)); | ||
2165 | } | 1146 | } |
1147 | } | ||
2166 | 1148 | ||
2167 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | 1149 | notion& generator::createNotion(part_of_speech partOfSpeech) |
2168 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | 1150 | { |
1151 | notions_.emplace_back(partOfSpeech); | ||
1152 | |||
1153 | return notions_.back(); | ||
1154 | } | ||
2169 | 1155 | ||
2170 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1156 | notion& generator::lookupOrCreateNotion(int wnid) |
1157 | { | ||
1158 | if (!notionByWnid_.count(wnid)) | ||
2171 | { | 1159 | { |
2172 | db_error(ppdb, query); | 1160 | notions_.emplace_back(partOfSpeechByWnid(wnid), wnid); |
1161 | notionByWnid_[wnid] = ¬ions_.back(); | ||
2173 | } | 1162 | } |
2174 | 1163 | ||
2175 | sqlite3_finalize(ppstmt); | 1164 | return *notionByWnid_.at(wnid); |
2176 | } | ||
2177 | } | ||
2178 | |||
2179 | // sim table | ||
2180 | { | ||
2181 | std::ifstream wnsimfile(wnpref + "wn_sim.pl"); | ||
2182 | if (!wnsimfile.is_open()) | ||
2183 | { | ||
2184 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
2185 | print_usage(); | ||
2186 | } | 1165 | } |
2187 | 1166 | ||
2188 | std::list<std::string> lines; | 1167 | lemma& generator::lookupOrCreateLemma(std::string base_form) |
2189 | for (;;) | ||
2190 | { | 1168 | { |
2191 | std::string line; | 1169 | if (!lemmaByBaseForm_.count(base_form)) |
2192 | if (!getline(wnsimfile, line)) | ||
2193 | { | 1170 | { |
2194 | break; | 1171 | lemmas_.emplace_back(lookupOrCreateForm(base_form)); |
1172 | lemmaByBaseForm_[base_form] = &lemmas_.back(); | ||
2195 | } | 1173 | } |
1174 | |||
1175 | return *lemmaByBaseForm_.at(base_form); | ||
1176 | } | ||
2196 | 1177 | ||
2197 | if (line.back() == '\r') | 1178 | form& generator::lookupOrCreateForm(std::string text) |
1179 | { | ||
1180 | if (!formByText_.count(text)) | ||
2198 | { | 1181 | { |
2199 | line.pop_back(); | 1182 | forms_.emplace_back(text); |
1183 | formByText_[text] = &forms_.back(); | ||
2200 | } | 1184 | } |
2201 | 1185 | ||
2202 | lines.push_back(line); | 1186 | return *formByText_[text]; |
2203 | } | 1187 | } |
2204 | 1188 | ||
2205 | progress ppgs("Writing sense synonyms...", lines.size()); | 1189 | template <typename... Args> word& generator::createWord(Args&&... args) |
2206 | for (auto line : lines) | ||
2207 | { | 1190 | { |
2208 | ppgs.update(); | 1191 | words_.emplace_back(std::forward<Args>(args)...); |
1192 | word& w = words_.back(); | ||
2209 | 1193 | ||
2210 | std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); | 1194 | wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w); |
2211 | std::smatch relation_data; | 1195 | |
2212 | if (!std::regex_search(line, relation_data, relation)) | 1196 | if (w.getNotion().hasWnid()) |
2213 | { | 1197 | { |
2214 | continue; | 1198 | wordsByWnid_[w.getNotion().getWnid()].insert(&w); |
2215 | } | 1199 | } |
2216 | 1200 | ||
2217 | int synset_id_1 = stoi(relation_data[1]); | 1201 | return w; |
2218 | int synset_id_2 = stoi(relation_data[2]); | 1202 | } |
2219 | std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"); | 1203 | |
1204 | group& generator::createGroup(xmlNodePtr top) | ||
1205 | { | ||
1206 | groups_.emplace_back(); | ||
1207 | group& grp = groups_.back(); | ||
2220 | 1208 | ||
2221 | for (auto mapping1 : wn[synset_id_1]) | 1209 | xmlChar* key; |
1210 | |||
1211 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) | ||
2222 | { | 1212 | { |
2223 | for (auto mapping2 : wn[synset_id_2]) | 1213 | if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES"))) |
2224 | { | 1214 | { |
2225 | sqlite3_stmt* ppstmt; | 1215 | for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next) |
2226 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
2227 | { | 1216 | { |
2228 | db_error(ppdb, query); | 1217 | if (!xmlStrcmp(subclass->name, reinterpret_cast<const xmlChar*>("VNSUBCLASS"))) |
1218 | { | ||
1219 | try | ||
1220 | { | ||
1221 | group& subgrp = createGroup(subclass); | ||
1222 | subgrp.setParent(grp); | ||
1223 | } catch (const std::exception& e) | ||
1224 | { | ||
1225 | key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID")); | ||
1226 | |||
1227 | if (key == nullptr) | ||
1228 | { | ||
1229 | std::throw_with_nested(std::logic_error("Error parsing IDless subgroup")); | ||
1230 | } else { | ||
1231 | std::string subgroupId(reinterpret_cast<const char*>(key)); | ||
1232 | xmlFree(key); | ||
1233 | |||
1234 | std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId)); | ||
1235 | } | ||
1236 | } | ||
1237 | } | ||
2229 | } | 1238 | } |
2230 | 1239 | } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("MEMBERS"))) | |
2231 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 1240 | { |
2232 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 1241 | for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) |
2233 | |||
2234 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
2235 | { | 1242 | { |
2236 | db_error(ppdb, query); | 1243 | if (!xmlStrcmp(member->name, reinterpret_cast<const xmlChar*>("MEMBER"))) |
1244 | { | ||
1245 | key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn")); | ||
1246 | std::string wnSenses(reinterpret_cast<const char*>(key)); | ||
1247 | xmlFree(key); | ||
1248 | |||
1249 | auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " "); | ||
1250 | if (!wnSenseKeys.empty()) | ||
1251 | { | ||
1252 | std::list<std::string> tempKeys; | ||
1253 | |||
1254 | std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) { | ||
1255 | return sense + "::"; | ||
1256 | }); | ||
1257 | |||
1258 | std::list<std::string> filteredKeys; | ||
1259 | |||
1260 | std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) { | ||
1261 | return !wnSenseKeys_.count(sense); | ||
1262 | }); | ||
1263 | |||
1264 | wnSenseKeys = std::move(filteredKeys); | ||
1265 | } | ||
1266 | |||
1267 | if (!wnSenseKeys.empty()) | ||
1268 | { | ||
1269 | for (std::string sense : wnSenseKeys) | ||
1270 | { | ||
1271 | word& wordSense = *wnSenseKeys_[sense]; | ||
1272 | wordSense.setVerbGroup(grp); | ||
1273 | } | ||
1274 | } else { | ||
1275 | key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name")); | ||
1276 | std::string memberName(reinterpret_cast<const char*>(key)); | ||
1277 | xmlFree(key); | ||
1278 | |||
1279 | notion& n = createNotion(part_of_speech::verb); | ||
1280 | lemma& l = lookupOrCreateLemma(memberName); | ||
1281 | word& w = createWord(n, l); | ||
1282 | |||
1283 | w.setVerbGroup(grp); | ||
1284 | } | ||
1285 | } | ||
2237 | } | 1286 | } |
2238 | 1287 | } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("THEMROLES"))) | |
2239 | sqlite3_reset(ppstmt); | 1288 | { |
2240 | sqlite3_clear_bindings(ppstmt); | 1289 | for (xmlNodePtr roletopnode = node->xmlChildrenNode; roletopnode != nullptr; roletopnode = roletopnode->next) |
2241 | |||
2242 | sqlite3_bind_int(ppstmt, 1, mapping2.second); | ||
2243 | sqlite3_bind_int(ppstmt, 2, mapping1.second); | ||
2244 | |||
2245 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
2246 | { | 1290 | { |
2247 | db_error(ppdb, query); | 1291 | if (!xmlStrcmp(roletopnode->name, reinterpret_cast<const xmlChar*>("THEMROLE"))) |
1292 | { | ||
1293 | role r; | ||
1294 | |||
1295 | key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type")); | ||
1296 | std::string roleName = reinterpret_cast<const char*>(key); | ||
1297 | xmlFree(key); | ||
1298 | |||
1299 | for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) | ||
1300 | { | ||
1301 | if (!xmlStrcmp(rolenode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
1302 | { | ||
1303 | r.setSelrestrs(parseSelrestr(rolenode)); | ||
1304 | } | ||
1305 | } | ||
1306 | |||
1307 | grp.addRole(roleName, std::move(r)); | ||
1308 | } | ||
2248 | } | 1309 | } |
1310 | } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("FRAMES"))) | ||
1311 | { | ||
1312 | for (xmlNodePtr frametopnode = node->xmlChildrenNode; frametopnode != nullptr; frametopnode = frametopnode->next) | ||
1313 | { | ||
1314 | if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME"))) | ||
1315 | { | ||
1316 | frames_.emplace_back(); | ||
1317 | frame& fr = frames_.back(); | ||
2249 | 1318 | ||
2250 | sqlite3_finalize(ppstmt); | 1319 | for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) |
1320 | { | ||
1321 | if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX"))) | ||
1322 | { | ||
1323 | for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) | ||
1324 | { | ||
1325 | if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP"))) | ||
1326 | { | ||
1327 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | ||
1328 | std::string partRole = reinterpret_cast<const char*>(key); | ||
1329 | xmlFree(key); | ||
1330 | |||
1331 | selrestr partSelrestrs; | ||
1332 | std::set<std::string> partSynrestrs; | ||
1333 | |||
1334 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
1335 | { | ||
1336 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SYNRESTRS"))) | ||
1337 | { | ||
1338 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
1339 | { | ||
1340 | if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SYNRESTR"))) | ||
1341 | { | ||
1342 | key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type")); | ||
1343 | partSynrestrs.insert(reinterpret_cast<const char*>(key)); | ||
1344 | xmlFree(key); | ||
1345 | } | ||
1346 | } | ||
1347 | } | ||
1348 | |||
1349 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
1350 | { | ||
1351 | partSelrestrs = parseSelrestr(npnode); | ||
1352 | } | ||
1353 | } | ||
1354 | |||
1355 | fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs))); | ||
1356 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB"))) | ||
1357 | { | ||
1358 | fr.push_back(part::createVerb()); | ||
1359 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("PREP"))) | ||
1360 | { | ||
1361 | std::set<std::string> partChoices; | ||
1362 | bool partLiteral; | ||
1363 | |||
1364 | if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"))) | ||
1365 | { | ||
1366 | partLiteral = true; | ||
1367 | |||
1368 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | ||
1369 | std::string choicesStr = reinterpret_cast<const char*>(key); | ||
1370 | xmlFree(key); | ||
1371 | |||
1372 | split(choicesStr, " ", std::inserter(partChoices, std::end(partChoices))); | ||
1373 | } else { | ||
1374 | partLiteral = false; | ||
1375 | |||
1376 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
1377 | { | ||
1378 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
1379 | { | ||
1380 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
1381 | { | ||
1382 | if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
1383 | { | ||
1384 | key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type")); | ||
1385 | partChoices.insert(reinterpret_cast<const char*>(key)); | ||
1386 | xmlFree(key); | ||
1387 | } | ||
1388 | } | ||
1389 | } | ||
1390 | } | ||
1391 | } | ||
1392 | |||
1393 | fr.push_back(part::createPreposition(std::move(partChoices), partLiteral)); | ||
1394 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ"))) | ||
1395 | { | ||
1396 | fr.push_back(part::createAdjective()); | ||
1397 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADV"))) | ||
1398 | { | ||
1399 | fr.push_back(part::createAdverb()); | ||
1400 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("LEX"))) | ||
1401 | { | ||
1402 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | ||
1403 | std::string literalValue = reinterpret_cast<const char*>(key); | ||
1404 | xmlFree(key); | ||
1405 | |||
1406 | fr.push_back(part::createLiteral(literalValue)); | ||
1407 | } else { | ||
1408 | continue; | ||
1409 | } | ||
1410 | } | ||
1411 | |||
1412 | grp.addFrame(fr); | ||
1413 | } | ||
1414 | } | ||
1415 | } | ||
1416 | } | ||
2251 | } | 1417 | } |
2252 | } | 1418 | } |
2253 | } | ||
2254 | } | ||
2255 | |||
2256 | // syntax table | ||
2257 | { | ||
2258 | std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl"); | ||
2259 | if (!wnsyntaxfile.is_open()) | ||
2260 | { | ||
2261 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
2262 | print_usage(); | ||
2263 | } | ||
2264 | 1419 | ||
2265 | std::list<std::string> lines; | 1420 | return grp; |
2266 | for (;;) | ||
2267 | { | ||
2268 | std::string line; | ||
2269 | if (!getline(wnsyntaxfile, line)) | ||
2270 | { | ||
2271 | break; | ||
2272 | } | ||
2273 | |||
2274 | if (line.back() == '\r') | ||
2275 | { | ||
2276 | line.pop_back(); | ||
2277 | } | ||
2278 | |||
2279 | lines.push_back(line); | ||
2280 | } | 1421 | } |
2281 | 1422 | ||
2282 | progress ppgs("Writing adjective syntax markers...", lines.size()); | 1423 | selrestr generator::parseSelrestr(xmlNodePtr top) |
2283 | for (auto line : lines) | ||
2284 | { | 1424 | { |
2285 | ppgs.update(); | 1425 | xmlChar* key; |
2286 | 1426 | ||
2287 | std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); | 1427 | if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) |
2288 | std::smatch relation_data; | ||
2289 | if (!std::regex_search(line, relation_data, relation)) | ||
2290 | { | ||
2291 | continue; | ||
2292 | } | ||
2293 | |||
2294 | int synset_id = stoi(relation_data[1]); | ||
2295 | int wnum = stoi(relation_data[2]); | ||
2296 | std::string syn = relation_data[3]; | ||
2297 | std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?"); | ||
2298 | |||
2299 | sqlite3_stmt* ppstmt; | ||
2300 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
2301 | { | 1428 | { |
2302 | db_error(ppdb, query); | 1429 | if (xmlChildElementCount(top) == 0) |
2303 | } | 1430 | { |
2304 | 1431 | return {}; | |
2305 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); | 1432 | } else if (xmlChildElementCount(top) == 1) |
2306 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); | 1433 | { |
2307 | 1434 | return parseSelrestr(xmlFirstElementChild(top)); | |
2308 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1435 | } else { |
1436 | bool orlogic = false; | ||
1437 | if (xmlHasProp(top, reinterpret_cast<const xmlChar*>("logic"))) | ||
1438 | { | ||
1439 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("logic")); | ||
1440 | if (!xmlStrcmp(key, reinterpret_cast<const xmlChar*>("or"))) | ||
1441 | { | ||
1442 | orlogic = true; | ||
1443 | } | ||
1444 | |||
1445 | xmlFree(key); | ||
1446 | } | ||
1447 | |||
1448 | std::list<selrestr> children; | ||
1449 | for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) | ||
1450 | { | ||
1451 | if (!xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTRS")) | ||
1452 | || !xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
1453 | { | ||
1454 | children.push_back(parseSelrestr(selrestr)); | ||
1455 | } | ||
1456 | } | ||
1457 | |||
1458 | return selrestr(children, orlogic); | ||
1459 | } | ||
1460 | } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
2309 | { | 1461 | { |
2310 | db_error(ppdb, query); | 1462 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("Value")); |
1463 | bool selPos = (std::string(reinterpret_cast<const char*>(key)) == "+"); | ||
1464 | xmlFree(key); | ||
1465 | |||
1466 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type")); | ||
1467 | std::string selRestriction = reinterpret_cast<const char*>(key); | ||
1468 | xmlFree(key); | ||
1469 | |||
1470 | return selrestr(selRestriction, selPos); | ||
1471 | } else { | ||
1472 | throw std::logic_error("Badly formatted selrestr"); | ||
2311 | } | 1473 | } |
2312 | |||
2313 | sqlite3_finalize(ppstmt); | ||
2314 | } | 1474 | } |
2315 | } | 1475 | |
2316 | 1476 | }; | |
2317 | sqlite3_close_v2(ppdb); | 1477 | }; |
2318 | |||
2319 | std::cout << "Done." << std::endl; | ||
2320 | } | ||
diff --git a/generator/generator.h b/generator/generator.h new file mode 100644 index 0000000..e2a7404 --- /dev/null +++ b/generator/generator.h | |||
@@ -0,0 +1,151 @@ | |||
1 | #ifndef GENERATOR_H_5B61CBC5 | ||
2 | #define GENERATOR_H_5B61CBC5 | ||
3 | |||
4 | #include <string> | ||
5 | #include <map> | ||
6 | #include <list> | ||
7 | #include <set> | ||
8 | #include <libxml/parser.h> | ||
9 | #include "database.h" | ||
10 | #include "notion.h" | ||
11 | #include "word.h" | ||
12 | #include "lemma.h" | ||
13 | #include "form.h" | ||
14 | #include "pronunciation.h" | ||
15 | #include "group.h" | ||
16 | #include "frame.h" | ||
17 | |||
18 | namespace verbly { | ||
19 | namespace generator { | ||
20 | |||
21 | enum class part_of_speech; | ||
22 | class selrestr; | ||
23 | |||
24 | class generator { | ||
25 | public: | ||
26 | |||
27 | // Constructor | ||
28 | |||
29 | generator( | ||
30 | std::string verbNetPath, | ||
31 | std::string agidPath, | ||
32 | std::string wordNetPath, | ||
33 | std::string cmudictPath, | ||
34 | std::string imageNetPath, | ||
35 | std::string outputPath); | ||
36 | |||
37 | // Action | ||
38 | |||
39 | void run(); | ||
40 | |||
41 | private: | ||
42 | |||
43 | // Subroutines | ||
44 | |||
45 | void readWordNetSynsets(); | ||
46 | |||
47 | void readAdjectivePositioning(); | ||
48 | |||
49 | void readImageNetUrls(); | ||
50 | |||
51 | void readWordNetSenseKeys(); | ||
52 | |||
53 | void readVerbNet(); | ||
54 | |||
55 | void readAgidInflections(); | ||
56 | |||
57 | void readPrepositions(); | ||
58 | |||
59 | void readCmudictPronunciations(); | ||
60 | |||
61 | void writeSchema(); | ||
62 | |||
63 | void dumpObjects(); | ||
64 | |||
65 | void readWordNetAntonymy(); | ||
66 | |||
67 | void readWordNetVariation(); | ||
68 | |||
69 | void readWordNetClasses(); | ||
70 | |||
71 | void readWordNetCausality(); | ||
72 | |||
73 | void readWordNetEntailment(); | ||
74 | |||
75 | void readWordNetHypernymy(); | ||
76 | |||
77 | void readWordNetInstantiation(); | ||
78 | |||
79 | void readWordNetMemberMeronymy(); | ||
80 | |||
81 | void readWordNetPartMeronymy(); | ||
82 | |||
83 | void readWordNetSubstanceMeronymy(); | ||
84 | |||
85 | void readWordNetPertainymy(); | ||
86 | |||
87 | void readWordNetSpecification(); | ||
88 | |||
89 | void readWordNetSimilarity(); | ||
90 | |||
91 | // Helpers | ||
92 | |||
93 | std::list<std::string> readFile(std::string path); | ||
94 | |||
95 | inline part_of_speech partOfSpeechByWnid(int wnid); | ||
96 | |||
97 | notion& createNotion(part_of_speech partOfSpeech); | ||
98 | |||
99 | notion& lookupOrCreateNotion(int wnid); | ||
100 | |||
101 | lemma& lookupOrCreateLemma(std::string base_form); | ||
102 | |||
103 | form& lookupOrCreateForm(std::string text); | ||
104 | |||
105 | template <typename... Args> word& createWord(Args&&... args); | ||
106 | |||
107 | group& createGroup(xmlNodePtr top); | ||
108 | |||
109 | selrestr parseSelrestr(xmlNodePtr top); | ||
110 | |||
111 | // Input | ||
112 | |||
113 | std::string verbNetPath_; | ||
114 | std::string agidPath_; | ||
115 | std::string wordNetPath_; | ||
116 | std::string cmudictPath_; | ||
117 | std::string imageNetPath_; | ||
118 | |||
119 | // Output | ||
120 | |||
121 | database db_; | ||
122 | |||
123 | // Data | ||
124 | |||
125 | std::list<notion> notions_; | ||
126 | std::list<word> words_; | ||
127 | std::list<lemma> lemmas_; | ||
128 | std::list<form> forms_; | ||
129 | std::list<pronunciation> pronunciations_; | ||
130 | std::list<frame> frames_; | ||
131 | std::list<group> groups_; | ||
132 | |||
133 | // Indexes | ||
134 | |||
135 | std::map<int, notion*> notionByWnid_; | ||
136 | std::map<int, std::set<word*>> wordsByWnid_; | ||
137 | std::map<std::pair<int, int>, word*> wordByWnidAndWnum_; | ||
138 | std::map<std::string, std::set<word*>> wordsByBaseForm_; | ||
139 | std::map<std::string, lemma*> lemmaByBaseForm_; | ||
140 | std::map<std::string, form*> formByText_; | ||
141 | |||
142 | // Caches | ||
143 | |||
144 | std::map<std::string, word*> wnSenseKeys_; | ||
145 | |||
146 | }; | ||
147 | |||
148 | }; | ||
149 | }; | ||
150 | |||
151 | #endif /* end of include guard: GENERATOR_H_5B61CBC5 */ | ||
diff --git a/generator/group.cpp b/generator/group.cpp new file mode 100644 index 0000000..7cbd4c8 --- /dev/null +++ b/generator/group.cpp | |||
@@ -0,0 +1,119 @@ | |||
1 | #include "group.h" | ||
2 | #include <stdexcept> | ||
3 | #include <list> | ||
4 | #include <json.hpp> | ||
5 | #include "database.h" | ||
6 | #include "field.h" | ||
7 | #include "frame.h" | ||
8 | |||
9 | namespace verbly { | ||
10 | namespace generator { | ||
11 | |||
12 | int group::nextId_ = 0; | ||
13 | |||
14 | group::group() : id_(nextId_++) | ||
15 | { | ||
16 | } | ||
17 | |||
18 | void group::setParent(const group& parent) | ||
19 | { | ||
20 | // Adding a group to itself is nonsensical. | ||
21 | assert(&parent != this); | ||
22 | |||
23 | parent_ = &parent; | ||
24 | } | ||
25 | |||
26 | void group::addRole(std::string name, role r) | ||
27 | { | ||
28 | roleNames_.insert(name); | ||
29 | roles_[name] = std::move(r); | ||
30 | } | ||
31 | |||
32 | void group::addFrame(const frame& f) | ||
33 | { | ||
34 | frames_.insert(&f); | ||
35 | } | ||
36 | |||
37 | std::set<std::string> group::getRoles() const | ||
38 | { | ||
39 | std::set<std::string> fullRoles = roleNames_; | ||
40 | |||
41 | if (hasParent()) | ||
42 | { | ||
43 | for (std::string name : getParent().getRoles()) | ||
44 | { | ||
45 | fullRoles.insert(name); | ||
46 | } | ||
47 | } | ||
48 | |||
49 | return fullRoles; | ||
50 | } | ||
51 | |||
52 | const role& group::getRole(std::string name) const | ||
53 | { | ||
54 | if (roles_.count(name)) | ||
55 | { | ||
56 | return roles_.at(name); | ||
57 | } else if (hasParent()) | ||
58 | { | ||
59 | return getParent().getRole(name); | ||
60 | } else { | ||
61 | throw std::invalid_argument("Specified role not found in verb group"); | ||
62 | } | ||
63 | } | ||
64 | |||
65 | std::set<const frame*> group::getFrames() const | ||
66 | { | ||
67 | std::set<const frame*> fullFrames = frames_; | ||
68 | |||
69 | if (hasParent()) | ||
70 | { | ||
71 | for (const frame* f : getParent().getFrames()) | ||
72 | { | ||
73 | fullFrames.insert(f); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | return fullFrames; | ||
78 | } | ||
79 | |||
80 | database& operator<<(database& db, const group& arg) | ||
81 | { | ||
82 | // Serialize the group first | ||
83 | { | ||
84 | std::list<field> fields; | ||
85 | fields.emplace_back("group_id", arg.getId()); | ||
86 | |||
87 | nlohmann::json jsonRoles; | ||
88 | for (std::string name : arg.getRoles()) | ||
89 | { | ||
90 | const role& r = arg.getRole(name); | ||
91 | |||
92 | nlohmann::json jsonRole; | ||
93 | jsonRole["type"] = name; | ||
94 | jsonRole["selrestrs"] = r.getSelrestrs().toJson(); | ||
95 | |||
96 | jsonRoles.emplace_back(std::move(jsonRole)); | ||
97 | } | ||
98 | |||
99 | fields.emplace_back("data", jsonRoles.dump()); | ||
100 | |||
101 | db.insertIntoTable("groups", std::move(fields)); | ||
102 | } | ||
103 | |||
104 | // Then, serialize the group/frame relationship | ||
105 | for (const frame* f : arg.getFrames()) | ||
106 | { | ||
107 | std::list<field> fields; | ||
108 | |||
109 | fields.emplace_back("group_id", arg.getId()); | ||
110 | fields.emplace_back("frame_id", f->getId()); | ||
111 | |||
112 | db.insertIntoTable("groups_frames", std::move(fields)); | ||
113 | } | ||
114 | |||
115 | return db; | ||
116 | } | ||
117 | |||
118 | }; | ||
119 | }; | ||
diff --git a/generator/group.h b/generator/group.h new file mode 100644 index 0000000..efb8c5d --- /dev/null +++ b/generator/group.h | |||
@@ -0,0 +1,80 @@ | |||
1 | #ifndef GROUP_H_EDAFB5DC | ||
2 | #define GROUP_H_EDAFB5DC | ||
3 | |||
4 | #include <map> | ||
5 | #include <set> | ||
6 | #include <string> | ||
7 | #include <cassert> | ||
8 | #include "role.h" | ||
9 | |||
10 | namespace verbly { | ||
11 | namespace generator { | ||
12 | |||
13 | class frame; | ||
14 | class database; | ||
15 | |||
16 | class group { | ||
17 | public: | ||
18 | |||
19 | // Constructor | ||
20 | |||
21 | group(); | ||
22 | |||
23 | // Mutators | ||
24 | |||
25 | void setParent(const group& parent); | ||
26 | |||
27 | void addRole(std::string name, role r); | ||
28 | |||
29 | void addFrame(const frame& f); | ||
30 | |||
31 | // Accessors | ||
32 | |||
33 | int getId() const | ||
34 | { | ||
35 | return id_; | ||
36 | } | ||
37 | |||
38 | bool hasParent() const | ||
39 | { | ||
40 | return (parent_ != nullptr); | ||
41 | } | ||
42 | |||
43 | const group& getParent() const | ||
44 | { | ||
45 | // Calling code should always call hasParent first | ||
46 | assert(parent_ != nullptr); | ||
47 | |||
48 | return *parent_; | ||
49 | } | ||
50 | |||
51 | std::set<std::string> getRoles() const; | ||
52 | |||
53 | const role& getRole(std::string name) const; | ||
54 | |||
55 | std::set<const frame*> getFrames() const; | ||
56 | |||
57 | private: | ||
58 | |||
59 | static int nextId_; | ||
60 | |||
61 | const int id_; | ||
62 | |||
63 | const group* parent_ = nullptr; | ||
64 | std::map<std::string, role> roles_; | ||
65 | std::set<const frame*> frames_; | ||
66 | |||
67 | // Caches | ||
68 | |||
69 | std::set<std::string> roleNames_; | ||
70 | |||
71 | }; | ||
72 | |||
73 | // Serializer | ||
74 | |||
75 | database& operator<<(database& db, const group& arg); | ||
76 | |||
77 | }; | ||
78 | }; | ||
79 | |||
80 | #endif /* end of include guard: GROUP_H_EDAFB5DC */ | ||
diff --git a/generator/lemma.cpp b/generator/lemma.cpp new file mode 100644 index 0000000..e66b153 --- /dev/null +++ b/generator/lemma.cpp | |||
@@ -0,0 +1,65 @@ | |||
1 | #include "lemma.h" | ||
2 | #include <list> | ||
3 | #include <cassert> | ||
4 | #include "field.h" | ||
5 | #include "database.h" | ||
6 | #include "form.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | namespace generator { | ||
10 | |||
11 | int lemma::nextId_ = 0; | ||
12 | |||
13 | lemma::lemma(const form& baseForm) : | ||
14 | id_(nextId_++), | ||
15 | baseForm_(baseForm) | ||
16 | { | ||
17 | inflections_[inflection::base] = {&baseForm}; | ||
18 | } | ||
19 | |||
20 | void lemma::addInflection(inflection type, const form& f) | ||
21 | { | ||
22 | // There can only be one base form. | ||
23 | assert(type != inflection::base); | ||
24 | |||
25 | inflections_[type].insert(&f); | ||
26 | } | ||
27 | |||
28 | std::set<const form*> lemma::getInflections(inflection type) const | ||
29 | { | ||
30 | if (inflections_.count(type)) | ||
31 | { | ||
32 | return inflections_.at(type); | ||
33 | } else { | ||
34 | return {}; | ||
35 | } | ||
36 | } | ||
37 | |||
38 | database& operator<<(database& db, const lemma& arg) | ||
39 | { | ||
40 | for (inflection type : { | ||
41 | inflection::base, | ||
42 | inflection::plural, | ||
43 | inflection::comparative, | ||
44 | inflection::superlative, | ||
45 | inflection::past_tense, | ||
46 | inflection::past_participle, | ||
47 | inflection::ing_form, | ||
48 | inflection::s_form}) | ||
49 | { | ||
50 | for (const form* f : arg.getInflections(type)) | ||
51 | { | ||
52 | std::list<field> fields; | ||
53 | fields.emplace_back("lemma_id", arg.getId()); | ||
54 | fields.emplace_back("form_id", f->getId()); | ||
55 | fields.emplace_back("category", static_cast<int>(type)); | ||
56 | |||
57 | db.insertIntoTable("lemmas_forms", std::move(fields)); | ||
58 | } | ||
59 | } | ||
60 | |||
61 | return db; | ||
62 | } | ||
63 | |||
64 | }; | ||
65 | }; | ||
diff --git a/generator/lemma.h b/generator/lemma.h new file mode 100644 index 0000000..6452e08 --- /dev/null +++ b/generator/lemma.h | |||
@@ -0,0 +1,58 @@ | |||
1 | #ifndef LEMMA_H_D73105A7 | ||
2 | #define LEMMA_H_D73105A7 | ||
3 | |||
4 | #include <string> | ||
5 | #include <map> | ||
6 | #include <set> | ||
7 | #include "enums.h" | ||
8 | |||
9 | namespace verbly { | ||
10 | namespace generator { | ||
11 | |||
12 | class database; | ||
13 | class form; | ||
14 | |||
15 | class lemma { | ||
16 | public: | ||
17 | |||
18 | // Constructors | ||
19 | |||
20 | explicit lemma(const form& baseForm); | ||
21 | |||
22 | // Mutators | ||
23 | |||
24 | void addInflection(inflection type, const form& f); | ||
25 | |||
26 | // Accessors | ||
27 | |||
28 | int getId() const | ||
29 | { | ||
30 | return id_; | ||
31 | } | ||
32 | |||
33 | const form& getBaseForm() const | ||
34 | { | ||
35 | return baseForm_; | ||
36 | } | ||
37 | |||
38 | std::set<const form*> getInflections(inflection type) const; | ||
39 | |||
40 | private: | ||
41 | |||
42 | static int nextId_; | ||
43 | |||
44 | const int id_; | ||
45 | const form& baseForm_; | ||
46 | |||
47 | std::map<inflection, std::set<const form*>> inflections_; | ||
48 | |||
49 | }; | ||
50 | |||
51 | // Serializer | ||
52 | |||
53 | database& operator<<(database& db, const lemma& arg); | ||
54 | |||
55 | }; | ||
56 | }; | ||
57 | |||
58 | #endif /* end of include guard: LEMMA_H_D73105A7 */ | ||
diff --git a/generator/main.cpp b/generator/main.cpp new file mode 100644 index 0000000..827c963 --- /dev/null +++ b/generator/main.cpp | |||
@@ -0,0 +1,40 @@ | |||
1 | #include <iostream> | ||
2 | #include <exception> | ||
3 | #include "generator.h" | ||
4 | |||
5 | void printUsage() | ||
6 | { | ||
7 | std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl; | ||
8 | std::cout << "verbnet :: path to a VerbNet data directory" << std::endl; | ||
9 | std::cout << "agid :: path to an AGID infl.txt file" << std::endl; | ||
10 | std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl; | ||
11 | std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl; | ||
12 | std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl; | ||
13 | std::cout << "output :: datafile output path" << std::endl; | ||
14 | } | ||
15 | |||
16 | int main(int argc, char** argv) | ||
17 | { | ||
18 | if (argc == 7) | ||
19 | { | ||
20 | try | ||
21 | { | ||
22 | verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); | ||
23 | |||
24 | try | ||
25 | { | ||
26 | app.run(); | ||
27 | } catch (const std::exception& e) | ||
28 | { | ||
29 | std::cout << e.what() << std::endl; | ||
30 | } | ||
31 | } catch (const std::exception& e) | ||
32 | { | ||
33 | std::cout << e.what() << std::endl; | ||
34 | printUsage(); | ||
35 | } | ||
36 | } else { | ||
37 | std::cout << "verbly datafile generator" << std::endl; | ||
38 | printUsage(); | ||
39 | } | ||
40 | } | ||
diff --git a/generator/notion.cpp b/generator/notion.cpp new file mode 100644 index 0000000..290d982 --- /dev/null +++ b/generator/notion.cpp | |||
@@ -0,0 +1,85 @@ | |||
1 | #include "notion.h" | ||
2 | #include <string> | ||
3 | #include <list> | ||
4 | #include "database.h" | ||
5 | #include "field.h" | ||
6 | |||
7 | namespace verbly { | ||
8 | namespace generator { | ||
9 | |||
10 | int notion::nextId_ = 0; | ||
11 | |||
12 | notion::notion( | ||
13 | part_of_speech partOfSpeech) : | ||
14 | id_(nextId_++), | ||
15 | partOfSpeech_(partOfSpeech) | ||
16 | { | ||
17 | } | ||
18 | |||
19 | notion::notion( | ||
20 | part_of_speech partOfSpeech, | ||
21 | int wnid) : | ||
22 | id_(nextId_++), | ||
23 | partOfSpeech_(partOfSpeech), | ||
24 | wnid_(wnid), | ||
25 | hasWnid_(true) | ||
26 | { | ||
27 | } | ||
28 | |||
29 | void notion::incrementNumOfImages() | ||
30 | { | ||
31 | // Calling code should always call hasWnid and check that the notion is a noun first. | ||
32 | assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); | ||
33 | |||
34 | numOfImages_++; | ||
35 | } | ||
36 | |||
37 | void notion::setPrepositionGroups(std::list<std::string> groups) | ||
38 | { | ||
39 | // Calling code should always check that the notion is a preposition first. | ||
40 | assert(partOfSpeech_ == part_of_speech::preposition); | ||
41 | |||
42 | prepositionGroups_ = groups; | ||
43 | } | ||
44 | |||
45 | database& operator<<(database& db, const notion& arg) | ||
46 | { | ||
47 | // First, serialize the notion | ||
48 | { | ||
49 | std::list<field> fields; | ||
50 | |||
51 | fields.emplace_back("notion_id", arg.getId()); | ||
52 | fields.emplace_back("part_of_speech", static_cast<int>(arg.getPartOfSpeech())); | ||
53 | |||
54 | if (arg.hasWnid()) | ||
55 | { | ||
56 | fields.emplace_back("wnid", arg.getWnid()); | ||
57 | |||
58 | if (arg.getPartOfSpeech() == part_of_speech::noun) | ||
59 | { | ||
60 | fields.emplace_back("images", arg.getNumOfImages()); | ||
61 | } | ||
62 | } | ||
63 | |||
64 | db.insertIntoTable("notions", std::move(fields)); | ||
65 | } | ||
66 | |||
67 | // Next, serialize the is_a relationship if this is a preposition | ||
68 | if (arg.getPartOfSpeech() == part_of_speech::preposition) | ||
69 | { | ||
70 | for (std::string group : arg.getPrepositionGroups()) | ||
71 | { | ||
72 | std::list<field> fields; | ||
73 | |||
74 | fields.emplace_back("notion_id", arg.getId()); | ||
75 | fields.emplace_back("groupname", group); | ||
76 | |||
77 | db.insertIntoTable("is_a", std::move(fields)); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | return db; | ||
82 | } | ||
83 | |||
84 | }; | ||
85 | }; | ||
diff --git a/generator/notion.h b/generator/notion.h new file mode 100644 index 0000000..76210de --- /dev/null +++ b/generator/notion.h | |||
@@ -0,0 +1,91 @@ | |||
1 | #ifndef NOTION_H_221DE2BC | ||
2 | #define NOTION_H_221DE2BC | ||
3 | |||
4 | #include <cassert> | ||
5 | #include <list> | ||
6 | #include <string> | ||
7 | #include "enums.h" | ||
8 | |||
9 | namespace verbly { | ||
10 | namespace generator { | ||
11 | |||
12 | class database; | ||
13 | |||
14 | class notion { | ||
15 | public: | ||
16 | |||
17 | // Constructors | ||
18 | |||
19 | explicit notion(part_of_speech partOfSpeech); | ||
20 | |||
21 | notion(part_of_speech partOfSpeech, int wnid); | ||
22 | |||
23 | // Mutators | ||
24 | |||
25 | void incrementNumOfImages(); | ||
26 | |||
27 | void setPrepositionGroups(std::list<std::string> groups); | ||
28 | |||
29 | // Accessors | ||
30 | |||
31 | int getId() const | ||
32 | { | ||
33 | return id_; | ||
34 | } | ||
35 | |||
36 | part_of_speech getPartOfSpeech() const | ||
37 | { | ||
38 | return partOfSpeech_; | ||
39 | } | ||
40 | |||
41 | bool hasWnid() const | ||
42 | { | ||
43 | return hasWnid_; | ||
44 | } | ||
45 | |||
46 | int getWnid() const | ||
47 | { | ||
48 | // Calling code should always call hasWnid first. | ||
49 | assert(hasWnid_); | ||
50 | |||
51 | return wnid_; | ||
52 | } | ||
53 | |||
54 | int getNumOfImages() const | ||
55 | { | ||
56 | // Calling code should always call hasWnid and check that the notion is a noun first. | ||
57 | assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); | ||
58 | |||
59 | return numOfImages_; | ||
60 | } | ||
61 | |||
62 | std::list<std::string> getPrepositionGroups() const | ||
63 | { | ||
64 | // Calling code should always check that the notion is a preposition first. | ||
65 | assert(partOfSpeech_ == part_of_speech::preposition); | ||
66 | |||
67 | return prepositionGroups_; | ||
68 | } | ||
69 | |||
70 | private: | ||
71 | |||
72 | static int nextId_; | ||
73 | |||
74 | const int id_; | ||
75 | const part_of_speech partOfSpeech_; | ||
76 | const int wnid_ = 0; | ||
77 | const bool hasWnid_ = false; | ||
78 | |||
79 | int numOfImages_ = 0; | ||
80 | std::list<std::string> prepositionGroups_; | ||
81 | |||
82 | }; | ||
83 | |||
84 | // Serializer | ||
85 | |||
86 | database& operator<<(database& db, const notion& arg); | ||
87 | |||
88 | }; | ||
89 | }; | ||
90 | |||
91 | #endif /* end of include guard: NOTION_H_221DE2BC */ | ||
diff --git a/generator/part.cpp b/generator/part.cpp new file mode 100644 index 0000000..dbd4e11 --- /dev/null +++ b/generator/part.cpp | |||
@@ -0,0 +1,336 @@ | |||
1 | #include "part.h" | ||
2 | #include <stdexcept> | ||
3 | #include "selrestr.h" | ||
4 | |||
5 | namespace verbly { | ||
6 | namespace generator { | ||
7 | |||
8 | part part::createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs) | ||
9 | { | ||
10 | part p(type::noun_phrase); | ||
11 | |||
12 | new(&p.noun_phrase_.role) std::string(std::move(role)); | ||
13 | new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs)); | ||
14 | new(&p.noun_phrase_.synrestrs) std::set<std::string>(std::move(synrestrs)); | ||
15 | |||
16 | return p; | ||
17 | } | ||
18 | |||
19 | part part::createVerb() | ||
20 | { | ||
21 | return part(type::verb); | ||
22 | } | ||
23 | |||
24 | part part::createPreposition(std::set<std::string> choices, bool literal) | ||
25 | { | ||
26 | part p(type::preposition); | ||
27 | |||
28 | new(&p.preposition_.choices) std::set<std::string>(std::move(choices)); | ||
29 | p.preposition_.literal = literal; | ||
30 | |||
31 | return p; | ||
32 | } | ||
33 | |||
34 | part part::createAdjective() | ||
35 | { | ||
36 | return part(type::adjective); | ||
37 | } | ||
38 | |||
39 | part part::createAdverb() | ||
40 | { | ||
41 | return part(type::adverb); | ||
42 | } | ||
43 | |||
44 | part part::createLiteral(std::string value) | ||
45 | { | ||
46 | part p(type::literal); | ||
47 | |||
48 | new(&p.literal_) std::string(std::move(value)); | ||
49 | |||
50 | return p; | ||
51 | } | ||
52 | |||
53 | part::part(const part& other) | ||
54 | { | ||
55 | type_ = other.type_; | ||
56 | |||
57 | switch (type_) | ||
58 | { | ||
59 | case type::noun_phrase: | ||
60 | { | ||
61 | new(&noun_phrase_.role) std::string(other.noun_phrase_.role); | ||
62 | new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs); | ||
63 | new(&noun_phrase_.synrestrs) std::set<std::string>(other.noun_phrase_.synrestrs); | ||
64 | |||
65 | break; | ||
66 | } | ||
67 | |||
68 | case type::preposition: | ||
69 | { | ||
70 | new(&preposition_.choices) std::set<std::string>(other.preposition_.choices); | ||
71 | preposition_.literal = other.preposition_.literal; | ||
72 | |||
73 | break; | ||
74 | } | ||
75 | |||
76 | case type::literal: | ||
77 | { | ||
78 | new(&literal_) std::string(other.literal_); | ||
79 | |||
80 | break; | ||
81 | } | ||
82 | |||
83 | case type::verb: | ||
84 | case type::adjective: | ||
85 | case type::adverb: | ||
86 | case type::invalid: | ||
87 | { | ||
88 | break; | ||
89 | } | ||
90 | } | ||
91 | } | ||
92 | |||
93 | part::part(part&& other) : part() | ||
94 | { | ||
95 | swap(*this, other); | ||
96 | } | ||
97 | |||
98 | part& part::operator=(part other) | ||
99 | { | ||
100 | swap(*this, other); | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | void swap(part& first, part& second) | ||
106 | { | ||
107 | using type = part::type; | ||
108 | |||
109 | type tempType = first.type_; | ||
110 | std::string tempRole; | ||
111 | selrestr tempSelrestrs; | ||
112 | std::set<std::string> tempSynrestrs; | ||
113 | std::set<std::string> tempChoices; | ||
114 | bool tempPrepLiteral; | ||
115 | std::string tempLiteralValue; | ||
116 | |||
117 | switch (tempType) | ||
118 | { | ||
119 | case type::noun_phrase: | ||
120 | { | ||
121 | tempRole = std::move(first.noun_phrase_.role); | ||
122 | tempSelrestrs = std::move(first.noun_phrase_.selrestrs); | ||
123 | tempSynrestrs = std::move(first.noun_phrase_.synrestrs); | ||
124 | |||
125 | break; | ||
126 | } | ||
127 | |||
128 | case type::preposition: | ||
129 | { | ||
130 | tempChoices = std::move(first.preposition_.choices); | ||
131 | tempPrepLiteral = first.preposition_.literal; | ||
132 | |||
133 | break; | ||
134 | } | ||
135 | |||
136 | case type::literal: | ||
137 | { | ||
138 | tempLiteralValue = std::move(first.literal_); | ||
139 | |||
140 | break; | ||
141 | } | ||
142 | |||
143 | case type::verb: | ||
144 | case type::adjective: | ||
145 | case type::adverb: | ||
146 | case type::invalid: | ||
147 | { | ||
148 | break; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | first.~part(); | ||
153 | |||
154 | first.type_ = second.type_; | ||
155 | |||
156 | switch (first.type_) | ||
157 | { | ||
158 | case type::noun_phrase: | ||
159 | { | ||
160 | new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role)); | ||
161 | new(&first.noun_phrase_.selrestrs) selrestr(std::move(second.noun_phrase_.selrestrs)); | ||
162 | new(&first.noun_phrase_.synrestrs) std::set<std::string>(std::move(second.noun_phrase_.synrestrs)); | ||
163 | |||
164 | break; | ||
165 | } | ||
166 | |||
167 | case type::preposition: | ||
168 | { | ||
169 | new(&first.preposition_.choices) std::set<std::string>(std::move(second.preposition_.choices)); | ||
170 | first.preposition_.literal = second.preposition_.literal; | ||
171 | |||
172 | break; | ||
173 | } | ||
174 | |||
175 | case type::literal: | ||
176 | { | ||
177 | new(&first.literal_) std::string(std::move(second.literal_)); | ||
178 | |||
179 | break; | ||
180 | } | ||
181 | |||
182 | case type::verb: | ||
183 | case type::adjective: | ||
184 | case type::adverb: | ||
185 | case type::invalid: | ||
186 | { | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | second.~part(); | ||
192 | |||
193 | second.type_ = tempType; | ||
194 | |||
195 | switch (second.type_) | ||
196 | { | ||
197 | case type::noun_phrase: | ||
198 | { | ||
199 | new(&second.noun_phrase_.role) std::string(std::move(tempRole)); | ||
200 | new(&second.noun_phrase_.selrestrs) selrestr(std::move(tempSelrestrs)); | ||
201 | new(&second.noun_phrase_.synrestrs) std::set<std::string>(std::move(tempSynrestrs)); | ||
202 | |||
203 | break; | ||
204 | } | ||
205 | |||
206 | case type::preposition: | ||
207 | { | ||
208 | new(&second.preposition_.choices) std::set<std::string>(std::move(tempChoices)); | ||
209 | second.preposition_.literal = tempPrepLiteral; | ||
210 | |||
211 | break; | ||
212 | } | ||
213 | |||
214 | case type::literal: | ||
215 | { | ||
216 | new(&second.literal_) std::string(std::move(tempLiteralValue)); | ||
217 | |||
218 | break; | ||
219 | } | ||
220 | |||
221 | case type::verb: | ||
222 | case type::adjective: | ||
223 | case type::adverb: | ||
224 | case type::invalid: | ||
225 | { | ||
226 | break; | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | |||
231 | part::~part() | ||
232 | { | ||
233 | switch (type_) | ||
234 | { | ||
235 | case type::noun_phrase: | ||
236 | { | ||
237 | using string_type = std::string; | ||
238 | using set_type = std::set<std::string>; | ||
239 | |||
240 | noun_phrase_.role.~string_type(); | ||
241 | noun_phrase_.selrestrs.~selrestr(); | ||
242 | noun_phrase_.synrestrs.~set_type(); | ||
243 | |||
244 | break; | ||
245 | } | ||
246 | |||
247 | case type::preposition: | ||
248 | { | ||
249 | using set_type = std::set<std::string>; | ||
250 | |||
251 | preposition_.choices.~set_type(); | ||
252 | |||
253 | break; | ||
254 | } | ||
255 | |||
256 | case type::literal: | ||
257 | { | ||
258 | using string_type = std::string; | ||
259 | |||
260 | literal_.~string_type(); | ||
261 | |||
262 | break; | ||
263 | } | ||
264 | |||
265 | case type::verb: | ||
266 | case type::adjective: | ||
267 | case type::adverb: | ||
268 | case type::invalid: | ||
269 | { | ||
270 | break; | ||
271 | } | ||
272 | } | ||
273 | } | ||
274 | |||
275 | std::string part::getNounRole() const | ||
276 | { | ||
277 | if (type_ == type::noun_phrase) | ||
278 | { | ||
279 | return noun_phrase_.role; | ||
280 | } else { | ||
281 | throw std::domain_error("part::getNounRole is only valid for noun phrase parts"); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | selrestr part::getNounSelrestrs() const | ||
286 | { | ||
287 | if (type_ == type::noun_phrase) | ||
288 | { | ||
289 | return noun_phrase_.selrestrs; | ||
290 | } else { | ||
291 | throw std::domain_error("part::getNounSelrestrs is only valid for noun phrase parts"); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | std::set<std::string> part::getNounSynrestrs() const | ||
296 | { | ||
297 | if (type_ == type::noun_phrase) | ||
298 | { | ||
299 | return noun_phrase_.synrestrs; | ||
300 | } else { | ||
301 | throw std::domain_error("part::getNounSynrestrs is only valid for noun phrase parts"); | ||
302 | } | ||
303 | } | ||
304 | |||
305 | std::set<std::string> part::getPrepositionChoices() const | ||
306 | { | ||
307 | if (type_ == type::preposition) | ||
308 | { | ||
309 | return preposition_.choices; | ||
310 | } else { | ||
311 | throw std::domain_error("part::getPrepositionChoices is only valid for preposition parts"); | ||
312 | } | ||
313 | } | ||
314 | |||
315 | bool part::isPrepositionLiteral() const | ||
316 | { | ||
317 | if (type_ == type::preposition) | ||
318 | { | ||
319 | return preposition_.literal; | ||
320 | } else { | ||
321 | throw std::domain_error("part::isPrepositionLiteral is only valid for preposition parts"); | ||
322 | } | ||
323 | } | ||
324 | |||
325 | std::string part::getLiteralValue() const | ||
326 | { | ||
327 | if (type_ == type::literal) | ||
328 | { | ||
329 | return literal_; | ||
330 | } else { | ||
331 | throw std::domain_error("part::getLiteralValue is only valid for literal parts"); | ||
332 | } | ||
333 | } | ||
334 | |||
335 | }; | ||
336 | }; | ||
diff --git a/generator/part.h b/generator/part.h new file mode 100644 index 0000000..d044630 --- /dev/null +++ b/generator/part.h | |||
@@ -0,0 +1,114 @@ | |||
1 | #ifndef PART_H_FB54F361 | ||
2 | #define PART_H_FB54F361 | ||
3 | |||
4 | #include <string> | ||
5 | #include <set> | ||
6 | #include "selrestr.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | namespace generator { | ||
10 | |||
11 | class part { | ||
12 | public: | ||
13 | enum class type { | ||
14 | invalid = -1, | ||
15 | noun_phrase = 0, | ||
16 | verb = 1, | ||
17 | preposition = 2, | ||
18 | adjective = 3, | ||
19 | adverb = 4, | ||
20 | literal = 5 | ||
21 | }; | ||
22 | |||
23 | // Static factories | ||
24 | |||
25 | static part createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs); | ||
26 | |||
27 | static part createVerb(); | ||
28 | |||
29 | static part createPreposition(std::set<std::string> choices, bool literal); | ||
30 | |||
31 | static part createAdjective(); | ||
32 | |||
33 | static part createAdverb(); | ||
34 | |||
35 | static part createLiteral(std::string value); | ||
36 | |||
37 | // Copy and move constructors | ||
38 | |||
39 | part(const part& other); | ||
40 | |||
41 | part(part&& other); | ||
42 | |||
43 | // Assignment | ||
44 | |||
45 | part& operator=(part other); | ||
46 | |||
47 | // Swap | ||
48 | |||
49 | friend void swap(part& first, part& second); | ||
50 | |||
51 | // Destructor | ||
52 | |||
53 | ~part(); | ||
54 | |||
55 | // General accessors | ||
56 | |||
57 | type getType() const | ||
58 | { | ||
59 | return type_; | ||
60 | } | ||
61 | |||
62 | // Noun phrase accessors | ||
63 | |||
64 | std::string getNounRole() const; | ||
65 | |||
66 | selrestr getNounSelrestrs() const; | ||
67 | |||
68 | std::set<std::string> getNounSynrestrs() const; | ||
69 | |||
70 | // Preposition accessors | ||
71 | |||
72 | std::set<std::string> getPrepositionChoices() const; | ||
73 | |||
74 | bool isPrepositionLiteral() const; | ||
75 | |||
76 | // Literal accessors | ||
77 | |||
78 | std::string getLiteralValue() const; | ||
79 | |||
80 | private: | ||
81 | |||
82 | // Private constructors | ||
83 | |||
84 | part() | ||
85 | { | ||
86 | } | ||
87 | |||
88 | part(type t) : type_(t) | ||
89 | { | ||
90 | } | ||
91 | |||
92 | // Data | ||
93 | |||
94 | union { | ||
95 | struct { | ||
96 | std::string role; | ||
97 | selrestr selrestrs; | ||
98 | std::set<std::string> synrestrs; | ||
99 | } noun_phrase_; | ||
100 | struct { | ||
101 | std::set<std::string> choices; | ||
102 | bool literal; | ||
103 | } preposition_; | ||
104 | std::string literal_; | ||
105 | }; | ||
106 | |||
107 | type type_ = type::invalid; | ||
108 | |||
109 | }; | ||
110 | |||
111 | }; | ||
112 | }; | ||
113 | |||
114 | #endif /* end of include guard: PART_H_FB54F361 */ | ||
diff --git a/generator/progress.h b/generator/progress.h index 81f07a3..fcb680d 100644 --- a/generator/progress.h +++ b/generator/progress.h | |||
@@ -3,48 +3,54 @@ | |||
3 | 3 | ||
4 | #include <string> | 4 | #include <string> |
5 | 5 | ||
6 | class progress { | 6 | namespace verbly { |
7 | private: | 7 | namespace generator { |
8 | std::string message; | ||
9 | int total; | ||
10 | int cur = 0; | ||
11 | int lprint = 0; | ||
12 | 8 | ||
13 | public: | 9 | class progress { |
14 | progress(std::string message, int total) : message(message), total(total) | 10 | private: |
15 | { | 11 | std::string message; |
16 | std::cout << message << " 0%" << std::flush; | 12 | int total; |
17 | } | 13 | int cur = 0; |
14 | int lprint = 0; | ||
18 | 15 | ||
19 | void update(int val) | 16 | public: |
20 | { | 17 | progress(std::string message, int total) : message(message), total(total) |
21 | if (val <= total) | 18 | { |
22 | { | 19 | std::cout << message << " 0%" << std::flush; |
23 | cur = val; | 20 | } |
24 | } else { | 21 | |
25 | cur = total; | 22 | void update(int val) |
26 | } | 23 | { |
24 | if (val <= total) | ||
25 | { | ||
26 | cur = val; | ||
27 | } else { | ||
28 | cur = total; | ||
29 | } | ||
27 | 30 | ||
28 | int pp = cur * 100 / total; | 31 | int pp = cur * 100 / total; |
29 | if (pp != lprint) | 32 | if (pp != lprint) |
30 | { | 33 | { |
31 | lprint = pp; | 34 | lprint = pp; |
32 | 35 | ||
33 | std::cout << "\b\b\b\b" << std::right; | 36 | std::cout << "\b\b\b\b" << std::right; |
34 | std::cout.width(3); | 37 | std::cout.width(3); |
35 | std::cout << pp << "%" << std::flush; | 38 | std::cout << pp << "%" << std::flush; |
36 | } | 39 | } |
37 | } | 40 | } |
41 | |||
42 | void update() | ||
43 | { | ||
44 | update(cur+1); | ||
45 | } | ||
38 | 46 | ||
39 | void update() | 47 | ~progress() |
40 | { | 48 | { |
41 | update(cur+1); | 49 | std::cout << "\b\b\b\b100%" << std::endl; |
42 | } | 50 | } |
51 | }; | ||
43 | 52 | ||
44 | ~progress() | 53 | }; |
45 | { | ||
46 | std::cout << "\b\b\b\b100%" << std::endl; | ||
47 | } | ||
48 | }; | 54 | }; |
49 | 55 | ||
50 | #endif /* end of include guard: PROGRESS_H_A34EF856 */ | 56 | #endif /* end of include guard: PROGRESS_H_A34EF856 */ |
diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp new file mode 100644 index 0000000..eb07607 --- /dev/null +++ b/generator/pronunciation.cpp | |||
@@ -0,0 +1,87 @@ | |||
1 | #include "pronunciation.h" | ||
2 | #include <list> | ||
3 | #include <algorithm> | ||
4 | #include <cctype> | ||
5 | #include <iterator> | ||
6 | #include "database.h" | ||
7 | #include "field.h" | ||
8 | #include "../lib/util.h" | ||
9 | |||
10 | namespace verbly { | ||
11 | namespace generator { | ||
12 | |||
13 | int pronunciation::nextId_ = 0; | ||
14 | |||
15 | pronunciation::pronunciation(std::string phonemes) : | ||
16 | id_(nextId_++), | ||
17 | phonemes_(phonemes) | ||
18 | { | ||
19 | auto phonemeList = split<std::list<std::string>>(phonemes, " "); | ||
20 | |||
21 | auto rhymeStart = std::find_if(std::begin(phonemeList), std::end(phonemeList), [] (std::string phoneme) { | ||
22 | return phoneme.find("1") != std::string::npos; | ||
23 | }); | ||
24 | |||
25 | // Rhyme detection | ||
26 | if (rhymeStart != std::end(phonemeList)) | ||
27 | { | ||
28 | std::list<std::string> rhymePhonemes; | ||
29 | |||
30 | std::transform(rhymeStart, std::end(phonemeList), std::back_inserter(rhymePhonemes), [] (std::string phoneme) { | ||
31 | std::string naked; | ||
32 | |||
33 | std::remove_copy_if(std::begin(phoneme), std::end(phoneme), std::back_inserter(naked), [] (char ch) { | ||
34 | return std::isdigit(ch); | ||
35 | }); | ||
36 | |||
37 | return naked; | ||
38 | }); | ||
39 | |||
40 | rhyme_ = implode(std::begin(rhymePhonemes), std::end(rhymePhonemes), " "); | ||
41 | |||
42 | if (rhymeStart != std::begin(phonemeList)) | ||
43 | { | ||
44 | prerhyme_ = *std::prev(rhymeStart); | ||
45 | } | ||
46 | } | ||
47 | |||
48 | // Syllable/stress | ||
49 | for (std::string phoneme : phonemeList) | ||
50 | { | ||
51 | if (std::isdigit(phoneme.back())) | ||
52 | { | ||
53 | // It's a vowel! | ||
54 | syllables_++; | ||
55 | |||
56 | if (phoneme.back() == '1') | ||
57 | { | ||
58 | stress_.push_back('1'); | ||
59 | } else { | ||
60 | stress_.push_back('0'); | ||
61 | } | ||
62 | } | ||
63 | } | ||
64 | } | ||
65 | |||
66 | database& operator<<(database& db, const pronunciation& arg) | ||
67 | { | ||
68 | std::list<field> fields; | ||
69 | |||
70 | fields.emplace_back("pronunciation_id", arg.getId()); | ||
71 | fields.emplace_back("phonemes", arg.getPhonemes()); | ||
72 | fields.emplace_back("syllables", arg.getSyllables()); | ||
73 | fields.emplace_back("stress", arg.getStress()); | ||
74 | |||
75 | if (arg.hasRhyme()) | ||
76 | { | ||
77 | fields.emplace_back("rhyme", arg.getRhymePhonemes()); | ||
78 | fields.emplace_back("prerhyme", arg.getPrerhyme()); | ||
79 | } | ||
80 | |||
81 | db.insertIntoTable("pronunciations", std::move(fields)); | ||
82 | |||
83 | return db; | ||
84 | } | ||
85 | |||
86 | }; | ||
87 | }; | ||
diff --git a/generator/pronunciation.h b/generator/pronunciation.h new file mode 100644 index 0000000..81be6c4 --- /dev/null +++ b/generator/pronunciation.h | |||
@@ -0,0 +1,82 @@ | |||
1 | #ifndef PRONUNCIATION_H_584A08DD | ||
2 | #define PRONUNCIATION_H_584A08DD | ||
3 | |||
4 | #include <string> | ||
5 | #include <cassert> | ||
6 | |||
7 | namespace verbly { | ||
8 | namespace generator { | ||
9 | |||
10 | class database; | ||
11 | |||
12 | class pronunciation { | ||
13 | public: | ||
14 | |||
15 | // Constructor | ||
16 | |||
17 | explicit pronunciation(std::string phonemes); | ||
18 | |||
19 | // Accessors | ||
20 | |||
21 | int getId() const | ||
22 | { | ||
23 | return id_; | ||
24 | } | ||
25 | |||
26 | std::string getPhonemes() const | ||
27 | { | ||
28 | return phonemes_; | ||
29 | } | ||
30 | |||
31 | bool hasRhyme() const | ||
32 | { | ||
33 | return !rhyme_.empty(); | ||
34 | } | ||
35 | |||
36 | std::string getRhymePhonemes() const | ||
37 | { | ||
38 | // Calling code should always call hasRhyme first. | ||
39 | assert(!rhyme_.empty()); | ||
40 | |||
41 | return rhyme_; | ||
42 | } | ||
43 | |||
44 | std::string getPrerhyme() const | ||
45 | { | ||
46 | // Calling code should always call hasRhyme first. | ||
47 | assert(!rhyme_.empty()); | ||
48 | |||
49 | return prerhyme_; | ||
50 | } | ||
51 | |||
52 | int getSyllables() const | ||
53 | { | ||
54 | return syllables_; | ||
55 | } | ||
56 | |||
57 | std::string getStress() const | ||
58 | { | ||
59 | return stress_; | ||
60 | } | ||
61 | |||
62 | private: | ||
63 | |||
64 | static int nextId_; | ||
65 | |||
66 | const int id_; | ||
67 | const std::string phonemes_; | ||
68 | std::string rhyme_; | ||
69 | std::string prerhyme_; | ||
70 | int syllables_ = 0; | ||
71 | std::string stress_; | ||
72 | |||
73 | }; | ||
74 | |||
75 | // Serializer | ||
76 | |||
77 | database& operator<<(database& db, const pronunciation& arg); | ||
78 | |||
79 | }; | ||
80 | }; | ||
81 | |||
82 | #endif /* end of include guard: PRONUNCIATION_H_584A08DD */ | ||
diff --git a/generator/role.h b/generator/role.h new file mode 100644 index 0000000..5fa68b8 --- /dev/null +++ b/generator/role.h | |||
@@ -0,0 +1,35 @@ | |||
1 | #ifndef ROLE_H_249F9A9C | ||
2 | #define ROLE_H_249F9A9C | ||
3 | |||
4 | #include "selrestr.h" | ||
5 | |||
6 | namespace verbly { | ||
7 | namespace generator { | ||
8 | |||
9 | class role { | ||
10 | public: | ||
11 | |||
12 | // Mutators | ||
13 | |||
14 | void setSelrestrs(selrestr selrestrs) | ||
15 | { | ||
16 | selrestrs_ = selrestrs; | ||
17 | } | ||
18 | |||
19 | // Accessors | ||
20 | |||
21 | const selrestr& getSelrestrs() const | ||
22 | { | ||
23 | return selrestrs_; | ||
24 | } | ||
25 | |||
26 | private: | ||
27 | |||
28 | selrestr selrestrs_; | ||
29 | |||
30 | }; | ||
31 | |||
32 | }; | ||
33 | }; | ||
34 | |||
35 | #endif /* end of include guard: ROLE_H_249F9A9C */ | ||
diff --git a/generator/schema.sql b/generator/schema.sql index 410b536..c3e54d8 100644 --- a/generator/schema.sql +++ b/generator/schema.sql | |||
@@ -1,286 +1,204 @@ | |||
1 | DROP TABLE IF EXISTS `verbs`; | 1 | CREATE TABLE `notions` ( |
2 | CREATE TABLE `verbs` ( | 2 | `notion_id` INTEGER PRIMARY KEY, |
3 | `verb_id` INTEGER PRIMARY KEY, | 3 | `part_of_speech` SMALLINT NOT NULL, |
4 | `infinitive` VARCHAR(32) NOT NULL, | 4 | `wnid` INTEGER, |
5 | `past_tense` VARCHAR(32) NOT NULL, | 5 | `images` INTEGER |
6 | `past_participle` VARCHAR(32) NOT NULL, | ||
7 | `ing_form` VARCHAR(32) NOT NULL, | ||
8 | `s_form` VARCHAR(32) NOT NULL | ||
9 | ); | 6 | ); |
10 | 7 | ||
11 | DROP TABLE IF EXISTS `groups`; | 8 | CREATE UNIQUE INDEX `notion_by_wnid` ON `notions`(`wnid`); |
12 | CREATE TABLE `groups` ( | ||
13 | `group_id` INTEGER PRIMARY KEY, | ||
14 | `data` BLOB NOT NULL | ||
15 | ); | ||
16 | |||
17 | DROP TABLE IF EXISTS `frames`; | ||
18 | CREATE TABLE `frames` ( | ||
19 | `frame_id` INTEGER PRIMARY KEY, | ||
20 | `group_id` INTEGER NOT NULL, | ||
21 | `data` BLOB NOT NULL, | ||
22 | FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) | ||
23 | ); | ||
24 | 9 | ||
25 | DROP TABLE IF EXISTS `verb_groups`; | ||
26 | CREATE TABLE `verb_groups` ( | ||
27 | `verb_id` INTEGER NOT NULL, | ||
28 | `group_id` INTEGER NOT NULL, | ||
29 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`), | ||
30 | FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) | ||
31 | ); | ||
32 | |||
33 | DROP TABLE IF EXISTS `adjectives`; | ||
34 | CREATE TABLE `adjectives` ( | ||
35 | `adjective_id` INTEGER PRIMARY KEY, | ||
36 | `base_form` VARCHAR(32) NOT NULL, | ||
37 | `comparative` VARCHAR(32), | ||
38 | `superlative` VARCHAR(32), | ||
39 | `position` CHAR(1), | ||
40 | `complexity` INTEGER NOT NULL | ||
41 | ); | ||
42 | |||
43 | DROP TABLE IF EXISTS `adverbs`; | ||
44 | CREATE TABLE `adverbs` ( | ||
45 | `adverb_id` INTEGER PRIMARY KEY, | ||
46 | `base_form` VARCHAR(32) NOT NULL, | ||
47 | `comparative` VARCHAR(32), | ||
48 | `superlative` VARCHAR(32), | ||
49 | `complexity` INTEGER NOT NULL | ||
50 | ); | ||
51 | |||
52 | DROP TABLE IF EXISTS `nouns`; | ||
53 | CREATE TABLE `nouns` ( | ||
54 | `noun_id` INTEGER PRIMARY KEY, | ||
55 | `singular` VARCHAR(32) NOT NULL, | ||
56 | `plural` VARCHAR(32), | ||
57 | `proper` INTEGER(1) NOT NULL, | ||
58 | `complexity` INTEGER NOT NULL, | ||
59 | `images` INTEGER NOT NULL, | ||
60 | `wnid` INTEGER NOT NULL | ||
61 | ); | ||
62 | |||
63 | DROP TABLE IF EXISTS `hypernymy`; | ||
64 | CREATE TABLE `hypernymy` ( | 10 | CREATE TABLE `hypernymy` ( |
65 | `hypernym_id` INTEGER NOT NULL, | 11 | `hypernym_id` INTEGER NOT NULL, |
66 | `hyponym_id` INTEGER NOT NULL, | 12 | `hyponym_id` INTEGER NOT NULL |
67 | FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`), | ||
68 | FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`) | ||
69 | ); | 13 | ); |
70 | 14 | ||
71 | DROP TABLE IF EXISTS `instantiation`; | 15 | CREATE INDEX `hyponym_of` ON `hypernymy`(`hypernym_id`); |
16 | CREATE INDEX `hypernym_of` ON `hypernymy`(`hyponym_id`); | ||
17 | |||
72 | CREATE TABLE `instantiation` ( | 18 | CREATE TABLE `instantiation` ( |
73 | `class_id` INTEGER NOT NULL, | 19 | `class_id` INTEGER NOT NULL, |
74 | `instance_id` INTEGER NOT NULL, | 20 | `instance_id` INTEGER NOT NULL |
75 | FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`), | ||
76 | FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`) | ||
77 | ); | 21 | ); |
78 | 22 | ||
79 | DROP TABLE IF EXISTS `member_meronymy`; | 23 | CREATE INDEX `instance_of` ON `instantiation`(`class_id`); |
24 | CREATE INDEX `class_of` ON `instantiation`(`instance_id`); | ||
25 | |||
80 | CREATE TABLE `member_meronymy` ( | 26 | CREATE TABLE `member_meronymy` ( |
81 | `meronym_id` INTEGER NOT NULL, | 27 | `meronym_id` INTEGER NOT NULL, |
82 | `holonym_id` INTEGER NOT NULL, | 28 | `holonym_id` INTEGER NOT NULL |
83 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
84 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
85 | ); | 29 | ); |
86 | 30 | ||
87 | DROP TABLE IF EXISTS `part_meronymy`; | 31 | CREATE INDEX `member_holonym_of` ON `member_meronymy`(`meronym_id`); |
32 | CREATE INDEX `member_meronym_of` ON `member_meronymy`(`holonym_id`); | ||
33 | |||
88 | CREATE TABLE `part_meronymy` ( | 34 | CREATE TABLE `part_meronymy` ( |
89 | `meronym_id` INTEGER NOT NULL, | 35 | `meronym_id` INTEGER NOT NULL, |
90 | `holonym_id` INTEGER NOT NULL, | 36 | `holonym_id` INTEGER NOT NULL |
91 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
92 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
93 | ); | 37 | ); |
94 | 38 | ||
95 | DROP TABLE IF EXISTS `substance_meronymy`; | 39 | CREATE INDEX `part_holonym_of` ON `part_meronymy`(`meronym_id`); |
40 | CREATE INDEX `part_meronym_of` ON `part_meronymy`(`holonym_id`); | ||
41 | |||
96 | CREATE TABLE `substance_meronymy` ( | 42 | CREATE TABLE `substance_meronymy` ( |
97 | `meronym_id` INTEGER NOT NULL, | 43 | `meronym_id` INTEGER NOT NULL, |
98 | `holonym_id` INTEGER NOT NULL, | 44 | `holonym_id` INTEGER NOT NULL |
99 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
100 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
101 | ); | 45 | ); |
102 | 46 | ||
103 | DROP TABLE IF EXISTS `variation`; | 47 | CREATE INDEX `substance_holonym_of` ON `substance_meronymy`(`meronym_id`); |
48 | CREATE INDEX `substance_meronym_of` ON `substance_meronymy`(`holonym_id`); | ||
49 | |||
104 | CREATE TABLE `variation` ( | 50 | CREATE TABLE `variation` ( |
105 | `noun_id` INTEGER NOT NULL, | 51 | `noun_id` INTEGER NOT NULL, |
106 | `adjective_id` INTEGER NOT NULL, | 52 | `adjective_id` INTEGER NOT NULL |
107 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
108 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
109 | ); | 53 | ); |
110 | 54 | ||
111 | DROP TABLE IF EXISTS `noun_antonymy`; | 55 | CREATE INDEX `variant_of` ON `variation`(`noun_id`); |
112 | CREATE TABLE `noun_antonymy` ( | 56 | CREATE INDEX `attribute_of` ON `variation`(`adjective_id`); |
113 | `noun_1_id` INTEGER NOT NULL, | ||
114 | `noun_2_id` INTEGER NOT NULL, | ||
115 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), | ||
116 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) | ||
117 | ); | ||
118 | 57 | ||
119 | DROP TABLE IF EXISTS `adjective_antonymy`; | 58 | CREATE TABLE `similarity` ( |
120 | CREATE TABLE `adjective_antonymy` ( | ||
121 | `adjective_1_id` INTEGER NOT NULL, | 59 | `adjective_1_id` INTEGER NOT NULL, |
122 | `adjective_2_id` INTEGER NOT NULL, | 60 | `adjective_2_id` INTEGER NOT NULL |
123 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | 61 | ); |
124 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | 62 | |
63 | CREATE INDEX `similar_to` ON `similarity`(`adjective_1_id`); | ||
64 | |||
65 | CREATE TABLE `is_a` ( | ||
66 | `notion_id` INTEGER NOT NULL, | ||
67 | `groupname` VARCHAR(32) NOT NULL | ||
125 | ); | 68 | ); |
126 | 69 | ||
127 | DROP TABLE IF EXISTS `adverb_antonymy`; | 70 | CREATE TABLE `entailment` ( |
128 | CREATE TABLE `adverb_antonymy` ( | 71 | `given_id` INTEGER NOT NULL, |
129 | `adverb_1_id` INTEGER NOT NULL, | 72 | `entailment_id` INTEGER NOT NULL |
130 | `adverb_2_id` INTEGER NOT NULL, | 73 | ); |
131 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | 74 | |
132 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | 75 | CREATE INDEX `entailment_of` ON `entailment`(`given_id`); |
76 | CREATE INDEX `entailed_by` ON `entailment`(`entailment_id`); | ||
77 | |||
78 | CREATE TABLE `causality` ( | ||
79 | `cause_id` INTEGER NOT NULL, | ||
80 | `effect_id` INTEGER NOT NULL | ||
81 | ); | ||
82 | |||
83 | CREATE INDEX `effect_of` ON `causality`(`cause_id`); | ||
84 | CREATE INDEX `cause_of` ON `causality`(`effect_id`); | ||
85 | |||
86 | CREATE TABLE `words` ( | ||
87 | `word_id` INTEGER PRIMARY KEY, | ||
88 | `notion_id` INTEGER NOT NULL, | ||
89 | `lemma_id` INTEGER NOT NULL, | ||
90 | `tag_count` INTEGER, | ||
91 | `position` SMALLINT, | ||
92 | `group_id` INTEGER | ||
93 | ); | ||
94 | |||
95 | CREATE INDEX `notion_words` ON `words`(`notion_id`); | ||
96 | CREATE INDEX `lemma_words` ON `words`(`lemma_id`); | ||
97 | CREATE INDEX `group_words` ON `words`(`group_id`); | ||
98 | |||
99 | CREATE TABLE `antonymy` ( | ||
100 | `antonym_1_id` INTEGER NOT NULL, | ||
101 | `antonym_2_id` INTEGER NOT NULL | ||
133 | ); | 102 | ); |
134 | 103 | ||
135 | DROP TABLE IF EXISTS `specification`; | 104 | CREATE INDEX `antonym_of` ON `antonymy`(`antonym_1_id`); |
105 | |||
136 | CREATE TABLE `specification` ( | 106 | CREATE TABLE `specification` ( |
137 | `general_id` INTEGER NOT NULL, | 107 | `general_id` INTEGER NOT NULL, |
138 | `specific_id` INTEGER NOT NULL, | 108 | `specific_id` INTEGER NOT NULL |
139 | FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`), | ||
140 | FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`) | ||
141 | ); | 109 | ); |
142 | 110 | ||
143 | DROP TABLE IF EXISTS `pertainymy`; | 111 | CREATE INDEX `specification_of` ON `specification`(`general_id`); |
112 | CREATE INDEX `generalization_of` ON `specification`(`specific_id`); | ||
113 | |||
144 | CREATE TABLE `pertainymy` ( | 114 | CREATE TABLE `pertainymy` ( |
145 | `noun_id` INTEGER NOT NULL, | 115 | `noun_id` INTEGER NOT NULL, |
146 | `pertainym_id` INTEGER NOT NULL, | 116 | `pertainym_id` INTEGER NOT NULL |
147 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
148 | FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`) | ||
149 | ); | 117 | ); |
150 | 118 | ||
151 | DROP TABLE IF EXISTS `mannernymy`; | 119 | CREATE INDEX `pertainym_of` ON `pertainymy`(`noun_id`); |
120 | CREATE INDEX `anti_pertainym_of` ON `pertainymy`(`pertainym_id`); | ||
121 | |||
152 | CREATE TABLE `mannernymy` ( | 122 | CREATE TABLE `mannernymy` ( |
153 | `adjective_id` INTEGER NOT NULL, | 123 | `adjective_id` INTEGER NOT NULL, |
154 | `mannernym_id` INTEGER NOT NULL, | 124 | `mannernym_id` INTEGER NOT NULL |
155 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), | ||
156 | FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`) | ||
157 | ); | 125 | ); |
158 | 126 | ||
159 | DROP TABLE IF EXISTS `noun_synonymy`; | 127 | CREATE INDEX `mannernym_of` ON `mannernymy`(`adjective_id`); |
160 | CREATE TABLE `noun_synonymy` ( | 128 | CREATE INDEX `anti_mannernym_of` ON `mannernymy`(`mannernym_id`); |
161 | `noun_1_id` INTEGER NOT NULL, | ||
162 | `noun_2_id` INTEGER NOT NULL, | ||
163 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`), | ||
164 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`) | ||
165 | ); | ||
166 | 129 | ||
167 | DROP TABLE IF EXISTS `adjective_synonymy`; | 130 | CREATE TABLE `usage` ( |
168 | CREATE TABLE `adjective_synonymy` ( | 131 | `domain_id` INTEGER NOT NULL, |
169 | `adjective_1_id` INTEGER NOT NULL, | 132 | `term_id` INTEGER NOT NULL |
170 | `adjective_2_id` INTEGER NOT NULL, | ||
171 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | ||
172 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | ||
173 | ); | 133 | ); |
174 | 134 | ||
175 | DROP TABLE IF EXISTS `adverb_synonymy`; | 135 | CREATE INDEX `usage_term_of` ON `usage`(`domain_id`); |
176 | CREATE TABLE `adverb_synonymy` ( | 136 | CREATE INDEX `usage_domain_of` ON `usage`(`term_id`); |
177 | `adverb_1_id` INTEGER NOT NULL, | ||
178 | `adverb_2_id` INTEGER NOT NULL, | ||
179 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | ||
180 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | ||
181 | ); | ||
182 | 137 | ||
183 | DROP TABLE IF EXISTS `noun_pronunciations`; | 138 | CREATE TABLE `topicality` ( |
184 | CREATE TABLE `noun_pronunciations` ( | 139 | `domain_id` INTEGER NOT NULL, |
185 | `noun_id` INTEGER NOT NULL, | 140 | `term_id` INTEGER NOT NULL |
186 | `pronunciation` VARCHAR(64) NOT NULL, | ||
187 | `prerhyme` VARCHAR(8), | ||
188 | `rhyme` VARCHAR(64), | ||
189 | `syllables` INT NOT NULL, | ||
190 | `stress` VARCHAR(64) NOT NULL, | ||
191 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) | ||
192 | ); | 141 | ); |
193 | 142 | ||
194 | DROP TABLE IF EXISTS `verb_pronunciations`; | 143 | CREATE INDEX `topical_term_of` ON `topicality`(`domain_id`); |
195 | CREATE TABLE `verb_pronunciations` ( | 144 | CREATE INDEX `topical_domain_of` ON `topicality`(`term_id`); |
196 | `verb_id` INTEGER NOT NULL, | ||
197 | `pronunciation` VARCHAR(64) NOT NULL, | ||
198 | `prerhyme` VARCHAR(8), | ||
199 | `rhyme` VARCHAR(64), | ||
200 | `syllables` INT NOT NULL, | ||
201 | `stress` VARCHAR(64) NOT NULL, | ||
202 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) | ||
203 | ); | ||
204 | 145 | ||
205 | DROP TABLE IF EXISTS `adjective_pronunciations`; | 146 | CREATE TABLE `regionality` ( |
206 | CREATE TABLE `adjective_pronunciations` ( | 147 | `domain_id` INTEGER NOT NULL, |
207 | `adjective_id` INTEGER NOT NULL, | 148 | `term_id` INTEGER NOT NULL |
208 | `pronunciation` VARCHAR(64) NOT NULL, | ||
209 | `prerhyme` VARCHAR(8), | ||
210 | `rhyme` VARCHAR(64), | ||
211 | `syllables` INT NOT NULL, | ||
212 | `stress` VARCHAR(64) NOT NULL, | ||
213 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
214 | ); | 149 | ); |
215 | 150 | ||
216 | DROP TABLE IF EXISTS `adverb_pronunciations`; | 151 | CREATE INDEX `regional_term_of` ON `regionality`(`domain_id`); |
217 | CREATE TABLE `adverb_pronunciations` ( | 152 | CREATE INDEX `regional_domain_of` ON `regionality`(`term_id`); |
218 | `adverb_id` INTEGER NOT NULL, | ||
219 | `pronunciation` VARCHAR(64) NOT NULL, | ||
220 | `prerhyme` VARCHAR(8), | ||
221 | `rhyme` VARCHAR(64), | ||
222 | `syllables` INT NOT NULL, | ||
223 | `stress` VARCHAR(64) NOT NULL, | ||
224 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | ||
225 | ); | ||
226 | 153 | ||
227 | DROP TABLE IF EXISTS `noun_noun_derivation`; | 154 | CREATE TABLE `forms` ( |
228 | CREATE TABLE `noun_noun_derivation` ( | 155 | `form_id` INTEGER PRIMARY KEY, |
229 | `noun_1_id` INTEGER NOT NULL, | 156 | `form` VARCHAR(32) NOT NULL, |
230 | `noun_2_id` INTEGER NOT NULL, | 157 | `complexity` SMALLINT NOT NULL, |
231 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), | 158 | `proper` SMALLINT NOT NULL |
232 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) | ||
233 | ); | 159 | ); |
234 | 160 | ||
235 | DROP TABLE IF EXISTS `noun_adjective_derivation`; | 161 | CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); |
236 | CREATE TABLE `noun_adjective_derivation` ( | ||
237 | `noun_id` INTEGER NOT NULL, | ||
238 | `adjective_id` INTEGER NOT NULL, | ||
239 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
240 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
241 | ); | ||
242 | 162 | ||
243 | DROP TABLE IF EXISTS `noun_adverb_derivation`; | 163 | CREATE TABLE `lemmas_forms` ( |
244 | CREATE TABLE `noun_adverb_derivation` ( | 164 | `lemma_id` INTEGER NOT NULL, |
245 | `noun_id` INTEGER NOT NULL, | 165 | `form_id` INTEGER NOT NULL, |
246 | `adverb_id` INTEGER NOT NULL, | 166 | `category` SMALLINT NOT NULL |
247 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
248 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | ||
249 | ); | 167 | ); |
250 | 168 | ||
251 | DROP TABLE IF EXISTS `adjective_adjective_derivation`; | 169 | CREATE INDEX `form_of` ON `lemmas_forms`(`lemma_id`); |
252 | CREATE TABLE `adjective_adjective_derivation` ( | 170 | CREATE INDEX `lemma_of` ON `lemmas_forms`(`form_id`); |
253 | `adjective_1_id` INTEGER NOT NULL, | 171 | |
254 | `adjective_2_id` INTEGER NOT NULL, | 172 | CREATE TABLE `pronunciations` ( |
255 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | 173 | `pronunciation_id` INTEGER PRIMARY KEY, |
256 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | 174 | `phonemes` VARCHAR(64) NOT NULL, |
175 | `prerhyme` VARCHAR(8), | ||
176 | `rhyme` VARCHAR(64), | ||
177 | `syllables` INTEGER NOT NULL, | ||
178 | `stress` VARCHAR(64) NOT NULL | ||
257 | ); | 179 | ); |
258 | 180 | ||
259 | DROP TABLE IF EXISTS `adjective_adverb_derivation`; | 181 | CREATE TABLE `forms_pronunciations` ( |
260 | CREATE TABLE `adjective_adverb_derivation` ( | 182 | `form_id` INTEGER NOT NULL, |
261 | `adjective_id` INTEGER NOT NULL, | 183 | `pronunciation_id` INTEGER NOT NULL |
262 | `adverb_id` INTEGER NOT NULL, | ||
263 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), | ||
264 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`) | ||
265 | ); | 184 | ); |
266 | 185 | ||
267 | DROP TABLE IF EXISTS `adverb_adverb_derivation`; | 186 | CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`); |
268 | CREATE TABLE `adverb_adverb_derivation` ( | 187 | CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`); |
269 | `adverb_1_id` INTEGER NOT NULL, | 188 | |
270 | `adverb_2_id` INTEGER NOT NULL, | 189 | CREATE TABLE `groups` ( |
271 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | 190 | `group_id` INTEGER PRIMARY KEY, |
272 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | 191 | `data` BLOB NOT NULL |
273 | ); | 192 | ); |
274 | 193 | ||
275 | DROP TABLE IF EXISTS `prepositions`; | 194 | CREATE TABLE `frames` ( |
276 | CREATE TABLE `prepositions` ( | 195 | `frame_id` INTEGER PRIMARY KEY, |
277 | `preposition_id` INTEGER PRIMARY KEY, | 196 | `data` BLOB NOT NULL |
278 | `form` VARCHAR(32) NOT NULL | ||
279 | ); | 197 | ); |
280 | 198 | ||
281 | DROP TABLE IF EXISTS `preposition_groups`; | 199 | CREATE TABLE `groups_frames` ( |
282 | CREATE TABLE `preposition_groups` ( | 200 | `group_id` INTEGER NOT NULL, |
283 | `preposition_id` INTEGER NOT NULL, | 201 | `frame_id` INTEGER NOT NULL |
284 | `groupname` VARCHAR(32) NOT NULL, | ||
285 | FOREIGN KEY (`preposition_id`) REFERENCES `prepositions`(`preposition_id`) | ||
286 | ); | 202 | ); |
203 | |||
204 | CREATE INDEX `frames_in` ON `groups_frames`(`group_id`); | ||
diff --git a/generator/selrestr.cpp b/generator/selrestr.cpp new file mode 100644 index 0000000..8bdd3f6 --- /dev/null +++ b/generator/selrestr.cpp | |||
@@ -0,0 +1,288 @@ | |||
1 | #include "selrestr.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | namespace generator { | ||
5 | |||
6 | selrestr::selrestr(const selrestr& other) | ||
7 | { | ||
8 | type_ = other.type_; | ||
9 | |||
10 | switch (type_) | ||
11 | { | ||
12 | case type::singleton: | ||
13 | { | ||
14 | singleton_.pos = other.singleton_.pos; | ||
15 | new(&singleton_.restriction) std::string(other.singleton_.restriction); | ||
16 | |||
17 | break; | ||
18 | } | ||
19 | |||
20 | case type::group: | ||
21 | { | ||
22 | new(&group_.children) std::list<selrestr>(other.group_.children); | ||
23 | group_.orlogic = other.group_.orlogic; | ||
24 | |||
25 | break; | ||
26 | } | ||
27 | |||
28 | case type::empty: | ||
29 | { | ||
30 | break; | ||
31 | } | ||
32 | } | ||
33 | } | ||
34 | |||
35 | selrestr::selrestr(selrestr&& other) : selrestr() | ||
36 | { | ||
37 | swap(*this, other); | ||
38 | } | ||
39 | |||
40 | selrestr& selrestr::operator=(selrestr other) | ||
41 | { | ||
42 | swap(*this, other); | ||
43 | |||
44 | return *this; | ||
45 | } | ||
46 | |||
47 | void swap(selrestr& first, selrestr& second) | ||
48 | { | ||
49 | using type = selrestr::type; | ||
50 | |||
51 | type tempType = first.type_; | ||
52 | int tempPos; | ||
53 | std::string tempRestriction; | ||
54 | std::list<selrestr> tempChildren; | ||
55 | bool tempOrlogic; | ||
56 | |||
57 | switch (tempType) | ||
58 | { | ||
59 | case type::singleton: | ||
60 | { | ||
61 | tempPos = first.singleton_.pos; | ||
62 | tempRestriction = std::move(first.singleton_.restriction); | ||
63 | |||
64 | break; | ||
65 | } | ||
66 | |||
67 | case type::group: | ||
68 | { | ||
69 | tempChildren = std::move(first.group_.children); | ||
70 | tempOrlogic = first.group_.orlogic; | ||
71 | |||
72 | break; | ||
73 | } | ||
74 | |||
75 | case type::empty: | ||
76 | { | ||
77 | break; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | first.~selrestr(); | ||
82 | |||
83 | first.type_ = second.type_; | ||
84 | |||
85 | switch (first.type_) | ||
86 | { | ||
87 | case type::singleton: | ||
88 | { | ||
89 | first.singleton_.pos = second.singleton_.pos; | ||
90 | new(&first.singleton_.restriction) std::string(std::move(second.singleton_.restriction)); | ||
91 | |||
92 | break; | ||
93 | } | ||
94 | |||
95 | case type::group: | ||
96 | { | ||
97 | new(&first.group_.children) std::list<selrestr>(std::move(second.group_.children)); | ||
98 | first.group_.orlogic = second.group_.orlogic; | ||
99 | |||
100 | break; | ||
101 | } | ||
102 | |||
103 | case type::empty: | ||
104 | { | ||
105 | break; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | second.~selrestr(); | ||
110 | |||
111 | second.type_ = tempType; | ||
112 | |||
113 | switch (second.type_) | ||
114 | { | ||
115 | case type::singleton: | ||
116 | { | ||
117 | second.singleton_.pos = tempPos; | ||
118 | new(&second.singleton_.restriction) std::string(std::move(tempRestriction)); | ||
119 | |||
120 | break; | ||
121 | } | ||
122 | |||
123 | case type::group: | ||
124 | { | ||
125 | new(&second.group_.children) std::list<selrestr>(std::move(tempChildren)); | ||
126 | second.group_.orlogic = tempOrlogic; | ||
127 | |||
128 | break; | ||
129 | } | ||
130 | |||
131 | case type::empty: | ||
132 | { | ||
133 | break; | ||
134 | } | ||
135 | } | ||
136 | } | ||
137 | |||
138 | selrestr::~selrestr() | ||
139 | { | ||
140 | switch (type_) | ||
141 | { | ||
142 | case type::singleton: | ||
143 | { | ||
144 | using string_type = std::string; | ||
145 | singleton_.restriction.~string_type(); | ||
146 | |||
147 | break; | ||
148 | } | ||
149 | |||
150 | case type::group: | ||
151 | { | ||
152 | using list_type = std::list<selrestr>; | ||
153 | group_.children.~list_type(); | ||
154 | |||
155 | break; | ||
156 | } | ||
157 | |||
158 | case type::empty: | ||
159 | { | ||
160 | break; | ||
161 | } | ||
162 | } | ||
163 | } | ||
164 | |||
165 | selrestr::selrestr() : type_(type::empty) | ||
166 | { | ||
167 | } | ||
168 | |||
169 | selrestr::selrestr( | ||
170 | std::string restriction, | ||
171 | bool pos) : | ||
172 | type_(type::singleton) | ||
173 | { | ||
174 | new(&singleton_.restriction) std::string(std::move(restriction)); | ||
175 | singleton_.pos = pos; | ||
176 | } | ||
177 | |||
178 | std::string selrestr::getRestriction() const | ||
179 | { | ||
180 | if (type_ == type::singleton) | ||
181 | { | ||
182 | return singleton_.restriction; | ||
183 | } else { | ||
184 | throw std::domain_error("Only singleton selrestrs have restrictions"); | ||
185 | } | ||
186 | } | ||
187 | |||
188 | bool selrestr::getPos() const | ||
189 | { | ||
190 | if (type_ == type::singleton) | ||
191 | { | ||
192 | return singleton_.pos; | ||
193 | } else { | ||
194 | throw std::domain_error("Only singleton selrestrs have positivity flags"); | ||
195 | } | ||
196 | } | ||
197 | |||
198 | selrestr::selrestr( | ||
199 | std::list<selrestr> children, | ||
200 | bool orlogic) : | ||
201 | type_(type::group) | ||
202 | { | ||
203 | new(&group_.children) std::list<selrestr>(std::move(children)); | ||
204 | group_.orlogic = orlogic; | ||
205 | } | ||
206 | |||
207 | std::list<selrestr> selrestr::getChildren() const | ||
208 | { | ||
209 | if (type_ == type::group) | ||
210 | { | ||
211 | return group_.children; | ||
212 | } else { | ||
213 | throw std::domain_error("Only group selrestrs have children"); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | std::list<selrestr>::const_iterator selrestr::begin() const | ||
218 | { | ||
219 | if (type_ == type::group) | ||
220 | { | ||
221 | return std::begin(group_.children); | ||
222 | } else { | ||
223 | throw std::domain_error("Only group selrestrs have children"); | ||
224 | } | ||
225 | } | ||
226 | |||
227 | std::list<selrestr>::const_iterator selrestr::end() const | ||
228 | { | ||
229 | if (type_ == type::group) | ||
230 | { | ||
231 | return std::end(group_.children); | ||
232 | } else { | ||
233 | throw std::domain_error("Only group selrestrs have children"); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | bool selrestr::getOrlogic() const | ||
238 | { | ||
239 | if (type_ == type::group) | ||
240 | { | ||
241 | return group_.orlogic; | ||
242 | } else { | ||
243 | throw std::domain_error("Only group selrestrs have logic"); | ||
244 | } | ||
245 | } | ||
246 | |||
247 | nlohmann::json selrestr::toJson() const | ||
248 | { | ||
249 | switch (type_) | ||
250 | { | ||
251 | case type::empty: | ||
252 | { | ||
253 | return {}; | ||
254 | } | ||
255 | |||
256 | case type::singleton: | ||
257 | { | ||
258 | return { | ||
259 | {"type", singleton_.restriction}, | ||
260 | {"pos", singleton_.pos} | ||
261 | }; | ||
262 | } | ||
263 | |||
264 | case type::group: | ||
265 | { | ||
266 | std::string logic; | ||
267 | if (group_.orlogic) | ||
268 | { | ||
269 | logic = "or"; | ||
270 | } else { | ||
271 | logic = "and"; | ||
272 | } | ||
273 | |||
274 | std::list<nlohmann::json> children; | ||
275 | std::transform(std::begin(group_.children), std::end(group_.children), std::back_inserter(children), [] (const selrestr& child) { | ||
276 | return child.toJson(); | ||
277 | }); | ||
278 | |||
279 | return { | ||
280 | {"logic", logic}, | ||
281 | {"children", children} | ||
282 | }; | ||
283 | } | ||
284 | } | ||
285 | } | ||
286 | |||
287 | }; | ||
288 | }; | ||
diff --git a/generator/selrestr.h b/generator/selrestr.h new file mode 100644 index 0000000..5000970 --- /dev/null +++ b/generator/selrestr.h | |||
@@ -0,0 +1,88 @@ | |||
1 | #ifndef SELRESTR_H_50652FB7 | ||
2 | #define SELRESTR_H_50652FB7 | ||
3 | |||
4 | #include <list> | ||
5 | #include <string> | ||
6 | #include <json.hpp> | ||
7 | |||
8 | namespace verbly { | ||
9 | namespace generator { | ||
10 | |||
11 | class selrestr { | ||
12 | public: | ||
13 | enum class type { | ||
14 | empty, | ||
15 | singleton, | ||
16 | group | ||
17 | }; | ||
18 | |||
19 | // Copy and move constructors | ||
20 | |||
21 | selrestr(const selrestr& other); | ||
22 | selrestr(selrestr&& other); | ||
23 | |||
24 | // Assignment | ||
25 | |||
26 | selrestr& operator=(selrestr other); | ||
27 | |||
28 | // Swap | ||
29 | |||
30 | friend void swap(selrestr& first, selrestr& second); | ||
31 | |||
32 | // Destructor | ||
33 | |||
34 | ~selrestr(); | ||
35 | |||
36 | // Generic accessors | ||
37 | |||
38 | type getType() const | ||
39 | { | ||
40 | return type_; | ||
41 | } | ||
42 | |||
43 | // Empty | ||
44 | |||
45 | selrestr(); | ||
46 | |||
47 | // Singleton | ||
48 | |||
49 | selrestr(std::string restriction, bool pos); | ||
50 | |||
51 | std::string getRestriction() const; | ||
52 | |||
53 | bool getPos() const; | ||
54 | |||
55 | // Group | ||
56 | |||
57 | selrestr(std::list<selrestr> children, bool orlogic); | ||
58 | |||
59 | std::list<selrestr> getChildren() const; | ||
60 | |||
61 | std::list<selrestr>::const_iterator begin() const; | ||
62 | |||
63 | std::list<selrestr>::const_iterator end() const; | ||
64 | |||
65 | bool getOrlogic() const; | ||
66 | |||
67 | // Helpers | ||
68 | |||
69 | nlohmann::json toJson() const; | ||
70 | |||
71 | private: | ||
72 | union { | ||
73 | struct { | ||
74 | bool pos; | ||
75 | std::string restriction; | ||
76 | } singleton_; | ||
77 | struct { | ||
78 | std::list<selrestr> children; | ||
79 | bool orlogic; | ||
80 | } group_; | ||
81 | }; | ||
82 | type type_; | ||
83 | }; | ||
84 | |||
85 | }; | ||
86 | }; | ||
87 | |||
88 | #endif /* end of include guard: SELRESTR_H_50652FB7 */ | ||
diff --git a/generator/word.cpp b/generator/word.cpp new file mode 100644 index 0000000..8ba3ce2 --- /dev/null +++ b/generator/word.cpp | |||
@@ -0,0 +1,77 @@ | |||
1 | #include "word.h" | ||
2 | #include <list> | ||
3 | #include <string> | ||
4 | #include "database.h" | ||
5 | #include "notion.h" | ||
6 | #include "lemma.h" | ||
7 | #include "field.h" | ||
8 | #include "group.h" | ||
9 | |||
10 | namespace verbly { | ||
11 | namespace generator { | ||
12 | |||
13 | int word::nextId_ = 0; | ||
14 | |||
15 | word::word( | ||
16 | notion& n, | ||
17 | lemma& l) : | ||
18 | id_(nextId_++), | ||
19 | notion_(n), | ||
20 | lemma_(l) | ||
21 | { | ||
22 | } | ||
23 | |||
24 | word::word( | ||
25 | notion& n, | ||
26 | lemma& l, | ||
27 | int tagCount) : | ||
28 | id_(nextId_++), | ||
29 | notion_(n), | ||
30 | lemma_(l), | ||
31 | tagCount_(tagCount), | ||
32 | hasTagCount_(true) | ||
33 | { | ||
34 | } | ||
35 | |||
36 | void word::setAdjectivePosition(positioning adjectivePosition) | ||
37 | { | ||
38 | adjectivePosition_ = adjectivePosition; | ||
39 | } | ||
40 | |||
41 | void word::setVerbGroup(const group& verbGroup) | ||
42 | { | ||
43 | verbGroup_ = &verbGroup; | ||
44 | } | ||
45 | |||
46 | database& operator<<(database& db, const word& arg) | ||
47 | { | ||
48 | std::list<field> fields; | ||
49 | |||
50 | fields.emplace_back("word_id", arg.getId()); | ||
51 | fields.emplace_back("notion_id", arg.getNotion().getId()); | ||
52 | fields.emplace_back("lemma_id", arg.getLemma().getId()); | ||
53 | |||
54 | if (arg.hasTagCount()) | ||
55 | { | ||
56 | fields.emplace_back("tag_count", arg.getTagCount()); | ||
57 | } | ||
58 | |||
59 | if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective) | ||
60 | && (arg.getAdjectivePosition() != positioning::undefined)) | ||
61 | { | ||
62 | fields.emplace_back("position", static_cast<int>(arg.getAdjectivePosition())); | ||
63 | } | ||
64 | |||
65 | if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb) | ||
66 | && (arg.hasVerbGroup())) | ||
67 | { | ||
68 | fields.emplace_back("group_id", arg.getVerbGroup().getId()); | ||
69 | } | ||
70 | |||
71 | db.insertIntoTable("words", std::move(fields)); | ||
72 | |||
73 | return db; | ||
74 | } | ||
75 | |||
76 | }; | ||
77 | }; | ||
diff --git a/generator/word.h b/generator/word.h new file mode 100644 index 0000000..bfed586 --- /dev/null +++ b/generator/word.h | |||
@@ -0,0 +1,110 @@ | |||
1 | #ifndef WORD_H_91F99D46 | ||
2 | #define WORD_H_91F99D46 | ||
3 | |||
4 | #include <cassert> | ||
5 | #include "enums.h" | ||
6 | |||
7 | namespace verbly { | ||
8 | namespace generator { | ||
9 | |||
10 | class notion; | ||
11 | class lemma; | ||
12 | class database; | ||
13 | class group; | ||
14 | |||
15 | class word { | ||
16 | public: | ||
17 | |||
18 | // Constructors | ||
19 | |||
20 | word(notion& n, lemma& l); | ||
21 | |||
22 | word(notion& n, lemma& l, int tagCount); | ||
23 | |||
24 | // Mutators | ||
25 | |||
26 | void setAdjectivePosition(positioning adjectivePosition); | ||
27 | |||
28 | void setVerbGroup(const group& verbGroup); | ||
29 | |||
30 | // Accessors | ||
31 | |||
32 | int getId() const | ||
33 | { | ||
34 | return id_; | ||
35 | } | ||
36 | |||
37 | notion& getNotion() | ||
38 | { | ||
39 | return notion_; | ||
40 | } | ||
41 | |||
42 | const notion& getNotion() const | ||
43 | { | ||
44 | return notion_; | ||
45 | } | ||
46 | |||
47 | lemma& getLemma() | ||
48 | { | ||
49 | return lemma_; | ||
50 | } | ||
51 | |||
52 | const lemma& getLemma() const | ||
53 | { | ||
54 | return lemma_; | ||
55 | } | ||
56 | |||
57 | bool hasTagCount() const | ||
58 | { | ||
59 | return hasTagCount_; | ||
60 | } | ||
61 | |||
62 | int getTagCount() const | ||
63 | { | ||
64 | // Calling code should always call hasTagCount first. | ||
65 | assert(hasTagCount_); | ||
66 | |||
67 | return tagCount_; | ||
68 | } | ||
69 | |||
70 | positioning getAdjectivePosition() const | ||
71 | { | ||
72 | return adjectivePosition_; | ||
73 | } | ||
74 | |||
75 | bool hasVerbGroup() const | ||
76 | { | ||
77 | return (verbGroup_ != nullptr); | ||
78 | } | ||
79 | |||
80 | const group& getVerbGroup() const | ||
81 | { | ||
82 | // Calling code should always call hasVerbGroup first. | ||
83 | assert(verbGroup_ != nullptr); | ||
84 | |||
85 | return *verbGroup_; | ||
86 | } | ||
87 | |||
88 | private: | ||
89 | |||
90 | static int nextId_; | ||
91 | |||
92 | const int id_; | ||
93 | notion& notion_; | ||
94 | lemma& lemma_; | ||
95 | const int tagCount_ = 0; | ||
96 | const bool hasTagCount_ = false; | ||
97 | |||
98 | positioning adjectivePosition_ = positioning::undefined; | ||
99 | const group* verbGroup_ = nullptr; | ||
100 | |||
101 | }; | ||
102 | |||
103 | // Serializer | ||
104 | |||
105 | database& operator<<(database& db, const word& arg); | ||
106 | |||
107 | }; | ||
108 | }; | ||
109 | |||
110 | #endif /* end of include guard: WORD_H_91F99D46 */ | ||
diff --git a/lib/adjective.cpp b/lib/adjective.cpp deleted file mode 100644 index ba8254a..0000000 --- a/lib/adjective.cpp +++ /dev/null | |||
@@ -1,113 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | adjective::adjective() | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | adjective::adjective(const data& _data, int _id) : word(_data, _id) | ||
11 | { | ||
12 | |||
13 | } | ||
14 | |||
15 | std::string adjective::base_form() const | ||
16 | { | ||
17 | assert(_valid == true); | ||
18 | |||
19 | return _base_form; | ||
20 | } | ||
21 | |||
22 | std::string adjective::comparative_form() const | ||
23 | { | ||
24 | assert(_valid == true); | ||
25 | |||
26 | return _comparative_form; | ||
27 | } | ||
28 | |||
29 | std::string adjective::superlative_form() const | ||
30 | { | ||
31 | assert(_valid == true); | ||
32 | |||
33 | return _superlative_form; | ||
34 | } | ||
35 | |||
36 | adjective::positioning adjective::position() const | ||
37 | { | ||
38 | assert(_valid == true); | ||
39 | |||
40 | return _position; | ||
41 | } | ||
42 | |||
43 | bool adjective::has_comparative_form() const | ||
44 | { | ||
45 | assert(_valid == true); | ||
46 | |||
47 | return !_comparative_form.empty(); | ||
48 | } | ||
49 | |||
50 | bool adjective::has_superlative_form() const | ||
51 | { | ||
52 | assert(_valid == true); | ||
53 | |||
54 | return !_superlative_form.empty(); | ||
55 | } | ||
56 | |||
57 | bool adjective::has_position() const | ||
58 | { | ||
59 | assert(_valid == true); | ||
60 | |||
61 | return _position != adjective::positioning::undefined; | ||
62 | } | ||
63 | |||
64 | adjective_query adjective::antonyms() const | ||
65 | { | ||
66 | assert(_valid == true); | ||
67 | |||
68 | return _data->adjectives().antonym_of(*this); | ||
69 | } | ||
70 | |||
71 | adjective_query adjective::synonyms() const | ||
72 | { | ||
73 | assert(_valid == true); | ||
74 | |||
75 | return _data->adjectives().synonym_of(*this); | ||
76 | } | ||
77 | |||
78 | adjective_query adjective::generalizations() const | ||
79 | { | ||
80 | assert(_valid == true); | ||
81 | |||
82 | return _data->adjectives().generalization_of(*this); | ||
83 | } | ||
84 | |||
85 | adjective_query adjective::specifications() const | ||
86 | { | ||
87 | assert(_valid == true); | ||
88 | |||
89 | return _data->adjectives().specification_of(*this); | ||
90 | } | ||
91 | |||
92 | noun_query adjective::anti_pertainyms() const | ||
93 | { | ||
94 | assert(_valid == true); | ||
95 | |||
96 | return _data->nouns().anti_pertainym_of(*this); | ||
97 | } | ||
98 | |||
99 | adverb_query adjective::mannernyms() const | ||
100 | { | ||
101 | assert(_valid == true); | ||
102 | |||
103 | return _data->adverbs().mannernym_of(*this); | ||
104 | } | ||
105 | |||
106 | noun_query adjective::attributes() const | ||
107 | { | ||
108 | assert(_valid == true); | ||
109 | |||
110 | return _data->nouns().attribute_of(*this); | ||
111 | } | ||
112 | |||
113 | }; | ||
diff --git a/lib/adjective.h b/lib/adjective.h deleted file mode 100644 index a6eb293..0000000 --- a/lib/adjective.h +++ /dev/null | |||
@@ -1,51 +0,0 @@ | |||
1 | #ifndef ADJECTIVE_H_87B3FB75 | ||
2 | #define ADJECTIVE_H_87B3FB75 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class adjective_query; | ||
7 | class adverb_query; | ||
8 | class noun_query; | ||
9 | |||
10 | class adjective : public word { | ||
11 | public: | ||
12 | enum class positioning { | ||
13 | undefined, | ||
14 | predicate, | ||
15 | attributive, | ||
16 | postnominal | ||
17 | }; | ||
18 | |||
19 | private: | ||
20 | std::string _base_form; | ||
21 | std::string _comparative_form; | ||
22 | std::string _superlative_form; | ||
23 | positioning _position = positioning::undefined; | ||
24 | |||
25 | friend class adjective_query; | ||
26 | |||
27 | public: | ||
28 | adjective(); | ||
29 | adjective(const data& _data, int _id); | ||
30 | |||
31 | std::string base_form() const; | ||
32 | std::string comparative_form() const; | ||
33 | std::string superlative_form() const; | ||
34 | positioning position() const; | ||
35 | |||
36 | bool has_comparative_form() const; | ||
37 | bool has_superlative_form() const; | ||
38 | bool has_position() const; | ||
39 | |||
40 | adjective_query antonyms() const; | ||
41 | adjective_query synonyms() const; | ||
42 | adjective_query generalizations() const; | ||
43 | adjective_query specifications() const; | ||
44 | noun_query anti_pertainyms() const; | ||
45 | adverb_query mannernyms() const; | ||
46 | noun_query attributes() const; | ||
47 | }; | ||
48 | |||
49 | }; | ||
50 | |||
51 | #endif /* end of include guard: ADJECTIVE_H_87B3FB75 */ | ||
diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp deleted file mode 100644 index 90ccef4..0000000 --- a/lib/adjective_query.cpp +++ /dev/null | |||
@@ -1,1072 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | adjective_query::adjective_query(const data& _data) : _data(_data) | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | adjective_query& adjective_query::limit(int _limit) | ||
11 | { | ||
12 | if ((_limit > 0) || (_limit == unlimited)) | ||
13 | { | ||
14 | this->_limit = _limit; | ||
15 | } | ||
16 | |||
17 | return *this; | ||
18 | } | ||
19 | |||
20 | adjective_query& adjective_query::random() | ||
21 | { | ||
22 | this->_random = true; | ||
23 | |||
24 | return *this; | ||
25 | } | ||
26 | |||
27 | adjective_query& adjective_query::except(const adjective& _word) | ||
28 | { | ||
29 | _except.push_back(_word); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | adjective_query& adjective_query::rhymes_with(const word& _word) | ||
35 | { | ||
36 | for (auto rhyme : _word.get_rhymes()) | ||
37 | { | ||
38 | _rhymes.push_back(rhyme); | ||
39 | } | ||
40 | |||
41 | if (dynamic_cast<const adjective*>(&_word) != nullptr) | ||
42 | { | ||
43 | _except.push_back(dynamic_cast<const adjective&>(_word)); | ||
44 | } | ||
45 | |||
46 | return *this; | ||
47 | } | ||
48 | |||
49 | adjective_query& adjective_query::rhymes_with(rhyme _r) | ||
50 | { | ||
51 | _rhymes.push_back(_r); | ||
52 | |||
53 | return *this; | ||
54 | } | ||
55 | |||
56 | adjective_query& adjective_query::has_pronunciation() | ||
57 | { | ||
58 | this->_has_prn = true; | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | adjective_query& adjective_query::has_rhyming_noun() | ||
64 | { | ||
65 | _has_rhyming_noun = true; | ||
66 | |||
67 | return *this; | ||
68 | } | ||
69 | |||
70 | adjective_query& adjective_query::has_rhyming_adjective() | ||
71 | { | ||
72 | _has_rhyming_adjective = true; | ||
73 | |||
74 | return *this; | ||
75 | } | ||
76 | |||
77 | adjective_query& adjective_query::has_rhyming_adverb() | ||
78 | { | ||
79 | _has_rhyming_adverb = true; | ||
80 | |||
81 | return *this; | ||
82 | } | ||
83 | |||
84 | adjective_query& adjective_query::has_rhyming_verb() | ||
85 | { | ||
86 | _has_rhyming_verb = true; | ||
87 | |||
88 | return *this; | ||
89 | } | ||
90 | |||
91 | adjective_query& adjective_query::with_stress(filter<std::vector<bool>> _arg) | ||
92 | { | ||
93 | _stress = _arg; | ||
94 | |||
95 | return *this; | ||
96 | } | ||
97 | |||
98 | adjective_query& adjective_query::with_prefix(filter<std::string> _f) | ||
99 | { | ||
100 | _f.clean(); | ||
101 | _with_prefix = _f; | ||
102 | |||
103 | return *this; | ||
104 | } | ||
105 | |||
106 | adjective_query& adjective_query::with_suffix(filter<std::string> _f) | ||
107 | { | ||
108 | _f.clean(); | ||
109 | _with_suffix = _f; | ||
110 | |||
111 | return *this; | ||
112 | } | ||
113 | |||
114 | adjective_query& adjective_query::with_complexity(int _arg) | ||
115 | { | ||
116 | _with_complexity = _arg; | ||
117 | |||
118 | return *this; | ||
119 | } | ||
120 | |||
121 | adjective_query& adjective_query::requires_comparative_form() | ||
122 | { | ||
123 | _requires_comparative_form = true; | ||
124 | |||
125 | return *this; | ||
126 | } | ||
127 | |||
128 | adjective_query& adjective_query::requires_superlative_form() | ||
129 | { | ||
130 | _requires_superlative_form = true; | ||
131 | |||
132 | return *this; | ||
133 | } | ||
134 | |||
135 | adjective_query& adjective_query::position(adjective::positioning pos) | ||
136 | { | ||
137 | _position = pos; | ||
138 | |||
139 | return *this; | ||
140 | } | ||
141 | |||
142 | adjective_query& adjective_query::is_variant() | ||
143 | { | ||
144 | this->_is_variant = true; | ||
145 | |||
146 | return *this; | ||
147 | } | ||
148 | |||
149 | adjective_query& adjective_query::variant_of(filter<noun> _f) | ||
150 | { | ||
151 | _f.clean(); | ||
152 | _variant_of = _f; | ||
153 | |||
154 | return *this; | ||
155 | } | ||
156 | |||
157 | adjective_query& adjective_query::has_antonyms() | ||
158 | { | ||
159 | this->_is_antonymic = true; | ||
160 | |||
161 | return *this; | ||
162 | } | ||
163 | |||
164 | adjective_query& adjective_query::antonym_of(filter<adjective> _f) | ||
165 | { | ||
166 | _f.clean(); | ||
167 | _antonym_of = _f; | ||
168 | |||
169 | return *this; | ||
170 | } | ||
171 | |||
172 | adjective_query& adjective_query::has_synonyms() | ||
173 | { | ||
174 | this->_is_synonymic = true; | ||
175 | |||
176 | return *this; | ||
177 | } | ||
178 | |||
179 | adjective_query& adjective_query::synonym_of(filter<adjective> _f) | ||
180 | { | ||
181 | _f.clean(); | ||
182 | _synonym_of = _f; | ||
183 | |||
184 | return *this; | ||
185 | } | ||
186 | |||
187 | adjective_query& adjective_query::is_generalization() | ||
188 | { | ||
189 | this->_is_generalization = true; | ||
190 | |||
191 | return *this; | ||
192 | } | ||
193 | |||
194 | adjective_query& adjective_query::generalization_of(filter<adjective> _f) | ||
195 | { | ||
196 | _f.clean(); | ||
197 | _generalization_of = _f; | ||
198 | |||
199 | return *this; | ||
200 | } | ||
201 | |||
202 | adjective_query& adjective_query::is_specification() | ||
203 | { | ||
204 | this->_is_specification = true; | ||
205 | |||
206 | return *this; | ||
207 | } | ||
208 | |||
209 | adjective_query& adjective_query::specification_of(filter<adjective> _f) | ||
210 | { | ||
211 | _f.clean(); | ||
212 | _specification_of = _f; | ||
213 | |||
214 | return *this; | ||
215 | } | ||
216 | |||
217 | adjective_query& adjective_query::is_pertainymic() | ||
218 | { | ||
219 | this->_is_pertainymic = true; | ||
220 | |||
221 | return *this; | ||
222 | } | ||
223 | |||
224 | adjective_query& adjective_query::pertainym_of(filter<noun> _f) | ||
225 | { | ||
226 | _f.clean(); | ||
227 | _pertainym_of = _f; | ||
228 | |||
229 | return *this; | ||
230 | } | ||
231 | |||
232 | adjective_query& adjective_query::is_mannernymic() | ||
233 | { | ||
234 | this->_is_mannernymic = true; | ||
235 | |||
236 | return *this; | ||
237 | } | ||
238 | |||
239 | adjective_query& adjective_query::anti_mannernym_of(filter<adverb> _f) | ||
240 | { | ||
241 | _f.clean(); | ||
242 | _anti_mannernym_of = _f; | ||
243 | |||
244 | return *this; | ||
245 | } | ||
246 | /* | ||
247 | adjective_query& adjective_query::derived_from(const word& _w) | ||
248 | { | ||
249 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
250 | { | ||
251 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
252 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
253 | { | ||
254 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
255 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
256 | { | ||
257 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
258 | } | ||
259 | |||
260 | return *this; | ||
261 | } | ||
262 | |||
263 | adjective_query& adjective_query::not_derived_from(const word& _w) | ||
264 | { | ||
265 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
266 | { | ||
267 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
268 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
269 | { | ||
270 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
271 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
272 | { | ||
273 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
274 | } | ||
275 | |||
276 | return *this; | ||
277 | } | ||
278 | */ | ||
279 | std::list<adjective> adjective_query::run() const | ||
280 | { | ||
281 | std::stringstream construct; | ||
282 | construct << "SELECT adjective_id, base_form, comparative, superlative, position FROM adjectives"; | ||
283 | std::list<std::string> conditions; | ||
284 | std::list<binding> bindings; | ||
285 | |||
286 | if (_has_prn) | ||
287 | { | ||
288 | conditions.push_back("adjective_id IN (SELECT adjective_id FROM adjective_pronunciations)"); | ||
289 | } | ||
290 | |||
291 | if (!_rhymes.empty()) | ||
292 | { | ||
293 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
294 | std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
295 | conditions.push_back(cond); | ||
296 | |||
297 | for (auto rhy : _rhymes) | ||
298 | { | ||
299 | bindings.emplace_back(rhy.get_prerhyme()); | ||
300 | bindings.emplace_back(rhy.get_rhyme()); | ||
301 | } | ||
302 | } | ||
303 | |||
304 | if (_has_rhyming_noun) | ||
305 | { | ||
306 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
307 | } | ||
308 | |||
309 | if (_has_rhyming_adjective) | ||
310 | { | ||
311 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adjective_id != curp.adjective_id)"); | ||
312 | } | ||
313 | |||
314 | if (_has_rhyming_adverb) | ||
315 | { | ||
316 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
317 | } | ||
318 | |||
319 | if (_has_rhyming_verb) | ||
320 | { | ||
321 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
322 | } | ||
323 | |||
324 | for (auto except : _except) | ||
325 | { | ||
326 | conditions.push_back("adjective_id != ?"); | ||
327 | bindings.emplace_back(except._id); | ||
328 | } | ||
329 | |||
330 | if (_requires_comparative_form) | ||
331 | { | ||
332 | conditions.push_back("comparative IS NOT NULL"); | ||
333 | } | ||
334 | |||
335 | if (_requires_superlative_form) | ||
336 | { | ||
337 | conditions.push_back("superlative IS NOT NULL"); | ||
338 | } | ||
339 | |||
340 | switch (_position) | ||
341 | { | ||
342 | case adjective::positioning::predicate: conditions.push_back("position = 'p'"); break; | ||
343 | case adjective::positioning::attributive: conditions.push_back("position = 'a'"); break; | ||
344 | case adjective::positioning::postnominal: conditions.push_back("position = 'i'"); break; | ||
345 | case adjective::positioning::undefined: break; | ||
346 | } | ||
347 | |||
348 | if (!_stress.empty()) | ||
349 | { | ||
350 | std::stringstream cond; | ||
351 | if (_stress.get_notlogic()) | ||
352 | { | ||
353 | cond << "adjective_id NOT IN"; | ||
354 | } else { | ||
355 | cond << "adjective_id IN"; | ||
356 | } | ||
357 | |||
358 | cond << "(SELECT adjective_id FROM adjective_pronunciations WHERE "; | ||
359 | |||
360 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
361 | switch (f.get_type()) | ||
362 | { | ||
363 | case filter<std::vector<bool>>::type::singleton: | ||
364 | { | ||
365 | std::ostringstream _val; | ||
366 | for (auto syl : f.get_elem()) | ||
367 | { | ||
368 | if (syl) | ||
369 | { | ||
370 | _val << "1"; | ||
371 | } else { | ||
372 | _val << "0"; | ||
373 | } | ||
374 | } | ||
375 | |||
376 | bindings.emplace_back(_val.str()); | ||
377 | |||
378 | if (notlogic == f.get_notlogic()) | ||
379 | { | ||
380 | return "stress = ?"; | ||
381 | } else { | ||
382 | return "stress != ?"; | ||
383 | } | ||
384 | } | ||
385 | |||
386 | case filter<std::vector<bool>>::type::group: | ||
387 | { | ||
388 | bool truelogic = notlogic != f.get_notlogic(); | ||
389 | |||
390 | std::list<std::string> clauses; | ||
391 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
392 | return recur(f2, truelogic); | ||
393 | }); | ||
394 | |||
395 | if (truelogic == f.get_orlogic()) | ||
396 | { | ||
397 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
398 | } else { | ||
399 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
400 | } | ||
401 | } | ||
402 | } | ||
403 | }; | ||
404 | |||
405 | cond << recur(_stress, _stress.get_notlogic()); | ||
406 | cond << ")"; | ||
407 | conditions.push_back(cond.str()); | ||
408 | } | ||
409 | |||
410 | if (!_with_prefix.empty()) | ||
411 | { | ||
412 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
413 | switch (f.get_type()) | ||
414 | { | ||
415 | case filter<std::string>::type::singleton: | ||
416 | { | ||
417 | bindings.emplace_back(f.get_elem() + "%"); | ||
418 | |||
419 | if (notlogic == f.get_notlogic()) | ||
420 | { | ||
421 | return "base_form LIKE ?"; | ||
422 | } else { | ||
423 | return "base_form NOT LIKE ?"; | ||
424 | } | ||
425 | } | ||
426 | |||
427 | case filter<std::string>::type::group: | ||
428 | { | ||
429 | bool truelogic = notlogic != f.get_notlogic(); | ||
430 | |||
431 | std::list<std::string> clauses; | ||
432 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
433 | return recur(f2, truelogic); | ||
434 | }); | ||
435 | |||
436 | if (truelogic == f.get_orlogic()) | ||
437 | { | ||
438 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
439 | } else { | ||
440 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
441 | } | ||
442 | } | ||
443 | } | ||
444 | }; | ||
445 | |||
446 | conditions.push_back(recur(_with_prefix, false)); | ||
447 | } | ||
448 | |||
449 | if (!_with_suffix.empty()) | ||
450 | { | ||
451 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
452 | switch (f.get_type()) | ||
453 | { | ||
454 | case filter<std::string>::type::singleton: | ||
455 | { | ||
456 | bindings.emplace_back("%" + f.get_elem()); | ||
457 | |||
458 | if (notlogic == f.get_notlogic()) | ||
459 | { | ||
460 | return "base_form LIKE ?"; | ||
461 | } else { | ||
462 | return "base_form NOT LIKE ?"; | ||
463 | } | ||
464 | } | ||
465 | |||
466 | case filter<std::string>::type::group: | ||
467 | { | ||
468 | bool truelogic = notlogic != f.get_notlogic(); | ||
469 | |||
470 | std::list<std::string> clauses; | ||
471 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
472 | return recur(f2, truelogic); | ||
473 | }); | ||
474 | |||
475 | if (truelogic == f.get_orlogic()) | ||
476 | { | ||
477 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
478 | } else { | ||
479 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
480 | } | ||
481 | } | ||
482 | } | ||
483 | }; | ||
484 | |||
485 | conditions.push_back(recur(_with_suffix, false)); | ||
486 | } | ||
487 | |||
488 | if (_with_complexity != unlimited) | ||
489 | { | ||
490 | conditions.push_back("complexity = ?"); | ||
491 | bindings.emplace_back(_with_complexity); | ||
492 | } | ||
493 | |||
494 | if (_is_variant) | ||
495 | { | ||
496 | conditions.push_back("adjective_id IN (SELECT adjective_id FROM variation)"); | ||
497 | } | ||
498 | |||
499 | if (!_variant_of.empty()) | ||
500 | { | ||
501 | std::stringstream cond; | ||
502 | if (_variant_of.get_notlogic()) | ||
503 | { | ||
504 | cond << "adjective_id NOT IN"; | ||
505 | } else { | ||
506 | cond << "adjective_id IN"; | ||
507 | } | ||
508 | |||
509 | cond << "(SELECT adjective_id FROM variation WHERE "; | ||
510 | |||
511 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
512 | switch (f.get_type()) | ||
513 | { | ||
514 | case filter<noun>::type::singleton: | ||
515 | { | ||
516 | bindings.emplace_back(f.get_elem()._id); | ||
517 | |||
518 | if (notlogic == f.get_notlogic()) | ||
519 | { | ||
520 | return "noun_id = ?"; | ||
521 | } else { | ||
522 | return "noun_id != ?"; | ||
523 | } | ||
524 | } | ||
525 | |||
526 | case filter<noun>::type::group: | ||
527 | { | ||
528 | bool truelogic = notlogic != f.get_notlogic(); | ||
529 | |||
530 | std::list<std::string> clauses; | ||
531 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
532 | return recur(f2, truelogic); | ||
533 | }); | ||
534 | |||
535 | if (truelogic == f.get_orlogic()) | ||
536 | { | ||
537 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
538 | } else { | ||
539 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
540 | } | ||
541 | } | ||
542 | } | ||
543 | }; | ||
544 | |||
545 | cond << recur(_variant_of, _variant_of.get_notlogic()); | ||
546 | cond << ")"; | ||
547 | conditions.push_back(cond.str()); | ||
548 | } | ||
549 | |||
550 | if (_is_antonymic) | ||
551 | { | ||
552 | conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_antonymy)"); | ||
553 | } | ||
554 | |||
555 | if (!_antonym_of.empty()) | ||
556 | { | ||
557 | std::stringstream cond; | ||
558 | if (_antonym_of.get_notlogic()) | ||
559 | { | ||
560 | cond << "adjective_id NOT IN"; | ||
561 | } else { | ||
562 | cond << "adjective_id IN"; | ||
563 | } | ||
564 | |||
565 | cond << "(SELECT adjective_2_id FROM adjective_antonymy WHERE "; | ||
566 | |||
567 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
568 | switch (f.get_type()) | ||
569 | { | ||
570 | case filter<adjective>::type::singleton: | ||
571 | { | ||
572 | bindings.emplace_back(f.get_elem()._id); | ||
573 | |||
574 | if (notlogic == f.get_notlogic()) | ||
575 | { | ||
576 | return "adjective_1_id = ?"; | ||
577 | } else { | ||
578 | return "adjective_1_id != ?"; | ||
579 | } | ||
580 | } | ||
581 | |||
582 | case filter<adjective>::type::group: | ||
583 | { | ||
584 | bool truelogic = notlogic != f.get_notlogic(); | ||
585 | |||
586 | std::list<std::string> clauses; | ||
587 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
588 | return recur(f2, truelogic); | ||
589 | }); | ||
590 | |||
591 | if (truelogic == f.get_orlogic()) | ||
592 | { | ||
593 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
594 | } else { | ||
595 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
596 | } | ||
597 | } | ||
598 | } | ||
599 | }; | ||
600 | |||
601 | cond << recur(_antonym_of, _antonym_of.get_notlogic()); | ||
602 | cond << ")"; | ||
603 | conditions.push_back(cond.str()); | ||
604 | } | ||
605 | |||
606 | if (_is_synonymic) | ||
607 | { | ||
608 | conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_synonymy)"); | ||
609 | } | ||
610 | |||
611 | if (!_synonym_of.empty()) | ||
612 | { | ||
613 | std::stringstream cond; | ||
614 | if (_synonym_of.get_notlogic()) | ||
615 | { | ||
616 | cond << "adjective_id NOT IN"; | ||
617 | } else { | ||
618 | cond << "adjective_id IN"; | ||
619 | } | ||
620 | |||
621 | cond << "(SELECT adjective_2_id FROM adjective_synonymy WHERE "; | ||
622 | |||
623 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
624 | switch (f.get_type()) | ||
625 | { | ||
626 | case filter<adjective>::type::singleton: | ||
627 | { | ||
628 | bindings.emplace_back(f.get_elem()._id); | ||
629 | |||
630 | if (notlogic == f.get_notlogic()) | ||
631 | { | ||
632 | return "adjective_1_id = ?"; | ||
633 | } else { | ||
634 | return "adjective_1_id != ?"; | ||
635 | } | ||
636 | } | ||
637 | |||
638 | case filter<adjective>::type::group: | ||
639 | { | ||
640 | bool truelogic = notlogic != f.get_notlogic(); | ||
641 | |||
642 | std::list<std::string> clauses; | ||
643 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
644 | return recur(f2, truelogic); | ||
645 | }); | ||
646 | |||
647 | if (truelogic == f.get_orlogic()) | ||
648 | { | ||
649 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
650 | } else { | ||
651 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
652 | } | ||
653 | } | ||
654 | } | ||
655 | }; | ||
656 | |||
657 | cond << recur(_synonym_of, _synonym_of.get_notlogic()); | ||
658 | cond << ")"; | ||
659 | conditions.push_back(cond.str()); | ||
660 | } | ||
661 | |||
662 | if (_is_generalization) | ||
663 | { | ||
664 | conditions.push_back("adjective_id IN (SELECT general_id FROM specification)"); | ||
665 | } | ||
666 | |||
667 | if (!_generalization_of.empty()) | ||
668 | { | ||
669 | std::stringstream cond; | ||
670 | if (_generalization_of.get_notlogic()) | ||
671 | { | ||
672 | cond << "adjective_id NOT IN"; | ||
673 | } else { | ||
674 | cond << "adjective_id IN"; | ||
675 | } | ||
676 | |||
677 | cond << "(SELECT general_id FROM specification WHERE "; | ||
678 | |||
679 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
680 | switch (f.get_type()) | ||
681 | { | ||
682 | case filter<adjective>::type::singleton: | ||
683 | { | ||
684 | bindings.emplace_back(f.get_elem()._id); | ||
685 | |||
686 | if (notlogic == f.get_notlogic()) | ||
687 | { | ||
688 | return "specific_id = ?"; | ||
689 | } else { | ||
690 | return "specific_id != ?"; | ||
691 | } | ||
692 | } | ||
693 | |||
694 | case filter<adjective>::type::group: | ||
695 | { | ||
696 | bool truelogic = notlogic != f.get_notlogic(); | ||
697 | |||
698 | std::list<std::string> clauses; | ||
699 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
700 | return recur(f2, truelogic); | ||
701 | }); | ||
702 | |||
703 | if (truelogic == f.get_orlogic()) | ||
704 | { | ||
705 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
706 | } else { | ||
707 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
708 | } | ||
709 | } | ||
710 | } | ||
711 | }; | ||
712 | |||
713 | cond << recur(_generalization_of, _generalization_of.get_notlogic()); | ||
714 | cond << ")"; | ||
715 | conditions.push_back(cond.str()); | ||
716 | } | ||
717 | |||
718 | if (_is_specification) | ||
719 | { | ||
720 | conditions.push_back("adjective_id IN (SELECT specific_id FROM specification)"); | ||
721 | } | ||
722 | |||
723 | if (!_specification_of.empty()) | ||
724 | { | ||
725 | std::stringstream cond; | ||
726 | if (_specification_of.get_notlogic()) | ||
727 | { | ||
728 | cond << "adjective_id NOT IN"; | ||
729 | } else { | ||
730 | cond << "adjective_id IN"; | ||
731 | } | ||
732 | |||
733 | cond << "(SELECT specific_id FROM specification WHERE "; | ||
734 | |||
735 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
736 | switch (f.get_type()) | ||
737 | { | ||
738 | case filter<adjective>::type::singleton: | ||
739 | { | ||
740 | bindings.emplace_back(f.get_elem()._id); | ||
741 | |||
742 | if (notlogic == f.get_notlogic()) | ||
743 | { | ||
744 | return "general_id = ?"; | ||
745 | } else { | ||
746 | return "general_id != ?"; | ||
747 | } | ||
748 | } | ||
749 | |||
750 | case filter<adjective>::type::group: | ||
751 | { | ||
752 | bool truelogic = notlogic != f.get_notlogic(); | ||
753 | |||
754 | std::list<std::string> clauses; | ||
755 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
756 | return recur(f2, truelogic); | ||
757 | }); | ||
758 | |||
759 | if (truelogic == f.get_orlogic()) | ||
760 | { | ||
761 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
762 | } else { | ||
763 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
764 | } | ||
765 | } | ||
766 | } | ||
767 | }; | ||
768 | |||
769 | cond << recur(_specification_of, _specification_of.get_notlogic()); | ||
770 | cond << ")"; | ||
771 | conditions.push_back(cond.str()); | ||
772 | } | ||
773 | |||
774 | if (_is_pertainymic) | ||
775 | { | ||
776 | conditions.push_back("adjective_id IN (SELECT pertainym_id FROM pertainymy)"); | ||
777 | } | ||
778 | |||
779 | if (!_pertainym_of.empty()) | ||
780 | { | ||
781 | std::stringstream cond; | ||
782 | if (_pertainym_of.get_notlogic()) | ||
783 | { | ||
784 | cond << "adjective_id NOT IN"; | ||
785 | } else { | ||
786 | cond << "adjective_id IN"; | ||
787 | } | ||
788 | |||
789 | cond << "(SELECT pertainym_id FROM pertainymy WHERE "; | ||
790 | |||
791 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
792 | switch (f.get_type()) | ||
793 | { | ||
794 | case filter<noun>::type::singleton: | ||
795 | { | ||
796 | bindings.emplace_back(f.get_elem()._id); | ||
797 | |||
798 | if (notlogic == f.get_notlogic()) | ||
799 | { | ||
800 | return "noun_id = ?"; | ||
801 | } else { | ||
802 | return "noun_id != ?"; | ||
803 | } | ||
804 | } | ||
805 | |||
806 | case filter<noun>::type::group: | ||
807 | { | ||
808 | bool truelogic = notlogic != f.get_notlogic(); | ||
809 | |||
810 | std::list<std::string> clauses; | ||
811 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
812 | return recur(f2, truelogic); | ||
813 | }); | ||
814 | |||
815 | if (truelogic == f.get_orlogic()) | ||
816 | { | ||
817 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
818 | } else { | ||
819 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
820 | } | ||
821 | } | ||
822 | } | ||
823 | }; | ||
824 | |||
825 | cond << recur(_pertainym_of, _pertainym_of.get_notlogic()); | ||
826 | cond << ")"; | ||
827 | conditions.push_back(cond.str()); | ||
828 | } | ||
829 | |||
830 | if (_is_mannernymic) | ||
831 | { | ||
832 | conditions.push_back("adjective_id IN (SELECT adjective_id FROM mannernymy)"); | ||
833 | } | ||
834 | |||
835 | if (!_anti_mannernym_of.empty()) | ||
836 | { | ||
837 | std::stringstream cond; | ||
838 | if (_anti_mannernym_of.get_notlogic()) | ||
839 | { | ||
840 | cond << "adjective_id NOT IN"; | ||
841 | } else { | ||
842 | cond << "adjective_id IN"; | ||
843 | } | ||
844 | |||
845 | cond << "(SELECT adjective_id FROM mannernymy WHERE "; | ||
846 | |||
847 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
848 | switch (f.get_type()) | ||
849 | { | ||
850 | case filter<adverb>::type::singleton: | ||
851 | { | ||
852 | bindings.emplace_back(f.get_elem()._id); | ||
853 | |||
854 | if (notlogic == f.get_notlogic()) | ||
855 | { | ||
856 | return "mannernym_id = ?"; | ||
857 | } else { | ||
858 | return "mannernym_id != ?"; | ||
859 | } | ||
860 | } | ||
861 | |||
862 | case filter<adverb>::type::group: | ||
863 | { | ||
864 | bool truelogic = notlogic != f.get_notlogic(); | ||
865 | |||
866 | std::list<std::string> clauses; | ||
867 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
868 | return recur(f2, truelogic); | ||
869 | }); | ||
870 | |||
871 | if (truelogic == f.get_orlogic()) | ||
872 | { | ||
873 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
874 | } else { | ||
875 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
876 | } | ||
877 | } | ||
878 | } | ||
879 | }; | ||
880 | |||
881 | cond << recur(_anti_mannernym_of, _anti_mannernym_of.get_notlogic()); | ||
882 | cond << ")"; | ||
883 | conditions.push_back(cond.str()); | ||
884 | } | ||
885 | /* | ||
886 | if (!_derived_from_adjective.empty()) | ||
887 | { | ||
888 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_2_id = @DERADJ"); | ||
889 | std::string cond = "adjective_id IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
890 | conditions.push_back(cond); | ||
891 | } | ||
892 | |||
893 | if (!_not_derived_from_adjective.empty()) | ||
894 | { | ||
895 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_2_id = @NDERADJ"); | ||
896 | std::string cond = "adjective_id NOT IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
897 | conditions.push_back(cond); | ||
898 | } | ||
899 | |||
900 | if (!_derived_from_adverb.empty()) | ||
901 | { | ||
902 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); | ||
903 | std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
904 | conditions.push_back(cond); | ||
905 | } | ||
906 | |||
907 | if (!_not_derived_from_adverb.empty()) | ||
908 | { | ||
909 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); | ||
910 | std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
911 | conditions.push_back(cond); | ||
912 | } | ||
913 | |||
914 | if (!_derived_from_noun.empty()) | ||
915 | { | ||
916 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN"); | ||
917 | std::string cond = "adjective_id IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
918 | conditions.push_back(cond); | ||
919 | } | ||
920 | |||
921 | if (!_not_derived_from_noun.empty()) | ||
922 | { | ||
923 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); | ||
924 | std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
925 | conditions.push_back(cond); | ||
926 | }*/ | ||
927 | |||
928 | if (!conditions.empty()) | ||
929 | { | ||
930 | construct << " WHERE "; | ||
931 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
932 | } | ||
933 | |||
934 | if (_random) | ||
935 | { | ||
936 | construct << " ORDER BY RANDOM()"; | ||
937 | } | ||
938 | |||
939 | if (_limit != unlimited) | ||
940 | { | ||
941 | construct << " LIMIT " << _limit; | ||
942 | } | ||
943 | |||
944 | sqlite3_stmt* ppstmt; | ||
945 | std::string query = construct.str(); | ||
946 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
947 | { | ||
948 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
949 | } | ||
950 | |||
951 | int i = 1; | ||
952 | for (auto& binding : bindings) | ||
953 | { | ||
954 | switch (binding.get_type()) | ||
955 | { | ||
956 | case binding::type::integer: | ||
957 | { | ||
958 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
959 | |||
960 | break; | ||
961 | } | ||
962 | |||
963 | case binding::type::string: | ||
964 | { | ||
965 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
966 | |||
967 | break; | ||
968 | } | ||
969 | } | ||
970 | |||
971 | i++; | ||
972 | } | ||
973 | |||
974 | /* | ||
975 | for (auto adj : _derived_from_adjective) | ||
976 | { | ||
977 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
978 | } | ||
979 | |||
980 | for (auto adj : _not_derived_from_adjective) | ||
981 | { | ||
982 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
983 | } | ||
984 | |||
985 | for (auto adv : _derived_from_adverb) | ||
986 | { | ||
987 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
988 | } | ||
989 | |||
990 | for (auto adv : _not_derived_from_adverb) | ||
991 | { | ||
992 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
993 | } | ||
994 | |||
995 | for (auto n : _derived_from_noun) | ||
996 | { | ||
997 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
998 | } | ||
999 | |||
1000 | for (auto n : _not_derived_from_noun) | ||
1001 | { | ||
1002 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
1003 | } | ||
1004 | */ | ||
1005 | std::list<adjective> output; | ||
1006 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
1007 | { | ||
1008 | adjective tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
1009 | tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
1010 | |||
1011 | if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) | ||
1012 | { | ||
1013 | tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
1014 | } | ||
1015 | |||
1016 | if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) | ||
1017 | { | ||
1018 | tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
1019 | } | ||
1020 | |||
1021 | if (sqlite3_column_type(ppstmt, 4) != SQLITE_NULL) | ||
1022 | { | ||
1023 | std::string adjpos(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4))); | ||
1024 | if (adjpos == "p") | ||
1025 | { | ||
1026 | tnc._position = adjective::positioning::predicate; | ||
1027 | } else if (adjpos == "a") | ||
1028 | { | ||
1029 | tnc._position = adjective::positioning::attributive; | ||
1030 | } else if (adjpos == "i") | ||
1031 | { | ||
1032 | tnc._position = adjective::positioning::postnominal; | ||
1033 | } | ||
1034 | } | ||
1035 | |||
1036 | output.push_back(tnc); | ||
1037 | } | ||
1038 | |||
1039 | sqlite3_finalize(ppstmt); | ||
1040 | |||
1041 | for (auto& adjective : output) | ||
1042 | { | ||
1043 | query = "SELECT pronunciation, prerhyme, rhyme FROM adjective_pronunciations WHERE adjective_id = ?"; | ||
1044 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1045 | { | ||
1046 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
1047 | } | ||
1048 | |||
1049 | sqlite3_bind_int(ppstmt, 1, adjective._id); | ||
1050 | |||
1051 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
1052 | { | ||
1053 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
1054 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
1055 | |||
1056 | adjective.pronunciations.push_back(phonemes); | ||
1057 | |||
1058 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
1059 | { | ||
1060 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
1061 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
1062 | adjective.rhymes.emplace_back(prerhyme, rhyming); | ||
1063 | } | ||
1064 | } | ||
1065 | |||
1066 | sqlite3_finalize(ppstmt); | ||
1067 | } | ||
1068 | |||
1069 | return output; | ||
1070 | } | ||
1071 | |||
1072 | }; | ||
diff --git a/lib/adjective_query.h b/lib/adjective_query.h deleted file mode 100644 index e6a6609..0000000 --- a/lib/adjective_query.h +++ /dev/null | |||
@@ -1,112 +0,0 @@ | |||
1 | #ifndef ADJECTIVE_QUERY_H_05E590FD | ||
2 | #define ADJECTIVE_QUERY_H_05E590FD | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class adjective_query { | ||
7 | public: | ||
8 | adjective_query(const data& _data); | ||
9 | |||
10 | adjective_query& limit(int _limit); | ||
11 | adjective_query& random(); | ||
12 | adjective_query& except(const adjective& _word); | ||
13 | adjective_query& rhymes_with(const word& _word); | ||
14 | adjective_query& rhymes_with(rhyme _r); | ||
15 | adjective_query& has_pronunciation(); | ||
16 | adjective_query& has_rhyming_noun(); | ||
17 | adjective_query& has_rhyming_adjective(); | ||
18 | adjective_query& has_rhyming_adverb(); | ||
19 | adjective_query& has_rhyming_verb(); | ||
20 | adjective_query& with_stress(filter<std::vector<bool>> _arg); | ||
21 | |||
22 | adjective_query& requires_comparative_form(); | ||
23 | adjective_query& requires_superlative_form(); | ||
24 | adjective_query& position(adjective::positioning pos); | ||
25 | |||
26 | adjective_query& with_prefix(filter<std::string> _f); | ||
27 | adjective_query& with_suffix(filter<std::string> _f); | ||
28 | |||
29 | adjective_query& with_complexity(int _arg); | ||
30 | |||
31 | adjective_query& is_variant(); | ||
32 | adjective_query& variant_of(filter<noun> _f); | ||
33 | |||
34 | adjective_query& has_antonyms(); | ||
35 | adjective_query& antonym_of(filter<adjective> _f); | ||
36 | |||
37 | adjective_query& has_synonyms(); | ||
38 | adjective_query& synonym_of(filter<adjective> _f); | ||
39 | |||
40 | adjective_query& is_generalization(); | ||
41 | adjective_query& generalization_of(filter<adjective> _f); | ||
42 | |||
43 | adjective_query& is_specification(); | ||
44 | adjective_query& specification_of(filter<adjective> _f); | ||
45 | |||
46 | adjective_query& is_pertainymic(); | ||
47 | adjective_query& pertainym_of(filter<noun> _f); | ||
48 | |||
49 | adjective_query& is_mannernymic(); | ||
50 | adjective_query& anti_mannernym_of(filter<adverb> _f); | ||
51 | |||
52 | /* adjective_query& derived_from(const word& _w); | ||
53 | adjective_query& not_derived_from(const word& _w);*/ | ||
54 | |||
55 | std::list<adjective> run() const; | ||
56 | |||
57 | const static int unlimited = -1; | ||
58 | |||
59 | protected: | ||
60 | const data& _data; | ||
61 | int _limit = unlimited; | ||
62 | bool _random = false; | ||
63 | std::list<rhyme> _rhymes; | ||
64 | std::list<adjective> _except; | ||
65 | bool _has_prn = false; | ||
66 | bool _has_rhyming_noun = false; | ||
67 | bool _has_rhyming_adjective = false; | ||
68 | bool _has_rhyming_adverb = false; | ||
69 | bool _has_rhyming_verb = false; | ||
70 | filter<std::vector<bool>> _stress; | ||
71 | |||
72 | bool _requires_comparative_form = false; | ||
73 | bool _requires_superlative_form = false; | ||
74 | adjective::positioning _position = adjective::positioning::undefined; | ||
75 | |||
76 | filter<std::string> _with_prefix; | ||
77 | filter<std::string> _with_suffix; | ||
78 | |||
79 | int _with_complexity = unlimited; | ||
80 | |||
81 | bool _is_variant = false; | ||
82 | filter<noun> _variant_of; | ||
83 | |||
84 | bool _is_antonymic = false; | ||
85 | filter<adjective> _antonym_of; | ||
86 | |||
87 | bool _is_synonymic = false; | ||
88 | filter<adjective> _synonym_of; | ||
89 | |||
90 | bool _is_generalization = false; | ||
91 | filter<adjective> _generalization_of; | ||
92 | |||
93 | bool _is_specification = false; | ||
94 | filter<adjective> _specification_of; | ||
95 | |||
96 | bool _is_pertainymic = false; | ||
97 | filter<noun> _pertainym_of; | ||
98 | |||
99 | bool _is_mannernymic = false; | ||
100 | filter<adverb> _anti_mannernym_of; | ||
101 | |||
102 | /* std::list<adjective> _derived_from_adjective; | ||
103 | std::list<adjective> _not_derived_from_adjective; | ||
104 | std::list<adverb> _derived_from_adverb; | ||
105 | std::list<adverb> _not_derived_from_adverb; | ||
106 | std::list<noun> _derived_from_noun; | ||
107 | std::list<noun> _not_derived_from_noun;*/ | ||
108 | }; | ||
109 | |||
110 | }; | ||
111 | |||
112 | #endif /* end of include guard: ADJECTIVE_QUERY_H_05E590FD */ | ||
diff --git a/lib/adverb.cpp b/lib/adverb.cpp deleted file mode 100644 index 442574e..0000000 --- a/lib/adverb.cpp +++ /dev/null | |||
@@ -1,71 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | adverb::adverb() | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | adverb::adverb(const data& _data, int _id) : word(_data, _id) | ||
11 | { | ||
12 | |||
13 | } | ||
14 | |||
15 | std::string adverb::base_form() const | ||
16 | { | ||
17 | assert(_valid == true); | ||
18 | |||
19 | return _base_form; | ||
20 | } | ||
21 | |||
22 | std::string adverb::comparative_form() const | ||
23 | { | ||
24 | assert(_valid == true); | ||
25 | |||
26 | return _comparative_form; | ||
27 | } | ||
28 | |||
29 | std::string adverb::superlative_form() const | ||
30 | { | ||
31 | assert(_valid == true); | ||
32 | |||
33 | return _superlative_form; | ||
34 | } | ||
35 | |||
36 | bool adverb::has_comparative_form() const | ||
37 | { | ||
38 | assert(_valid == true); | ||
39 | |||
40 | return !_comparative_form.empty(); | ||
41 | } | ||
42 | |||
43 | bool adverb::has_superlative_form() const | ||
44 | { | ||
45 | assert(_valid == true); | ||
46 | |||
47 | return !_superlative_form.empty(); | ||
48 | } | ||
49 | |||
50 | adverb_query adverb::antonyms() const | ||
51 | { | ||
52 | assert(_valid == true); | ||
53 | |||
54 | return _data->adverbs().antonym_of(*this); | ||
55 | } | ||
56 | |||
57 | adverb_query adverb::synonyms() const | ||
58 | { | ||
59 | assert(_valid == true); | ||
60 | |||
61 | return _data->adverbs().synonym_of(*this); | ||
62 | } | ||
63 | |||
64 | adjective_query adverb::anti_mannernyms() const | ||
65 | { | ||
66 | assert(_valid == true); | ||
67 | |||
68 | return _data->adjectives().anti_mannernym_of(*this); | ||
69 | } | ||
70 | |||
71 | }; | ||
diff --git a/lib/adverb.h b/lib/adverb.h deleted file mode 100644 index 56d4e28..0000000 --- a/lib/adverb.h +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | #ifndef ADVERB_H_86F8302F | ||
2 | #define ADVERB_H_86F8302F | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class adverb : public word { | ||
7 | private: | ||
8 | std::string _base_form; | ||
9 | std::string _comparative_form; | ||
10 | std::string _superlative_form; | ||
11 | |||
12 | friend class adverb_query; | ||
13 | |||
14 | public: | ||
15 | adverb(); | ||
16 | adverb(const data& _data, int _id); | ||
17 | |||
18 | std::string base_form() const; | ||
19 | std::string comparative_form() const; | ||
20 | std::string superlative_form() const; | ||
21 | |||
22 | bool has_comparative_form() const; | ||
23 | bool has_superlative_form() const; | ||
24 | |||
25 | adverb_query antonyms() const; | ||
26 | adverb_query synonyms() const; | ||
27 | adjective_query anti_mannernyms() const; | ||
28 | |||
29 | adverb_query& derived_from(const word& _w); | ||
30 | adverb_query& not_derived_from(const word& _w); | ||
31 | }; | ||
32 | |||
33 | }; | ||
34 | |||
35 | #endif /* end of include guard: ADVERB_H_86F8302F */ | ||
diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp deleted file mode 100644 index 3e62bb7..0000000 --- a/lib/adverb_query.cpp +++ /dev/null | |||
@@ -1,758 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | adverb_query::adverb_query(const data& _data) : _data(_data) | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | adverb_query& adverb_query::limit(int _limit) | ||
11 | { | ||
12 | if ((_limit > 0) || (_limit == unlimited)) | ||
13 | { | ||
14 | this->_limit = _limit; | ||
15 | } | ||
16 | |||
17 | return *this; | ||
18 | } | ||
19 | |||
20 | adverb_query& adverb_query::random() | ||
21 | { | ||
22 | this->_random = true; | ||
23 | |||
24 | return *this; | ||
25 | } | ||
26 | |||
27 | adverb_query& adverb_query::except(const adverb& _word) | ||
28 | { | ||
29 | _except.push_back(_word); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | adverb_query& adverb_query::rhymes_with(const word& _word) | ||
35 | { | ||
36 | for (auto rhyme : _word.get_rhymes()) | ||
37 | { | ||
38 | _rhymes.push_back(rhyme); | ||
39 | } | ||
40 | |||
41 | if (dynamic_cast<const adverb*>(&_word) != nullptr) | ||
42 | { | ||
43 | _except.push_back(dynamic_cast<const adverb&>(_word)); | ||
44 | } | ||
45 | |||
46 | return *this; | ||
47 | } | ||
48 | |||
49 | adverb_query& adverb_query::rhymes_with(rhyme _r) | ||
50 | { | ||
51 | _rhymes.push_back(_r); | ||
52 | |||
53 | return *this; | ||
54 | } | ||
55 | |||
56 | adverb_query& adverb_query::has_pronunciation() | ||
57 | { | ||
58 | this->_has_prn = true; | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | adverb_query& adverb_query::has_rhyming_noun() | ||
64 | { | ||
65 | _has_rhyming_noun = true; | ||
66 | |||
67 | return *this; | ||
68 | } | ||
69 | |||
70 | adverb_query& adverb_query::has_rhyming_adjective() | ||
71 | { | ||
72 | _has_rhyming_adjective = true; | ||
73 | |||
74 | return *this; | ||
75 | } | ||
76 | |||
77 | adverb_query& adverb_query::has_rhyming_adverb() | ||
78 | { | ||
79 | _has_rhyming_adverb = true; | ||
80 | |||
81 | return *this; | ||
82 | } | ||
83 | |||
84 | adverb_query& adverb_query::has_rhyming_verb() | ||
85 | { | ||
86 | _has_rhyming_verb = true; | ||
87 | |||
88 | return *this; | ||
89 | } | ||
90 | |||
91 | adverb_query& adverb_query::requires_comparative_form() | ||
92 | { | ||
93 | _requires_comparative_form = true; | ||
94 | |||
95 | return *this; | ||
96 | } | ||
97 | |||
98 | adverb_query& adverb_query::requires_superlative_form() | ||
99 | { | ||
100 | _requires_superlative_form = true; | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | adverb_query& adverb_query::with_stress(filter<std::vector<bool>> _arg) | ||
106 | { | ||
107 | _stress = _arg; | ||
108 | |||
109 | return *this; | ||
110 | } | ||
111 | |||
112 | adverb_query& adverb_query::with_prefix(filter<std::string> _f) | ||
113 | { | ||
114 | _f.clean(); | ||
115 | _with_prefix = _f; | ||
116 | |||
117 | return *this; | ||
118 | } | ||
119 | |||
120 | adverb_query& adverb_query::with_suffix(filter<std::string> _f) | ||
121 | { | ||
122 | _f.clean(); | ||
123 | _with_suffix = _f; | ||
124 | |||
125 | return *this; | ||
126 | } | ||
127 | |||
128 | adverb_query& adverb_query::with_complexity(int _arg) | ||
129 | { | ||
130 | _with_complexity = _arg; | ||
131 | |||
132 | return *this; | ||
133 | } | ||
134 | |||
135 | adverb_query& adverb_query::has_antonyms() | ||
136 | { | ||
137 | _has_antonyms = true; | ||
138 | |||
139 | return *this; | ||
140 | } | ||
141 | |||
142 | adverb_query& adverb_query::antonym_of(filter<adverb> _f) | ||
143 | { | ||
144 | _f.clean(); | ||
145 | _antonym_of = _f; | ||
146 | |||
147 | return *this; | ||
148 | } | ||
149 | |||
150 | adverb_query& adverb_query::has_synonyms() | ||
151 | { | ||
152 | _has_synonyms = true; | ||
153 | |||
154 | return *this; | ||
155 | } | ||
156 | |||
157 | adverb_query& adverb_query::synonym_of(filter<adverb> _f) | ||
158 | { | ||
159 | _f.clean(); | ||
160 | _synonym_of = _f; | ||
161 | |||
162 | return *this; | ||
163 | } | ||
164 | |||
165 | adverb_query& adverb_query::is_mannernymic() | ||
166 | { | ||
167 | _is_mannernymic = true; | ||
168 | |||
169 | return *this; | ||
170 | } | ||
171 | |||
172 | adverb_query& adverb_query::mannernym_of(filter<adjective> _f) | ||
173 | { | ||
174 | _f.clean(); | ||
175 | _mannernym_of = _f; | ||
176 | |||
177 | return *this; | ||
178 | } | ||
179 | /* | ||
180 | adverb_query& adverb_query::derived_from(const word& _w) | ||
181 | { | ||
182 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
183 | { | ||
184 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
185 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
186 | { | ||
187 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
188 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
189 | { | ||
190 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
191 | } | ||
192 | |||
193 | return *this; | ||
194 | } | ||
195 | |||
196 | adverb_query& adverb_query::not_derived_from(const word& _w) | ||
197 | { | ||
198 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
199 | { | ||
200 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
201 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
202 | { | ||
203 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
204 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
205 | { | ||
206 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
207 | } | ||
208 | |||
209 | return *this; | ||
210 | } | ||
211 | */ | ||
212 | std::list<adverb> adverb_query::run() const | ||
213 | { | ||
214 | std::stringstream construct; | ||
215 | construct << "SELECT adverb_id, base_form, comparative, superlative FROM adverbs"; | ||
216 | std::list<std::string> conditions; | ||
217 | std::list<binding> bindings; | ||
218 | |||
219 | if (_has_prn) | ||
220 | { | ||
221 | conditions.push_back("adverb_id IN (SELECT adverb_id FROM adverb_pronunciations)"); | ||
222 | } | ||
223 | |||
224 | if (!_rhymes.empty()) | ||
225 | { | ||
226 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
227 | std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
228 | conditions.push_back(cond); | ||
229 | |||
230 | for (auto rhy : _rhymes) | ||
231 | { | ||
232 | bindings.emplace_back(rhy.get_prerhyme()); | ||
233 | bindings.emplace_back(rhy.get_rhyme()); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | if (_has_rhyming_noun) | ||
238 | { | ||
239 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
240 | } | ||
241 | |||
242 | if (_has_rhyming_adjective) | ||
243 | { | ||
244 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
245 | } | ||
246 | |||
247 | if (_has_rhyming_adverb) | ||
248 | { | ||
249 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adverb_id != curp.adverb_id)"); | ||
250 | } | ||
251 | |||
252 | if (_has_rhyming_verb) | ||
253 | { | ||
254 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
255 | } | ||
256 | |||
257 | for (auto except : _except) | ||
258 | { | ||
259 | conditions.push_back("adverb_id != ?"); | ||
260 | bindings.emplace_back(except._id); | ||
261 | } | ||
262 | |||
263 | if (_requires_comparative_form) | ||
264 | { | ||
265 | conditions.push_back("comparative IS NOT NULL"); | ||
266 | } | ||
267 | |||
268 | if (_requires_superlative_form) | ||
269 | { | ||
270 | conditions.push_back("superlative IS NOT NULL"); | ||
271 | } | ||
272 | |||
273 | if (!_stress.empty()) | ||
274 | { | ||
275 | std::stringstream cond; | ||
276 | if (_stress.get_notlogic()) | ||
277 | { | ||
278 | cond << "adverb_id NOT IN"; | ||
279 | } else { | ||
280 | cond << "adverb_id IN"; | ||
281 | } | ||
282 | |||
283 | cond << "(SELECT adverb_id FROM adverb_pronunciations WHERE "; | ||
284 | |||
285 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
286 | switch (f.get_type()) | ||
287 | { | ||
288 | case filter<std::vector<bool>>::type::singleton: | ||
289 | { | ||
290 | std::ostringstream _val; | ||
291 | for (auto syl : f.get_elem()) | ||
292 | { | ||
293 | if (syl) | ||
294 | { | ||
295 | _val << "1"; | ||
296 | } else { | ||
297 | _val << "0"; | ||
298 | } | ||
299 | } | ||
300 | |||
301 | bindings.emplace_back(_val.str()); | ||
302 | |||
303 | if (notlogic == f.get_notlogic()) | ||
304 | { | ||
305 | return "stress = ?"; | ||
306 | } else { | ||
307 | return "stress != ?"; | ||
308 | } | ||
309 | } | ||
310 | |||
311 | case filter<std::vector<bool>>::type::group: | ||
312 | { | ||
313 | bool truelogic = notlogic != f.get_notlogic(); | ||
314 | |||
315 | std::list<std::string> clauses; | ||
316 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
317 | return recur(f2, truelogic); | ||
318 | }); | ||
319 | |||
320 | if (truelogic == f.get_orlogic()) | ||
321 | { | ||
322 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
323 | } else { | ||
324 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
325 | } | ||
326 | } | ||
327 | } | ||
328 | }; | ||
329 | |||
330 | cond << recur(_stress, _stress.get_notlogic()); | ||
331 | cond << ")"; | ||
332 | conditions.push_back(cond.str()); | ||
333 | } | ||
334 | |||
335 | if (!_with_prefix.empty()) | ||
336 | { | ||
337 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
338 | switch (f.get_type()) | ||
339 | { | ||
340 | case filter<std::string>::type::singleton: | ||
341 | { | ||
342 | bindings.emplace_back(f.get_elem() + "%"); | ||
343 | |||
344 | if (notlogic == f.get_notlogic()) | ||
345 | { | ||
346 | return "base_form LIKE ?"; | ||
347 | } else { | ||
348 | return "base_form NOT LIKE ?"; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | case filter<std::string>::type::group: | ||
353 | { | ||
354 | bool truelogic = notlogic != f.get_notlogic(); | ||
355 | |||
356 | std::list<std::string> clauses; | ||
357 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
358 | return recur(f2, truelogic); | ||
359 | }); | ||
360 | |||
361 | if (truelogic == f.get_orlogic()) | ||
362 | { | ||
363 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
364 | } else { | ||
365 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
366 | } | ||
367 | } | ||
368 | } | ||
369 | }; | ||
370 | |||
371 | conditions.push_back(recur(_with_prefix, false)); | ||
372 | } | ||
373 | |||
374 | if (!_with_suffix.empty()) | ||
375 | { | ||
376 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
377 | switch (f.get_type()) | ||
378 | { | ||
379 | case filter<std::string>::type::singleton: | ||
380 | { | ||
381 | bindings.emplace_back("%" + f.get_elem()); | ||
382 | |||
383 | if (notlogic == f.get_notlogic()) | ||
384 | { | ||
385 | return "base_form LIKE ?"; | ||
386 | } else { | ||
387 | return "base_form NOT LIKE ?"; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | case filter<std::string>::type::group: | ||
392 | { | ||
393 | bool truelogic = notlogic != f.get_notlogic(); | ||
394 | |||
395 | std::list<std::string> clauses; | ||
396 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
397 | return recur(f2, truelogic); | ||
398 | }); | ||
399 | |||
400 | if (truelogic == f.get_orlogic()) | ||
401 | { | ||
402 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
403 | } else { | ||
404 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
405 | } | ||
406 | } | ||
407 | } | ||
408 | }; | ||
409 | |||
410 | conditions.push_back(recur(_with_suffix, false)); | ||
411 | } | ||
412 | |||
413 | if (_with_complexity != unlimited) | ||
414 | { | ||
415 | conditions.push_back("complexity = ?"); | ||
416 | bindings.emplace_back(_with_complexity); | ||
417 | } | ||
418 | |||
419 | if (_has_antonyms) | ||
420 | { | ||
421 | conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_antonymy)"); | ||
422 | } | ||
423 | |||
424 | if (!_antonym_of.empty()) | ||
425 | { | ||
426 | std::stringstream cond; | ||
427 | if (_antonym_of.get_notlogic()) | ||
428 | { | ||
429 | cond << "adverb_id NOT IN"; | ||
430 | } else { | ||
431 | cond << "adverb_id IN"; | ||
432 | } | ||
433 | |||
434 | cond << "(SELECT adverb_2_id FROM adverb_antonymy WHERE "; | ||
435 | |||
436 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
437 | switch (f.get_type()) | ||
438 | { | ||
439 | case filter<adverb>::type::singleton: | ||
440 | { | ||
441 | bindings.emplace_back(f.get_elem()._id); | ||
442 | |||
443 | if (notlogic == f.get_notlogic()) | ||
444 | { | ||
445 | return "adverb_1_id = ?"; | ||
446 | } else { | ||
447 | return "adverb_1_id != ?"; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | case filter<adverb>::type::group: | ||
452 | { | ||
453 | bool truelogic = notlogic != f.get_notlogic(); | ||
454 | |||
455 | std::list<std::string> clauses; | ||
456 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
457 | return recur(f2, truelogic); | ||
458 | }); | ||
459 | |||
460 | if (truelogic == f.get_orlogic()) | ||
461 | { | ||
462 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
463 | } else { | ||
464 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
465 | } | ||
466 | } | ||
467 | } | ||
468 | }; | ||
469 | |||
470 | cond << recur(_antonym_of, _antonym_of.get_notlogic()); | ||
471 | cond << ")"; | ||
472 | conditions.push_back(cond.str()); | ||
473 | } | ||
474 | |||
475 | if (_has_synonyms) | ||
476 | { | ||
477 | conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_synonymy)"); | ||
478 | } | ||
479 | |||
480 | if (!_synonym_of.empty()) | ||
481 | { | ||
482 | std::stringstream cond; | ||
483 | if (_antonym_of.get_notlogic()) | ||
484 | { | ||
485 | cond << "adverb_id NOT IN"; | ||
486 | } else { | ||
487 | cond << "adverb_id IN"; | ||
488 | } | ||
489 | |||
490 | cond << "(SELECT adverb_2_id FROM adverb_synonymy WHERE "; | ||
491 | |||
492 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
493 | switch (f.get_type()) | ||
494 | { | ||
495 | case filter<adverb>::type::singleton: | ||
496 | { | ||
497 | bindings.emplace_back(f.get_elem()._id); | ||
498 | |||
499 | if (notlogic == f.get_notlogic()) | ||
500 | { | ||
501 | return "adverb_1_id = ?"; | ||
502 | } else { | ||
503 | return "adverb_1_id != ?"; | ||
504 | } | ||
505 | } | ||
506 | |||
507 | case filter<adverb>::type::group: | ||
508 | { | ||
509 | bool truelogic = notlogic != f.get_notlogic(); | ||
510 | |||
511 | std::list<std::string> clauses; | ||
512 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
513 | return recur(f2, truelogic); | ||
514 | }); | ||
515 | |||
516 | if (truelogic == f.get_orlogic()) | ||
517 | { | ||
518 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
519 | } else { | ||
520 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
521 | } | ||
522 | } | ||
523 | } | ||
524 | }; | ||
525 | |||
526 | cond << recur(_synonym_of, _synonym_of.get_notlogic()); | ||
527 | cond << ")"; | ||
528 | conditions.push_back(cond.str()); | ||
529 | } | ||
530 | |||
531 | if (_is_mannernymic) | ||
532 | { | ||
533 | conditions.push_back("adverb_id IN (SELECT mannernym_id FROM mannernymy)"); | ||
534 | } | ||
535 | |||
536 | if (!_mannernym_of.empty()) | ||
537 | { | ||
538 | std::stringstream cond; | ||
539 | if (_antonym_of.get_notlogic()) | ||
540 | { | ||
541 | cond << "adverb_id NOT IN"; | ||
542 | } else { | ||
543 | cond << "adverb_id IN"; | ||
544 | } | ||
545 | |||
546 | cond << "(SELECT mannernym_id FROM mannernymy WHERE "; | ||
547 | |||
548 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
549 | switch (f.get_type()) | ||
550 | { | ||
551 | case filter<adjective>::type::singleton: | ||
552 | { | ||
553 | bindings.emplace_back(f.get_elem()._id); | ||
554 | |||
555 | if (notlogic == f.get_notlogic()) | ||
556 | { | ||
557 | return "adjective_id = ?"; | ||
558 | } else { | ||
559 | return "adjective_id != ?"; | ||
560 | } | ||
561 | } | ||
562 | |||
563 | case filter<adjective>::type::group: | ||
564 | { | ||
565 | bool truelogic = notlogic != f.get_notlogic(); | ||
566 | |||
567 | std::list<std::string> clauses; | ||
568 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
569 | return recur(f2, truelogic); | ||
570 | }); | ||
571 | |||
572 | if (truelogic == f.get_orlogic()) | ||
573 | { | ||
574 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
575 | } else { | ||
576 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
577 | } | ||
578 | } | ||
579 | } | ||
580 | }; | ||
581 | |||
582 | cond << recur(_mannernym_of, _mannernym_of.get_notlogic()); | ||
583 | cond << ")"; | ||
584 | conditions.push_back(cond.str()); | ||
585 | } | ||
586 | |||
587 | /* if (!_derived_from_adjective.empty()) | ||
588 | { | ||
589 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); | ||
590 | std::string cond = "adverb_id IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
591 | conditions.push_back(cond); | ||
592 | } | ||
593 | |||
594 | if (!_not_derived_from_adjective.empty()) | ||
595 | { | ||
596 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); | ||
597 | std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
598 | conditions.push_back(cond); | ||
599 | } | ||
600 | |||
601 | if (!_derived_from_adverb.empty()) | ||
602 | { | ||
603 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_2_id = @DERADV"); | ||
604 | std::string cond = "adverb_id IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
605 | conditions.push_back(cond); | ||
606 | } | ||
607 | |||
608 | if (!_not_derived_from_adverb.empty()) | ||
609 | { | ||
610 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_2_id = @NDERADV"); | ||
611 | std::string cond = "adverb_id NOT IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
612 | conditions.push_back(cond); | ||
613 | } | ||
614 | |||
615 | if (!_derived_from_noun.empty()) | ||
616 | { | ||
617 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN"); | ||
618 | std::string cond = "adverb_id IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
619 | conditions.push_back(cond); | ||
620 | } | ||
621 | |||
622 | if (!_not_derived_from_noun.empty()) | ||
623 | { | ||
624 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); | ||
625 | std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
626 | conditions.push_back(cond); | ||
627 | }*/ | ||
628 | |||
629 | if (!conditions.empty()) | ||
630 | { | ||
631 | construct << " WHERE "; | ||
632 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
633 | } | ||
634 | |||
635 | if (_random) | ||
636 | { | ||
637 | construct << " ORDER BY RANDOM()"; | ||
638 | } | ||
639 | |||
640 | if (_limit != unlimited) | ||
641 | { | ||
642 | construct << " LIMIT " << _limit; | ||
643 | } | ||
644 | |||
645 | sqlite3_stmt* ppstmt; | ||
646 | std::string query = construct.str(); | ||
647 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
648 | { | ||
649 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
650 | } | ||
651 | |||
652 | int i = 1; | ||
653 | for (auto& binding : bindings) | ||
654 | { | ||
655 | switch (binding.get_type()) | ||
656 | { | ||
657 | case binding::type::integer: | ||
658 | { | ||
659 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
660 | |||
661 | break; | ||
662 | } | ||
663 | |||
664 | case binding::type::string: | ||
665 | { | ||
666 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
667 | |||
668 | break; | ||
669 | } | ||
670 | } | ||
671 | |||
672 | i++; | ||
673 | } | ||
674 | |||
675 | /* | ||
676 | for (auto adj : _derived_from_adjective) | ||
677 | { | ||
678 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
679 | } | ||
680 | |||
681 | for (auto adj : _not_derived_from_adjective) | ||
682 | { | ||
683 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
684 | } | ||
685 | |||
686 | for (auto adv : _derived_from_adverb) | ||
687 | { | ||
688 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
689 | } | ||
690 | |||
691 | for (auto adv : _not_derived_from_adverb) | ||
692 | { | ||
693 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
694 | } | ||
695 | |||
696 | for (auto n : _derived_from_noun) | ||
697 | { | ||
698 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
699 | } | ||
700 | |||
701 | for (auto n : _not_derived_from_noun) | ||
702 | { | ||
703 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
704 | }*/ | ||
705 | |||
706 | std::list<adverb> output; | ||
707 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
708 | { | ||
709 | adverb tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
710 | tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
711 | |||
712 | if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) | ||
713 | { | ||
714 | tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
715 | } | ||
716 | |||
717 | if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) | ||
718 | { | ||
719 | tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
720 | } | ||
721 | |||
722 | output.push_back(tnc); | ||
723 | } | ||
724 | |||
725 | sqlite3_finalize(ppstmt); | ||
726 | |||
727 | for (auto& adverb : output) | ||
728 | { | ||
729 | query = "SELECT pronunciation, prerhyme, rhyme FROM adverb_pronunciations WHERE adverb_id = ?"; | ||
730 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
731 | { | ||
732 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
733 | } | ||
734 | |||
735 | sqlite3_bind_int(ppstmt, 1, adverb._id); | ||
736 | |||
737 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
738 | { | ||
739 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
740 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
741 | |||
742 | adverb.pronunciations.push_back(phonemes); | ||
743 | |||
744 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
745 | { | ||
746 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
747 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
748 | adverb.rhymes.emplace_back(prerhyme, rhyming); | ||
749 | } | ||
750 | } | ||
751 | |||
752 | sqlite3_finalize(ppstmt); | ||
753 | } | ||
754 | |||
755 | return output; | ||
756 | } | ||
757 | |||
758 | }; | ||
diff --git a/lib/adverb_query.h b/lib/adverb_query.h deleted file mode 100644 index 30e7400..0000000 --- a/lib/adverb_query.h +++ /dev/null | |||
@@ -1,86 +0,0 @@ | |||
1 | #ifndef ADVERB_QUERY_H_CA13CCDD | ||
2 | #define ADVERB_QUERY_H_CA13CCDD | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class adverb_query { | ||
7 | public: | ||
8 | adverb_query(const data& _data); | ||
9 | |||
10 | adverb_query& limit(int _limit); | ||
11 | adverb_query& random(); | ||
12 | adverb_query& except(const adverb& _word); | ||
13 | adverb_query& rhymes_with(const word& _word); | ||
14 | adverb_query& rhymes_with(rhyme _r); | ||
15 | adverb_query& has_pronunciation(); | ||
16 | adverb_query& has_rhyming_noun(); | ||
17 | adverb_query& has_rhyming_adjective(); | ||
18 | adverb_query& has_rhyming_adverb(); | ||
19 | adverb_query& has_rhyming_verb(); | ||
20 | adverb_query& with_stress(filter<std::vector<bool>> _arg); | ||
21 | |||
22 | adverb_query& requires_comparative_form(); | ||
23 | adverb_query& requires_superlative_form(); | ||
24 | |||
25 | adverb_query& with_prefix(filter<std::string> _f); | ||
26 | adverb_query& with_suffix(filter<std::string> _f); | ||
27 | |||
28 | adverb_query& with_complexity(int _arg); | ||
29 | |||
30 | adverb_query& has_antonyms(); | ||
31 | adverb_query& antonym_of(filter<adverb> _f); | ||
32 | |||
33 | adverb_query& has_synonyms(); | ||
34 | adverb_query& synonym_of(filter<adverb> _f); | ||
35 | |||
36 | adverb_query& is_mannernymic(); | ||
37 | adverb_query& mannernym_of(filter<adjective> _f); | ||
38 | |||
39 | /* adverb_query& derived_from(const word& _w); | ||
40 | adverb_query& not_derived_from(const word& _w);*/ | ||
41 | |||
42 | std::list<adverb> run() const; | ||
43 | |||
44 | const static int unlimited = -1; | ||
45 | |||
46 | private: | ||
47 | const data& _data; | ||
48 | int _limit = unlimited; | ||
49 | bool _random = false; | ||
50 | std::list<rhyme> _rhymes; | ||
51 | std::list<adverb> _except; | ||
52 | bool _has_prn = false; | ||
53 | bool _has_rhyming_noun = false; | ||
54 | bool _has_rhyming_adjective = false; | ||
55 | bool _has_rhyming_adverb = false; | ||
56 | bool _has_rhyming_verb = false; | ||
57 | filter<std::vector<bool>> _stress; | ||
58 | |||
59 | bool _requires_comparative_form = false; | ||
60 | bool _requires_superlative_form = false; | ||
61 | |||
62 | filter<std::string> _with_prefix; | ||
63 | filter<std::string> _with_suffix; | ||
64 | |||
65 | int _with_complexity = unlimited; | ||
66 | |||
67 | bool _has_antonyms = false; | ||
68 | filter<adverb> _antonym_of; | ||
69 | |||
70 | bool _has_synonyms = false; | ||
71 | filter<adverb> _synonym_of; | ||
72 | |||
73 | bool _is_mannernymic = false; | ||
74 | filter<adjective> _mannernym_of; | ||
75 | |||
76 | /* std::list<adjective> _derived_from_adjective; | ||
77 | std::list<adjective> _not_derived_from_adjective; | ||
78 | std::list<adverb> _derived_from_adverb; | ||
79 | std::list<adverb> _not_derived_from_adverb; | ||
80 | std::list<noun> _derived_from_noun; | ||
81 | std::list<noun> _not_derived_from_noun;*/ | ||
82 | }; | ||
83 | |||
84 | }; | ||
85 | |||
86 | #endif /* end of include guard: ADVERB_QUERY_H_CA13CCDD */ | ||
diff --git a/lib/binding.cpp b/lib/binding.cpp new file mode 100644 index 0000000..349cd6f --- /dev/null +++ b/lib/binding.cpp | |||
@@ -0,0 +1,180 @@ | |||
1 | #include "binding.h" | ||
2 | #include <stdexcept> | ||
3 | #include <utility> | ||
4 | |||
5 | namespace verbly { | ||
6 | |||
7 | binding::binding(const binding& other) | ||
8 | { | ||
9 | type_ = other.type_; | ||
10 | |||
11 | switch (type_) | ||
12 | { | ||
13 | case type::integer: | ||
14 | { | ||
15 | integer_ = other.integer_; | ||
16 | |||
17 | break; | ||
18 | } | ||
19 | |||
20 | case type::string: | ||
21 | { | ||
22 | new(&string_) std::string(other.string_); | ||
23 | |||
24 | break; | ||
25 | } | ||
26 | |||
27 | case type::invalid: | ||
28 | { | ||
29 | break; | ||
30 | } | ||
31 | } | ||
32 | } | ||
33 | |||
34 | binding::binding(binding&& other) : binding() | ||
35 | { | ||
36 | swap(*this, other); | ||
37 | } | ||
38 | |||
39 | binding& binding::operator=(binding other) | ||
40 | { | ||
41 | swap(*this, other); | ||
42 | |||
43 | return *this; | ||
44 | } | ||
45 | |||
46 | void swap(binding& first, binding& second) | ||
47 | { | ||
48 | using type = binding::type; | ||
49 | |||
50 | type tempType = first.type_; | ||
51 | int tempInteger; | ||
52 | std::string tempString; | ||
53 | |||
54 | switch (first.type_) | ||
55 | { | ||
56 | case type::integer: | ||
57 | { | ||
58 | tempInteger = first.integer_; | ||
59 | |||
60 | break; | ||
61 | } | ||
62 | |||
63 | case type::string: | ||
64 | { | ||
65 | tempString = std::move(tempString); | ||
66 | |||
67 | break; | ||
68 | } | ||
69 | |||
70 | case type::invalid: | ||
71 | { | ||
72 | break; | ||
73 | } | ||
74 | } | ||
75 | |||
76 | first.~binding(); | ||
77 | |||
78 | first.type_ = second.type_; | ||
79 | |||
80 | switch (second.type_) | ||
81 | { | ||
82 | case type::integer: | ||
83 | { | ||
84 | first.integer_ = second.integer_; | ||
85 | |||
86 | break; | ||
87 | } | ||
88 | |||
89 | case type::string: | ||
90 | { | ||
91 | new(&first.string_) std::string(std::move(second.string_)); | ||
92 | |||
93 | break; | ||
94 | } | ||
95 | |||
96 | case type::invalid: | ||
97 | { | ||
98 | break; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | second.~binding(); | ||
103 | |||
104 | second.type_ = tempType; | ||
105 | |||
106 | switch (tempType) | ||
107 | { | ||
108 | case type::integer: | ||
109 | { | ||
110 | second.integer_ = tempInteger; | ||
111 | |||
112 | break; | ||
113 | } | ||
114 | |||
115 | case type::string: | ||
116 | { | ||
117 | new(&second.string_) std::string(std::move(tempString)); | ||
118 | |||
119 | break; | ||
120 | } | ||
121 | |||
122 | case type::invalid: | ||
123 | { | ||
124 | break; | ||
125 | } | ||
126 | } | ||
127 | } | ||
128 | |||
129 | binding::~binding() | ||
130 | { | ||
131 | switch (type_) | ||
132 | { | ||
133 | case type::string: | ||
134 | { | ||
135 | using string_type = std::string; | ||
136 | string_.~string_type(); | ||
137 | |||
138 | break; | ||
139 | } | ||
140 | |||
141 | case type::integer: | ||
142 | case type::invalid: | ||
143 | { | ||
144 | break; | ||
145 | } | ||
146 | } | ||
147 | } | ||
148 | |||
149 | binding::binding(int arg) : | ||
150 | type_(type::integer), | ||
151 | integer_(arg) | ||
152 | { | ||
153 | } | ||
154 | |||
155 | int binding::getInteger() const | ||
156 | { | ||
157 | if (type_ != type::integer) | ||
158 | { | ||
159 | throw std::domain_error("binding::getInteger called on non-integer binding"); | ||
160 | } | ||
161 | |||
162 | return integer_; | ||
163 | } | ||
164 | |||
165 | binding::binding(std::string arg) : type_(type::string) | ||
166 | { | ||
167 | new(&string_) std::string(arg); | ||
168 | } | ||
169 | |||
170 | std::string binding::getString() const | ||
171 | { | ||
172 | if (type_ != type::string) | ||
173 | { | ||
174 | throw std::domain_error("binding::getString called on non-string binding"); | ||
175 | } | ||
176 | |||
177 | return string_; | ||
178 | } | ||
179 | |||
180 | }; | ||
diff --git a/lib/binding.h b/lib/binding.h new file mode 100644 index 0000000..7fbe20e --- /dev/null +++ b/lib/binding.h | |||
@@ -0,0 +1,70 @@ | |||
1 | #ifndef BINDING_H_CAE0B18E | ||
2 | #define BINDING_H_CAE0B18E | ||
3 | |||
4 | #include <string> | ||
5 | |||
6 | namespace verbly { | ||
7 | |||
8 | class binding { | ||
9 | public: | ||
10 | enum class type { | ||
11 | invalid, | ||
12 | integer, | ||
13 | string | ||
14 | }; | ||
15 | |||
16 | // Default constructor | ||
17 | |||
18 | binding() | ||
19 | { | ||
20 | } | ||
21 | |||
22 | // Copy and move constructors | ||
23 | |||
24 | binding(const binding& other); | ||
25 | binding(binding&& other); | ||
26 | |||
27 | // Assignment | ||
28 | |||
29 | binding& operator=(binding other); | ||
30 | |||
31 | // Swap | ||
32 | |||
33 | friend void swap(binding& first, binding& second); | ||
34 | |||
35 | // Destructor | ||
36 | |||
37 | ~binding(); | ||
38 | |||
39 | // Generic accessors | ||
40 | |||
41 | type getType() const | ||
42 | { | ||
43 | return type_; | ||
44 | } | ||
45 | |||
46 | // Integer | ||
47 | |||
48 | binding(int arg); | ||
49 | |||
50 | int getInteger() const; | ||
51 | |||
52 | // String | ||
53 | |||
54 | binding(std::string arg); | ||
55 | |||
56 | std::string getString() const; | ||
57 | |||
58 | private: | ||
59 | |||
60 | union { | ||
61 | int integer_; | ||
62 | std::string string_; | ||
63 | }; | ||
64 | |||
65 | type type_ = type::invalid; | ||
66 | }; | ||
67 | |||
68 | }; | ||
69 | |||
70 | #endif /* end of include guard: BINDING_H_CAE0B18E */ | ||
diff --git a/lib/data.cpp b/lib/data.cpp deleted file mode 100644 index db42487..0000000 --- a/lib/data.cpp +++ /dev/null | |||
@@ -1,177 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | data::data(std::string datafile) | ||
6 | { | ||
7 | if (sqlite3_open_v2(datafile.c_str(), &ppdb, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) | ||
8 | { | ||
9 | throw std::invalid_argument(sqlite3_errmsg(ppdb)); | ||
10 | } | ||
11 | } | ||
12 | |||
13 | data::data(data&& other) | ||
14 | { | ||
15 | ppdb = other.ppdb; | ||
16 | } | ||
17 | |||
18 | data& data::operator=(data&& other) | ||
19 | { | ||
20 | ppdb = other.ppdb; | ||
21 | |||
22 | return *this; | ||
23 | } | ||
24 | |||
25 | data::~data() | ||
26 | { | ||
27 | sqlite3_close_v2(ppdb); | ||
28 | } | ||
29 | |||
30 | verb_query data::verbs() const | ||
31 | { | ||
32 | return verb_query(*this); | ||
33 | } | ||
34 | |||
35 | adjective_query data::adjectives() const | ||
36 | { | ||
37 | return adjective_query(*this); | ||
38 | } | ||
39 | |||
40 | adverb_query data::adverbs() const | ||
41 | { | ||
42 | return adverb_query(*this); | ||
43 | } | ||
44 | |||
45 | noun_query data::nouns() const | ||
46 | { | ||
47 | return noun_query(*this); | ||
48 | } | ||
49 | |||
50 | frame_query data::frames() const | ||
51 | { | ||
52 | return frame_query(*this); | ||
53 | } | ||
54 | |||
55 | preposition_query data::prepositions() const | ||
56 | { | ||
57 | return preposition_query(*this); | ||
58 | } | ||
59 | |||
60 | binding::type binding::get_type() const | ||
61 | { | ||
62 | return _type; | ||
63 | } | ||
64 | |||
65 | binding::binding(const binding& other) | ||
66 | { | ||
67 | _type = other._type; | ||
68 | |||
69 | switch (_type) | ||
70 | { | ||
71 | case type::integer: | ||
72 | { | ||
73 | _integer = other._integer; | ||
74 | |||
75 | break; | ||
76 | } | ||
77 | |||
78 | case type::string: | ||
79 | { | ||
80 | new(&_string) std::string(other._string); | ||
81 | |||
82 | break; | ||
83 | } | ||
84 | } | ||
85 | } | ||
86 | |||
87 | binding::~binding() | ||
88 | { | ||
89 | switch (_type) | ||
90 | { | ||
91 | case type::string: | ||
92 | { | ||
93 | using string_type = std::string; | ||
94 | _string.~string_type(); | ||
95 | |||
96 | break; | ||
97 | } | ||
98 | } | ||
99 | } | ||
100 | |||
101 | binding& binding::operator=(const binding& other) | ||
102 | { | ||
103 | this->~binding(); | ||
104 | |||
105 | _type = other._type; | ||
106 | |||
107 | switch (_type) | ||
108 | { | ||
109 | case type::integer: | ||
110 | { | ||
111 | _integer = other._integer; | ||
112 | |||
113 | break; | ||
114 | } | ||
115 | |||
116 | case type::string: | ||
117 | { | ||
118 | new(&_string) std::string(other._string); | ||
119 | |||
120 | break; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | return *this; | ||
125 | } | ||
126 | |||
127 | binding::binding(int _arg) | ||
128 | { | ||
129 | _type = type::integer; | ||
130 | _integer = _arg; | ||
131 | } | ||
132 | |||
133 | int binding::get_integer() const | ||
134 | { | ||
135 | assert(_type == type::integer); | ||
136 | |||
137 | return _integer; | ||
138 | } | ||
139 | |||
140 | void binding::set_integer(int _arg) | ||
141 | { | ||
142 | *this = binding(_arg); | ||
143 | } | ||
144 | |||
145 | binding& binding::operator=(int _arg) | ||
146 | { | ||
147 | *this = binding(_arg); | ||
148 | |||
149 | return *this; | ||
150 | } | ||
151 | |||
152 | binding::binding(std::string _arg) | ||
153 | { | ||
154 | _type = type::string; | ||
155 | new(&_string) std::string(_arg); | ||
156 | } | ||
157 | |||
158 | std::string binding::get_string() const | ||
159 | { | ||
160 | assert(_type == type::string); | ||
161 | |||
162 | return _string; | ||
163 | } | ||
164 | |||
165 | void binding::set_string(std::string _arg) | ||
166 | { | ||
167 | *this = binding(_arg); | ||
168 | } | ||
169 | |||
170 | binding& binding::operator=(std::string _arg) | ||
171 | { | ||
172 | *this = binding(_arg); | ||
173 | |||
174 | return *this; | ||
175 | } | ||
176 | |||
177 | }; | ||
diff --git a/lib/data.h b/lib/data.h deleted file mode 100644 index b8b12b9..0000000 --- a/lib/data.h +++ /dev/null | |||
@@ -1,380 +0,0 @@ | |||
1 | #ifndef DATA_H_C4AEC3DD | ||
2 | #define DATA_H_C4AEC3DD | ||
3 | |||
4 | #include <sqlite3.h> | ||
5 | |||
6 | namespace verbly { | ||
7 | |||
8 | class data; | ||
9 | class word; | ||
10 | class adjective; | ||
11 | class noun; | ||
12 | class verb; | ||
13 | class adverb; | ||
14 | class frame; | ||
15 | class adjective_query; | ||
16 | class adverb_query; | ||
17 | class noun_query; | ||
18 | class verb_query; | ||
19 | class frame_query; | ||
20 | class preposition_query; | ||
21 | |||
22 | class data { | ||
23 | private: | ||
24 | sqlite3* ppdb; | ||
25 | |||
26 | friend class adjective_query; | ||
27 | friend class noun_query; | ||
28 | friend class verb_query; | ||
29 | friend class adverb_query; | ||
30 | friend class frame_query; | ||
31 | friend class preposition_query; | ||
32 | |||
33 | public: | ||
34 | data(std::string datafile); | ||
35 | |||
36 | data(const data& other) = delete; | ||
37 | data& operator=(const data& other) = delete; | ||
38 | |||
39 | data(data&& other); | ||
40 | data& operator=(data&& other); | ||
41 | |||
42 | ~data(); | ||
43 | |||
44 | verb_query verbs() const; | ||
45 | adjective_query adjectives() const; | ||
46 | adverb_query adverbs() const; | ||
47 | noun_query nouns() const; | ||
48 | frame_query frames() const; | ||
49 | preposition_query prepositions() const; | ||
50 | |||
51 | }; | ||
52 | |||
53 | template <class T> | ||
54 | class filter { | ||
55 | public: | ||
56 | enum class type { | ||
57 | singleton, | ||
58 | group | ||
59 | }; | ||
60 | |||
61 | typedef filter<T> value_type; | ||
62 | |||
63 | type get_type() const | ||
64 | { | ||
65 | return _type; | ||
66 | } | ||
67 | |||
68 | filter(const filter<T>& other) | ||
69 | { | ||
70 | _type = other._type; | ||
71 | _notlogic = other._notlogic; | ||
72 | |||
73 | switch (_type) | ||
74 | { | ||
75 | case type::singleton: | ||
76 | { | ||
77 | new(&_singleton.elem) T(other._singleton.elem); | ||
78 | |||
79 | break; | ||
80 | } | ||
81 | |||
82 | case type::group: | ||
83 | { | ||
84 | new(&_group.elems) std::list<filter<T>>(other._group.elems); | ||
85 | _group.orlogic = other._group.orlogic; | ||
86 | |||
87 | break; | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | filter<T>& operator=(const filter<T>& other) | ||
93 | { | ||
94 | this->~filter(); | ||
95 | |||
96 | _type = other._type; | ||
97 | _notlogic = other._notlogic; | ||
98 | |||
99 | switch (_type) | ||
100 | { | ||
101 | case type::singleton: | ||
102 | { | ||
103 | new(&_singleton.elem) T(other._singleton.elem); | ||
104 | |||
105 | break; | ||
106 | } | ||
107 | |||
108 | case type::group: | ||
109 | { | ||
110 | new(&_group.elems) std::list<filter<T>>(other._group.elems); | ||
111 | _group.orlogic = other._group.orlogic; | ||
112 | |||
113 | break; | ||
114 | } | ||
115 | } | ||
116 | |||
117 | return *this; | ||
118 | } | ||
119 | |||
120 | ~filter() | ||
121 | { | ||
122 | switch (_type) | ||
123 | { | ||
124 | case type::singleton: | ||
125 | { | ||
126 | _singleton.elem.~T(); | ||
127 | |||
128 | break; | ||
129 | } | ||
130 | |||
131 | case type::group: | ||
132 | { | ||
133 | using list_type = std::list<filter<T>>; | ||
134 | _group.elems.~list_type(); | ||
135 | |||
136 | break; | ||
137 | } | ||
138 | } | ||
139 | } | ||
140 | |||
141 | bool get_notlogic() const | ||
142 | { | ||
143 | return _notlogic; | ||
144 | } | ||
145 | |||
146 | void set_notlogic(bool _nl) | ||
147 | { | ||
148 | _notlogic = _nl; | ||
149 | } | ||
150 | |||
151 | std::list<T> inorder_flatten() const | ||
152 | { | ||
153 | std::list<T> result; | ||
154 | |||
155 | if (_type == type::singleton) | ||
156 | { | ||
157 | result.push_back(_singleton.elem); | ||
158 | } else if (_type == type::group) | ||
159 | { | ||
160 | for (auto elem : _group.elems) | ||
161 | { | ||
162 | auto l = elem.inorder_flatten(); | ||
163 | result.insert(std::end(result), std::begin(l), std::end(l)); | ||
164 | } | ||
165 | } | ||
166 | |||
167 | return result; | ||
168 | } | ||
169 | |||
170 | std::set<T> uniq_flatten() const | ||
171 | { | ||
172 | std::set<T> result; | ||
173 | |||
174 | if (_type == type::singleton) | ||
175 | { | ||
176 | result.insert(_singleton.elem); | ||
177 | } else if (_type == type::group) | ||
178 | { | ||
179 | for (auto elem : _group.elems) | ||
180 | { | ||
181 | auto l = elem.uniq_flatten(); | ||
182 | result.insert(std::begin(l), std::end(l)); | ||
183 | } | ||
184 | } | ||
185 | |||
186 | return result; | ||
187 | } | ||
188 | |||
189 | void clean() | ||
190 | { | ||
191 | if (_type == type::group) | ||
192 | { | ||
193 | std::list<typename std::list<filter<T>>::iterator> toremove; | ||
194 | for (auto it = _group.elems.begin(); it != _group.elems.end(); it++) | ||
195 | { | ||
196 | it->clean(); | ||
197 | |||
198 | if (it->get_type() == type::group) | ||
199 | { | ||
200 | if (it->_group.elems.size() == 0) | ||
201 | { | ||
202 | toremove.push_back(it); | ||
203 | } else if (it->_group.elems.size() == 1) | ||
204 | { | ||
205 | bool truelogic = it->_notlogic != it->_group.elems.front()._notlogic; | ||
206 | filter<T> e = it->_group.elems.front(); | ||
207 | *it = e; | ||
208 | it->_notlogic = truelogic; | ||
209 | } | ||
210 | } | ||
211 | } | ||
212 | |||
213 | for (auto rem : toremove) | ||
214 | { | ||
215 | _group.elems.erase(rem); | ||
216 | } | ||
217 | |||
218 | if (_group.elems.size() == 1) | ||
219 | { | ||
220 | bool truelogic = _notlogic != _group.elems.front()._notlogic; | ||
221 | filter<T> e = _group.elems.front(); | ||
222 | *this = e; | ||
223 | _notlogic = truelogic; | ||
224 | } | ||
225 | } | ||
226 | } | ||
227 | |||
228 | // Singleton | ||
229 | filter(T _elem, bool _notlogic = false) : _type(type::singleton) | ||
230 | { | ||
231 | new(&_singleton.elem) T(_elem); | ||
232 | this->_notlogic = _notlogic; | ||
233 | } | ||
234 | |||
235 | filter<T>& operator=(T _elem) | ||
236 | { | ||
237 | *this = filter<T>{_elem}; | ||
238 | |||
239 | return *this; | ||
240 | } | ||
241 | |||
242 | T get_elem() const | ||
243 | { | ||
244 | assert(_type == type::singleton); | ||
245 | |||
246 | return _singleton.elem; | ||
247 | } | ||
248 | |||
249 | void set_elem(T _elem) | ||
250 | { | ||
251 | assert(_type == type::singleton); | ||
252 | |||
253 | _singleton.elem = _elem; | ||
254 | } | ||
255 | |||
256 | // Group | ||
257 | typedef typename std::list<filter<T>>::iterator iterator; | ||
258 | |||
259 | filter() : _type(type::group) | ||
260 | { | ||
261 | new(&_group.elems) std::list<filter<T>>(); | ||
262 | _group.orlogic = false; | ||
263 | } | ||
264 | |||
265 | filter(std::initializer_list<filter<T>> _init) : _type(type::group) | ||
266 | { | ||
267 | new(&_group.elems) std::list<filter<T>>(_init); | ||
268 | _group.orlogic = false; | ||
269 | } | ||
270 | |||
271 | iterator begin() | ||
272 | { | ||
273 | assert(_type == type::group); | ||
274 | |||
275 | return _group.elems.begin(); | ||
276 | } | ||
277 | |||
278 | iterator end() | ||
279 | { | ||
280 | assert(_type == type::group); | ||
281 | |||
282 | return _group.elems.end(); | ||
283 | } | ||
284 | |||
285 | filter<T>& operator<<(filter<T> _elem) | ||
286 | { | ||
287 | assert(_type == type::group); | ||
288 | |||
289 | _group.elems.push_back(_elem); | ||
290 | |||
291 | return *this; | ||
292 | } | ||
293 | |||
294 | void push_back(filter<T> _elem) | ||
295 | { | ||
296 | assert(_type == type::group); | ||
297 | |||
298 | _group.elems.push_back(_elem); | ||
299 | } | ||
300 | |||
301 | bool get_orlogic() const | ||
302 | { | ||
303 | assert(_type == type::group); | ||
304 | |||
305 | return _group.orlogic; | ||
306 | } | ||
307 | |||
308 | void set_orlogic(bool _ol) | ||
309 | { | ||
310 | assert(_type == type::group); | ||
311 | |||
312 | _group.orlogic = _ol; | ||
313 | } | ||
314 | |||
315 | bool empty() const | ||
316 | { | ||
317 | if (_type == type::group) | ||
318 | { | ||
319 | return _group.elems.empty(); | ||
320 | } else { | ||
321 | return false; | ||
322 | } | ||
323 | } | ||
324 | |||
325 | int size() const | ||
326 | { | ||
327 | assert(_type == type::group); | ||
328 | |||
329 | return _group.elems.size(); | ||
330 | } | ||
331 | |||
332 | private: | ||
333 | type _type; | ||
334 | bool _notlogic = false; | ||
335 | union { | ||
336 | struct { | ||
337 | T elem; | ||
338 | } _singleton; | ||
339 | struct { | ||
340 | std::list<filter<T>> elems; | ||
341 | bool orlogic; | ||
342 | } _group; | ||
343 | }; | ||
344 | }; | ||
345 | |||
346 | class binding { | ||
347 | public: | ||
348 | enum class type { | ||
349 | integer, | ||
350 | string | ||
351 | }; | ||
352 | |||
353 | type get_type() const; | ||
354 | binding(const binding& other); | ||
355 | ~binding(); | ||
356 | binding& operator=(const binding& other); | ||
357 | |||
358 | // Integer | ||
359 | binding(int _arg); | ||
360 | int get_integer() const; | ||
361 | void set_integer(int _arg); | ||
362 | binding& operator=(int _arg); | ||
363 | |||
364 | // String | ||
365 | binding(std::string _arg); | ||
366 | std::string get_string() const; | ||
367 | void set_string(std::string _arg); | ||
368 | binding& operator=(std::string _arg); | ||
369 | |||
370 | private: | ||
371 | union { | ||
372 | int _integer; | ||
373 | std::string _string; | ||
374 | }; | ||
375 | type _type; | ||
376 | }; | ||
377 | |||
378 | }; | ||
379 | |||
380 | #endif /* end of include guard: DATA_H_C4AEC3DD */ | ||
diff --git a/lib/database.cpp b/lib/database.cpp new file mode 100644 index 0000000..351b93d --- /dev/null +++ b/lib/database.cpp | |||
@@ -0,0 +1,79 @@ | |||
1 | #include "database.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include <stdexcept> | ||
4 | #include "query.h" | ||
5 | |||
6 | namespace verbly { | ||
7 | |||
8 | database::database(std::string path) | ||
9 | { | ||
10 | if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) | ||
11 | { | ||
12 | // We still have to free the resources allocated. In the event that | ||
13 | // allocation failed, ppdb will be null and sqlite3_close_v2 will just | ||
14 | // ignore it. | ||
15 | std::string errmsg(sqlite3_errmsg(ppdb_)); | ||
16 | sqlite3_close_v2(ppdb_); | ||
17 | |||
18 | throw database_error("Could not open verbly datafile", errmsg); | ||
19 | } | ||
20 | } | ||
21 | |||
22 | database::database(database&& other) : database() | ||
23 | { | ||
24 | swap(*this, other); | ||
25 | } | ||
26 | |||
27 | database& database::operator=(database&& other) | ||
28 | { | ||
29 | swap(*this, other); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | void swap(database& first, database& second) | ||
35 | { | ||
36 | std::swap(first.ppdb_, second.ppdb_); | ||
37 | } | ||
38 | |||
39 | database::~database() | ||
40 | { | ||
41 | sqlite3_close_v2(ppdb_); | ||
42 | } | ||
43 | |||
44 | query<notion> database::notions(filter where, bool random, int limit) const | ||
45 | { | ||
46 | return query<notion>(*this, ppdb_, std::move(where), random, limit); | ||
47 | } | ||
48 | |||
49 | query<word> database::words(filter where, bool random, int limit) const | ||
50 | { | ||
51 | return query<word>(*this, ppdb_, std::move(where), random, limit); | ||
52 | } | ||
53 | |||
54 | query<group> database::groups(filter where, bool random, int limit) const | ||
55 | { | ||
56 | return query<group>(*this, ppdb_, std::move(where), random, limit); | ||
57 | } | ||
58 | |||
59 | query<frame> database::frames(filter where, bool random, int limit) const | ||
60 | { | ||
61 | return query<frame>(*this, ppdb_, std::move(where), random, limit); | ||
62 | } | ||
63 | |||
64 | query<lemma> database::lemmas(filter where, bool random, int limit) const | ||
65 | { | ||
66 | return query<lemma>(*this, ppdb_, std::move(where), random, limit); | ||
67 | } | ||
68 | |||
69 | query<form> database::forms(filter where, bool random, int limit) const | ||
70 | { | ||
71 | return query<form>(*this, ppdb_, std::move(where), random, limit); | ||
72 | } | ||
73 | |||
74 | query<pronunciation> database::pronunciations(filter where, bool random, int limit) const | ||
75 | { | ||
76 | return query<pronunciation>(*this, ppdb_, std::move(where), random, limit); | ||
77 | } | ||
78 | |||
79 | }; | ||
diff --git a/lib/database.h b/lib/database.h new file mode 100644 index 0000000..d68c40b --- /dev/null +++ b/lib/database.h | |||
@@ -0,0 +1,73 @@ | |||
1 | #ifndef DATABASE_H_0B0A47D2 | ||
2 | #define DATABASE_H_0B0A47D2 | ||
3 | |||
4 | #include <string> | ||
5 | #include <exception> | ||
6 | #include <list> | ||
7 | #include "notion.h" | ||
8 | #include "word.h" | ||
9 | #include "group.h" | ||
10 | #include "frame.h" | ||
11 | #include "lemma.h" | ||
12 | #include "form.h" | ||
13 | #include "pronunciation.h" | ||
14 | |||
15 | struct sqlite3; | ||
16 | |||
17 | namespace verbly { | ||
18 | |||
19 | template <typename Object> | ||
20 | class query; | ||
21 | |||
22 | class database { | ||
23 | public: | ||
24 | |||
25 | // Constructor | ||
26 | |||
27 | explicit database(std::string path); | ||
28 | |||
29 | // Disable copying | ||
30 | |||
31 | database(const database& other) = delete; | ||
32 | database& operator=(const database& other) = delete; | ||
33 | |||
34 | // Move constructor and move assignment | ||
35 | |||
36 | database(database&& other); | ||
37 | database& operator=(database&& other); | ||
38 | |||
39 | // Swap | ||
40 | |||
41 | friend void swap(database& first, database& second); | ||
42 | |||
43 | // Destructor | ||
44 | |||
45 | ~database(); | ||
46 | |||
47 | // Queries | ||
48 | |||
49 | query<notion> notions(filter where, bool random = true, int limit = 1) const; | ||
50 | |||
51 | query<word> words(filter where, bool random = true, int limit = 1) const; | ||
52 | |||
53 | query<group> groups(filter where, bool random = true, int limit = 1) const; | ||
54 | |||
55 | query<frame> frames(filter where, bool random = true, int limit = 1) const; | ||
56 | |||
57 | query<lemma> lemmas(filter where, bool random = true, int limit = 1) const; | ||
58 | |||
59 | query<form> forms(filter where, bool random = true, int limit = 1) const; | ||
60 | |||
61 | query<pronunciation> pronunciations(filter where, bool random = true, int limit = 1) const; | ||
62 | |||
63 | private: | ||
64 | |||
65 | database() = default; | ||
66 | |||
67 | sqlite3* ppdb_ = nullptr; | ||
68 | |||
69 | }; | ||
70 | |||
71 | }; | ||
72 | |||
73 | #endif /* end of include guard: DATABASE_H_0B0A47D2 */ | ||
diff --git a/lib/enums.h b/lib/enums.h new file mode 100644 index 0000000..b37be7b --- /dev/null +++ b/lib/enums.h | |||
@@ -0,0 +1,45 @@ | |||
1 | #ifndef ENUMS_H_260BA847 | ||
2 | #define ENUMS_H_260BA847 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | enum class part_of_speech { | ||
7 | noun = 0, | ||
8 | adjective = 1, | ||
9 | adverb = 2, | ||
10 | verb = 3, | ||
11 | preposition = 4 | ||
12 | }; | ||
13 | |||
14 | enum class positioning { | ||
15 | undefined = -1, | ||
16 | predicate = 0, | ||
17 | attributive = 1, | ||
18 | postnominal = 2 | ||
19 | }; | ||
20 | |||
21 | enum class inflection { | ||
22 | base = 0, | ||
23 | plural = 1, | ||
24 | comparative = 2, | ||
25 | superlative = 3, | ||
26 | past_tense = 4, | ||
27 | past_participle = 5, | ||
28 | ing_form = 6, | ||
29 | s_form = 7 | ||
30 | }; | ||
31 | |||
32 | enum class object { | ||
33 | undefined = -1, | ||
34 | notion = 0, | ||
35 | word = 1, | ||
36 | group = 2, | ||
37 | frame = 3, | ||
38 | lemma = 4, | ||
39 | form = 5, | ||
40 | pronunciation = 6 | ||
41 | }; | ||
42 | |||
43 | }; | ||
44 | |||
45 | #endif /* end of include guard: ENUMS_H_260BA847 */ | ||
diff --git a/lib/field.cpp b/lib/field.cpp new file mode 100644 index 0000000..d7adbb3 --- /dev/null +++ b/lib/field.cpp | |||
@@ -0,0 +1,91 @@ | |||
1 | #include "field.h" | ||
2 | #include "filter.h" | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | filter field::operator==(int value) const | ||
7 | { | ||
8 | return filter(*this, filter::comparison::int_equals, value); | ||
9 | } | ||
10 | |||
11 | filter field::operator!=(int value) const | ||
12 | { | ||
13 | return filter(*this, filter::comparison::int_does_not_equal, value); | ||
14 | } | ||
15 | |||
16 | filter field::operator<(int value) const | ||
17 | { | ||
18 | return filter(*this, filter::comparison::int_is_less_than, value); | ||
19 | } | ||
20 | |||
21 | filter field::operator<=(int value) const | ||
22 | { | ||
23 | return filter(*this, filter::comparison::int_is_at_most, value); | ||
24 | } | ||
25 | |||
26 | filter field::operator>(int value) const | ||
27 | { | ||
28 | return filter(*this, filter::comparison::int_is_greater_than, value); | ||
29 | } | ||
30 | |||
31 | filter field::operator>=(int value) const | ||
32 | { | ||
33 | return filter(*this, filter::comparison::int_is_at_least, value); | ||
34 | } | ||
35 | |||
36 | filter field::operator==(part_of_speech value) const | ||
37 | { | ||
38 | return filter(*this, filter::comparison::int_equals, static_cast<int>(value)); | ||
39 | } | ||
40 | |||
41 | filter field::operator==(positioning value) const | ||
42 | { | ||
43 | return filter(*this, filter::comparison::int_equals, static_cast<int>(value)); | ||
44 | } | ||
45 | |||
46 | filter field::operator==(inflection value) const | ||
47 | { | ||
48 | return filter(*this, filter::comparison::int_equals, static_cast<int>(value)); | ||
49 | } | ||
50 | |||
51 | filter field::operator==(bool value) const | ||
52 | { | ||
53 | return filter(*this, filter::comparison::boolean_equals, value); | ||
54 | } | ||
55 | |||
56 | filter field::operator==(std::string value) const | ||
57 | { | ||
58 | return filter(*this, filter::comparison::string_equals, std::move(value)); | ||
59 | } | ||
60 | |||
61 | filter field::operator!=(std::string value) const | ||
62 | { | ||
63 | return filter(*this, filter::comparison::string_does_not_equal, std::move(value)); | ||
64 | } | ||
65 | |||
66 | filter field::operator%=(std::string value) const | ||
67 | { | ||
68 | return filter(*this, filter::comparison::string_is_like, std::move(value)); | ||
69 | } | ||
70 | |||
71 | field::operator filter() const | ||
72 | { | ||
73 | return filter(*this, filter::comparison::is_not_null); | ||
74 | } | ||
75 | |||
76 | filter field::operator!() const | ||
77 | { | ||
78 | return filter(*this, filter::comparison::is_null); | ||
79 | } | ||
80 | |||
81 | filter field::operator%=(filter joinCondition) const | ||
82 | { | ||
83 | if (type_ == type::hierarchal_join) | ||
84 | { | ||
85 | return filter(*this, filter::comparison::hierarchally_matches, std::move(joinCondition)); | ||
86 | } else { | ||
87 | return filter(*this, filter::comparison::matches, std::move(joinCondition)); | ||
88 | } | ||
89 | } | ||
90 | |||
91 | }; | ||
diff --git a/lib/field.h b/lib/field.h new file mode 100644 index 0000000..30c62be --- /dev/null +++ b/lib/field.h | |||
@@ -0,0 +1,306 @@ | |||
1 | #ifndef FIELD_H_43258321 | ||
2 | #define FIELD_H_43258321 | ||
3 | |||
4 | #include "enums.h" | ||
5 | #include <stdexcept> | ||
6 | #include <tuple> | ||
7 | |||
8 | namespace verbly { | ||
9 | |||
10 | class filter; | ||
11 | |||
12 | class field { | ||
13 | public: | ||
14 | enum class type { | ||
15 | undefined, | ||
16 | string, | ||
17 | integer, | ||
18 | boolean, | ||
19 | join, | ||
20 | join_through, | ||
21 | hierarchal_join | ||
22 | }; | ||
23 | |||
24 | // Default constructor | ||
25 | |||
26 | field() | ||
27 | { | ||
28 | } | ||
29 | |||
30 | // Static factories | ||
31 | |||
32 | static field stringField( | ||
33 | object obj, | ||
34 | const char* name, | ||
35 | bool nullable = false) | ||
36 | { | ||
37 | return field(obj, type::string, name, nullable); | ||
38 | } | ||
39 | |||
40 | static field stringField( | ||
41 | const char* table, | ||
42 | const char* name, | ||
43 | bool nullable = false) | ||
44 | { | ||
45 | return field(object::undefined, type::string, name, nullable, table); | ||
46 | } | ||
47 | |||
48 | static field integerField( | ||
49 | object obj, | ||
50 | const char* name, | ||
51 | bool nullable = false) | ||
52 | { | ||
53 | return field(obj, type::integer, name, nullable); | ||
54 | } | ||
55 | |||
56 | static field integerField( | ||
57 | const char* table, | ||
58 | const char* name, | ||
59 | bool nullable = false) | ||
60 | { | ||
61 | return field(object::undefined, type::integer, name, nullable, table); | ||
62 | } | ||
63 | |||
64 | static field booleanField( | ||
65 | object obj, | ||
66 | const char* name, | ||
67 | bool nullable = false) | ||
68 | { | ||
69 | return field(obj, type::boolean, name, nullable); | ||
70 | } | ||
71 | |||
72 | static field booleanField( | ||
73 | const char* table, | ||
74 | const char* name, | ||
75 | bool nullable = false) | ||
76 | { | ||
77 | return field(object::undefined, type::boolean, name, nullable, table); | ||
78 | } | ||
79 | |||
80 | static field joinField( | ||
81 | object obj, | ||
82 | const char* name, | ||
83 | object joinWith, | ||
84 | bool nullable = false) | ||
85 | { | ||
86 | return field(obj, type::join, name, nullable, 0, joinWith); | ||
87 | } | ||
88 | |||
89 | static field joinField( | ||
90 | object obj, | ||
91 | const char* name, | ||
92 | const char* table, | ||
93 | bool nullable = false) | ||
94 | { | ||
95 | return field(obj, type::join, name, nullable, table); | ||
96 | } | ||
97 | |||
98 | static field joinThrough( | ||
99 | object obj, | ||
100 | const char* name, | ||
101 | object joinWith, | ||
102 | const char* joinTable, | ||
103 | const char* foreignColumn) | ||
104 | { | ||
105 | return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, name, foreignColumn); | ||
106 | } | ||
107 | |||
108 | static field joinThrough( | ||
109 | object obj, | ||
110 | const char* name, | ||
111 | object joinWith, | ||
112 | const char* joinTable, | ||
113 | const char* foreignColumn, | ||
114 | const char* joinColumn, | ||
115 | const char* foreignJoinColumn) | ||
116 | { | ||
117 | return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, joinColumn, foreignJoinColumn); | ||
118 | } | ||
119 | |||
120 | static field selfJoin( | ||
121 | object obj, | ||
122 | const char* name, | ||
123 | const char* joinTable, | ||
124 | const char* joinColumn, | ||
125 | const char* foreignJoinColumn) | ||
126 | { | ||
127 | return field(obj, type::join_through, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn); | ||
128 | } | ||
129 | |||
130 | static field hierarchalSelfJoin( | ||
131 | object obj, | ||
132 | const char* name, | ||
133 | const char* joinTable, | ||
134 | const char* joinColumn, | ||
135 | const char* foreignJoinColumn) | ||
136 | { | ||
137 | return field(obj, type::hierarchal_join, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn); | ||
138 | } | ||
139 | |||
140 | // Accessors | ||
141 | |||
142 | object getObject() const | ||
143 | { | ||
144 | return object_; | ||
145 | } | ||
146 | |||
147 | type getType() const | ||
148 | { | ||
149 | return type_; | ||
150 | } | ||
151 | |||
152 | bool isJoin() const | ||
153 | { | ||
154 | return ((type_ == type::join) || (type_ == type::join_through) || (type_ == type::hierarchal_join)); | ||
155 | } | ||
156 | |||
157 | const char* getColumn() const | ||
158 | { | ||
159 | return column_; | ||
160 | } | ||
161 | |||
162 | bool isNullable() const | ||
163 | { | ||
164 | return nullable_; | ||
165 | } | ||
166 | |||
167 | bool hasTable() const | ||
168 | { | ||
169 | return (table_ != 0); | ||
170 | } | ||
171 | |||
172 | const char* getTable() const | ||
173 | { | ||
174 | return table_; | ||
175 | } | ||
176 | |||
177 | // Joins | ||
178 | |||
179 | object getJoinObject() const | ||
180 | { | ||
181 | // We ignore hierarchal joins because they are always self joins. | ||
182 | return ((type_ == type::join) || (type_ == type::join_through)) | ||
183 | ? joinObject_ | ||
184 | : throw std::domain_error("Non-join fields don't have join objects"); | ||
185 | } | ||
186 | |||
187 | // Many-to-many joins | ||
188 | |||
189 | const char* getForeignColumn() const | ||
190 | { | ||
191 | // We ignore hierarchal joins because they are always self joins. | ||
192 | return (type_ == type::join_through) | ||
193 | ? foreignColumn_ | ||
194 | : throw std::domain_error("Only many-to-many join fields have a foreign column"); | ||
195 | } | ||
196 | |||
197 | const char* getJoinColumn() const | ||
198 | { | ||
199 | return ((type_ == type::join_through) || (type_ == type::hierarchal_join)) | ||
200 | ? joinColumn_ | ||
201 | : throw std::domain_error("Only many-to-many join fields have a join column"); | ||
202 | } | ||
203 | |||
204 | const char* getForeignJoinColumn() const | ||
205 | { | ||
206 | return ((type_ == type::join_through) || (type_ == type::hierarchal_join)) | ||
207 | ? foreignJoinColumn_ | ||
208 | : throw std::domain_error("Only many-to-many join fields have a foreign join column"); | ||
209 | } | ||
210 | |||
211 | // Ordering | ||
212 | |||
213 | bool operator<(const field& other) const | ||
214 | { | ||
215 | // For the most part, (object, column) uniquely identifies fields. | ||
216 | // However, there do exist a number of relationships from an object to | ||
217 | // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have | ||
218 | // the same object (notion), the same column (notion_id), and the same | ||
219 | // table (hypernymy); however, they have different join columns. | ||
220 | return std::tie(object_, column_, table_, joinColumn_) < std::tie(other.object_, other.column_, other.table_, other.joinColumn_); | ||
221 | } | ||
222 | |||
223 | // Equality | ||
224 | |||
225 | bool operator==(const field& other) const | ||
226 | { | ||
227 | // For the most part, (object, column) uniquely identifies fields. | ||
228 | // However, there do exist a number of relationships from an object to | ||
229 | // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have | ||
230 | // the same object (notion), the same column (notion_id), and the same | ||
231 | // table (hypernymy); however, they have different join columns. | ||
232 | return std::tie(object_, column_, table_, joinColumn_) == std::tie(other.object_, other.column_, other.table_, other.joinColumn_); | ||
233 | } | ||
234 | |||
235 | // Filter construction | ||
236 | |||
237 | filter operator==(int value) const; // Integer equality | ||
238 | filter operator!=(int value) const; // Integer inequality | ||
239 | filter operator<(int value) const; // Integer is less than | ||
240 | filter operator<=(int value) const; // Integer is at most | ||
241 | filter operator>(int value) const; // Integer is greater than | ||
242 | filter operator>=(int value) const; // Integer is at least | ||
243 | |||
244 | filter operator==(part_of_speech value) const; // Part of speech equality | ||
245 | filter operator==(positioning value) const; // Adjective positioning equality | ||
246 | filter operator==(inflection value) const; // Inflection category equality | ||
247 | |||
248 | filter operator==(bool value) const; // Boolean equality | ||
249 | |||
250 | filter operator==(std::string value) const; // String equality | ||
251 | filter operator!=(std::string value) const; // String inequality | ||
252 | filter operator%=(std::string value) const; // String matching | ||
253 | |||
254 | operator filter() const; // Non-nullity | ||
255 | filter operator!() const; // Nullity | ||
256 | |||
257 | filter operator%=(filter joinCondition) const; // Join | ||
258 | |||
259 | private: | ||
260 | |||
261 | // Constructor | ||
262 | |||
263 | field( | ||
264 | object obj, | ||
265 | type datatype, | ||
266 | const char* column, | ||
267 | bool nullable = false, | ||
268 | const char* table = 0, | ||
269 | object joinObject = object::undefined, | ||
270 | const char* foreignColumn = 0, | ||
271 | const char* joinColumn = 0, | ||
272 | const char* foreignJoinColumn = 0) : | ||
273 | object_(obj), | ||
274 | type_(datatype), | ||
275 | column_(column), | ||
276 | nullable_(nullable), | ||
277 | table_(table), | ||
278 | joinObject_(joinObject), | ||
279 | foreignColumn_(foreignColumn), | ||
280 | joinColumn_(joinColumn), | ||
281 | foreignJoinColumn_(foreignJoinColumn) | ||
282 | { | ||
283 | } | ||
284 | |||
285 | // General | ||
286 | object object_ = object::undefined; | ||
287 | type type_ = type::undefined; | ||
288 | const char* column_ = 0; | ||
289 | const char* table_ = 0; | ||
290 | |||
291 | // Non-joins and belongs-to joins | ||
292 | bool nullable_ = false; | ||
293 | |||
294 | // Joins | ||
295 | object joinObject_ = object::undefined; | ||
296 | |||
297 | // Many-to-many joins | ||
298 | const char* foreignColumn_ = 0; | ||
299 | const char* joinColumn_ = 0; | ||
300 | const char* foreignJoinColumn_ = 0; | ||
301 | |||
302 | }; | ||
303 | |||
304 | }; | ||
305 | |||
306 | #endif /* end of include guard: FIELD_H_43258321 */ | ||
diff --git a/lib/filter.cpp b/lib/filter.cpp new file mode 100644 index 0000000..959fa05 --- /dev/null +++ b/lib/filter.cpp | |||
@@ -0,0 +1,1365 @@ | |||
1 | #include "filter.h" | ||
2 | #include <stdexcept> | ||
3 | #include <map> | ||
4 | #include "notion.h" | ||
5 | #include "word.h" | ||
6 | #include "group.h" | ||
7 | #include "frame.h" | ||
8 | #include "lemma.h" | ||
9 | #include "form.h" | ||
10 | #include "pronunciation.h" | ||
11 | |||
12 | namespace verbly { | ||
13 | |||
14 | filter::filter(const filter& other) | ||
15 | { | ||
16 | type_ = other.type_; | ||
17 | |||
18 | switch (type_) | ||
19 | { | ||
20 | case type::empty: | ||
21 | { | ||
22 | break; | ||
23 | } | ||
24 | |||
25 | case type::singleton: | ||
26 | { | ||
27 | new(&singleton_.filterField) field(other.singleton_.filterField); | ||
28 | singleton_.filterType = other.singleton_.filterType; | ||
29 | |||
30 | switch (singleton_.filterType) | ||
31 | { | ||
32 | case comparison::int_equals: | ||
33 | case comparison::int_does_not_equal: | ||
34 | case comparison::int_is_at_least: | ||
35 | case comparison::int_is_greater_than: | ||
36 | case comparison::int_is_at_most: | ||
37 | case comparison::int_is_less_than: | ||
38 | { | ||
39 | singleton_.intValue = other.singleton_.intValue; | ||
40 | |||
41 | break; | ||
42 | } | ||
43 | |||
44 | case comparison::boolean_equals: | ||
45 | { | ||
46 | singleton_.boolValue = other.singleton_.boolValue; | ||
47 | |||
48 | break; | ||
49 | } | ||
50 | |||
51 | case comparison::string_equals: | ||
52 | case comparison::string_does_not_equal: | ||
53 | case comparison::string_is_like: | ||
54 | case comparison::string_is_not_like: | ||
55 | { | ||
56 | new(&singleton_.stringValue) std::string(other.singleton_.stringValue); | ||
57 | |||
58 | break; | ||
59 | } | ||
60 | |||
61 | case comparison::is_null: | ||
62 | case comparison::is_not_null: | ||
63 | { | ||
64 | break; | ||
65 | } | ||
66 | |||
67 | case comparison::matches: | ||
68 | case comparison::does_not_match: | ||
69 | case comparison::hierarchally_matches: | ||
70 | case comparison::does_not_hierarchally_match: | ||
71 | { | ||
72 | new(&singleton_.join) std::unique_ptr<filter>(new filter(*other.singleton_.join)); | ||
73 | |||
74 | break; | ||
75 | } | ||
76 | } | ||
77 | |||
78 | break; | ||
79 | } | ||
80 | |||
81 | case type::group: | ||
82 | { | ||
83 | new(&group_.children) std::list<filter>(other.group_.children); | ||
84 | group_.orlogic = other.group_.orlogic; | ||
85 | |||
86 | break; | ||
87 | } | ||
88 | } | ||
89 | } | ||
90 | |||
91 | filter::filter(filter&& other) : filter() | ||
92 | { | ||
93 | swap(*this, other); | ||
94 | } | ||
95 | |||
96 | filter& filter::operator=(filter other) | ||
97 | { | ||
98 | swap(*this, other); | ||
99 | |||
100 | return *this; | ||
101 | } | ||
102 | |||
103 | void swap(filter& first, filter& second) | ||
104 | { | ||
105 | using type = filter::type; | ||
106 | using comparison = filter::comparison; | ||
107 | |||
108 | type tempType = first.type_; | ||
109 | field tempField; | ||
110 | comparison tempComparison; | ||
111 | std::unique_ptr<filter> tempJoin; | ||
112 | std::string tempStringValue; | ||
113 | int tempIntValue; | ||
114 | bool tempBoolValue; | ||
115 | std::list<filter> tempChildren; | ||
116 | bool tempOrlogic; | ||
117 | |||
118 | switch (tempType) | ||
119 | { | ||
120 | case type::empty: | ||
121 | { | ||
122 | break; | ||
123 | } | ||
124 | |||
125 | case type::singleton: | ||
126 | { | ||
127 | tempField = std::move(first.singleton_.filterField); | ||
128 | tempComparison = first.singleton_.filterType; | ||
129 | |||
130 | switch (tempComparison) | ||
131 | { | ||
132 | case comparison::int_equals: | ||
133 | case comparison::int_does_not_equal: | ||
134 | case comparison::int_is_at_least: | ||
135 | case comparison::int_is_greater_than: | ||
136 | case comparison::int_is_at_most: | ||
137 | case comparison::int_is_less_than: | ||
138 | { | ||
139 | tempIntValue = first.singleton_.intValue; | ||
140 | |||
141 | break; | ||
142 | } | ||
143 | |||
144 | case comparison::boolean_equals: | ||
145 | { | ||
146 | tempBoolValue = first.singleton_.boolValue; | ||
147 | |||
148 | break; | ||
149 | } | ||
150 | |||
151 | case comparison::string_equals: | ||
152 | case comparison::string_does_not_equal: | ||
153 | case comparison::string_is_like: | ||
154 | case comparison::string_is_not_like: | ||
155 | { | ||
156 | tempStringValue = std::move(first.singleton_.stringValue); | ||
157 | |||
158 | break; | ||
159 | } | ||
160 | |||
161 | case comparison::is_null: | ||
162 | case comparison::is_not_null: | ||
163 | { | ||
164 | break; | ||
165 | } | ||
166 | |||
167 | case comparison::matches: | ||
168 | case comparison::does_not_match: | ||
169 | case comparison::hierarchally_matches: | ||
170 | case comparison::does_not_hierarchally_match: | ||
171 | { | ||
172 | tempJoin = std::move(first.singleton_.join); | ||
173 | |||
174 | break; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | break; | ||
179 | } | ||
180 | |||
181 | case type::group: | ||
182 | { | ||
183 | tempChildren = std::move(first.group_.children); | ||
184 | tempOrlogic = first.group_.orlogic; | ||
185 | |||
186 | break; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | first.~filter(); | ||
191 | |||
192 | first.type_ = second.type_; | ||
193 | |||
194 | switch (first.type_) | ||
195 | { | ||
196 | case type::empty: | ||
197 | { | ||
198 | break; | ||
199 | } | ||
200 | |||
201 | case type::singleton: | ||
202 | { | ||
203 | new(&first.singleton_.filterField) field(std::move(second.singleton_.filterField)); | ||
204 | first.singleton_.filterType = second.singleton_.filterType; | ||
205 | |||
206 | switch (first.singleton_.filterType) | ||
207 | { | ||
208 | case comparison::int_equals: | ||
209 | case comparison::int_does_not_equal: | ||
210 | case comparison::int_is_at_least: | ||
211 | case comparison::int_is_greater_than: | ||
212 | case comparison::int_is_at_most: | ||
213 | case comparison::int_is_less_than: | ||
214 | { | ||
215 | first.singleton_.intValue = second.singleton_.intValue; | ||
216 | |||
217 | break; | ||
218 | } | ||
219 | |||
220 | case comparison::boolean_equals: | ||
221 | { | ||
222 | first.singleton_.boolValue = second.singleton_.boolValue; | ||
223 | |||
224 | break; | ||
225 | } | ||
226 | |||
227 | case comparison::string_equals: | ||
228 | case comparison::string_does_not_equal: | ||
229 | case comparison::string_is_like: | ||
230 | case comparison::string_is_not_like: | ||
231 | { | ||
232 | new(&first.singleton_.stringValue) std::string(std::move(second.singleton_.stringValue)); | ||
233 | |||
234 | break; | ||
235 | } | ||
236 | |||
237 | case comparison::is_null: | ||
238 | case comparison::is_not_null: | ||
239 | { | ||
240 | break; | ||
241 | } | ||
242 | |||
243 | case comparison::matches: | ||
244 | case comparison::does_not_match: | ||
245 | case comparison::hierarchally_matches: | ||
246 | case comparison::does_not_hierarchally_match: | ||
247 | { | ||
248 | new(&first.singleton_.join) std::unique_ptr<filter>(std::move(second.singleton_.join)); | ||
249 | |||
250 | break; | ||
251 | } | ||
252 | } | ||
253 | |||
254 | break; | ||
255 | } | ||
256 | |||
257 | case type::group: | ||
258 | { | ||
259 | new(&first.group_.children) std::list<filter>(std::move(second.group_.children)); | ||
260 | first.group_.orlogic = second.group_.orlogic; | ||
261 | |||
262 | break; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | second.~filter(); | ||
267 | |||
268 | second.type_ = tempType; | ||
269 | |||
270 | switch (second.type_) | ||
271 | { | ||
272 | case type::empty: | ||
273 | { | ||
274 | break; | ||
275 | } | ||
276 | |||
277 | case type::singleton: | ||
278 | { | ||
279 | new(&second.singleton_.filterField) field(std::move(tempField)); | ||
280 | second.singleton_.filterType = tempComparison; | ||
281 | |||
282 | switch (second.singleton_.filterType) | ||
283 | { | ||
284 | case comparison::int_equals: | ||
285 | case comparison::int_does_not_equal: | ||
286 | case comparison::int_is_at_least: | ||
287 | case comparison::int_is_greater_than: | ||
288 | case comparison::int_is_at_most: | ||
289 | case comparison::int_is_less_than: | ||
290 | { | ||
291 | second.singleton_.intValue = tempIntValue; | ||
292 | |||
293 | break; | ||
294 | } | ||
295 | |||
296 | case comparison::boolean_equals: | ||
297 | { | ||
298 | second.singleton_.boolValue = tempBoolValue; | ||
299 | |||
300 | break; | ||
301 | } | ||
302 | |||
303 | case comparison::string_equals: | ||
304 | case comparison::string_does_not_equal: | ||
305 | case comparison::string_is_like: | ||
306 | case comparison::string_is_not_like: | ||
307 | { | ||
308 | new(&second.singleton_.stringValue) std::string(std::move(tempStringValue)); | ||
309 | |||
310 | break; | ||
311 | } | ||
312 | |||
313 | case comparison::is_null: | ||
314 | case comparison::is_not_null: | ||
315 | { | ||
316 | break; | ||
317 | } | ||
318 | |||
319 | case comparison::matches: | ||
320 | case comparison::does_not_match: | ||
321 | case comparison::hierarchally_matches: | ||
322 | case comparison::does_not_hierarchally_match: | ||
323 | { | ||
324 | new(&second.singleton_.join) std::unique_ptr<filter>(std::move(tempJoin)); | ||
325 | |||
326 | break; | ||
327 | } | ||
328 | } | ||
329 | |||
330 | break; | ||
331 | } | ||
332 | |||
333 | case type::group: | ||
334 | { | ||
335 | new(&second.group_.children) std::list<filter>(std::move(tempChildren)); | ||
336 | second.group_.orlogic = tempOrlogic; | ||
337 | |||
338 | break; | ||
339 | } | ||
340 | } | ||
341 | } | ||
342 | |||
343 | filter::~filter() | ||
344 | { | ||
345 | switch (type_) | ||
346 | { | ||
347 | case type::empty: | ||
348 | { | ||
349 | break; | ||
350 | } | ||
351 | |||
352 | case type::singleton: | ||
353 | { | ||
354 | singleton_.filterField.~field(); | ||
355 | |||
356 | switch (singleton_.filterType) | ||
357 | { | ||
358 | case comparison::int_equals: | ||
359 | case comparison::int_does_not_equal: | ||
360 | case comparison::int_is_at_least: | ||
361 | case comparison::int_is_greater_than: | ||
362 | case comparison::int_is_at_most: | ||
363 | case comparison::int_is_less_than: | ||
364 | case comparison::boolean_equals: | ||
365 | case comparison::is_null: | ||
366 | case comparison::is_not_null: | ||
367 | { | ||
368 | break; | ||
369 | } | ||
370 | |||
371 | case comparison::string_equals: | ||
372 | case comparison::string_does_not_equal: | ||
373 | case comparison::string_is_like: | ||
374 | case comparison::string_is_not_like: | ||
375 | { | ||
376 | using string_type = std::string; | ||
377 | |||
378 | singleton_.stringValue.~string_type(); | ||
379 | |||
380 | break; | ||
381 | } | ||
382 | |||
383 | case comparison::matches: | ||
384 | case comparison::does_not_match: | ||
385 | case comparison::hierarchally_matches: | ||
386 | case comparison::does_not_hierarchally_match: | ||
387 | { | ||
388 | using ptr_type = std::unique_ptr<filter>; | ||
389 | |||
390 | singleton_.join.~ptr_type(); | ||
391 | |||
392 | break; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | break; | ||
397 | } | ||
398 | |||
399 | case type::group: | ||
400 | { | ||
401 | using list_type = std::list<filter>; | ||
402 | |||
403 | group_.children.~list_type(); | ||
404 | |||
405 | break; | ||
406 | } | ||
407 | } | ||
408 | } | ||
409 | |||
410 | filter::filter() | ||
411 | { | ||
412 | } | ||
413 | |||
414 | filter::filter( | ||
415 | field filterField, | ||
416 | comparison filterType, | ||
417 | int filterValue) : | ||
418 | type_(type::singleton) | ||
419 | { | ||
420 | if (filterField.getType() == field::type::integer) | ||
421 | { | ||
422 | switch (filterType) | ||
423 | { | ||
424 | case comparison::int_equals: | ||
425 | case comparison::int_does_not_equal: | ||
426 | case comparison::int_is_at_least: | ||
427 | case comparison::int_is_greater_than: | ||
428 | case comparison::int_is_at_most: | ||
429 | case comparison::int_is_less_than: | ||
430 | { | ||
431 | new(&singleton_.filterField) field(std::move(filterField)); | ||
432 | singleton_.filterType = filterType; | ||
433 | singleton_.intValue = filterValue; | ||
434 | |||
435 | break; | ||
436 | } | ||
437 | |||
438 | case comparison::boolean_equals: | ||
439 | case comparison::string_equals: | ||
440 | case comparison::string_does_not_equal: | ||
441 | case comparison::string_is_like: | ||
442 | case comparison::string_is_not_like: | ||
443 | case comparison::is_null: | ||
444 | case comparison::is_not_null: | ||
445 | case comparison::matches: | ||
446 | case comparison::does_not_match: | ||
447 | case comparison::hierarchally_matches: | ||
448 | case comparison::does_not_hierarchally_match: | ||
449 | { | ||
450 | throw std::invalid_argument("Invalid comparison for integer field"); | ||
451 | } | ||
452 | } | ||
453 | } else { | ||
454 | throw std::domain_error("Cannot match a non-integer field against an integer value"); | ||
455 | } | ||
456 | } | ||
457 | |||
458 | filter::filter( | ||
459 | field filterField, | ||
460 | comparison filterType, | ||
461 | std::string filterValue) : | ||
462 | type_(type::singleton) | ||
463 | { | ||
464 | if (filterField.getType() == field::type::string) | ||
465 | { | ||
466 | switch (filterType) | ||
467 | { | ||
468 | case comparison::string_equals: | ||
469 | case comparison::string_does_not_equal: | ||
470 | case comparison::string_is_like: | ||
471 | case comparison::string_is_not_like: | ||
472 | { | ||
473 | new(&singleton_.filterField) field(std::move(filterField)); | ||
474 | singleton_.filterType = filterType; | ||
475 | new(&singleton_.stringValue) std::string(std::move(filterValue)); | ||
476 | |||
477 | break; | ||
478 | } | ||
479 | |||
480 | case comparison::int_equals: | ||
481 | case comparison::int_does_not_equal: | ||
482 | case comparison::int_is_at_least: | ||
483 | case comparison::int_is_greater_than: | ||
484 | case comparison::int_is_at_most: | ||
485 | case comparison::int_is_less_than: | ||
486 | case comparison::boolean_equals: | ||
487 | case comparison::is_null: | ||
488 | case comparison::is_not_null: | ||
489 | case comparison::matches: | ||
490 | case comparison::does_not_match: | ||
491 | case comparison::hierarchally_matches: | ||
492 | case comparison::does_not_hierarchally_match: | ||
493 | { | ||
494 | throw std::invalid_argument("Invalid comparison for string field"); | ||
495 | } | ||
496 | } | ||
497 | } else { | ||
498 | throw std::domain_error("Cannot match a non-string field against an string value"); | ||
499 | } | ||
500 | } | ||
501 | |||
502 | filter::filter( | ||
503 | field filterField, | ||
504 | comparison filterType, | ||
505 | bool filterValue) : | ||
506 | type_(type::singleton) | ||
507 | { | ||
508 | if (filterField.getType() == field::type::boolean) | ||
509 | { | ||
510 | switch (filterType) | ||
511 | { | ||
512 | case comparison::boolean_equals: | ||
513 | { | ||
514 | new(&singleton_.filterField) field(std::move(filterField)); | ||
515 | singleton_.filterType = filterType; | ||
516 | singleton_.boolValue = filterValue; | ||
517 | |||
518 | break; | ||
519 | } | ||
520 | |||
521 | case comparison::string_equals: | ||
522 | case comparison::string_does_not_equal: | ||
523 | case comparison::string_is_like: | ||
524 | case comparison::string_is_not_like: | ||
525 | case comparison::int_equals: | ||
526 | case comparison::int_does_not_equal: | ||
527 | case comparison::int_is_at_least: | ||
528 | case comparison::int_is_greater_than: | ||
529 | case comparison::int_is_at_most: | ||
530 | case comparison::int_is_less_than: | ||
531 | case comparison::is_null: | ||
532 | case comparison::is_not_null: | ||
533 | case comparison::matches: | ||
534 | case comparison::does_not_match: | ||
535 | case comparison::hierarchally_matches: | ||
536 | case comparison::does_not_hierarchally_match: | ||
537 | { | ||
538 | throw std::invalid_argument("Invalid comparison for boolean field"); | ||
539 | } | ||
540 | } | ||
541 | } else { | ||
542 | throw std::domain_error("Cannot match a non-boolean field against a boolean value"); | ||
543 | } | ||
544 | } | ||
545 | |||
546 | filter::filter( | ||
547 | field filterField, | ||
548 | comparison filterType) : | ||
549 | type_(type::singleton) | ||
550 | { | ||
551 | if (filterField.isNullable()) | ||
552 | { | ||
553 | switch (filterType) | ||
554 | { | ||
555 | case comparison::is_null: | ||
556 | case comparison::is_not_null: | ||
557 | { | ||
558 | new(&singleton_.filterField) field(std::move(filterField)); | ||
559 | singleton_.filterType = filterType; | ||
560 | |||
561 | break; | ||
562 | } | ||
563 | |||
564 | case comparison::string_equals: | ||
565 | case comparison::string_does_not_equal: | ||
566 | case comparison::string_is_like: | ||
567 | case comparison::string_is_not_like: | ||
568 | case comparison::int_equals: | ||
569 | case comparison::int_does_not_equal: | ||
570 | case comparison::int_is_at_least: | ||
571 | case comparison::int_is_greater_than: | ||
572 | case comparison::int_is_at_most: | ||
573 | case comparison::int_is_less_than: | ||
574 | case comparison::boolean_equals: | ||
575 | case comparison::matches: | ||
576 | case comparison::does_not_match: | ||
577 | case comparison::hierarchally_matches: | ||
578 | case comparison::does_not_hierarchally_match: | ||
579 | { | ||
580 | throw std::invalid_argument("Incorrect constructor for given comparison"); | ||
581 | } | ||
582 | } | ||
583 | } else { | ||
584 | throw std::domain_error("Cannot check nullity/non-nullity of non-nullable field"); | ||
585 | } | ||
586 | } | ||
587 | |||
588 | filter::filter( | ||
589 | field joinOn, | ||
590 | comparison filterType, | ||
591 | filter joinCondition) : | ||
592 | type_(type::singleton) | ||
593 | { | ||
594 | switch (joinOn.getType()) | ||
595 | { | ||
596 | case field::type::join: | ||
597 | case field::type::join_through: | ||
598 | { | ||
599 | switch (filterType) | ||
600 | { | ||
601 | case comparison::matches: | ||
602 | case comparison::does_not_match: | ||
603 | { | ||
604 | new(&singleton_.filterField) field(std::move(joinOn)); | ||
605 | singleton_.filterType = filterType; | ||
606 | new(&singleton_.join) std::unique_ptr<filter>(new filter(joinCondition.normalize(singleton_.filterField.getJoinObject()))); | ||
607 | |||
608 | break; | ||
609 | } | ||
610 | |||
611 | case comparison::int_equals: | ||
612 | case comparison::int_does_not_equal: | ||
613 | case comparison::int_is_at_least: | ||
614 | case comparison::int_is_greater_than: | ||
615 | case comparison::int_is_at_most: | ||
616 | case comparison::int_is_less_than: | ||
617 | case comparison::boolean_equals: | ||
618 | case comparison::string_equals: | ||
619 | case comparison::string_does_not_equal: | ||
620 | case comparison::string_is_like: | ||
621 | case comparison::string_is_not_like: | ||
622 | case comparison::is_null: | ||
623 | case comparison::is_not_null: | ||
624 | case comparison::hierarchally_matches: | ||
625 | case comparison::does_not_hierarchally_match: | ||
626 | { | ||
627 | throw std::invalid_argument("Incorrect constructor for given comparison"); | ||
628 | } | ||
629 | } | ||
630 | |||
631 | break; | ||
632 | } | ||
633 | |||
634 | case field::type::hierarchal_join: | ||
635 | { | ||
636 | switch (filterType) | ||
637 | { | ||
638 | case comparison::hierarchally_matches: | ||
639 | case comparison::does_not_hierarchally_match: | ||
640 | { | ||
641 | new(&singleton_.filterField) field(std::move(joinOn)); | ||
642 | singleton_.filterType = filterType; | ||
643 | new(&singleton_.join) std::unique_ptr<filter>(new filter(joinCondition.normalize(singleton_.filterField.getObject()))); | ||
644 | |||
645 | break; | ||
646 | } | ||
647 | |||
648 | case comparison::int_equals: | ||
649 | case comparison::int_does_not_equal: | ||
650 | case comparison::int_is_at_least: | ||
651 | case comparison::int_is_greater_than: | ||
652 | case comparison::int_is_at_most: | ||
653 | case comparison::int_is_less_than: | ||
654 | case comparison::boolean_equals: | ||
655 | case comparison::string_equals: | ||
656 | case comparison::string_does_not_equal: | ||
657 | case comparison::string_is_like: | ||
658 | case comparison::string_is_not_like: | ||
659 | case comparison::is_null: | ||
660 | case comparison::is_not_null: | ||
661 | case comparison::matches: | ||
662 | case comparison::does_not_match: | ||
663 | { | ||
664 | throw std::invalid_argument("Incorrect constructor for given comparison"); | ||
665 | } | ||
666 | } | ||
667 | |||
668 | break; | ||
669 | } | ||
670 | |||
671 | case field::type::undefined: | ||
672 | case field::type::string: | ||
673 | case field::type::integer: | ||
674 | case field::type::boolean: | ||
675 | { | ||
676 | throw std::domain_error("Matching field must be a join field"); | ||
677 | } | ||
678 | } | ||
679 | } | ||
680 | |||
681 | field filter::getField() const | ||
682 | { | ||
683 | if (type_ == type::singleton) | ||
684 | { | ||
685 | return singleton_.filterField; | ||
686 | } else { | ||
687 | throw std::domain_error("This filter does not have a field"); | ||
688 | } | ||
689 | } | ||
690 | |||
691 | filter::comparison filter::getComparison() const | ||
692 | { | ||
693 | if (type_ == type::singleton) | ||
694 | { | ||
695 | return singleton_.filterType; | ||
696 | } else { | ||
697 | throw std::domain_error("This filter does not have a comparison"); | ||
698 | } | ||
699 | } | ||
700 | |||
701 | filter filter::getJoinCondition() const | ||
702 | { | ||
703 | if (type_ == type::singleton) | ||
704 | { | ||
705 | switch (singleton_.filterType) | ||
706 | { | ||
707 | case comparison::matches: | ||
708 | case comparison::does_not_match: | ||
709 | case comparison::hierarchally_matches: | ||
710 | case comparison::does_not_hierarchally_match: | ||
711 | { | ||
712 | return *singleton_.join; | ||
713 | } | ||
714 | |||
715 | case comparison::string_equals: | ||
716 | case comparison::string_does_not_equal: | ||
717 | case comparison::string_is_like: | ||
718 | case comparison::string_is_not_like: | ||
719 | case comparison::int_equals: | ||
720 | case comparison::int_does_not_equal: | ||
721 | case comparison::int_is_at_least: | ||
722 | case comparison::int_is_greater_than: | ||
723 | case comparison::int_is_at_most: | ||
724 | case comparison::int_is_less_than: | ||
725 | case comparison::boolean_equals: | ||
726 | case comparison::is_null: | ||
727 | case comparison::is_not_null: | ||
728 | { | ||
729 | throw std::domain_error("This filter does not have a join condition"); | ||
730 | } | ||
731 | } | ||
732 | } else { | ||
733 | throw std::domain_error("This filter does not have a join condition"); | ||
734 | } | ||
735 | } | ||
736 | |||
737 | std::string filter::getStringArgument() const | ||
738 | { | ||
739 | if (type_ == type::singleton) | ||
740 | { | ||
741 | switch (singleton_.filterType) | ||
742 | { | ||
743 | case comparison::string_equals: | ||
744 | case comparison::string_does_not_equal: | ||
745 | case comparison::string_is_like: | ||
746 | case comparison::string_is_not_like: | ||
747 | { | ||
748 | return singleton_.stringValue; | ||
749 | } | ||
750 | |||
751 | case comparison::int_equals: | ||
752 | case comparison::int_does_not_equal: | ||
753 | case comparison::int_is_at_least: | ||
754 | case comparison::int_is_greater_than: | ||
755 | case comparison::int_is_at_most: | ||
756 | case comparison::int_is_less_than: | ||
757 | case comparison::boolean_equals: | ||
758 | case comparison::is_null: | ||
759 | case comparison::is_not_null: | ||
760 | case comparison::matches: | ||
761 | case comparison::does_not_match: | ||
762 | case comparison::hierarchally_matches: | ||
763 | case comparison::does_not_hierarchally_match: | ||
764 | { | ||
765 | throw std::domain_error("This filter does not have a string argument"); | ||
766 | } | ||
767 | } | ||
768 | } else { | ||
769 | throw std::domain_error("This filter does not have a string argument"); | ||
770 | } | ||
771 | } | ||
772 | |||
773 | int filter::getIntegerArgument() const | ||
774 | { | ||
775 | if (type_ == type::singleton) | ||
776 | { | ||
777 | switch (singleton_.filterType) | ||
778 | { | ||
779 | case comparison::int_equals: | ||
780 | case comparison::int_does_not_equal: | ||
781 | case comparison::int_is_at_least: | ||
782 | case comparison::int_is_greater_than: | ||
783 | case comparison::int_is_at_most: | ||
784 | case comparison::int_is_less_than: | ||
785 | { | ||
786 | return singleton_.intValue; | ||
787 | } | ||
788 | |||
789 | case comparison::string_equals: | ||
790 | case comparison::string_does_not_equal: | ||
791 | case comparison::string_is_like: | ||
792 | case comparison::string_is_not_like: | ||
793 | case comparison::boolean_equals: | ||
794 | case comparison::is_null: | ||
795 | case comparison::is_not_null: | ||
796 | case comparison::matches: | ||
797 | case comparison::does_not_match: | ||
798 | case comparison::hierarchally_matches: | ||
799 | case comparison::does_not_hierarchally_match: | ||
800 | { | ||
801 | throw std::domain_error("This filter does not have an integer argument"); | ||
802 | } | ||
803 | } | ||
804 | } else { | ||
805 | throw std::domain_error("This filter does not have an integer argument"); | ||
806 | } | ||
807 | } | ||
808 | |||
809 | bool filter::getBooleanArgument() const | ||
810 | { | ||
811 | if ((type_ == type::singleton) && (singleton_.filterType == comparison::boolean_equals)) | ||
812 | { | ||
813 | return singleton_.boolValue; | ||
814 | } else { | ||
815 | throw std::domain_error("This filter does not have a boolean argument"); | ||
816 | } | ||
817 | } | ||
818 | |||
819 | filter::filter(bool orlogic) : type_(type::group) | ||
820 | { | ||
821 | new(&group_.children) std::list<filter>(); | ||
822 | group_.orlogic = orlogic; | ||
823 | } | ||
824 | |||
825 | bool filter::getOrlogic() const | ||
826 | { | ||
827 | if (type_ == type::group) | ||
828 | { | ||
829 | return group_.orlogic; | ||
830 | } else { | ||
831 | throw std::domain_error("This filter is not a group filter"); | ||
832 | } | ||
833 | } | ||
834 | |||
835 | filter filter::operator+(filter condition) const | ||
836 | { | ||
837 | filter result(*this); | ||
838 | result += std::move(condition); | ||
839 | |||
840 | return result; | ||
841 | } | ||
842 | |||
843 | filter& filter::operator+=(filter condition) | ||
844 | { | ||
845 | if (type_ == type::group) | ||
846 | { | ||
847 | group_.children.push_back(std::move(condition)); | ||
848 | |||
849 | return *this; | ||
850 | } else { | ||
851 | throw std::domain_error("Children can only be added to group filters"); | ||
852 | } | ||
853 | } | ||
854 | |||
855 | filter::const_iterator filter::begin() const | ||
856 | { | ||
857 | if (type_ == type::group) | ||
858 | { | ||
859 | return std::begin(group_.children); | ||
860 | } else { | ||
861 | throw std::domain_error("This filter has no children"); | ||
862 | } | ||
863 | } | ||
864 | |||
865 | filter::const_iterator filter::end() const | ||
866 | { | ||
867 | if (type_ == type::group) | ||
868 | { | ||
869 | return std::end(group_.children); | ||
870 | } else { | ||
871 | throw std::domain_error("This filter has no children"); | ||
872 | } | ||
873 | } | ||
874 | |||
875 | filter filter::operator!() const | ||
876 | { | ||
877 | switch (type_) | ||
878 | { | ||
879 | case type::empty: | ||
880 | { | ||
881 | return {}; | ||
882 | } | ||
883 | |||
884 | case type::singleton: | ||
885 | { | ||
886 | switch (singleton_.filterType) | ||
887 | { | ||
888 | case comparison::int_equals: | ||
889 | { | ||
890 | return filter(singleton_.filterField, comparison::int_does_not_equal, singleton_.intValue); | ||
891 | } | ||
892 | |||
893 | case comparison::int_does_not_equal: | ||
894 | { | ||
895 | return filter(singleton_.filterField, comparison::int_equals, singleton_.intValue); | ||
896 | } | ||
897 | |||
898 | case comparison::int_is_at_least: | ||
899 | { | ||
900 | return filter(singleton_.filterField, comparison::int_is_less_than, singleton_.intValue); | ||
901 | } | ||
902 | |||
903 | case comparison::int_is_greater_than: | ||
904 | { | ||
905 | return filter(singleton_.filterField, comparison::int_is_at_most, singleton_.intValue); | ||
906 | } | ||
907 | |||
908 | case comparison::int_is_at_most: | ||
909 | { | ||
910 | return filter(singleton_.filterField, comparison::int_is_greater_than, singleton_.intValue); | ||
911 | } | ||
912 | |||
913 | case comparison::int_is_less_than: | ||
914 | { | ||
915 | return filter(singleton_.filterField, comparison::int_is_at_least, singleton_.intValue); | ||
916 | } | ||
917 | |||
918 | case comparison::boolean_equals: | ||
919 | { | ||
920 | return filter(singleton_.filterField, comparison::boolean_equals, !singleton_.boolValue); | ||
921 | } | ||
922 | |||
923 | case comparison::string_equals: | ||
924 | { | ||
925 | return filter(singleton_.filterField, comparison::string_does_not_equal, singleton_.stringValue); | ||
926 | } | ||
927 | |||
928 | case comparison::string_does_not_equal: | ||
929 | { | ||
930 | return filter(singleton_.filterField, comparison::string_equals, singleton_.stringValue); | ||
931 | } | ||
932 | |||
933 | case comparison::string_is_like: | ||
934 | { | ||
935 | return filter(singleton_.filterField, comparison::string_is_not_like, singleton_.stringValue); | ||
936 | } | ||
937 | |||
938 | case comparison::string_is_not_like: | ||
939 | { | ||
940 | return filter(singleton_.filterField, comparison::string_is_like, singleton_.stringValue); | ||
941 | } | ||
942 | |||
943 | case comparison::is_null: | ||
944 | { | ||
945 | return filter(singleton_.filterField, comparison::is_not_null); | ||
946 | } | ||
947 | |||
948 | case comparison::is_not_null: | ||
949 | { | ||
950 | return filter(singleton_.filterField, comparison::is_null); | ||
951 | } | ||
952 | |||
953 | case comparison::matches: | ||
954 | { | ||
955 | return filter(singleton_.filterField, comparison::does_not_match, *singleton_.join); | ||
956 | } | ||
957 | |||
958 | case comparison::does_not_match: | ||
959 | { | ||
960 | return filter(singleton_.filterField, comparison::matches, *singleton_.join); | ||
961 | } | ||
962 | |||
963 | case comparison::hierarchally_matches: | ||
964 | { | ||
965 | return filter(singleton_.filterField, comparison::does_not_hierarchally_match, *singleton_.join); | ||
966 | } | ||
967 | |||
968 | case comparison::does_not_hierarchally_match: | ||
969 | { | ||
970 | return filter(singleton_.filterField, comparison::hierarchally_matches, *singleton_.join); | ||
971 | } | ||
972 | } | ||
973 | } | ||
974 | |||
975 | case type::group: | ||
976 | { | ||
977 | filter result(!group_.orlogic); | ||
978 | |||
979 | for (const filter& child : group_.children) | ||
980 | { | ||
981 | result += !child; | ||
982 | } | ||
983 | |||
984 | return result; | ||
985 | } | ||
986 | } | ||
987 | } | ||
988 | |||
989 | filter& filter::operator&=(filter condition) | ||
990 | { | ||
991 | return (*this = (*this && std::move(condition))); | ||
992 | } | ||
993 | |||
994 | filter& filter::operator|=(filter condition) | ||
995 | { | ||
996 | return (*this = (*this || std::move(condition))); | ||
997 | } | ||
998 | |||
999 | filter filter::operator&&(filter condition) const | ||
1000 | { | ||
1001 | switch (type_) | ||
1002 | { | ||
1003 | case type::empty: | ||
1004 | { | ||
1005 | return condition; | ||
1006 | } | ||
1007 | |||
1008 | case type::singleton: | ||
1009 | { | ||
1010 | filter result(false); | ||
1011 | result.group_.children.push_back(*this); | ||
1012 | result.group_.children.push_back(std::move(condition)); | ||
1013 | |||
1014 | return result; | ||
1015 | } | ||
1016 | |||
1017 | case type::group: | ||
1018 | { | ||
1019 | if (group_.orlogic) | ||
1020 | { | ||
1021 | filter result(false); | ||
1022 | result.group_.children.push_back(*this); | ||
1023 | result.group_.children.push_back(std::move(condition)); | ||
1024 | |||
1025 | return result; | ||
1026 | } else { | ||
1027 | filter result(*this); | ||
1028 | result.group_.children.push_back(std::move(condition)); | ||
1029 | |||
1030 | return result; | ||
1031 | } | ||
1032 | } | ||
1033 | } | ||
1034 | } | ||
1035 | |||
1036 | filter filter::operator||(filter condition) const | ||
1037 | { | ||
1038 | switch (type_) | ||
1039 | { | ||
1040 | case type::empty: | ||
1041 | { | ||
1042 | return condition; | ||
1043 | } | ||
1044 | |||
1045 | case type::singleton: | ||
1046 | { | ||
1047 | filter result(true); | ||
1048 | result.group_.children.push_back(*this); | ||
1049 | result.group_.children.push_back(std::move(condition)); | ||
1050 | |||
1051 | return result; | ||
1052 | } | ||
1053 | |||
1054 | case type::group: | ||
1055 | { | ||
1056 | if (!group_.orlogic) | ||
1057 | { | ||
1058 | filter result(true); | ||
1059 | result.group_.children.push_back(*this); | ||
1060 | result.group_.children.push_back(std::move(condition)); | ||
1061 | |||
1062 | return result; | ||
1063 | } else { | ||
1064 | filter result(*this); | ||
1065 | result.group_.children.push_back(std::move(condition)); | ||
1066 | |||
1067 | return result; | ||
1068 | } | ||
1069 | } | ||
1070 | } | ||
1071 | } | ||
1072 | |||
1073 | filter filter::normalize(object context) const | ||
1074 | { | ||
1075 | { | ||
1076 | switch (type_) | ||
1077 | { | ||
1078 | case type::empty: | ||
1079 | { | ||
1080 | return *this; | ||
1081 | } | ||
1082 | |||
1083 | case type::singleton: | ||
1084 | { | ||
1085 | // First, switch on the normalized context, and then switch on the | ||
1086 | // current context. We recursively recontextualize by using the | ||
1087 | // current filter as a subquery for a join such that the context of | ||
1088 | // the subquery is one step closer to the context of the current | ||
1089 | // filter, and then letting the filter constructor normalize the | ||
1090 | // subquery. | ||
1091 | switch (context) | ||
1092 | { | ||
1093 | case object::undefined: | ||
1094 | { | ||
1095 | // An undefined object indicates no participation in | ||
1096 | // recontexualization. | ||
1097 | return *this; | ||
1098 | } | ||
1099 | |||
1100 | case object::notion: | ||
1101 | { | ||
1102 | switch (singleton_.filterField.getObject()) | ||
1103 | { | ||
1104 | case object::undefined: | ||
1105 | case object::notion: | ||
1106 | { | ||
1107 | return *this; | ||
1108 | } | ||
1109 | |||
1110 | case object::word: | ||
1111 | case object::group: | ||
1112 | case object::frame: | ||
1113 | case object::lemma: | ||
1114 | case object::form: | ||
1115 | case object::pronunciation: | ||
1116 | { | ||
1117 | return (verbly::notion::word %= *this); | ||
1118 | } | ||
1119 | } | ||
1120 | } | ||
1121 | |||
1122 | case object::word: | ||
1123 | { | ||
1124 | switch (singleton_.filterField.getObject()) | ||
1125 | { | ||
1126 | case object::notion: | ||
1127 | { | ||
1128 | return (verbly::word::notion %= *this); | ||
1129 | } | ||
1130 | |||
1131 | case object::undefined: | ||
1132 | case object::word: | ||
1133 | { | ||
1134 | return *this; | ||
1135 | } | ||
1136 | |||
1137 | case object::group: | ||
1138 | case object::frame: | ||
1139 | { | ||
1140 | return (verbly::word::group %= *this); | ||
1141 | } | ||
1142 | |||
1143 | case object::lemma: | ||
1144 | case object::form: | ||
1145 | case object::pronunciation: | ||
1146 | { | ||
1147 | return (verbly::word::lemma %= *this); | ||
1148 | } | ||
1149 | } | ||
1150 | |||
1151 | case object::group: | ||
1152 | { | ||
1153 | switch (singleton_.filterField.getObject()) | ||
1154 | { | ||
1155 | case object::undefined: | ||
1156 | case object::group: | ||
1157 | { | ||
1158 | return *this; | ||
1159 | } | ||
1160 | |||
1161 | case object::notion: | ||
1162 | case object::word: | ||
1163 | case object::lemma: | ||
1164 | case object::form: | ||
1165 | case object::pronunciation: | ||
1166 | { | ||
1167 | return (verbly::group::word %= *this); | ||
1168 | } | ||
1169 | |||
1170 | case object::frame: | ||
1171 | { | ||
1172 | return (verbly::group::frame %= *this); | ||
1173 | } | ||
1174 | } | ||
1175 | } | ||
1176 | |||
1177 | case object::frame: | ||
1178 | { | ||
1179 | switch (singleton_.filterField.getObject()) | ||
1180 | { | ||
1181 | case object::undefined: | ||
1182 | case object::frame: | ||
1183 | { | ||
1184 | return *this; | ||
1185 | } | ||
1186 | |||
1187 | case object::notion: | ||
1188 | case object::word: | ||
1189 | case object::group: | ||
1190 | case object::lemma: | ||
1191 | case object::form: | ||
1192 | case object::pronunciation: | ||
1193 | { | ||
1194 | return (verbly::frame::group %= *this); | ||
1195 | } | ||
1196 | } | ||
1197 | } | ||
1198 | |||
1199 | case object::lemma: | ||
1200 | { | ||
1201 | switch (singleton_.filterField.getObject()) | ||
1202 | { | ||
1203 | case object::notion: | ||
1204 | case object::word: | ||
1205 | case object::group: | ||
1206 | case object::frame: | ||
1207 | { | ||
1208 | return verbly::lemma::word %= *this; | ||
1209 | } | ||
1210 | |||
1211 | case object::undefined: | ||
1212 | case object::lemma: | ||
1213 | { | ||
1214 | return *this; | ||
1215 | } | ||
1216 | |||
1217 | case object::form: | ||
1218 | case object::pronunciation: | ||
1219 | { | ||
1220 | return (verbly::lemma::form(inflection::base) %= *this); | ||
1221 | } | ||
1222 | } | ||
1223 | } | ||
1224 | |||
1225 | case object::form: | ||
1226 | { | ||
1227 | switch (singleton_.filterField.getObject()) | ||
1228 | { | ||
1229 | case object::notion: | ||
1230 | case object::word: | ||
1231 | case object::group: | ||
1232 | case object::frame: | ||
1233 | case object::lemma: | ||
1234 | { | ||
1235 | return verbly::form::lemma(inflection::base) %= *this; | ||
1236 | } | ||
1237 | |||
1238 | case object::undefined: | ||
1239 | case object::form: | ||
1240 | { | ||
1241 | return *this; | ||
1242 | } | ||
1243 | |||
1244 | case object::pronunciation: | ||
1245 | { | ||
1246 | return (verbly::form::pronunciation %= *this); | ||
1247 | } | ||
1248 | } | ||
1249 | } | ||
1250 | |||
1251 | case object::pronunciation: | ||
1252 | { | ||
1253 | switch (singleton_.filterField.getObject()) | ||
1254 | { | ||
1255 | case object::notion: | ||
1256 | case object::word: | ||
1257 | case object::group: | ||
1258 | case object::frame: | ||
1259 | case object::lemma: | ||
1260 | case object::form: | ||
1261 | { | ||
1262 | return verbly::pronunciation::form %= *this; | ||
1263 | } | ||
1264 | |||
1265 | case object::undefined: | ||
1266 | case object::pronunciation: | ||
1267 | { | ||
1268 | return *this; | ||
1269 | } | ||
1270 | } | ||
1271 | } | ||
1272 | } | ||
1273 | } | ||
1274 | } | ||
1275 | |||
1276 | case type::group: | ||
1277 | { | ||
1278 | filter result(group_.orlogic); | ||
1279 | std::map<field, filter> joins; | ||
1280 | |||
1281 | for (const filter& child : group_.children) | ||
1282 | { | ||
1283 | filter normalized = child.normalize(context); | ||
1284 | |||
1285 | // Notably, this does not attempt to merge hierarchal matches. | ||
1286 | switch (normalized.getType()) | ||
1287 | { | ||
1288 | case type::singleton: | ||
1289 | { | ||
1290 | switch (normalized.getComparison()) | ||
1291 | { | ||
1292 | case comparison::matches: | ||
1293 | { | ||
1294 | if (!joins.count(normalized.singleton_.filterField)) | ||
1295 | { | ||
1296 | joins[normalized.getField()] = filter(group_.orlogic); | ||
1297 | } | ||
1298 | |||
1299 | joins.at(normalized.getField()) += std::move(*normalized.singleton_.join); | ||
1300 | |||
1301 | break; | ||
1302 | } | ||
1303 | |||
1304 | case comparison::does_not_match: | ||
1305 | { | ||
1306 | if (!joins.count(normalized.singleton_.filterField)) | ||
1307 | { | ||
1308 | joins[normalized.getField()] = filter(group_.orlogic); | ||
1309 | } | ||
1310 | |||
1311 | joins.at(normalized.getField()) += !*normalized.singleton_.join; | ||
1312 | |||
1313 | break; | ||
1314 | } | ||
1315 | |||
1316 | case comparison::int_equals: | ||
1317 | case comparison::int_does_not_equal: | ||
1318 | case comparison::int_is_at_least: | ||
1319 | case comparison::int_is_greater_than: | ||
1320 | case comparison::int_is_at_most: | ||
1321 | case comparison::int_is_less_than: | ||
1322 | case comparison::boolean_equals: | ||
1323 | case comparison::string_equals: | ||
1324 | case comparison::string_does_not_equal: | ||
1325 | case comparison::string_is_like: | ||
1326 | case comparison::string_is_not_like: | ||
1327 | case comparison::is_null: | ||
1328 | case comparison::is_not_null: | ||
1329 | case comparison::hierarchally_matches: | ||
1330 | case comparison::does_not_hierarchally_match: | ||
1331 | { | ||
1332 | result += std::move(normalized); | ||
1333 | |||
1334 | break; | ||
1335 | } | ||
1336 | } | ||
1337 | |||
1338 | break; | ||
1339 | } | ||
1340 | |||
1341 | case type::group: | ||
1342 | case type::empty: | ||
1343 | { | ||
1344 | result += std::move(normalized); | ||
1345 | |||
1346 | break; | ||
1347 | } | ||
1348 | } | ||
1349 | } | ||
1350 | |||
1351 | for (auto& mapping : joins) | ||
1352 | { | ||
1353 | const field& joinOn = mapping.first; | ||
1354 | filter& joinCondition = mapping.second; | ||
1355 | |||
1356 | result += (joinOn %= joinCondition.normalize(joinOn.getJoinObject())); | ||
1357 | } | ||
1358 | |||
1359 | return result; | ||
1360 | } | ||
1361 | } | ||
1362 | } | ||
1363 | } | ||
1364 | |||
1365 | }; | ||
diff --git a/lib/filter.h b/lib/filter.h new file mode 100644 index 0000000..d213d7a --- /dev/null +++ b/lib/filter.h | |||
@@ -0,0 +1,143 @@ | |||
1 | #ifndef FILTER_H_932BA9C6 | ||
2 | #define FILTER_H_932BA9C6 | ||
3 | |||
4 | #include <list> | ||
5 | #include <string> | ||
6 | #include <memory> | ||
7 | #include "field.h" | ||
8 | #include "enums.h" | ||
9 | |||
10 | namespace verbly { | ||
11 | |||
12 | class filter { | ||
13 | public: | ||
14 | enum class type { | ||
15 | empty, | ||
16 | singleton, | ||
17 | group | ||
18 | }; | ||
19 | |||
20 | enum class comparison { | ||
21 | int_equals, | ||
22 | int_does_not_equal, | ||
23 | int_is_at_least, | ||
24 | int_is_greater_than, | ||
25 | int_is_at_most, | ||
26 | int_is_less_than, | ||
27 | boolean_equals, | ||
28 | string_equals, | ||
29 | string_does_not_equal, | ||
30 | string_is_like, | ||
31 | string_is_not_like, | ||
32 | is_null, | ||
33 | is_not_null, | ||
34 | matches, | ||
35 | does_not_match, | ||
36 | hierarchally_matches, | ||
37 | does_not_hierarchally_match | ||
38 | }; | ||
39 | |||
40 | // Copy and move constructors | ||
41 | |||
42 | filter(const filter& other); | ||
43 | filter(filter&& other); | ||
44 | |||
45 | // Assignment | ||
46 | |||
47 | filter& operator=(filter other); | ||
48 | |||
49 | // Swap | ||
50 | |||
51 | friend void swap(filter& first, filter& second); | ||
52 | |||
53 | // Destructor | ||
54 | |||
55 | ~filter(); | ||
56 | |||
57 | // Accessors | ||
58 | |||
59 | type getType() const | ||
60 | { | ||
61 | return type_; | ||
62 | } | ||
63 | |||
64 | // Empty | ||
65 | |||
66 | filter(); | ||
67 | |||
68 | // Singleton | ||
69 | |||
70 | filter(field filterField, comparison filterType, int filterValue); | ||
71 | filter(field filterField, comparison filterType, std::string filterValue); | ||
72 | filter(field filterField, comparison filterType, bool filterValue); | ||
73 | filter(field filterField, comparison filterType); | ||
74 | filter(field joinOn, comparison filterType, filter joinCondition); | ||
75 | |||
76 | field getField() const; | ||
77 | |||
78 | comparison getComparison() const; | ||
79 | |||
80 | filter getJoinCondition() const; | ||
81 | |||
82 | std::string getStringArgument() const; | ||
83 | |||
84 | int getIntegerArgument() const; | ||
85 | |||
86 | bool getBooleanArgument() const; | ||
87 | |||
88 | // Group | ||
89 | |||
90 | explicit filter(bool orlogic); | ||
91 | |||
92 | bool getOrlogic() const; | ||
93 | |||
94 | filter operator+(filter condition) const; | ||
95 | |||
96 | filter& operator+=(filter condition); | ||
97 | |||
98 | using const_iterator = std::list<filter>::const_iterator; | ||
99 | |||
100 | const_iterator begin() const; | ||
101 | |||
102 | const_iterator end() const; | ||
103 | |||
104 | // Negation | ||
105 | |||
106 | filter operator!() const; | ||
107 | |||
108 | // Groupifying | ||
109 | |||
110 | filter operator&&(filter condition) const; | ||
111 | filter operator||(filter condition) const; | ||
112 | |||
113 | filter& operator&=(filter condition); | ||
114 | filter& operator|=(filter condition); | ||
115 | |||
116 | // Utility | ||
117 | |||
118 | filter normalize(object context) const; | ||
119 | |||
120 | private: | ||
121 | union { | ||
122 | struct { | ||
123 | field filterField; | ||
124 | comparison filterType; | ||
125 | union { | ||
126 | std::unique_ptr<filter> join; | ||
127 | std::string stringValue; | ||
128 | int intValue; | ||
129 | bool boolValue; | ||
130 | }; | ||
131 | } singleton_; | ||
132 | struct { | ||
133 | std::list<filter> children; | ||
134 | bool orlogic; | ||
135 | } group_; | ||
136 | }; | ||
137 | type type_ = type::empty; | ||
138 | |||
139 | }; | ||
140 | |||
141 | }; | ||
142 | |||
143 | #endif /* end of include guard: FILTER_H_932BA9C6 */ | ||
diff --git a/lib/form.cpp b/lib/form.cpp new file mode 100644 index 0000000..8ba3bd7 --- /dev/null +++ b/lib/form.cpp | |||
@@ -0,0 +1,53 @@ | |||
1 | #include "form.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include "filter.h" | ||
4 | #include "pronunciation.h" | ||
5 | #include "database.h" | ||
6 | #include "query.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | |||
10 | const object form::objectType = object::form; | ||
11 | |||
12 | const std::list<std::string> form::select = {"form_id", "form", "complexity", "proper"}; | ||
13 | |||
14 | const field form::id = field::integerField(object::form, "form_id"); | ||
15 | const field form::text = field::stringField(object::form, "form"); | ||
16 | const field form::complexity = field::integerField(object::form, "complexity"); | ||
17 | const field form::proper = field::booleanField(object::form, "proper"); | ||
18 | |||
19 | const field form::pronunciation = field::joinThrough(object::form, "form_id", object::pronunciation, "forms_pronunciations", "pronunciation_id"); | ||
20 | |||
21 | const field form::lemmaJoin = field::joinField(object::form, "form_id", object::lemma); | ||
22 | const field form::inflectionCategory = field::integerField("lemmas_forms", "category"); | ||
23 | |||
24 | form::form(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
25 | { | ||
26 | id_ = sqlite3_column_int(row, 0); | ||
27 | text_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 1))); | ||
28 | complexity_ = sqlite3_column_int(row, 2); | ||
29 | proper_ = (sqlite3_column_int(row, 3) == 1); | ||
30 | } | ||
31 | |||
32 | filter operator%=(form::inflection_field check, filter joinCondition) | ||
33 | { | ||
34 | return (form::lemmaJoin %= (joinCondition && (form::inflectionCategory == check.getCategory()))); | ||
35 | } | ||
36 | |||
37 | const std::vector<pronunciation>& form::getPronunciations() const | ||
38 | { | ||
39 | if (!valid_) | ||
40 | { | ||
41 | throw std::domain_error("Bad access to uninitialized form"); | ||
42 | } | ||
43 | |||
44 | if (!initializedPronunciations_) | ||
45 | { | ||
46 | pronunciations_ = db_->pronunciations(pronunciation::form %= *this, false, -1).all(); | ||
47 | initializedPronunciations_ = true; | ||
48 | } | ||
49 | |||
50 | return pronunciations_; | ||
51 | } | ||
52 | |||
53 | }; | ||
diff --git a/lib/form.h b/lib/form.h new file mode 100644 index 0000000..c6a1353 --- /dev/null +++ b/lib/form.h | |||
@@ -0,0 +1,149 @@ | |||
1 | #ifndef FORM_H_3A6C962C | ||
2 | #define FORM_H_3A6C962C | ||
3 | |||
4 | #include <list> | ||
5 | #include <vector> | ||
6 | #include <string> | ||
7 | #include <stdexcept> | ||
8 | #include "field.h" | ||
9 | #include "filter.h" | ||
10 | |||
11 | struct sqlite3_stmt; | ||
12 | |||
13 | namespace verbly { | ||
14 | |||
15 | class pronunciation; | ||
16 | class database; | ||
17 | |||
18 | class form { | ||
19 | public: | ||
20 | |||
21 | // Default constructor | ||
22 | |||
23 | form() = default; | ||
24 | |||
25 | // Construct from database | ||
26 | |||
27 | form(const database& db, sqlite3_stmt* row); | ||
28 | |||
29 | // Accessors | ||
30 | |||
31 | operator bool() const | ||
32 | { | ||
33 | return valid_; | ||
34 | } | ||
35 | |||
36 | int getId() const | ||
37 | { | ||
38 | if (!valid_) | ||
39 | { | ||
40 | throw std::domain_error("Bad access to uninitialized form"); | ||
41 | } | ||
42 | |||
43 | return id_; | ||
44 | } | ||
45 | |||
46 | std::string getText() const | ||
47 | { | ||
48 | if (!valid_) | ||
49 | { | ||
50 | throw std::domain_error("Bad access to uninitialized form"); | ||
51 | } | ||
52 | |||
53 | return text_; | ||
54 | } | ||
55 | |||
56 | int getComplexity() const | ||
57 | { | ||
58 | if (!valid_) | ||
59 | { | ||
60 | throw std::domain_error("Bad access to uninitialized form"); | ||
61 | } | ||
62 | |||
63 | return complexity_; | ||
64 | } | ||
65 | |||
66 | bool isProper() const | ||
67 | { | ||
68 | if (!valid_) | ||
69 | { | ||
70 | throw std::domain_error("Bad access to uninitialized form"); | ||
71 | } | ||
72 | |||
73 | return proper_; | ||
74 | } | ||
75 | |||
76 | const std::vector<pronunciation>& getPronunciations() const; | ||
77 | |||
78 | // Type info | ||
79 | |||
80 | static const object objectType; | ||
81 | |||
82 | static const std::list<std::string> select; | ||
83 | |||
84 | // Query fields | ||
85 | |||
86 | static const field id; | ||
87 | static const field text; | ||
88 | static const field complexity; | ||
89 | static const field proper; | ||
90 | |||
91 | operator filter() const | ||
92 | { | ||
93 | if (!valid_) | ||
94 | { | ||
95 | throw std::domain_error("Bad access to uninitialized form"); | ||
96 | } | ||
97 | |||
98 | return (id == id_); | ||
99 | } | ||
100 | |||
101 | // Relationships to other objects | ||
102 | |||
103 | static const field pronunciation; | ||
104 | |||
105 | class inflection_field { | ||
106 | public: | ||
107 | |||
108 | inflection_field(inflection category) : category_(category) | ||
109 | { | ||
110 | } | ||
111 | |||
112 | const inflection getCategory() const | ||
113 | { | ||
114 | return category_; | ||
115 | } | ||
116 | |||
117 | private: | ||
118 | |||
119 | const inflection category_; | ||
120 | }; | ||
121 | |||
122 | static const inflection_field lemma(inflection category) | ||
123 | { | ||
124 | return inflection_field(category); | ||
125 | } | ||
126 | |||
127 | friend filter operator%=(form::inflection_field check, filter joinCondition); | ||
128 | |||
129 | private: | ||
130 | bool valid_ = false; | ||
131 | |||
132 | int id_; | ||
133 | std::string text_; | ||
134 | int complexity_ ; | ||
135 | bool proper_; | ||
136 | |||
137 | const database* db_; | ||
138 | |||
139 | mutable bool initializedPronunciations_ = false; | ||
140 | mutable std::vector<class pronunciation> pronunciations_; | ||
141 | |||
142 | static const field lemmaJoin; | ||
143 | static const field inflectionCategory; | ||
144 | |||
145 | }; | ||
146 | |||
147 | }; | ||
148 | |||
149 | #endif /* end of include guard: FORM_H_3A6C962C */ | ||
diff --git a/lib/frame.cpp b/lib/frame.cpp index ccec81b..bc3f842 100644 --- a/lib/frame.cpp +++ b/lib/frame.cpp | |||
@@ -1,320 +1,21 @@ | |||
1 | #include "verbly.h" | 1 | #include "frame.h" |
2 | #include <sqlite3.h> | ||
2 | 3 | ||
3 | namespace verbly { | 4 | namespace verbly { |
4 | 5 | ||
5 | frame::selrestr::type frame::selrestr::get_type() const | 6 | const object frame::objectType = object::frame; |
6 | { | ||
7 | return _type; | ||
8 | } | ||
9 | |||
10 | frame::selrestr::selrestr(const selrestr& other) | ||
11 | { | ||
12 | _type = other._type; | ||
13 | |||
14 | switch (_type) | ||
15 | { | ||
16 | case frame::selrestr::type::singleton: | ||
17 | { | ||
18 | _singleton.pos = other._singleton.pos; | ||
19 | new(&_singleton.restriction) std::string(other._singleton.restriction); | ||
20 | |||
21 | break; | ||
22 | } | ||
23 | |||
24 | case frame::selrestr::type::group: | ||
25 | { | ||
26 | new(&_group.children) std::list<selrestr>(other._group.children); | ||
27 | _group.orlogic = other._group.orlogic; | ||
28 | |||
29 | break; | ||
30 | } | ||
31 | |||
32 | case frame::selrestr::type::empty: | ||
33 | { | ||
34 | // Nothing! | ||
35 | |||
36 | break; | ||
37 | } | ||
38 | } | ||
39 | } | ||
40 | |||
41 | frame::selrestr::~selrestr() | ||
42 | { | ||
43 | switch (_type) | ||
44 | { | ||
45 | case frame::selrestr::type::singleton: | ||
46 | { | ||
47 | using string_type = std::string; | ||
48 | _singleton.restriction.~string_type(); | ||
49 | |||
50 | break; | ||
51 | } | ||
52 | |||
53 | case frame::selrestr::type::group: | ||
54 | { | ||
55 | using list_type = std::list<selrestr>; | ||
56 | _group.children.~list_type(); | ||
57 | |||
58 | break; | ||
59 | } | ||
60 | |||
61 | case frame::selrestr::type::empty: | ||
62 | { | ||
63 | // Nothing! | ||
64 | |||
65 | break; | ||
66 | } | ||
67 | } | ||
68 | } | ||
69 | |||
70 | frame::selrestr& frame::selrestr::operator=(const selrestr& other) | ||
71 | { | ||
72 | this->~selrestr(); | ||
73 | |||
74 | _type = other._type; | ||
75 | |||
76 | switch (_type) | ||
77 | { | ||
78 | case frame::selrestr::type::singleton: | ||
79 | { | ||
80 | _singleton.pos = other._singleton.pos; | ||
81 | new(&_singleton.restriction) std::string(other._singleton.restriction); | ||
82 | |||
83 | break; | ||
84 | } | ||
85 | |||
86 | case frame::selrestr::type::group: | ||
87 | { | ||
88 | new(&_group.children) std::list<selrestr>(other._group.children); | ||
89 | _group.orlogic = other._group.orlogic; | ||
90 | |||
91 | break; | ||
92 | } | ||
93 | |||
94 | case frame::selrestr::type::empty: | ||
95 | { | ||
96 | // Nothing! | ||
97 | |||
98 | break; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | frame::selrestr::selrestr() : _type(frame::selrestr::type::empty) | ||
106 | { | ||
107 | |||
108 | } | ||
109 | |||
110 | frame::selrestr::selrestr(std::string restriction, bool pos) : _type(frame::selrestr::type::singleton) | ||
111 | { | ||
112 | new(&_singleton.restriction) std::string(restriction); | ||
113 | _singleton.pos = pos; | ||
114 | } | ||
115 | |||
116 | std::string frame::selrestr::get_restriction() const | ||
117 | { | ||
118 | assert(_type == frame::selrestr::type::singleton); | ||
119 | |||
120 | return _singleton.restriction; | ||
121 | } | ||
122 | |||
123 | bool frame::selrestr::get_pos() const | ||
124 | { | ||
125 | assert(_type == frame::selrestr::type::singleton); | ||
126 | |||
127 | return _singleton.pos; | ||
128 | } | ||
129 | |||
130 | frame::selrestr::selrestr(std::list<selrestr> children, bool orlogic) : _type(frame::selrestr::type::group) | ||
131 | { | ||
132 | new(&_group.children) std::list<selrestr>(children); | ||
133 | _group.orlogic = orlogic; | ||
134 | } | ||
135 | |||
136 | std::list<frame::selrestr> frame::selrestr::get_children() const | ||
137 | { | ||
138 | assert(_type == frame::selrestr::type::group); | ||
139 | |||
140 | return _group.children; | ||
141 | } | ||
142 | |||
143 | std::list<frame::selrestr>::const_iterator frame::selrestr::begin() const | ||
144 | { | ||
145 | assert(_type == frame::selrestr::type::group); | ||
146 | |||
147 | return _group.children.begin(); | ||
148 | } | ||
149 | |||
150 | std::list<frame::selrestr>::const_iterator frame::selrestr::end() const | ||
151 | { | ||
152 | assert(_type == frame::selrestr::type::group); | ||
153 | |||
154 | return _group.children.end(); | ||
155 | } | ||
156 | |||
157 | bool frame::selrestr::get_orlogic() const | ||
158 | { | ||
159 | assert(_type == frame::selrestr::type::group); | ||
160 | |||
161 | return _group.orlogic; | ||
162 | } | ||
163 | |||
164 | frame::part::type frame::part::get_type() const | ||
165 | { | ||
166 | return _type; | ||
167 | } | ||
168 | |||
169 | frame::part::part() | ||
170 | { | ||
171 | |||
172 | } | ||
173 | 7 | ||
174 | frame::part::part(const part& other) | 8 | const std::list<std::string> frame::select = {"frame_id", "data"}; |
175 | { | ||
176 | _type = other._type; | ||
177 | |||
178 | switch (_type) | ||
179 | { | ||
180 | case frame::part::type::noun_phrase: | ||
181 | { | ||
182 | new(&_noun_phrase.role) std::string(other._noun_phrase.role); | ||
183 | new(&_noun_phrase.selrestrs) selrestr(other._noun_phrase.selrestrs); | ||
184 | new(&_noun_phrase.synrestrs) std::set<std::string>(other._noun_phrase.synrestrs); | ||
185 | |||
186 | break; | ||
187 | } | ||
188 | |||
189 | case frame::part::type::literal_preposition: | ||
190 | { | ||
191 | new(&_literal_preposition.choices) std::vector<std::string>(other._literal_preposition.choices); | ||
192 | |||
193 | break; | ||
194 | } | ||
195 | |||
196 | case frame::part::type::selection_preposition: | ||
197 | { | ||
198 | new(&_selection_preposition.preprestrs) std::vector<std::string>(other._selection_preposition.preprestrs); | ||
199 | |||
200 | break; | ||
201 | } | ||
202 | |||
203 | case frame::part::type::literal: | ||
204 | { | ||
205 | new(&_literal.lexval) std::string(other._literal.lexval); | ||
206 | |||
207 | break; | ||
208 | } | ||
209 | |||
210 | default: | ||
211 | { | ||
212 | // Nothing! | ||
213 | |||
214 | break; | ||
215 | } | ||
216 | } | ||
217 | } | ||
218 | 9 | ||
219 | frame::part::~part() | 10 | const field frame::id = field::integerField(object::frame, "frame_id"); |
220 | { | ||
221 | switch (_type) | ||
222 | { | ||
223 | case frame::part::type::noun_phrase: | ||
224 | { | ||
225 | using string_type = std::string; | ||
226 | using set_type = std::set<std::string>; | ||
227 | |||
228 | _noun_phrase.role.~string_type(); | ||
229 | _noun_phrase.selrestrs.~selrestr(); | ||
230 | _noun_phrase.synrestrs.~set_type(); | ||
231 | |||
232 | break; | ||
233 | } | ||
234 | |||
235 | case frame::part::type::literal_preposition: | ||
236 | { | ||
237 | using vector_type = std::vector<std::string>; | ||
238 | _literal_preposition.choices.~vector_type(); | ||
239 | |||
240 | break; | ||
241 | } | ||
242 | |||
243 | case frame::part::type::selection_preposition: | ||
244 | { | ||
245 | using vector_type = std::vector<std::string>; | ||
246 | _selection_preposition.preprestrs.~vector_type(); | ||
247 | |||
248 | break; | ||
249 | } | ||
250 | |||
251 | case frame::part::type::literal: | ||
252 | { | ||
253 | using string_type = std::string; | ||
254 | _literal.lexval.~string_type(); | ||
255 | |||
256 | break; | ||
257 | } | ||
258 | |||
259 | default: | ||
260 | { | ||
261 | // Nothing! | ||
262 | |||
263 | break; | ||
264 | } | ||
265 | } | ||
266 | } | ||
267 | 11 | ||
268 | std::string frame::part::get_role() const | 12 | const field frame::group = field::joinThrough(object::frame, "frame_id", object::group, "groups_frames", "group_id"); |
269 | { | ||
270 | assert(_type == frame::part::type::noun_phrase); | ||
271 | |||
272 | return _noun_phrase.role; | ||
273 | } | ||
274 | 13 | ||
275 | frame::selrestr frame::part::get_selrestrs() const | 14 | frame::frame(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) |
276 | { | 15 | { |
277 | assert(_type == frame::part::type::noun_phrase); | 16 | id_ = sqlite3_column_int(row, 0); |
278 | 17 | ||
279 | return _noun_phrase.selrestrs; | 18 | // TODO: Initialize frame data from row. |
280 | } | ||
281 | |||
282 | std::set<std::string> frame::part::get_synrestrs() const | ||
283 | { | ||
284 | assert(_type == frame::part::type::noun_phrase); | ||
285 | |||
286 | return _noun_phrase.synrestrs; | ||
287 | } | ||
288 | |||
289 | std::vector<std::string> frame::part::get_choices() const | ||
290 | { | ||
291 | assert(_type == frame::part::type::literal_preposition); | ||
292 | |||
293 | return _literal_preposition.choices; | ||
294 | } | ||
295 | |||
296 | std::vector<std::string> frame::part::get_preprestrs() const | ||
297 | { | ||
298 | assert(_type == frame::part::type::selection_preposition); | ||
299 | |||
300 | return _selection_preposition.preprestrs; | ||
301 | } | ||
302 | |||
303 | std::string frame::part::get_literal() const | ||
304 | { | ||
305 | assert(_type == frame::part::type::literal); | ||
306 | |||
307 | return _literal.lexval; | ||
308 | } | ||
309 | |||
310 | std::vector<frame::part> frame::parts() const | ||
311 | { | ||
312 | return _parts; | ||
313 | } | ||
314 | |||
315 | std::map<std::string, frame::selrestr> frame::roles() const | ||
316 | { | ||
317 | return _roles; | ||
318 | } | 19 | } |
319 | 20 | ||
320 | }; | 21 | }; |
diff --git a/lib/frame.h b/lib/frame.h index fa57e1b..68a4346 100644 --- a/lib/frame.h +++ b/lib/frame.h | |||
@@ -1,118 +1,78 @@ | |||
1 | #ifndef FRAME_H_9A5D90FE | 1 | #ifndef FRAME_H_EA29065A |
2 | #define FRAME_H_9A5D90FE | 2 | #define FRAME_H_EA29065A |
3 | |||
4 | #include <stdexcept> | ||
5 | #include <list> | ||
6 | #include "field.h" | ||
7 | #include "filter.h" | ||
8 | |||
9 | struct sqlite3_stmt; | ||
3 | 10 | ||
4 | namespace verbly { | 11 | namespace verbly { |
5 | 12 | ||
6 | class frame_query; | 13 | class database; |
7 | 14 | ||
8 | class frame { | 15 | class frame { |
9 | public: | 16 | public: |
10 | class selrestr { | 17 | |
11 | public: | 18 | // Default constructor |
12 | enum class type { | 19 | |
13 | empty, | 20 | frame() = default; |
14 | singleton, | 21 | |
15 | group | 22 | // Construct from database |
16 | }; | 23 | |
17 | 24 | frame(const database& db, sqlite3_stmt* row); | |
18 | type get_type() const; | 25 | |
19 | selrestr(const selrestr& other); | 26 | // Accessors |
20 | ~selrestr(); | 27 | |
21 | selrestr& operator=(const selrestr& other); | 28 | operator bool() const |
22 | 29 | { | |
23 | // Empty | 30 | return valid_; |
24 | selrestr(); | 31 | } |
25 | 32 | ||
26 | // Singleton | 33 | int getId() const |
27 | selrestr(std::string restriction, bool pos); | 34 | { |
28 | std::string get_restriction() const; | 35 | if (!valid_) |
29 | bool get_pos() const; | 36 | { |
30 | 37 | throw std::domain_error("Bad access to uninitialized frame"); | |
31 | // Group | 38 | } |
32 | selrestr(std::list<selrestr> children, bool orlogic); | ||
33 | std::list<selrestr> get_children() const; | ||
34 | std::list<selrestr>::const_iterator begin() const; | ||
35 | std::list<selrestr>::const_iterator end() const; | ||
36 | bool get_orlogic() const; | ||
37 | |||
38 | private: | ||
39 | union { | ||
40 | struct { | ||
41 | bool pos; | ||
42 | std::string restriction; | ||
43 | } _singleton; | ||
44 | struct { | ||
45 | std::list<selrestr> children; | ||
46 | bool orlogic; | ||
47 | } _group; | ||
48 | }; | ||
49 | type _type; | ||
50 | }; | ||
51 | 39 | ||
52 | class part { | 40 | return id_; |
53 | public: | 41 | } |
54 | enum class type { | 42 | |
55 | noun_phrase, | 43 | // Type info |
56 | verb, | 44 | |
57 | literal_preposition, | 45 | static const object objectType; |
58 | selection_preposition, | 46 | |
59 | adjective, | 47 | static const std::list<std::string> select; |
60 | adverb, | 48 | |
61 | literal | 49 | // Query fields |
62 | }; | 50 | |
63 | 51 | static const field id; | |
64 | type get_type() const; | 52 | |
65 | part(const part& other); | 53 | operator filter() const |
66 | ~part(); | 54 | { |
67 | 55 | if (!valid_) | |
68 | // Noun phrase | 56 | { |
69 | std::string get_role() const; | 57 | throw std::domain_error("Bad access to uninitialized frame"); |
70 | selrestr get_selrestrs() const; | 58 | } |
71 | std::set<std::string> get_synrestrs() const; | ||
72 | |||
73 | // Literal preposition | ||
74 | std::vector<std::string> get_choices() const; | ||
75 | |||
76 | // Selection preposition | ||
77 | std::vector<std::string> get_preprestrs() const; | ||
78 | |||
79 | // Literal | ||
80 | std::string get_literal() const; | ||
81 | |||
82 | private: | ||
83 | friend class frame_query; | ||
84 | |||
85 | part(); | ||
86 | |||
87 | union { | ||
88 | struct { | ||
89 | std::string role; | ||
90 | selrestr selrestrs; | ||
91 | std::set<std::string> synrestrs; | ||
92 | } _noun_phrase; | ||
93 | struct { | ||
94 | std::vector<std::string> choices; | ||
95 | } _literal_preposition; | ||
96 | struct { | ||
97 | std::vector<std::string> preprestrs; | ||
98 | } _selection_preposition; | ||
99 | struct { | ||
100 | std::string lexval; | ||
101 | } _literal; | ||
102 | }; | ||
103 | type _type; | ||
104 | }; | ||
105 | 59 | ||
106 | std::vector<part> parts() const; | 60 | return (id == id_); |
107 | std::map<std::string, selrestr> roles() const; | 61 | } |
108 | 62 | ||
109 | private: | 63 | // Relationships to other objects |
110 | friend class frame_query; | 64 | |
111 | 65 | static const field group; | |
112 | std::vector<part> _parts; | 66 | |
113 | std::map<std::string, selrestr> _roles; | 67 | private: |
68 | bool valid_ = false; | ||
69 | |||
70 | int id_; | ||
71 | |||
72 | const database* db_; | ||
73 | |||
114 | }; | 74 | }; |
115 | 75 | ||
116 | }; | 76 | }; |
117 | 77 | ||
118 | #endif /* end of include guard: FRAME_H_9A5D90FE */ | 78 | #endif /* end of include guard: FRAME_H_EA29065A */ |
diff --git a/lib/group.cpp b/lib/group.cpp new file mode 100644 index 0000000..8b6d985 --- /dev/null +++ b/lib/group.cpp | |||
@@ -0,0 +1,43 @@ | |||
1 | #include "group.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include "frame.h" | ||
4 | #include "database.h" | ||
5 | #include "query.h" | ||
6 | |||
7 | namespace verbly { | ||
8 | |||
9 | const object group::objectType = object::group; | ||
10 | |||
11 | const std::list<std::string> group::select = {"group_id", "data"}; | ||
12 | |||
13 | const field group::id = field::integerField(object::group, "group_id"); | ||
14 | |||
15 | const field group::frame = field::joinThrough(object::group, "group_id", object::frame, "groups_frames", "frame_id"); | ||
16 | const field group::word = field::joinField(object::group, "group_id", object::word); | ||
17 | |||
18 | group::group(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
19 | { | ||
20 | id_ = sqlite3_column_int(row, 0); | ||
21 | |||
22 | // TODO: Initialize role data from row. | ||
23 | } | ||
24 | |||
25 | const std::vector<frame>& group::getFrames() const | ||
26 | { | ||
27 | if (!valid_) | ||
28 | { | ||
29 | throw std::domain_error("Bad access to uninitialized group"); | ||
30 | } | ||
31 | |||
32 | if (!initializedFrames_) | ||
33 | { | ||
34 | frames_ = db_->frames(frame::group %= *this, false, -1).all(); | ||
35 | |||
36 | initializedFrames_ = true; | ||
37 | } | ||
38 | |||
39 | return frames_; | ||
40 | } | ||
41 | |||
42 | }; | ||
43 | |||
diff --git a/lib/group.h b/lib/group.h new file mode 100644 index 0000000..dd53503 --- /dev/null +++ b/lib/group.h | |||
@@ -0,0 +1,87 @@ | |||
1 | #ifndef GROUP_H_BD6933C0 | ||
2 | #define GROUP_H_BD6933C0 | ||
3 | |||
4 | #include <stdexcept> | ||
5 | #include <list> | ||
6 | #include <vector> | ||
7 | #include "field.h" | ||
8 | #include "filter.h" | ||
9 | |||
10 | struct sqlite3_stmt; | ||
11 | |||
12 | namespace verbly { | ||
13 | |||
14 | class database; | ||
15 | class frame; | ||
16 | |||
17 | class group { | ||
18 | public: | ||
19 | |||
20 | // Default constructor | ||
21 | |||
22 | group() = default; | ||
23 | |||
24 | // Construct from database | ||
25 | |||
26 | group(const database& db, sqlite3_stmt* row); | ||
27 | |||
28 | // Accessors | ||
29 | |||
30 | operator bool() const | ||
31 | { | ||
32 | return valid_; | ||
33 | } | ||
34 | |||
35 | int getId() const | ||
36 | { | ||
37 | if (!valid_) | ||
38 | { | ||
39 | throw std::domain_error("Bad access to uninitialized group"); | ||
40 | } | ||
41 | |||
42 | return id_; | ||
43 | } | ||
44 | |||
45 | const std::vector<frame>& getFrames() const; | ||
46 | |||
47 | // Type info | ||
48 | |||
49 | static const object objectType; | ||
50 | |||
51 | static const std::list<std::string> select; | ||
52 | |||
53 | // Query fields | ||
54 | |||
55 | static const field id; | ||
56 | |||
57 | operator filter() const | ||
58 | { | ||
59 | if (!valid_) | ||
60 | { | ||
61 | throw std::domain_error("Bad access to uninitialized group"); | ||
62 | } | ||
63 | |||
64 | return (id == id_); | ||
65 | } | ||
66 | |||
67 | // Relationships to other objects | ||
68 | |||
69 | static const field frame; | ||
70 | |||
71 | static const field word; | ||
72 | |||
73 | private: | ||
74 | bool valid_ = false; | ||
75 | |||
76 | int id_; | ||
77 | |||
78 | const database* db_; | ||
79 | |||
80 | mutable bool initializedFrames_ = false; | ||
81 | mutable std::vector<class frame> frames_; | ||
82 | |||
83 | }; | ||
84 | |||
85 | }; | ||
86 | |||
87 | #endif /* end of include guard: GROUP_H_BD6933C0 */ | ||
diff --git a/lib/lemma.cpp b/lib/lemma.cpp new file mode 100644 index 0000000..f9e9fcc --- /dev/null +++ b/lib/lemma.cpp | |||
@@ -0,0 +1,69 @@ | |||
1 | #include "lemma.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include "database.h" | ||
4 | #include "query.h" | ||
5 | |||
6 | namespace verbly { | ||
7 | |||
8 | const object lemma::objectType = object::lemma; | ||
9 | |||
10 | const std::list<std::string> lemma::select = {"lemma_id"}; | ||
11 | |||
12 | const field lemma::id = field::integerField(object::lemma, "lemma_id"); | ||
13 | |||
14 | const field lemma::word = field::joinField(object::lemma, "lemma_id", object::word); | ||
15 | |||
16 | const field lemma::formJoin = field::joinField(object::lemma, "form_id", object::form); | ||
17 | const field lemma::inflectionCategory = field::integerField(object::lemma, "category"); | ||
18 | |||
19 | filter operator%=(lemma::inflection_field check, filter joinCondition) | ||
20 | { | ||
21 | return (lemma::formJoin %= joinCondition) && (lemma::inflectionCategory == check.getCategory()); | ||
22 | } | ||
23 | |||
24 | lemma::lemma(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
25 | { | ||
26 | id_ = sqlite3_column_int(row, 0); | ||
27 | } | ||
28 | |||
29 | const form& lemma::getBaseForm() const | ||
30 | { | ||
31 | if (!valid_) | ||
32 | { | ||
33 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
34 | } | ||
35 | |||
36 | if (!forms_.count(inflection::base)) | ||
37 | { | ||
38 | initializeForm(inflection::base); | ||
39 | } | ||
40 | |||
41 | return forms_.at(inflection::base).front(); | ||
42 | } | ||
43 | |||
44 | bool lemma::hasInflection(inflection category) const | ||
45 | { | ||
46 | return !getInflections(category).empty(); | ||
47 | } | ||
48 | |||
49 | const std::vector<form>& lemma::getInflections(inflection category) const | ||
50 | { | ||
51 | if (!valid_) | ||
52 | { | ||
53 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
54 | } | ||
55 | |||
56 | if (!forms_.count(category)) | ||
57 | { | ||
58 | initializeForm(category); | ||
59 | } | ||
60 | |||
61 | return forms_.at(category); | ||
62 | } | ||
63 | |||
64 | void lemma::initializeForm(inflection infl) const | ||
65 | { | ||
66 | forms_[infl] = db_->forms(form::lemma(infl) %= *this, false, -1).all(); | ||
67 | } | ||
68 | |||
69 | }; | ||
diff --git a/lib/lemma.h b/lib/lemma.h new file mode 100644 index 0000000..9a07f16 --- /dev/null +++ b/lib/lemma.h | |||
@@ -0,0 +1,120 @@ | |||
1 | #ifndef LEMMA_H_0A180D30 | ||
2 | #define LEMMA_H_0A180D30 | ||
3 | |||
4 | #include <stdexcept> | ||
5 | #include <vector> | ||
6 | #include <list> | ||
7 | #include <map> | ||
8 | #include "field.h" | ||
9 | #include "enums.h" | ||
10 | #include "filter.h" | ||
11 | |||
12 | struct sqlite3_stmt; | ||
13 | |||
14 | namespace verbly { | ||
15 | |||
16 | class form; | ||
17 | class database; | ||
18 | |||
19 | class lemma { | ||
20 | public: | ||
21 | |||
22 | // Default constructor | ||
23 | |||
24 | lemma() = default; | ||
25 | |||
26 | // Construct from database | ||
27 | |||
28 | lemma(const database& db, sqlite3_stmt* row); | ||
29 | |||
30 | // Accessors | ||
31 | |||
32 | operator bool() const | ||
33 | { | ||
34 | return valid_; | ||
35 | } | ||
36 | |||
37 | int getId() const | ||
38 | { | ||
39 | if (!valid_) | ||
40 | { | ||
41 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
42 | } | ||
43 | |||
44 | return id_; | ||
45 | } | ||
46 | |||
47 | const form& getBaseForm() const; | ||
48 | |||
49 | bool hasInflection(inflection category) const; | ||
50 | |||
51 | const std::vector<form>& getInflections(inflection category) const; | ||
52 | |||
53 | // Type info | ||
54 | |||
55 | static const object objectType; | ||
56 | |||
57 | static const std::list<std::string> select; | ||
58 | |||
59 | // Query fields | ||
60 | |||
61 | static const field id; | ||
62 | |||
63 | operator filter() const | ||
64 | { | ||
65 | if (!valid_) | ||
66 | { | ||
67 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
68 | } | ||
69 | |||
70 | return (id == id_); | ||
71 | } | ||
72 | |||
73 | // Relationships to other objects | ||
74 | |||
75 | static const field word; | ||
76 | |||
77 | class inflection_field { | ||
78 | public: | ||
79 | |||
80 | inflection_field(inflection category) : category_(category) | ||
81 | { | ||
82 | } | ||
83 | |||
84 | const inflection getCategory() const | ||
85 | { | ||
86 | return category_; | ||
87 | } | ||
88 | |||
89 | private: | ||
90 | |||
91 | const inflection category_; | ||
92 | }; | ||
93 | |||
94 | static const inflection_field form(inflection category) | ||
95 | { | ||
96 | return inflection_field(category); | ||
97 | } | ||
98 | |||
99 | friend filter operator%=(lemma::inflection_field check, filter joinCondition); | ||
100 | |||
101 | private: | ||
102 | |||
103 | void initializeForm(inflection category) const; | ||
104 | |||
105 | bool valid_ = false; | ||
106 | |||
107 | int id_; | ||
108 | |||
109 | mutable std::map<inflection, std::vector<class form>> forms_; | ||
110 | |||
111 | const database* db_; | ||
112 | |||
113 | static const field formJoin; | ||
114 | static const field inflectionCategory; | ||
115 | |||
116 | }; | ||
117 | |||
118 | }; | ||
119 | |||
120 | #endif /* end of include guard: LEMMA_H_0A180D30 */ | ||
diff --git a/lib/notion.cpp b/lib/notion.cpp new file mode 100644 index 0000000..16794d3 --- /dev/null +++ b/lib/notion.cpp | |||
@@ -0,0 +1,94 @@ | |||
1 | #include "notion.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include <sstream> | ||
4 | |||
5 | namespace verbly { | ||
6 | |||
7 | const object notion::objectType = object::notion; | ||
8 | |||
9 | const std::list<std::string> notion::select = {"notion_id", "part_of_speech", "wnid", "images"}; | ||
10 | |||
11 | const field notion::id = field::integerField(object::notion, "notion_id"); | ||
12 | const field notion::partOfSpeech = field::integerField(object::notion, "part_of_speech"); | ||
13 | const field notion::wnid = field::integerField(object::notion, "wnid", true); | ||
14 | const field notion::numOfImages = field::integerField(object::notion, "images", true); | ||
15 | |||
16 | const field notion::word = field::joinField(object::notion, "word_id", object::word); | ||
17 | |||
18 | const field notion::hypernyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id"); | ||
19 | const field notion::hyponyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id"); | ||
20 | |||
21 | const field notion::fullHypernyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id"); | ||
22 | const field notion::fullHyponyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id"); | ||
23 | |||
24 | const field notion::instances = field::selfJoin(object::notion, "notion_id", "instantiation", "class_id", "instance_id"); | ||
25 | const field notion::classes = field::selfJoin(object::notion, "notion_id", "instantiation", "instance_id", "class_id"); | ||
26 | |||
27 | const field notion::memberMeronyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id"); | ||
28 | const field notion::memberHolonyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id"); | ||
29 | |||
30 | const field notion::fullMemberMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id"); | ||
31 | const field notion::fullMemberHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id"); | ||
32 | |||
33 | const field notion::partMeronyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id"); | ||
34 | const field notion::partHolonyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id"); | ||
35 | |||
36 | const field notion::fullPartMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id"); | ||
37 | const field notion::fullPartHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id"); | ||
38 | |||
39 | const field notion::substanceMeronyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id"); | ||
40 | const field notion::substanceHolonyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id"); | ||
41 | |||
42 | const field notion::fullSubstanceMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id"); | ||
43 | const field notion::fullSubstanceHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id"); | ||
44 | |||
45 | const field notion::variants = field::selfJoin(object::notion, "notion_id", "variation", "noun_id", "adjective_id"); | ||
46 | const field notion::attributes = field::selfJoin(object::notion, "notion_id", "variation", "adjective_id", "noun_id"); | ||
47 | |||
48 | const field notion::similarAdjectives = field::selfJoin(object::notion, "notion_id", "similarity", "adjective_2_id", "adjective_1_id"); | ||
49 | |||
50 | const field notion::entails = field::selfJoin(object::notion, "notion_id", "entailment", "given_id", "entailment_id"); | ||
51 | const field notion::entailedBy = field::selfJoin(object::notion, "notion_id", "entailment", "entailment_id", "given_id"); | ||
52 | |||
53 | const field notion::causes = field::selfJoin(object::notion, "notion_id", "causality", "effect_id", "cause_id"); | ||
54 | const field notion::effects = field::selfJoin(object::notion, "notion_id", "causality", "cause_id", "effect_id"); | ||
55 | |||
56 | const notion::preposition_group_field prepositionGroup = {}; | ||
57 | |||
58 | const field notion::preposition_group_field::isA = field::joinField(object::notion, "notion_id", "is_a"); | ||
59 | const field notion::preposition_group_field::groupNameField = field::stringField("is_a", "groupname"); | ||
60 | |||
61 | notion::notion(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
62 | { | ||
63 | id_ = sqlite3_column_int(row, 0); | ||
64 | partOfSpeech_ = static_cast<part_of_speech>(sqlite3_column_int(row, 1)); | ||
65 | |||
66 | if (sqlite3_column_type(row, 2) != SQLITE_NULL) | ||
67 | { | ||
68 | hasWnid_ = true; | ||
69 | wnid_ = sqlite3_column_int(row, 2); | ||
70 | } | ||
71 | |||
72 | if (sqlite3_column_type(row, 3) != SQLITE_NULL) | ||
73 | { | ||
74 | hasNumOfImages_ = true; | ||
75 | numOfImages_ = sqlite3_column_int(row, 3); | ||
76 | } | ||
77 | } | ||
78 | |||
79 | std::string notion::getImageNetUrl() const | ||
80 | { | ||
81 | std::stringstream url; | ||
82 | url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n"; | ||
83 | url.width(8); | ||
84 | url.fill('0'); | ||
85 | url << (getWnid() % 100000000); | ||
86 | return url.str(); | ||
87 | } | ||
88 | |||
89 | filter notion::preposition_group_field::operator==(std::string groupName) const | ||
90 | { | ||
91 | return (isA %= (groupNameField == groupName)); | ||
92 | } | ||
93 | |||
94 | }; | ||
diff --git a/lib/notion.h b/lib/notion.h new file mode 100644 index 0000000..a180d73 --- /dev/null +++ b/lib/notion.h | |||
@@ -0,0 +1,200 @@ | |||
1 | #ifndef NOTION_H_FD1C7646 | ||
2 | #define NOTION_H_FD1C7646 | ||
3 | |||
4 | #include <stdexcept> | ||
5 | #include <string> | ||
6 | #include "field.h" | ||
7 | #include "filter.h" | ||
8 | |||
9 | struct sqlite3_stmt; | ||
10 | |||
11 | namespace verbly { | ||
12 | |||
13 | class database; | ||
14 | |||
15 | class notion { | ||
16 | public: | ||
17 | |||
18 | // Default constructor | ||
19 | |||
20 | notion() = default; | ||
21 | |||
22 | // Construct from database | ||
23 | |||
24 | notion(const database& db, sqlite3_stmt* row); | ||
25 | |||
26 | // Accessors | ||
27 | |||
28 | operator bool() const | ||
29 | { | ||
30 | return valid_; | ||
31 | } | ||
32 | |||
33 | int getId() const | ||
34 | { | ||
35 | if (!valid_) | ||
36 | { | ||
37 | throw std::domain_error("Bad access to uninitialized notion"); | ||
38 | } | ||
39 | |||
40 | return id_; | ||
41 | } | ||
42 | |||
43 | part_of_speech getPartOfSpeech() const | ||
44 | { | ||
45 | if (!valid_) | ||
46 | { | ||
47 | throw std::domain_error("Bad access to uninitialized notion"); | ||
48 | } | ||
49 | |||
50 | return partOfSpeech_; | ||
51 | } | ||
52 | |||
53 | bool hasWnid() const | ||
54 | { | ||
55 | if (!valid_) | ||
56 | { | ||
57 | throw std::domain_error("Bad access to uninitialized notion"); | ||
58 | } | ||
59 | |||
60 | return hasWnid_; | ||
61 | } | ||
62 | |||
63 | int getWnid() const | ||
64 | { | ||
65 | if (!valid_) | ||
66 | { | ||
67 | throw std::domain_error("Bad access to uninitialized notion"); | ||
68 | } | ||
69 | |||
70 | if (!hasWnid_) | ||
71 | { | ||
72 | throw std::domain_error("Notion has no wnid"); | ||
73 | } | ||
74 | |||
75 | return wnid_; | ||
76 | } | ||
77 | |||
78 | bool hasNumOfImages() const | ||
79 | { | ||
80 | if (!valid_) | ||
81 | { | ||
82 | throw std::domain_error("Bad access to uninitialized notion"); | ||
83 | } | ||
84 | |||
85 | return hasNumOfImages_; | ||
86 | } | ||
87 | |||
88 | int getNumOfImages() const | ||
89 | { | ||
90 | if (!valid_) | ||
91 | { | ||
92 | throw std::domain_error("Bad access to uninitialized notion"); | ||
93 | } | ||
94 | |||
95 | if (!hasNumOfImages_) | ||
96 | { | ||
97 | throw std::domain_error("Notion does not have a number of images"); | ||
98 | } | ||
99 | |||
100 | return numOfImages_; | ||
101 | } | ||
102 | |||
103 | // Convenience | ||
104 | |||
105 | std::string getImageNetUrl() const; | ||
106 | |||
107 | // Type info | ||
108 | |||
109 | static const object objectType; | ||
110 | |||
111 | static const std::list<std::string> select; | ||
112 | |||
113 | // Query fields | ||
114 | |||
115 | static const field id; | ||
116 | static const field partOfSpeech; | ||
117 | static const field wnid; | ||
118 | static const field numOfImages; | ||
119 | |||
120 | operator filter() const | ||
121 | { | ||
122 | return (id == id_); | ||
123 | } | ||
124 | |||
125 | // Relationships with other objects | ||
126 | |||
127 | static const field word; | ||
128 | |||
129 | // Relationships with self | ||
130 | |||
131 | static const field hypernyms; | ||
132 | static const field hyponyms; | ||
133 | |||
134 | static const field fullHypernyms; | ||
135 | static const field fullHyponyms; | ||
136 | |||
137 | static const field instances; | ||
138 | static const field classes; | ||
139 | |||
140 | static const field memberMeronyms; | ||
141 | static const field memberHolonyms; | ||
142 | |||
143 | static const field fullMemberMeronyms; | ||
144 | static const field fullMemberHolonyms; | ||
145 | |||
146 | static const field partMeronyms; | ||
147 | static const field partHolonyms; | ||
148 | |||
149 | static const field fullPartMeronyms; | ||
150 | static const field fullPartHolonyms; | ||
151 | |||
152 | static const field substanceMeronyms; | ||
153 | static const field substanceHolonyms; | ||
154 | |||
155 | static const field fullSubstanceMeronyms; | ||
156 | static const field fullSubstanceHolonyms; | ||
157 | |||
158 | static const field variants; | ||
159 | static const field attributes; | ||
160 | |||
161 | static const field similarAdjectives; | ||
162 | |||
163 | static const field entails; | ||
164 | static const field entailedBy; | ||
165 | |||
166 | static const field causes; | ||
167 | static const field effects; | ||
168 | |||
169 | // Preposition group relationship | ||
170 | |||
171 | class preposition_group_field { | ||
172 | public: | ||
173 | |||
174 | filter operator==(std::string groupName) const; | ||
175 | |||
176 | private: | ||
177 | |||
178 | static const field isA; | ||
179 | static const field groupNameField; | ||
180 | }; | ||
181 | |||
182 | static const preposition_group_field prepositionGroup; | ||
183 | |||
184 | private: | ||
185 | bool valid_ = false; | ||
186 | |||
187 | int id_; | ||
188 | part_of_speech partOfSpeech_; | ||
189 | bool hasWnid_ = false; | ||
190 | int wnid_; | ||
191 | bool hasNumOfImages_ = false; | ||
192 | int numOfImages_; | ||
193 | |||
194 | const database* db_; | ||
195 | |||
196 | }; | ||
197 | |||
198 | }; | ||
199 | |||
200 | #endif /* end of include guard: NOTION_H_FD1C7646 */ | ||
diff --git a/lib/noun.cpp b/lib/noun.cpp deleted file mode 100644 index d8b34c9..0000000 --- a/lib/noun.cpp +++ /dev/null | |||
@@ -1,221 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | #include <set> | ||
3 | #include <iostream> | ||
4 | |||
5 | namespace verbly { | ||
6 | |||
7 | noun::noun() | ||
8 | { | ||
9 | |||
10 | } | ||
11 | |||
12 | noun::noun(const data& _data, int _id) : word(_data, _id) | ||
13 | { | ||
14 | |||
15 | } | ||
16 | |||
17 | std::string noun::base_form() const | ||
18 | { | ||
19 | assert(_valid == true); | ||
20 | |||
21 | return _singular; | ||
22 | } | ||
23 | |||
24 | std::string noun::singular_form() const | ||
25 | { | ||
26 | assert(_valid == true); | ||
27 | |||
28 | return _singular; | ||
29 | } | ||
30 | |||
31 | std::string noun::plural_form() const | ||
32 | { | ||
33 | assert(_valid == true); | ||
34 | |||
35 | return _plural; | ||
36 | } | ||
37 | |||
38 | int noun::wnid() const | ||
39 | { | ||
40 | assert(_valid == true); | ||
41 | |||
42 | return _wnid; | ||
43 | } | ||
44 | |||
45 | bool noun::has_plural_form() const | ||
46 | { | ||
47 | assert(_valid == true); | ||
48 | |||
49 | return !_plural.empty(); | ||
50 | } | ||
51 | |||
52 | noun_query noun::hypernyms() const | ||
53 | { | ||
54 | assert(_valid == true); | ||
55 | |||
56 | return _data->nouns().hypernym_of(*this); | ||
57 | } | ||
58 | |||
59 | noun_query noun::full_hypernyms() const | ||
60 | { | ||
61 | assert(_valid == true); | ||
62 | |||
63 | return _data->nouns().full_hypernym_of(*this); | ||
64 | } | ||
65 | |||
66 | noun_query noun::hyponyms() const | ||
67 | { | ||
68 | assert(_valid == true); | ||
69 | |||
70 | return _data->nouns().hyponym_of(*this); | ||
71 | } | ||
72 | |||
73 | noun_query noun::full_hyponyms() const | ||
74 | { | ||
75 | assert(_valid == true); | ||
76 | |||
77 | return _data->nouns().full_hyponym_of(*this); | ||
78 | } | ||
79 | |||
80 | noun_query noun::part_meronyms() const | ||
81 | { | ||
82 | assert(_valid == true); | ||
83 | |||
84 | return _data->nouns().part_meronym_of(*this); | ||
85 | } | ||
86 | |||
87 | noun_query noun::full_part_meronyms() const | ||
88 | { | ||
89 | assert(_valid == true); | ||
90 | |||
91 | return _data->nouns().full_part_meronym_of(*this); | ||
92 | } | ||
93 | |||
94 | noun_query noun::part_holonyms() const | ||
95 | { | ||
96 | assert(_valid == true); | ||
97 | |||
98 | return _data->nouns().part_holonym_of(*this); | ||
99 | } | ||
100 | |||
101 | noun_query noun::full_part_holonyms() const | ||
102 | { | ||
103 | assert(_valid == true); | ||
104 | |||
105 | return _data->nouns().full_part_holonym_of(*this); | ||
106 | } | ||
107 | |||
108 | noun_query noun::substance_meronyms() const | ||
109 | { | ||
110 | assert(_valid == true); | ||
111 | |||
112 | return _data->nouns().substance_meronym_of(*this); | ||
113 | } | ||
114 | |||
115 | noun_query noun::full_substance_meronyms() const | ||
116 | { | ||
117 | assert(_valid == true); | ||
118 | |||
119 | return _data->nouns().full_substance_meronym_of(*this); | ||
120 | } | ||
121 | |||
122 | noun_query noun::substance_holonyms() const | ||
123 | { | ||
124 | assert(_valid == true); | ||
125 | |||
126 | return _data->nouns().substance_holonym_of(*this); | ||
127 | } | ||
128 | |||
129 | noun_query noun::full_substance_holonyms() const | ||
130 | { | ||
131 | assert(_valid == true); | ||
132 | |||
133 | return _data->nouns().full_substance_holonym_of(*this); | ||
134 | } | ||
135 | |||
136 | noun_query noun::member_meronyms() const | ||
137 | { | ||
138 | assert(_valid == true); | ||
139 | |||
140 | return _data->nouns().member_meronym_of(*this); | ||
141 | } | ||
142 | |||
143 | noun_query noun::full_member_meronyms() const | ||
144 | { | ||
145 | assert(_valid == true); | ||
146 | |||
147 | return _data->nouns().full_member_meronym_of(*this); | ||
148 | } | ||
149 | |||
150 | noun_query noun::member_holonyms() const | ||
151 | { | ||
152 | assert(_valid == true); | ||
153 | |||
154 | return _data->nouns().member_holonym_of(*this); | ||
155 | } | ||
156 | |||
157 | noun_query noun::full_member_holonyms() const | ||
158 | { | ||
159 | assert(_valid == true); | ||
160 | |||
161 | return _data->nouns().full_member_holonym_of(*this); | ||
162 | } | ||
163 | |||
164 | noun_query noun::classes() const | ||
165 | { | ||
166 | assert(_valid == true); | ||
167 | |||
168 | return _data->nouns().class_of(*this); | ||
169 | } | ||
170 | |||
171 | noun_query noun::instances() const | ||
172 | { | ||
173 | assert(_valid == true); | ||
174 | |||
175 | return _data->nouns().instance_of(*this); | ||
176 | } | ||
177 | |||
178 | noun_query noun::synonyms() const | ||
179 | { | ||
180 | assert(_valid == true); | ||
181 | |||
182 | return _data->nouns().synonym_of(*this); | ||
183 | } | ||
184 | |||
185 | noun_query noun::antonyms() const | ||
186 | { | ||
187 | assert(_valid == true); | ||
188 | |||
189 | return _data->nouns().antonym_of(*this); | ||
190 | } | ||
191 | |||
192 | adjective_query noun::pertainyms() const | ||
193 | { | ||
194 | assert(_valid == true); | ||
195 | |||
196 | return _data->adjectives().pertainym_of(*this); | ||
197 | } | ||
198 | |||
199 | adjective_query noun::variations() const | ||
200 | { | ||
201 | assert(_valid == true); | ||
202 | |||
203 | return _data->adjectives().variant_of(*this); | ||
204 | } | ||
205 | |||
206 | std::string noun::imagenet_url() const | ||
207 | { | ||
208 | std::stringstream url; | ||
209 | url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n"; | ||
210 | url.width(8); | ||
211 | url.fill('0'); | ||
212 | url << (_wnid % 100000000); | ||
213 | return url.str(); | ||
214 | } | ||
215 | |||
216 | bool noun::operator<(const noun& other) const | ||
217 | { | ||
218 | return _id < other._id; | ||
219 | } | ||
220 | |||
221 | }; | ||
diff --git a/lib/noun.h b/lib/noun.h deleted file mode 100644 index bd71e57..0000000 --- a/lib/noun.h +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | #ifndef NOUN_H_24A03C83 | ||
2 | #define NOUN_H_24A03C83 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class noun : public word { | ||
7 | private: | ||
8 | std::string _singular; | ||
9 | std::string _plural; | ||
10 | int _wnid; | ||
11 | |||
12 | friend class noun_query; | ||
13 | |||
14 | public: | ||
15 | noun(); | ||
16 | noun(const data& _data, int _id); | ||
17 | |||
18 | std::string base_form() const; | ||
19 | std::string singular_form() const; | ||
20 | std::string plural_form() const; | ||
21 | int wnid() const; | ||
22 | |||
23 | bool has_plural_form() const; | ||
24 | |||
25 | noun_query hypernyms() const; | ||
26 | noun_query full_hypernyms() const; | ||
27 | noun_query hyponyms() const; | ||
28 | noun_query full_hyponyms() const; | ||
29 | noun_query part_meronyms() const; | ||
30 | noun_query full_part_meronyms() const; | ||
31 | noun_query part_holonyms() const; | ||
32 | noun_query full_part_holonyms() const; | ||
33 | noun_query substance_meronyms() const; | ||
34 | noun_query full_substance_meronyms() const; | ||
35 | noun_query substance_holonyms() const; | ||
36 | noun_query full_substance_holonyms() const; | ||
37 | noun_query member_meronyms() const; | ||
38 | noun_query full_member_meronyms() const; | ||
39 | noun_query member_holonyms() const; | ||
40 | noun_query full_member_holonyms() const; | ||
41 | noun_query classes() const; | ||
42 | noun_query instances() const; | ||
43 | noun_query synonyms() const; | ||
44 | noun_query antonyms() const; | ||
45 | adjective_query pertainyms() const; | ||
46 | adjective_query variations() const; | ||
47 | |||
48 | std::string imagenet_url() const; | ||
49 | |||
50 | bool operator<(const noun& other) const; | ||
51 | }; | ||
52 | |||
53 | }; | ||
54 | |||
55 | #endif /* end of include guard: NOUN_H_24A03C83 */ | ||
diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp deleted file mode 100644 index 8648227..0000000 --- a/lib/noun_query.cpp +++ /dev/null | |||
@@ -1,2013 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | noun_query::noun_query(const data& _data) : _data(_data) | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | noun_query& noun_query::limit(int _limit) | ||
11 | { | ||
12 | if ((_limit > 0) || (_limit == unlimited)) | ||
13 | { | ||
14 | this->_limit = _limit; | ||
15 | } | ||
16 | |||
17 | return *this; | ||
18 | } | ||
19 | |||
20 | noun_query& noun_query::random() | ||
21 | { | ||
22 | this->_random = true; | ||
23 | |||
24 | return *this; | ||
25 | } | ||
26 | |||
27 | noun_query& noun_query::except(const noun& _word) | ||
28 | { | ||
29 | _except.push_back(_word); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | noun_query& noun_query::rhymes_with(const word& _word) | ||
35 | { | ||
36 | for (auto rhyme : _word.get_rhymes()) | ||
37 | { | ||
38 | _rhymes.push_back(rhyme); | ||
39 | } | ||
40 | |||
41 | if (dynamic_cast<const noun*>(&_word) != nullptr) | ||
42 | { | ||
43 | _except.push_back(dynamic_cast<const noun&>(_word)); | ||
44 | } | ||
45 | |||
46 | return *this; | ||
47 | } | ||
48 | |||
49 | noun_query& noun_query::rhymes_with(rhyme _r) | ||
50 | { | ||
51 | _rhymes.push_back(_r); | ||
52 | |||
53 | return *this; | ||
54 | } | ||
55 | |||
56 | noun_query& noun_query::has_pronunciation() | ||
57 | { | ||
58 | this->_has_prn = true; | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | noun_query& noun_query::has_rhyming_noun() | ||
64 | { | ||
65 | _has_rhyming_noun = true; | ||
66 | |||
67 | return *this; | ||
68 | } | ||
69 | |||
70 | noun_query& noun_query::has_rhyming_adjective() | ||
71 | { | ||
72 | _has_rhyming_adjective = true; | ||
73 | |||
74 | return *this; | ||
75 | } | ||
76 | |||
77 | noun_query& noun_query::has_rhyming_adverb() | ||
78 | { | ||
79 | _has_rhyming_adverb = true; | ||
80 | |||
81 | return *this; | ||
82 | } | ||
83 | |||
84 | noun_query& noun_query::has_rhyming_verb() | ||
85 | { | ||
86 | _has_rhyming_verb = true; | ||
87 | |||
88 | return *this; | ||
89 | } | ||
90 | |||
91 | noun_query& noun_query::with_stress(filter<std::vector<bool>> _arg) | ||
92 | { | ||
93 | _stress = _arg; | ||
94 | |||
95 | return *this; | ||
96 | } | ||
97 | |||
98 | noun_query& noun_query::with_singular_form(std::string _arg) | ||
99 | { | ||
100 | _with_singular_form.push_back(_arg); | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | noun_query& noun_query::with_prefix(filter<std::string> _f) | ||
106 | { | ||
107 | _f.clean(); | ||
108 | _with_prefix = _f; | ||
109 | |||
110 | return *this; | ||
111 | } | ||
112 | |||
113 | noun_query& noun_query::with_suffix(filter<std::string> _f) | ||
114 | { | ||
115 | _f.clean(); | ||
116 | _with_suffix = _f; | ||
117 | |||
118 | return *this; | ||
119 | } | ||
120 | |||
121 | noun_query& noun_query::requires_plural_form() | ||
122 | { | ||
123 | _requires_plural_form = true; | ||
124 | |||
125 | return *this; | ||
126 | } | ||
127 | |||
128 | noun_query& noun_query::with_complexity(int _arg) | ||
129 | { | ||
130 | _with_complexity = _arg; | ||
131 | |||
132 | return *this; | ||
133 | } | ||
134 | |||
135 | noun_query& noun_query::is_hypernym() | ||
136 | { | ||
137 | _is_hypernym = true; | ||
138 | |||
139 | return *this; | ||
140 | } | ||
141 | |||
142 | noun_query& noun_query::hypernym_of(filter<noun> _f) | ||
143 | { | ||
144 | _f.clean(); | ||
145 | _hypernym_of = _f; | ||
146 | |||
147 | return *this; | ||
148 | } | ||
149 | |||
150 | noun_query& noun_query::full_hypernym_of(filter<noun> _f) | ||
151 | { | ||
152 | _f.clean(); | ||
153 | _full_hypernym_of = _f; | ||
154 | |||
155 | return *this; | ||
156 | } | ||
157 | |||
158 | noun_query& noun_query::is_hyponym() | ||
159 | { | ||
160 | _is_hyponym = true; | ||
161 | |||
162 | return *this; | ||
163 | } | ||
164 | |||
165 | noun_query& noun_query::hyponym_of(filter<noun> _f) | ||
166 | { | ||
167 | _f.clean(); | ||
168 | _hyponym_of = _f; | ||
169 | |||
170 | return *this; | ||
171 | } | ||
172 | |||
173 | noun_query& noun_query::full_hyponym_of(filter<noun> _f) | ||
174 | { | ||
175 | _f.clean(); | ||
176 | _full_hyponym_of = _f; | ||
177 | |||
178 | return *this; | ||
179 | } | ||
180 | |||
181 | noun_query& noun_query::is_part_meronym() | ||
182 | { | ||
183 | _is_part_meronym = true; | ||
184 | |||
185 | return *this; | ||
186 | } | ||
187 | |||
188 | noun_query& noun_query::part_meronym_of(filter<noun> _f) | ||
189 | { | ||
190 | _f.clean(); | ||
191 | _part_meronym_of = _f; | ||
192 | |||
193 | return *this; | ||
194 | } | ||
195 | |||
196 | noun_query& noun_query::full_part_meronym_of(filter<noun> _f) | ||
197 | { | ||
198 | _f.clean(); | ||
199 | _full_part_meronym_of = _f; | ||
200 | |||
201 | return *this; | ||
202 | } | ||
203 | |||
204 | noun_query& noun_query::is_part_holonym() | ||
205 | { | ||
206 | _is_part_holonym = true; | ||
207 | |||
208 | return *this; | ||
209 | } | ||
210 | |||
211 | noun_query& noun_query::part_holonym_of(filter<noun> _f) | ||
212 | { | ||
213 | _f.clean(); | ||
214 | _part_holonym_of = _f; | ||
215 | |||
216 | return *this; | ||
217 | } | ||
218 | |||
219 | noun_query& noun_query::full_part_holonym_of(filter<noun> _f) | ||
220 | { | ||
221 | _f.clean(); | ||
222 | _full_part_holonym_of = _f; | ||
223 | |||
224 | return *this; | ||
225 | } | ||
226 | |||
227 | noun_query& noun_query::is_substance_meronym() | ||
228 | { | ||
229 | _is_substance_meronym = true; | ||
230 | |||
231 | return *this; | ||
232 | } | ||
233 | |||
234 | noun_query& noun_query::substance_meronym_of(filter<noun> _f) | ||
235 | { | ||
236 | _f.clean(); | ||
237 | _substance_meronym_of = _f; | ||
238 | |||
239 | return *this; | ||
240 | } | ||
241 | |||
242 | noun_query& noun_query::full_substance_meronym_of(filter<noun> _f) | ||
243 | { | ||
244 | _f.clean(); | ||
245 | _full_substance_meronym_of = _f; | ||
246 | |||
247 | return *this; | ||
248 | } | ||
249 | |||
250 | noun_query& noun_query::is_substance_holonym() | ||
251 | { | ||
252 | _is_substance_holonym = true; | ||
253 | |||
254 | return *this; | ||
255 | } | ||
256 | |||
257 | noun_query& noun_query::substance_holonym_of(filter<noun> _f) | ||
258 | { | ||
259 | _f.clean(); | ||
260 | _substance_holonym_of = _f; | ||
261 | |||
262 | return *this; | ||
263 | } | ||
264 | |||
265 | noun_query& noun_query::full_substance_holonym_of(filter<noun> _f) | ||
266 | { | ||
267 | _f.clean(); | ||
268 | _full_substance_holonym_of = _f; | ||
269 | |||
270 | return *this; | ||
271 | } | ||
272 | |||
273 | noun_query& noun_query::is_member_meronym() | ||
274 | { | ||
275 | _is_member_meronym = true; | ||
276 | |||
277 | return *this; | ||
278 | } | ||
279 | |||
280 | noun_query& noun_query::member_meronym_of(filter<noun> _f) | ||
281 | { | ||
282 | _f.clean(); | ||
283 | _member_meronym_of = _f; | ||
284 | |||
285 | return *this; | ||
286 | } | ||
287 | |||
288 | noun_query& noun_query::full_member_meronym_of(filter<noun> _f) | ||
289 | { | ||
290 | _f.clean(); | ||
291 | _full_member_meronym_of = _f; | ||
292 | |||
293 | return *this; | ||
294 | } | ||
295 | |||
296 | noun_query& noun_query::is_member_holonym() | ||
297 | { | ||
298 | _is_member_holonym = true; | ||
299 | |||
300 | return *this; | ||
301 | } | ||
302 | |||
303 | noun_query& noun_query::member_holonym_of(filter<noun> _f) | ||
304 | { | ||
305 | _f.clean(); | ||
306 | _member_holonym_of = _f; | ||
307 | |||
308 | return *this; | ||
309 | } | ||
310 | |||
311 | noun_query& noun_query::full_member_holonym_of(filter<noun> _f) | ||
312 | { | ||
313 | _f.clean(); | ||
314 | _full_member_holonym_of = _f; | ||
315 | |||
316 | return *this; | ||
317 | } | ||
318 | |||
319 | noun_query& noun_query::is_proper() | ||
320 | { | ||
321 | _is_proper = true; | ||
322 | |||
323 | return *this; | ||
324 | } | ||
325 | |||
326 | noun_query& noun_query::is_not_proper() | ||
327 | { | ||
328 | _is_not_proper = true; | ||
329 | |||
330 | return *this; | ||
331 | } | ||
332 | |||
333 | noun_query& noun_query::is_instance() | ||
334 | { | ||
335 | _is_instance = true; | ||
336 | |||
337 | return *this; | ||
338 | } | ||
339 | |||
340 | noun_query& noun_query::instance_of(filter<noun> _f) | ||
341 | { | ||
342 | _f.clean(); | ||
343 | _instance_of = _f; | ||
344 | |||
345 | return *this; | ||
346 | } | ||
347 | |||
348 | noun_query& noun_query::is_class() | ||
349 | { | ||
350 | _is_class = true; | ||
351 | |||
352 | return *this; | ||
353 | } | ||
354 | |||
355 | noun_query& noun_query::class_of(filter<noun> _f) | ||
356 | { | ||
357 | _f.clean(); | ||
358 | _class_of = _f; | ||
359 | |||
360 | return *this; | ||
361 | } | ||
362 | |||
363 | noun_query& noun_query::has_synonyms() | ||
364 | { | ||
365 | _has_synonyms = true; | ||
366 | |||
367 | return *this; | ||
368 | } | ||
369 | |||
370 | noun_query& noun_query::synonym_of(filter<noun> _f) | ||
371 | { | ||
372 | _f.clean(); | ||
373 | _synonym_of = _f; | ||
374 | |||
375 | return *this; | ||
376 | } | ||
377 | |||
378 | noun_query& noun_query::has_antonyms() | ||
379 | { | ||
380 | _has_antonyms = true; | ||
381 | |||
382 | return *this; | ||
383 | } | ||
384 | |||
385 | noun_query& noun_query::antonym_of(filter<noun> _f) | ||
386 | { | ||
387 | _f.clean(); | ||
388 | _antonym_of = _f; | ||
389 | |||
390 | return *this; | ||
391 | } | ||
392 | |||
393 | noun_query& noun_query::has_pertainym() | ||
394 | { | ||
395 | _has_pertainym = true; | ||
396 | |||
397 | return *this; | ||
398 | } | ||
399 | |||
400 | noun_query& noun_query::anti_pertainym_of(filter<adjective> _f) | ||
401 | { | ||
402 | _f.clean(); | ||
403 | _anti_pertainym_of = _f; | ||
404 | |||
405 | return *this; | ||
406 | } | ||
407 | |||
408 | noun_query& noun_query::is_attribute() | ||
409 | { | ||
410 | _is_attribute = true; | ||
411 | |||
412 | return *this; | ||
413 | } | ||
414 | |||
415 | noun_query& noun_query::attribute_of(filter<adjective> _f) | ||
416 | { | ||
417 | _f.clean(); | ||
418 | _attribute_of = _f; | ||
419 | |||
420 | return *this; | ||
421 | } | ||
422 | |||
423 | noun_query& noun_query::at_least_n_images(int _arg) | ||
424 | { | ||
425 | _at_least_n_images = _arg; | ||
426 | |||
427 | return *this; | ||
428 | } | ||
429 | |||
430 | noun_query& noun_query::with_wnid(int _arg) | ||
431 | { | ||
432 | _with_wnid.insert(_arg); | ||
433 | |||
434 | return *this; | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | noun_query& noun_query::derived_from(const word& _w) | ||
439 | { | ||
440 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
441 | { | ||
442 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
443 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
444 | { | ||
445 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
446 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
447 | { | ||
448 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
449 | } | ||
450 | |||
451 | return *this; | ||
452 | } | ||
453 | |||
454 | noun_query& noun_query::not_derived_from(const word& _w) | ||
455 | { | ||
456 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
457 | { | ||
458 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
459 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
460 | { | ||
461 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
462 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
463 | { | ||
464 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
465 | } | ||
466 | |||
467 | return *this; | ||
468 | }*/ | ||
469 | |||
470 | std::list<noun> noun_query::run() const | ||
471 | { | ||
472 | std::stringstream construct; | ||
473 | |||
474 | if (!_full_hypernym_of.empty() || !_full_hyponym_of.empty() || !_full_part_meronym_of.empty() || !_full_part_holonym_of.empty() || !_full_substance_meronym_of.empty() || !_full_substance_holonym_of.empty() || !_full_member_meronym_of.empty() || !_full_member_holonym_of.empty()) | ||
475 | { | ||
476 | construct << "WITH RECURSIVE "; | ||
477 | |||
478 | std::list<std::string> ctes; | ||
479 | |||
480 | for (auto hyponym : _full_hypernym_of.uniq_flatten()) | ||
481 | { | ||
482 | ctes.push_back("hypernym_tree_" + std::to_string(hyponym._id) + " AS (SELECT hypernym_id FROM hypernymy WHERE hyponym_id = " + std::to_string(hyponym._id) + " UNION SELECT h.hypernym_id FROM hypernym_tree_" + std::to_string(hyponym._id) + " AS t INNER JOIN hypernymy AS h ON t.hypernym_id = h.hyponym_id)"); | ||
483 | } | ||
484 | |||
485 | for (auto hypernym : _full_hyponym_of.uniq_flatten()) | ||
486 | { | ||
487 | ctes.push_back("hyponym_tree_" + std::to_string(hypernym._id) + " AS (SELECT hyponym_id FROM hypernymy WHERE hypernym_id = " + std::to_string(hypernym._id) + " UNION SELECT h.hyponym_id FROM hyponym_tree_" + std::to_string(hypernym._id) + " AS t INNER JOIN hypernymy AS h ON t.hyponym_id = h.hypernym_id)"); | ||
488 | } | ||
489 | |||
490 | for (auto holonym : _full_part_meronym_of.uniq_flatten()) | ||
491 | { | ||
492 | ctes.push_back("part_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM part_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM part_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN part_meronymy AS h ON t.meronym_id = h.holonym_id)"); | ||
493 | } | ||
494 | |||
495 | for (auto meronym : _full_part_holonym_of.uniq_flatten()) | ||
496 | { | ||
497 | ctes.push_back("part_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM part_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM part_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN part_meronymy AS h ON t.holonym_id = h.meronym_id)"); | ||
498 | } | ||
499 | |||
500 | for (auto holonym : _full_substance_meronym_of.uniq_flatten()) | ||
501 | { | ||
502 | ctes.push_back("substance_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM substance_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM substance_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.meronym_id = h.holonym_id)"); | ||
503 | } | ||
504 | |||
505 | for (auto meronym : _full_substance_holonym_of.uniq_flatten()) | ||
506 | { | ||
507 | ctes.push_back("substance_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM substance_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM substance_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.holonym_id = h.meronym_id)"); | ||
508 | } | ||
509 | |||
510 | for (auto holonym : _full_member_meronym_of.uniq_flatten()) | ||
511 | { | ||
512 | ctes.push_back("member_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM member_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM member_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN member_meronymy AS h ON t.meronym_id = h.holonym_id)"); | ||
513 | } | ||
514 | |||
515 | for (auto meronym : _full_member_holonym_of.uniq_flatten()) | ||
516 | { | ||
517 | ctes.push_back("member_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM member_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM member_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN member_meronymy AS h ON t.holonym_id = h.meronym_id)"); | ||
518 | } | ||
519 | |||
520 | construct << verbly::implode(std::begin(ctes), std::end(ctes), ", "); | ||
521 | construct << " "; | ||
522 | } | ||
523 | |||
524 | construct << "SELECT noun_id, singular, plural, wnid FROM nouns"; | ||
525 | std::list<std::string> conditions; | ||
526 | std::list<binding> bindings; | ||
527 | |||
528 | if (_has_prn) | ||
529 | { | ||
530 | conditions.push_back("noun_id IN (SELECT noun_id FROM noun_pronunciations)"); | ||
531 | } | ||
532 | |||
533 | if (!_rhymes.empty()) | ||
534 | { | ||
535 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
536 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
537 | conditions.push_back(cond); | ||
538 | |||
539 | for (auto rhy : _rhymes) | ||
540 | { | ||
541 | bindings.emplace_back(rhy.get_prerhyme()); | ||
542 | bindings.emplace_back(rhy.get_rhyme()); | ||
543 | } | ||
544 | } | ||
545 | |||
546 | if (_has_rhyming_noun) | ||
547 | { | ||
548 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.noun_id != curp.noun_id)"); | ||
549 | } | ||
550 | |||
551 | if (_has_rhyming_adjective) | ||
552 | { | ||
553 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
554 | } | ||
555 | |||
556 | if (_has_rhyming_adverb) | ||
557 | { | ||
558 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
559 | } | ||
560 | |||
561 | if (_has_rhyming_verb) | ||
562 | { | ||
563 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
564 | } | ||
565 | |||
566 | if (!_stress.empty()) | ||
567 | { | ||
568 | std::stringstream cond; | ||
569 | if (_stress.get_notlogic()) | ||
570 | { | ||
571 | cond << "noun_id NOT IN"; | ||
572 | } else { | ||
573 | cond << "noun_id IN"; | ||
574 | } | ||
575 | |||
576 | cond << "(SELECT noun_id FROM noun_pronunciations WHERE "; | ||
577 | |||
578 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
579 | switch (f.get_type()) | ||
580 | { | ||
581 | case filter<std::vector<bool>>::type::singleton: | ||
582 | { | ||
583 | std::ostringstream _val; | ||
584 | for (auto syl : f.get_elem()) | ||
585 | { | ||
586 | if (syl) | ||
587 | { | ||
588 | _val << "1"; | ||
589 | } else { | ||
590 | _val << "0"; | ||
591 | } | ||
592 | } | ||
593 | |||
594 | bindings.emplace_back(_val.str()); | ||
595 | |||
596 | if (notlogic == f.get_notlogic()) | ||
597 | { | ||
598 | return "stress = ?"; | ||
599 | } else { | ||
600 | return "stress != ?"; | ||
601 | } | ||
602 | } | ||
603 | |||
604 | case filter<std::vector<bool>>::type::group: | ||
605 | { | ||
606 | bool truelogic = notlogic != f.get_notlogic(); | ||
607 | |||
608 | std::list<std::string> clauses; | ||
609 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
610 | return recur(f2, truelogic); | ||
611 | }); | ||
612 | |||
613 | if (truelogic == f.get_orlogic()) | ||
614 | { | ||
615 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
616 | } else { | ||
617 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
618 | } | ||
619 | } | ||
620 | } | ||
621 | }; | ||
622 | |||
623 | cond << recur(_stress, _stress.get_notlogic()); | ||
624 | cond << ")"; | ||
625 | conditions.push_back(cond.str()); | ||
626 | } | ||
627 | |||
628 | for (auto except : _except) | ||
629 | { | ||
630 | conditions.push_back("noun_id != ?"); | ||
631 | bindings.emplace_back(except._id); | ||
632 | } | ||
633 | |||
634 | if (!_with_singular_form.empty()) | ||
635 | { | ||
636 | std::list<std::string> clauses(_with_singular_form.size(), "singular = ?"); | ||
637 | std::string cond = "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
638 | conditions.push_back(cond); | ||
639 | |||
640 | for (auto form : _with_singular_form) | ||
641 | { | ||
642 | bindings.emplace_back(form); | ||
643 | } | ||
644 | } | ||
645 | |||
646 | if (_requires_plural_form) | ||
647 | { | ||
648 | conditions.push_back("plural IS NOT NULL"); | ||
649 | } | ||
650 | |||
651 | if (!_with_prefix.empty()) | ||
652 | { | ||
653 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
654 | switch (f.get_type()) | ||
655 | { | ||
656 | case filter<std::string>::type::singleton: | ||
657 | { | ||
658 | bindings.emplace_back(f.get_elem() + "%"); | ||
659 | |||
660 | if (notlogic == f.get_notlogic()) | ||
661 | { | ||
662 | return "singular LIKE ?"; | ||
663 | } else { | ||
664 | return "singular NOT LIKE ?"; | ||
665 | } | ||
666 | } | ||
667 | |||
668 | case filter<std::string>::type::group: | ||
669 | { | ||
670 | bool truelogic = notlogic != f.get_notlogic(); | ||
671 | |||
672 | std::list<std::string> clauses; | ||
673 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
674 | return recur(f2, truelogic); | ||
675 | }); | ||
676 | |||
677 | if (truelogic == f.get_orlogic()) | ||
678 | { | ||
679 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
680 | } else { | ||
681 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
682 | } | ||
683 | } | ||
684 | } | ||
685 | }; | ||
686 | |||
687 | conditions.push_back(recur(_with_prefix, false)); | ||
688 | } | ||
689 | |||
690 | if (!_with_suffix.empty()) | ||
691 | { | ||
692 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
693 | switch (f.get_type()) | ||
694 | { | ||
695 | case filter<std::string>::type::singleton: | ||
696 | { | ||
697 | bindings.emplace_back("%" + f.get_elem()); | ||
698 | |||
699 | if (notlogic == f.get_notlogic()) | ||
700 | { | ||
701 | return "singular LIKE ?"; | ||
702 | } else { | ||
703 | return "singular NOT LIKE ?"; | ||
704 | } | ||
705 | } | ||
706 | |||
707 | case filter<std::string>::type::group: | ||
708 | { | ||
709 | bool truelogic = notlogic != f.get_notlogic(); | ||
710 | |||
711 | std::list<std::string> clauses; | ||
712 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
713 | return recur(f2, truelogic); | ||
714 | }); | ||
715 | |||
716 | if (truelogic == f.get_orlogic()) | ||
717 | { | ||
718 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
719 | } else { | ||
720 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
721 | } | ||
722 | } | ||
723 | } | ||
724 | }; | ||
725 | |||
726 | conditions.push_back(recur(_with_suffix, false)); | ||
727 | } | ||
728 | |||
729 | if (_with_complexity != unlimited) | ||
730 | { | ||
731 | conditions.push_back("complexity = ?"); | ||
732 | bindings.emplace_back(_with_complexity); | ||
733 | } | ||
734 | |||
735 | if (_is_hypernym) | ||
736 | { | ||
737 | conditions.push_back("noun_id IN (SELECT hypernym_id FROM hypernymy)"); | ||
738 | } | ||
739 | |||
740 | if (!_hypernym_of.empty()) | ||
741 | { | ||
742 | std::stringstream cond; | ||
743 | if (_hypernym_of.get_notlogic()) | ||
744 | { | ||
745 | cond << "noun_id NOT IN"; | ||
746 | } else { | ||
747 | cond << "noun_id IN"; | ||
748 | } | ||
749 | |||
750 | cond << "(SELECT hypernym_id FROM hypernymy WHERE "; | ||
751 | |||
752 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
753 | switch (f.get_type()) | ||
754 | { | ||
755 | case filter<noun>::type::singleton: | ||
756 | { | ||
757 | bindings.emplace_back(f.get_elem()._id); | ||
758 | |||
759 | if (notlogic == f.get_notlogic()) | ||
760 | { | ||
761 | return "hyponym_id = ?"; | ||
762 | } else { | ||
763 | return "hyponym_id != ?"; | ||
764 | } | ||
765 | } | ||
766 | |||
767 | case filter<noun>::type::group: | ||
768 | { | ||
769 | bool truelogic = notlogic != f.get_notlogic(); | ||
770 | |||
771 | std::list<std::string> clauses; | ||
772 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
773 | return recur(f2, truelogic); | ||
774 | }); | ||
775 | |||
776 | if (truelogic == f.get_orlogic()) | ||
777 | { | ||
778 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
779 | } else { | ||
780 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
781 | } | ||
782 | } | ||
783 | } | ||
784 | }; | ||
785 | |||
786 | cond << recur(_hypernym_of, _hypernym_of.get_notlogic()); | ||
787 | cond << ")"; | ||
788 | conditions.push_back(cond.str()); | ||
789 | } | ||
790 | |||
791 | if (!_full_hypernym_of.empty()) | ||
792 | { | ||
793 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
794 | switch (f.get_type()) | ||
795 | { | ||
796 | case filter<noun>::type::singleton: | ||
797 | { | ||
798 | if (notlogic == f.get_notlogic()) | ||
799 | { | ||
800 | return "noun_id IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
801 | } else { | ||
802 | return "noun_id NOT IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
803 | } | ||
804 | } | ||
805 | |||
806 | case filter<noun>::type::group: | ||
807 | { | ||
808 | bool truelogic = notlogic != f.get_notlogic(); | ||
809 | |||
810 | std::list<std::string> clauses; | ||
811 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
812 | return recur(f2, truelogic); | ||
813 | }); | ||
814 | |||
815 | if (truelogic == f.get_orlogic()) | ||
816 | { | ||
817 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
818 | } else { | ||
819 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
820 | } | ||
821 | } | ||
822 | } | ||
823 | }; | ||
824 | |||
825 | conditions.push_back(recur(_full_hypernym_of, false)); | ||
826 | } | ||
827 | |||
828 | if (!_full_hyponym_of.empty()) | ||
829 | { | ||
830 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
831 | switch (f.get_type()) | ||
832 | { | ||
833 | case filter<noun>::type::singleton: | ||
834 | { | ||
835 | if (notlogic == f.get_notlogic()) | ||
836 | { | ||
837 | return "noun_id IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
838 | } else { | ||
839 | return "noun_id NOT IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
840 | } | ||
841 | } | ||
842 | |||
843 | case filter<noun>::type::group: | ||
844 | { | ||
845 | bool truelogic = notlogic != f.get_notlogic(); | ||
846 | |||
847 | std::list<std::string> clauses; | ||
848 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
849 | return recur(f2, truelogic); | ||
850 | }); | ||
851 | |||
852 | if (truelogic == f.get_orlogic()) | ||
853 | { | ||
854 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
855 | } else { | ||
856 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
857 | } | ||
858 | } | ||
859 | } | ||
860 | }; | ||
861 | |||
862 | conditions.push_back(recur(_full_hyponym_of, false)); | ||
863 | } | ||
864 | |||
865 | if (_is_hyponym) | ||
866 | { | ||
867 | conditions.push_back("noun_id IN (SELECT hyponym_id FROM hypernymy)"); | ||
868 | } | ||
869 | |||
870 | if (!_hyponym_of.empty()) | ||
871 | { | ||
872 | std::stringstream cond; | ||
873 | if (_hyponym_of.get_notlogic()) | ||
874 | { | ||
875 | cond << "noun_id NOT IN"; | ||
876 | } else { | ||
877 | cond << "noun_id IN"; | ||
878 | } | ||
879 | |||
880 | cond << "(SELECT hyponym_id FROM hypernymy WHERE "; | ||
881 | |||
882 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
883 | switch (f.get_type()) | ||
884 | { | ||
885 | case filter<noun>::type::singleton: | ||
886 | { | ||
887 | bindings.emplace_back(f.get_elem()._id); | ||
888 | |||
889 | if (notlogic == f.get_notlogic()) | ||
890 | { | ||
891 | return "hypernym_id = ?"; | ||
892 | } else { | ||
893 | return "hypernym_id != ?"; | ||
894 | } | ||
895 | } | ||
896 | |||
897 | case filter<noun>::type::group: | ||
898 | { | ||
899 | bool truelogic = notlogic != f.get_notlogic(); | ||
900 | |||
901 | std::list<std::string> clauses; | ||
902 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
903 | return recur(f2, truelogic); | ||
904 | }); | ||
905 | |||
906 | if (truelogic == f.get_orlogic()) | ||
907 | { | ||
908 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
909 | } else { | ||
910 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
911 | } | ||
912 | } | ||
913 | } | ||
914 | }; | ||
915 | |||
916 | cond << recur(_hyponym_of, _hyponym_of.get_notlogic()); | ||
917 | cond << ")"; | ||
918 | conditions.push_back(cond.str()); | ||
919 | } | ||
920 | |||
921 | if (_is_part_meronym) | ||
922 | { | ||
923 | conditions.push_back("noun_id IN (SELECT meronym_id FROM part_meronymy)"); | ||
924 | } | ||
925 | |||
926 | if (!_part_meronym_of.empty()) | ||
927 | { | ||
928 | std::stringstream cond; | ||
929 | if (_part_meronym_of.get_notlogic()) | ||
930 | { | ||
931 | cond << "noun_id NOT IN"; | ||
932 | } else { | ||
933 | cond << "noun_id IN"; | ||
934 | } | ||
935 | |||
936 | cond << "(SELECT meronym_id FROM part_meronymy WHERE "; | ||
937 | |||
938 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
939 | switch (f.get_type()) | ||
940 | { | ||
941 | case filter<noun>::type::singleton: | ||
942 | { | ||
943 | bindings.emplace_back(f.get_elem()._id); | ||
944 | |||
945 | if (notlogic == f.get_notlogic()) | ||
946 | { | ||
947 | return "holonym_id = ?"; | ||
948 | } else { | ||
949 | return "holonym_id != ?"; | ||
950 | } | ||
951 | } | ||
952 | |||
953 | case filter<noun>::type::group: | ||
954 | { | ||
955 | bool truelogic = notlogic != f.get_notlogic(); | ||
956 | |||
957 | std::list<std::string> clauses; | ||
958 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
959 | return recur(f2, truelogic); | ||
960 | }); | ||
961 | |||
962 | if (truelogic == f.get_orlogic()) | ||
963 | { | ||
964 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
965 | } else { | ||
966 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
967 | } | ||
968 | } | ||
969 | } | ||
970 | }; | ||
971 | |||
972 | cond << recur(_part_meronym_of, _part_meronym_of.get_notlogic()); | ||
973 | cond << ")"; | ||
974 | conditions.push_back(cond.str()); | ||
975 | } | ||
976 | |||
977 | if (!_full_part_meronym_of.empty()) | ||
978 | { | ||
979 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
980 | switch (f.get_type()) | ||
981 | { | ||
982 | case filter<noun>::type::singleton: | ||
983 | { | ||
984 | if (notlogic == f.get_notlogic()) | ||
985 | { | ||
986 | return "noun_id IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
987 | } else { | ||
988 | return "noun_id NOT IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
989 | } | ||
990 | } | ||
991 | |||
992 | case filter<noun>::type::group: | ||
993 | { | ||
994 | bool truelogic = notlogic != f.get_notlogic(); | ||
995 | |||
996 | std::list<std::string> clauses; | ||
997 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
998 | return recur(f2, truelogic); | ||
999 | }); | ||
1000 | |||
1001 | if (truelogic == f.get_orlogic()) | ||
1002 | { | ||
1003 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1004 | } else { | ||
1005 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1006 | } | ||
1007 | } | ||
1008 | } | ||
1009 | }; | ||
1010 | |||
1011 | conditions.push_back(recur(_full_part_meronym_of, false)); | ||
1012 | } | ||
1013 | |||
1014 | if (_is_part_holonym) | ||
1015 | { | ||
1016 | conditions.push_back("noun_id IN (SELECT holonym_id FROM part_meronymy)"); | ||
1017 | } | ||
1018 | |||
1019 | if (!_part_holonym_of.empty()) | ||
1020 | { | ||
1021 | std::stringstream cond; | ||
1022 | if (_part_holonym_of.get_notlogic()) | ||
1023 | { | ||
1024 | cond << "noun_id NOT IN"; | ||
1025 | } else { | ||
1026 | cond << "noun_id IN"; | ||
1027 | } | ||
1028 | |||
1029 | cond << "(SELECT holonym_id FROM part_meronymy WHERE "; | ||
1030 | |||
1031 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1032 | switch (f.get_type()) | ||
1033 | { | ||
1034 | case filter<noun>::type::singleton: | ||
1035 | { | ||
1036 | bindings.emplace_back(f.get_elem()._id); | ||
1037 | |||
1038 | if (notlogic == f.get_notlogic()) | ||
1039 | { | ||
1040 | return "meronym_id = ?"; | ||
1041 | } else { | ||
1042 | return "meronym_id != ?"; | ||
1043 | } | ||
1044 | } | ||
1045 | |||
1046 | case filter<noun>::type::group: | ||
1047 | { | ||
1048 | bool truelogic = notlogic != f.get_notlogic(); | ||
1049 | |||
1050 | std::list<std::string> clauses; | ||
1051 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1052 | return recur(f2, truelogic); | ||
1053 | }); | ||
1054 | |||
1055 | if (truelogic == f.get_orlogic()) | ||
1056 | { | ||
1057 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1058 | } else { | ||
1059 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1060 | } | ||
1061 | } | ||
1062 | } | ||
1063 | }; | ||
1064 | |||
1065 | cond << recur(_part_holonym_of, _part_holonym_of.get_notlogic()); | ||
1066 | cond << ")"; | ||
1067 | conditions.push_back(cond.str()); | ||
1068 | } | ||
1069 | |||
1070 | if (!_full_part_holonym_of.empty()) | ||
1071 | { | ||
1072 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1073 | switch (f.get_type()) | ||
1074 | { | ||
1075 | case filter<noun>::type::singleton: | ||
1076 | { | ||
1077 | if (notlogic == f.get_notlogic()) | ||
1078 | { | ||
1079 | return "noun_id IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1080 | } else { | ||
1081 | return "noun_id NOT IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1082 | } | ||
1083 | } | ||
1084 | |||
1085 | case filter<noun>::type::group: | ||
1086 | { | ||
1087 | bool truelogic = notlogic != f.get_notlogic(); | ||
1088 | |||
1089 | std::list<std::string> clauses; | ||
1090 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1091 | return recur(f2, truelogic); | ||
1092 | }); | ||
1093 | |||
1094 | if (truelogic == f.get_orlogic()) | ||
1095 | { | ||
1096 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1097 | } else { | ||
1098 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1099 | } | ||
1100 | } | ||
1101 | } | ||
1102 | }; | ||
1103 | |||
1104 | conditions.push_back(recur(_full_part_holonym_of, false)); | ||
1105 | } | ||
1106 | |||
1107 | if (_is_substance_meronym) | ||
1108 | { | ||
1109 | conditions.push_back("noun_id IN (SELECT meronym_id FROM substance_meronymy)"); | ||
1110 | } | ||
1111 | |||
1112 | if (!_substance_meronym_of.empty()) | ||
1113 | { | ||
1114 | std::stringstream cond; | ||
1115 | if (_substance_meronym_of.get_notlogic()) | ||
1116 | { | ||
1117 | cond << "noun_id NOT IN"; | ||
1118 | } else { | ||
1119 | cond << "noun_id IN"; | ||
1120 | } | ||
1121 | |||
1122 | cond << "(SELECT meronym_id FROM substance_meronymy WHERE "; | ||
1123 | |||
1124 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1125 | switch (f.get_type()) | ||
1126 | { | ||
1127 | case filter<noun>::type::singleton: | ||
1128 | { | ||
1129 | bindings.emplace_back(f.get_elem()._id); | ||
1130 | |||
1131 | if (notlogic == f.get_notlogic()) | ||
1132 | { | ||
1133 | return "holonym_id = ?"; | ||
1134 | } else { | ||
1135 | return "holonym_id != ?"; | ||
1136 | } | ||
1137 | } | ||
1138 | |||
1139 | case filter<noun>::type::group: | ||
1140 | { | ||
1141 | bool truelogic = notlogic != f.get_notlogic(); | ||
1142 | |||
1143 | std::list<std::string> clauses; | ||
1144 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1145 | return recur(f2, truelogic); | ||
1146 | }); | ||
1147 | |||
1148 | if (truelogic == f.get_orlogic()) | ||
1149 | { | ||
1150 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1151 | } else { | ||
1152 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1153 | } | ||
1154 | } | ||
1155 | } | ||
1156 | }; | ||
1157 | |||
1158 | cond << recur(_substance_meronym_of, _substance_meronym_of.get_notlogic()); | ||
1159 | cond << ")"; | ||
1160 | conditions.push_back(cond.str()); | ||
1161 | } | ||
1162 | |||
1163 | if (!_full_substance_meronym_of.empty()) | ||
1164 | { | ||
1165 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1166 | switch (f.get_type()) | ||
1167 | { | ||
1168 | case filter<noun>::type::singleton: | ||
1169 | { | ||
1170 | if (notlogic == f.get_notlogic()) | ||
1171 | { | ||
1172 | return "noun_id IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1173 | } else { | ||
1174 | return "noun_id NOT IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1175 | } | ||
1176 | } | ||
1177 | |||
1178 | case filter<noun>::type::group: | ||
1179 | { | ||
1180 | bool truelogic = notlogic != f.get_notlogic(); | ||
1181 | |||
1182 | std::list<std::string> clauses; | ||
1183 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1184 | return recur(f2, truelogic); | ||
1185 | }); | ||
1186 | |||
1187 | if (truelogic == f.get_orlogic()) | ||
1188 | { | ||
1189 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1190 | } else { | ||
1191 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1192 | } | ||
1193 | } | ||
1194 | } | ||
1195 | }; | ||
1196 | |||
1197 | conditions.push_back(recur(_full_substance_meronym_of, false)); | ||
1198 | } | ||
1199 | |||
1200 | if (_is_substance_holonym) | ||
1201 | { | ||
1202 | conditions.push_back("noun_id IN (SELECT holonym_id FROM substance_meronymy)"); | ||
1203 | } | ||
1204 | |||
1205 | if (!_substance_holonym_of.empty()) | ||
1206 | { | ||
1207 | std::stringstream cond; | ||
1208 | if (_substance_holonym_of.get_notlogic()) | ||
1209 | { | ||
1210 | cond << "noun_id NOT IN"; | ||
1211 | } else { | ||
1212 | cond << "noun_id IN"; | ||
1213 | } | ||
1214 | |||
1215 | cond << "(SELECT holonym_id FROM substance_meronymy WHERE "; | ||
1216 | |||
1217 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1218 | switch (f.get_type()) | ||
1219 | { | ||
1220 | case filter<noun>::type::singleton: | ||
1221 | { | ||
1222 | bindings.emplace_back(f.get_elem()._id); | ||
1223 | |||
1224 | if (notlogic == f.get_notlogic()) | ||
1225 | { | ||
1226 | return "meronym_id = ?"; | ||
1227 | } else { | ||
1228 | return "meronym_id != ?"; | ||
1229 | } | ||
1230 | } | ||
1231 | |||
1232 | case filter<noun>::type::group: | ||
1233 | { | ||
1234 | bool truelogic = notlogic != f.get_notlogic(); | ||
1235 | |||
1236 | std::list<std::string> clauses; | ||
1237 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1238 | return recur(f2, truelogic); | ||
1239 | }); | ||
1240 | |||
1241 | if (truelogic == f.get_orlogic()) | ||
1242 | { | ||
1243 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1244 | } else { | ||
1245 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1246 | } | ||
1247 | } | ||
1248 | } | ||
1249 | }; | ||
1250 | |||
1251 | cond << recur(_substance_holonym_of, _substance_holonym_of.get_notlogic()); | ||
1252 | cond << ")"; | ||
1253 | conditions.push_back(cond.str()); | ||
1254 | } | ||
1255 | |||
1256 | if (!_full_substance_holonym_of.empty()) | ||
1257 | { | ||
1258 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1259 | switch (f.get_type()) | ||
1260 | { | ||
1261 | case filter<noun>::type::singleton: | ||
1262 | { | ||
1263 | if (notlogic == f.get_notlogic()) | ||
1264 | { | ||
1265 | return "noun_id IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1266 | } else { | ||
1267 | return "noun_id NOT IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1268 | } | ||
1269 | } | ||
1270 | |||
1271 | case filter<noun>::type::group: | ||
1272 | { | ||
1273 | bool truelogic = notlogic != f.get_notlogic(); | ||
1274 | |||
1275 | std::list<std::string> clauses; | ||
1276 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1277 | return recur(f2, truelogic); | ||
1278 | }); | ||
1279 | |||
1280 | if (truelogic == f.get_orlogic()) | ||
1281 | { | ||
1282 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1283 | } else { | ||
1284 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1285 | } | ||
1286 | } | ||
1287 | } | ||
1288 | }; | ||
1289 | |||
1290 | conditions.push_back(recur(_full_substance_holonym_of, false)); | ||
1291 | } | ||
1292 | |||
1293 | if (_is_member_meronym) | ||
1294 | { | ||
1295 | conditions.push_back("noun_id IN (SELECT meronym_id FROM member_meronymy)"); | ||
1296 | } | ||
1297 | |||
1298 | if (!_member_meronym_of.empty()) | ||
1299 | { | ||
1300 | std::stringstream cond; | ||
1301 | if (_member_meronym_of.get_notlogic()) | ||
1302 | { | ||
1303 | cond << "noun_id NOT IN"; | ||
1304 | } else { | ||
1305 | cond << "noun_id IN"; | ||
1306 | } | ||
1307 | |||
1308 | cond << "(SELECT meronym_id FROM member_meronymy WHERE "; | ||
1309 | |||
1310 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1311 | switch (f.get_type()) | ||
1312 | { | ||
1313 | case filter<noun>::type::singleton: | ||
1314 | { | ||
1315 | bindings.emplace_back(f.get_elem()._id); | ||
1316 | |||
1317 | if (notlogic == f.get_notlogic()) | ||
1318 | { | ||
1319 | return "holonym_id = ?"; | ||
1320 | } else { | ||
1321 | return "holonym_id != ?"; | ||
1322 | } | ||
1323 | } | ||
1324 | |||
1325 | case filter<noun>::type::group: | ||
1326 | { | ||
1327 | bool truelogic = notlogic != f.get_notlogic(); | ||
1328 | |||
1329 | std::list<std::string> clauses; | ||
1330 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1331 | return recur(f2, truelogic); | ||
1332 | }); | ||
1333 | |||
1334 | if (truelogic == f.get_orlogic()) | ||
1335 | { | ||
1336 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1337 | } else { | ||
1338 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1339 | } | ||
1340 | } | ||
1341 | } | ||
1342 | }; | ||
1343 | |||
1344 | cond << recur(_member_meronym_of, _member_meronym_of.get_notlogic()); | ||
1345 | cond << ")"; | ||
1346 | conditions.push_back(cond.str()); | ||
1347 | } | ||
1348 | |||
1349 | if (!_full_member_meronym_of.empty()) | ||
1350 | { | ||
1351 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1352 | switch (f.get_type()) | ||
1353 | { | ||
1354 | case filter<noun>::type::singleton: | ||
1355 | { | ||
1356 | if (notlogic == f.get_notlogic()) | ||
1357 | { | ||
1358 | return "noun_id IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1359 | } else { | ||
1360 | return "noun_id NOT IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1361 | } | ||
1362 | } | ||
1363 | |||
1364 | case filter<noun>::type::group: | ||
1365 | { | ||
1366 | bool truelogic = notlogic != f.get_notlogic(); | ||
1367 | |||
1368 | std::list<std::string> clauses; | ||
1369 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1370 | return recur(f2, truelogic); | ||
1371 | }); | ||
1372 | |||
1373 | if (truelogic == f.get_orlogic()) | ||
1374 | { | ||
1375 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1376 | } else { | ||
1377 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1378 | } | ||
1379 | } | ||
1380 | } | ||
1381 | }; | ||
1382 | |||
1383 | conditions.push_back(recur(_full_member_meronym_of, false)); | ||
1384 | } | ||
1385 | |||
1386 | if (_is_member_holonym) | ||
1387 | { | ||
1388 | conditions.push_back("noun_id IN (SELECT holonym_id FROM member_meronym)"); | ||
1389 | } | ||
1390 | |||
1391 | if (!_member_holonym_of.empty()) | ||
1392 | { | ||
1393 | std::stringstream cond; | ||
1394 | if (_member_holonym_of.get_notlogic()) | ||
1395 | { | ||
1396 | cond << "noun_id NOT IN"; | ||
1397 | } else { | ||
1398 | cond << "noun_id IN"; | ||
1399 | } | ||
1400 | |||
1401 | cond << "(SELECT holonym_id FROM member_meronymy WHERE "; | ||
1402 | |||
1403 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1404 | switch (f.get_type()) | ||
1405 | { | ||
1406 | case filter<noun>::type::singleton: | ||
1407 | { | ||
1408 | bindings.emplace_back(f.get_elem()._id); | ||
1409 | |||
1410 | if (notlogic == f.get_notlogic()) | ||
1411 | { | ||
1412 | return "meronym_id = ?"; | ||
1413 | } else { | ||
1414 | return "meronym_id != ?"; | ||
1415 | } | ||
1416 | } | ||
1417 | |||
1418 | case filter<noun>::type::group: | ||
1419 | { | ||
1420 | bool truelogic = notlogic != f.get_notlogic(); | ||
1421 | |||
1422 | std::list<std::string> clauses; | ||
1423 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1424 | return recur(f2, truelogic); | ||
1425 | }); | ||
1426 | |||
1427 | if (truelogic == f.get_orlogic()) | ||
1428 | { | ||
1429 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1430 | } else { | ||
1431 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1432 | } | ||
1433 | } | ||
1434 | } | ||
1435 | }; | ||
1436 | |||
1437 | cond << recur(_member_holonym_of, _member_holonym_of.get_notlogic()); | ||
1438 | cond << ")"; | ||
1439 | conditions.push_back(cond.str()); | ||
1440 | } | ||
1441 | |||
1442 | if (!_full_member_holonym_of.empty()) | ||
1443 | { | ||
1444 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1445 | switch (f.get_type()) | ||
1446 | { | ||
1447 | case filter<noun>::type::singleton: | ||
1448 | { | ||
1449 | if (notlogic == f.get_notlogic()) | ||
1450 | { | ||
1451 | return "noun_id IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1452 | } else { | ||
1453 | return "noun_id NOT IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1454 | } | ||
1455 | } | ||
1456 | |||
1457 | case filter<noun>::type::group: | ||
1458 | { | ||
1459 | bool truelogic = notlogic != f.get_notlogic(); | ||
1460 | |||
1461 | std::list<std::string> clauses; | ||
1462 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1463 | return recur(f2, truelogic); | ||
1464 | }); | ||
1465 | |||
1466 | if (truelogic == f.get_orlogic()) | ||
1467 | { | ||
1468 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1469 | } else { | ||
1470 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1471 | } | ||
1472 | } | ||
1473 | } | ||
1474 | }; | ||
1475 | |||
1476 | conditions.push_back(recur(_full_member_holonym_of, false)); | ||
1477 | } | ||
1478 | |||
1479 | if (_is_proper) | ||
1480 | { | ||
1481 | conditions.push_back("proper = 1"); | ||
1482 | } | ||
1483 | |||
1484 | if (_is_not_proper) | ||
1485 | { | ||
1486 | conditions.push_back("proper = 0"); | ||
1487 | } | ||
1488 | |||
1489 | if (_is_instance) | ||
1490 | { | ||
1491 | conditions.push_back("noun_id IN (SELECT instance_id FROM instantiation)"); | ||
1492 | } | ||
1493 | |||
1494 | if (!_instance_of.empty()) | ||
1495 | { | ||
1496 | std::stringstream cond; | ||
1497 | if (_instance_of.get_notlogic()) | ||
1498 | { | ||
1499 | cond << "noun_id NOT IN"; | ||
1500 | } else { | ||
1501 | cond << "noun_id IN"; | ||
1502 | } | ||
1503 | |||
1504 | cond << "(SELECT instance_id FROM instantiation WHERE "; | ||
1505 | |||
1506 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1507 | switch (f.get_type()) | ||
1508 | { | ||
1509 | case filter<noun>::type::singleton: | ||
1510 | { | ||
1511 | bindings.emplace_back(f.get_elem()._id); | ||
1512 | |||
1513 | if (notlogic == f.get_notlogic()) | ||
1514 | { | ||
1515 | return "class_id = ?"; | ||
1516 | } else { | ||
1517 | return "class_id != ?"; | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | case filter<noun>::type::group: | ||
1522 | { | ||
1523 | bool truelogic = notlogic != f.get_notlogic(); | ||
1524 | |||
1525 | std::list<std::string> clauses; | ||
1526 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1527 | return recur(f2, truelogic); | ||
1528 | }); | ||
1529 | |||
1530 | if (truelogic == f.get_orlogic()) | ||
1531 | { | ||
1532 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1533 | } else { | ||
1534 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1535 | } | ||
1536 | } | ||
1537 | } | ||
1538 | }; | ||
1539 | |||
1540 | cond << recur(_instance_of, _instance_of.get_notlogic()); | ||
1541 | cond << ")"; | ||
1542 | conditions.push_back(cond.str()); | ||
1543 | } | ||
1544 | |||
1545 | if (_is_class) | ||
1546 | { | ||
1547 | conditions.push_back("noun_id IN (SELECT class_id FROM instantiation)"); | ||
1548 | } | ||
1549 | |||
1550 | if (!_class_of.empty()) | ||
1551 | { | ||
1552 | std::stringstream cond; | ||
1553 | if (_class_of.get_notlogic()) | ||
1554 | { | ||
1555 | cond << "noun_id NOT IN"; | ||
1556 | } else { | ||
1557 | cond << "noun_id IN"; | ||
1558 | } | ||
1559 | |||
1560 | cond << "(SELECT class_id FROM instantiation WHERE "; | ||
1561 | |||
1562 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1563 | switch (f.get_type()) | ||
1564 | { | ||
1565 | case filter<noun>::type::singleton: | ||
1566 | { | ||
1567 | bindings.emplace_back(f.get_elem()._id); | ||
1568 | |||
1569 | if (notlogic == f.get_notlogic()) | ||
1570 | { | ||
1571 | return "instance_id = ?"; | ||
1572 | } else { | ||
1573 | return "instance_id != ?"; | ||
1574 | } | ||
1575 | } | ||
1576 | |||
1577 | case filter<noun>::type::group: | ||
1578 | { | ||
1579 | bool truelogic = notlogic != f.get_notlogic(); | ||
1580 | |||
1581 | std::list<std::string> clauses; | ||
1582 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1583 | return recur(f2, truelogic); | ||
1584 | }); | ||
1585 | |||
1586 | if (truelogic == f.get_orlogic()) | ||
1587 | { | ||
1588 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1589 | } else { | ||
1590 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1591 | } | ||
1592 | } | ||
1593 | } | ||
1594 | }; | ||
1595 | |||
1596 | cond << recur(_class_of, _class_of.get_notlogic()); | ||
1597 | cond << ")"; | ||
1598 | conditions.push_back(cond.str()); | ||
1599 | } | ||
1600 | |||
1601 | if (_has_synonyms) | ||
1602 | { | ||
1603 | conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_synonymy)"); | ||
1604 | } | ||
1605 | |||
1606 | if (!_synonym_of.empty()) | ||
1607 | { | ||
1608 | std::stringstream cond; | ||
1609 | if (_synonym_of.get_notlogic()) | ||
1610 | { | ||
1611 | cond << "noun_id NOT IN"; | ||
1612 | } else { | ||
1613 | cond << "noun_id IN"; | ||
1614 | } | ||
1615 | |||
1616 | cond << "(SELECT noun_2_id FROM noun_synonymy WHERE "; | ||
1617 | |||
1618 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1619 | switch (f.get_type()) | ||
1620 | { | ||
1621 | case filter<noun>::type::singleton: | ||
1622 | { | ||
1623 | bindings.emplace_back(f.get_elem()._id); | ||
1624 | |||
1625 | if (notlogic == f.get_notlogic()) | ||
1626 | { | ||
1627 | return "noun_1_id = ?"; | ||
1628 | } else { | ||
1629 | return "noun_1_id != ?"; | ||
1630 | } | ||
1631 | } | ||
1632 | |||
1633 | case filter<noun>::type::group: | ||
1634 | { | ||
1635 | bool truelogic = notlogic != f.get_notlogic(); | ||
1636 | |||
1637 | std::list<std::string> clauses; | ||
1638 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1639 | return recur(f2, truelogic); | ||
1640 | }); | ||
1641 | |||
1642 | if (truelogic == f.get_orlogic()) | ||
1643 | { | ||
1644 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1645 | } else { | ||
1646 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1647 | } | ||
1648 | } | ||
1649 | } | ||
1650 | }; | ||
1651 | |||
1652 | cond << recur(_synonym_of, _synonym_of.get_notlogic()); | ||
1653 | cond << ")"; | ||
1654 | conditions.push_back(cond.str()); | ||
1655 | } | ||
1656 | |||
1657 | if (_has_antonyms) | ||
1658 | { | ||
1659 | conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_antonymy)"); | ||
1660 | } | ||
1661 | |||
1662 | if (!_antonym_of.empty()) | ||
1663 | { | ||
1664 | std::stringstream cond; | ||
1665 | if (_antonym_of.get_notlogic()) | ||
1666 | { | ||
1667 | cond << "noun_id NOT IN"; | ||
1668 | } else { | ||
1669 | cond << "noun_id IN"; | ||
1670 | } | ||
1671 | |||
1672 | cond << "(SELECT noun_2_id FROM noun_antonymy WHERE "; | ||
1673 | |||
1674 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1675 | switch (f.get_type()) | ||
1676 | { | ||
1677 | case filter<noun>::type::singleton: | ||
1678 | { | ||
1679 | bindings.emplace_back(f.get_elem()._id); | ||
1680 | |||
1681 | if (notlogic == f.get_notlogic()) | ||
1682 | { | ||
1683 | return "noun_1_id = ?"; | ||
1684 | } else { | ||
1685 | return "noun_1_id != ?"; | ||
1686 | } | ||
1687 | } | ||
1688 | |||
1689 | case filter<noun>::type::group: | ||
1690 | { | ||
1691 | bool truelogic = notlogic != f.get_notlogic(); | ||
1692 | |||
1693 | std::list<std::string> clauses; | ||
1694 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1695 | return recur(f2, truelogic); | ||
1696 | }); | ||
1697 | |||
1698 | if (truelogic == f.get_orlogic()) | ||
1699 | { | ||
1700 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1701 | } else { | ||
1702 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1703 | } | ||
1704 | } | ||
1705 | } | ||
1706 | }; | ||
1707 | |||
1708 | cond << recur(_antonym_of, _antonym_of.get_notlogic()); | ||
1709 | cond << ")"; | ||
1710 | conditions.push_back(cond.str()); | ||
1711 | } | ||
1712 | |||
1713 | if (_has_pertainym) | ||
1714 | { | ||
1715 | conditions.push_back("noun_id IN (SELECT noun_id FROM pertainymy)"); | ||
1716 | } | ||
1717 | |||
1718 | if (!_anti_pertainym_of.empty()) | ||
1719 | { | ||
1720 | std::stringstream cond; | ||
1721 | if (_anti_pertainym_of.get_notlogic()) | ||
1722 | { | ||
1723 | cond << "noun_id NOT IN"; | ||
1724 | } else { | ||
1725 | cond << "noun_id IN"; | ||
1726 | } | ||
1727 | |||
1728 | cond << "(SELECT noun_id FROM pertainymy WHERE "; | ||
1729 | |||
1730 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
1731 | switch (f.get_type()) | ||
1732 | { | ||
1733 | case filter<adjective>::type::singleton: | ||
1734 | { | ||
1735 | bindings.emplace_back(f.get_elem()._id); | ||
1736 | |||
1737 | if (notlogic == f.get_notlogic()) | ||
1738 | { | ||
1739 | return "pertainym_id = ?"; | ||
1740 | } else { | ||
1741 | return "pertainym_id != ?"; | ||
1742 | } | ||
1743 | } | ||
1744 | |||
1745 | case filter<adjective>::type::group: | ||
1746 | { | ||
1747 | bool truelogic = notlogic != f.get_notlogic(); | ||
1748 | |||
1749 | std::list<std::string> clauses; | ||
1750 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
1751 | return recur(f2, truelogic); | ||
1752 | }); | ||
1753 | |||
1754 | if (truelogic == f.get_orlogic()) | ||
1755 | { | ||
1756 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1757 | } else { | ||
1758 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1759 | } | ||
1760 | } | ||
1761 | } | ||
1762 | }; | ||
1763 | |||
1764 | cond << recur(_anti_pertainym_of, _anti_pertainym_of.get_notlogic()); | ||
1765 | cond << ")"; | ||
1766 | conditions.push_back(cond.str()); | ||
1767 | } | ||
1768 | |||
1769 | if (_is_attribute) | ||
1770 | { | ||
1771 | conditions.push_back("noun_id IN (SELECT noun_id FROM variation)"); | ||
1772 | } | ||
1773 | |||
1774 | if (!_attribute_of.empty()) | ||
1775 | { | ||
1776 | std::stringstream cond; | ||
1777 | if (_attribute_of.get_notlogic()) | ||
1778 | { | ||
1779 | cond << "noun_id NOT IN"; | ||
1780 | } else { | ||
1781 | cond << "noun_id IN"; | ||
1782 | } | ||
1783 | |||
1784 | cond << "(SELECT noun_id FROM variation WHERE "; | ||
1785 | |||
1786 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
1787 | switch (f.get_type()) | ||
1788 | { | ||
1789 | case filter<adjective>::type::singleton: | ||
1790 | { | ||
1791 | bindings.emplace_back(f.get_elem()._id); | ||
1792 | |||
1793 | if (notlogic == f.get_notlogic()) | ||
1794 | { | ||
1795 | return "adjective_id = ?"; | ||
1796 | } else { | ||
1797 | return "adjective_id != ?"; | ||
1798 | } | ||
1799 | } | ||
1800 | |||
1801 | case filter<adjective>::type::group: | ||
1802 | { | ||
1803 | bool truelogic = notlogic != f.get_notlogic(); | ||
1804 | |||
1805 | std::list<std::string> clauses; | ||
1806 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
1807 | return recur(f2, truelogic); | ||
1808 | }); | ||
1809 | |||
1810 | if (truelogic == f.get_orlogic()) | ||
1811 | { | ||
1812 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1813 | } else { | ||
1814 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1815 | } | ||
1816 | } | ||
1817 | } | ||
1818 | }; | ||
1819 | |||
1820 | cond << recur(_attribute_of, _attribute_of.get_notlogic()); | ||
1821 | cond << ")"; | ||
1822 | conditions.push_back(cond.str()); | ||
1823 | } | ||
1824 | |||
1825 | if (_at_least_n_images != unlimited) | ||
1826 | { | ||
1827 | conditions.push_back("images >= ?"); | ||
1828 | bindings.emplace_back(_at_least_n_images); | ||
1829 | } | ||
1830 | |||
1831 | if (!_with_wnid.empty()) | ||
1832 | { | ||
1833 | std::vector<std::string> clauses(_with_wnid.size(), "wnid = ?"); | ||
1834 | std::string cond = verbly::implode(std::begin(clauses), std::end(clauses), " OR "); | ||
1835 | conditions.push_back("(" + cond + ")"); | ||
1836 | |||
1837 | for (auto wnid : _with_wnid) | ||
1838 | { | ||
1839 | bindings.emplace_back(wnid); | ||
1840 | } | ||
1841 | } | ||
1842 | |||
1843 | /* | ||
1844 | if (!_derived_from_adjective.empty()) | ||
1845 | { | ||
1846 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); | ||
1847 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1848 | conditions.push_back(cond); | ||
1849 | } | ||
1850 | |||
1851 | if (!_not_derived_from_adjective.empty()) | ||
1852 | { | ||
1853 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); | ||
1854 | std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1855 | conditions.push_back(cond); | ||
1856 | } | ||
1857 | |||
1858 | if (!_derived_from_adverb.empty()) | ||
1859 | { | ||
1860 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); | ||
1861 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1862 | conditions.push_back(cond); | ||
1863 | } | ||
1864 | |||
1865 | if (!_not_derived_from_adverb.empty()) | ||
1866 | { | ||
1867 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); | ||
1868 | std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1869 | conditions.push_back(cond); | ||
1870 | } | ||
1871 | |||
1872 | if (!_derived_from_noun.empty()) | ||
1873 | { | ||
1874 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_2_id = @DERN"); | ||
1875 | std::string cond = "noun_id IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1876 | conditions.push_back(cond); | ||
1877 | } | ||
1878 | |||
1879 | if (!_not_derived_from_noun.empty()) | ||
1880 | { | ||
1881 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_2_id = @NDERN"); | ||
1882 | std::string cond = "noun_id NOT IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1883 | conditions.push_back(cond); | ||
1884 | } | ||
1885 | */ | ||
1886 | if (!conditions.empty()) | ||
1887 | { | ||
1888 | construct << " WHERE "; | ||
1889 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
1890 | } | ||
1891 | |||
1892 | if (_random) | ||
1893 | { | ||
1894 | construct << " ORDER BY RANDOM()"; | ||
1895 | } | ||
1896 | |||
1897 | if (_limit != unlimited) | ||
1898 | { | ||
1899 | construct << " LIMIT " << _limit; | ||
1900 | } | ||
1901 | |||
1902 | sqlite3_stmt* ppstmt; | ||
1903 | std::string query = construct.str(); | ||
1904 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1905 | { | ||
1906 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
1907 | } | ||
1908 | |||
1909 | int i = 1; | ||
1910 | for (auto& binding : bindings) | ||
1911 | { | ||
1912 | switch (binding.get_type()) | ||
1913 | { | ||
1914 | case binding::type::integer: | ||
1915 | { | ||
1916 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
1917 | |||
1918 | break; | ||
1919 | } | ||
1920 | |||
1921 | case binding::type::string: | ||
1922 | { | ||
1923 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
1924 | |||
1925 | break; | ||
1926 | } | ||
1927 | } | ||
1928 | |||
1929 | i++; | ||
1930 | } | ||
1931 | |||
1932 | /* | ||
1933 | for (auto adj : _derived_from_adjective) | ||
1934 | { | ||
1935 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
1936 | } | ||
1937 | |||
1938 | for (auto adj : _not_derived_from_adjective) | ||
1939 | { | ||
1940 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
1941 | } | ||
1942 | |||
1943 | for (auto adv : _derived_from_adverb) | ||
1944 | { | ||
1945 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
1946 | } | ||
1947 | |||
1948 | for (auto adv : _not_derived_from_adverb) | ||
1949 | { | ||
1950 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
1951 | } | ||
1952 | |||
1953 | for (auto n : _derived_from_noun) | ||
1954 | { | ||
1955 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
1956 | } | ||
1957 | |||
1958 | for (auto n : _not_derived_from_noun) | ||
1959 | { | ||
1960 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
1961 | } | ||
1962 | */ | ||
1963 | std::list<noun> output; | ||
1964 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
1965 | { | ||
1966 | noun tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
1967 | tnc._singular = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
1968 | |||
1969 | if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) | ||
1970 | { | ||
1971 | tnc._plural = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
1972 | } | ||
1973 | |||
1974 | tnc._wnid = sqlite3_column_int(ppstmt, 3); | ||
1975 | |||
1976 | output.push_back(tnc); | ||
1977 | } | ||
1978 | |||
1979 | sqlite3_finalize(ppstmt); | ||
1980 | |||
1981 | for (auto& noun : output) | ||
1982 | { | ||
1983 | query = "SELECT pronunciation, prerhyme, rhyme FROM noun_pronunciations WHERE noun_id = ?"; | ||
1984 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1985 | { | ||
1986 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
1987 | } | ||
1988 | |||
1989 | sqlite3_bind_int(ppstmt, 1, noun._id); | ||
1990 | |||
1991 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
1992 | { | ||
1993 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
1994 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
1995 | |||
1996 | noun.pronunciations.push_back(phonemes); | ||
1997 | |||
1998 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
1999 | { | ||
2000 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
2001 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
2002 | |||
2003 | noun.rhymes.emplace_back(prerhyme, rhyming); | ||
2004 | } | ||
2005 | } | ||
2006 | |||
2007 | sqlite3_finalize(ppstmt); | ||
2008 | } | ||
2009 | |||
2010 | return output; | ||
2011 | } | ||
2012 | |||
2013 | }; | ||
diff --git a/lib/noun_query.h b/lib/noun_query.h deleted file mode 100644 index 74df260..0000000 --- a/lib/noun_query.h +++ /dev/null | |||
@@ -1,180 +0,0 @@ | |||
1 | #ifndef NOUN_QUERY_H_5DE51DD7 | ||
2 | #define NOUN_QUERY_H_5DE51DD7 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class noun_query { | ||
7 | public: | ||
8 | noun_query(const data& _data); | ||
9 | |||
10 | noun_query& limit(int _limit); | ||
11 | noun_query& random(); | ||
12 | noun_query& except(const noun& _word); | ||
13 | noun_query& rhymes_with(const word& _word); | ||
14 | noun_query& rhymes_with(rhyme _r); | ||
15 | noun_query& has_pronunciation(); | ||
16 | noun_query& has_rhyming_noun(); | ||
17 | noun_query& has_rhyming_adjective(); | ||
18 | noun_query& has_rhyming_adverb(); | ||
19 | noun_query& has_rhyming_verb(); | ||
20 | noun_query& with_stress(filter<std::vector<bool>> _arg); | ||
21 | |||
22 | noun_query& with_singular_form(std::string _arg); | ||
23 | noun_query& with_prefix(filter<std::string> _f); | ||
24 | noun_query& with_suffix(filter<std::string> _f); | ||
25 | |||
26 | noun_query& requires_plural_form(); | ||
27 | |||
28 | noun_query& with_complexity(int _arg); | ||
29 | |||
30 | noun_query& is_hypernym(); | ||
31 | noun_query& hypernym_of(filter<noun> _f); | ||
32 | noun_query& full_hypernym_of(filter<noun> _f); | ||
33 | |||
34 | noun_query& is_hyponym(); | ||
35 | noun_query& hyponym_of(filter<noun> _f); | ||
36 | noun_query& full_hyponym_of(filter<noun> _f); | ||
37 | |||
38 | noun_query& is_part_meronym(); | ||
39 | noun_query& part_meronym_of(filter<noun> _f); | ||
40 | noun_query& full_part_meronym_of(filter<noun> _f); | ||
41 | |||
42 | noun_query& is_part_holonym(); | ||
43 | noun_query& part_holonym_of(filter<noun> _f); | ||
44 | noun_query& full_part_holonym_of(filter<noun> _f); | ||
45 | |||
46 | noun_query& is_substance_meronym(); | ||
47 | noun_query& substance_meronym_of(filter<noun> _f); | ||
48 | noun_query& full_substance_meronym_of(filter<noun> _f); | ||
49 | |||
50 | noun_query& is_substance_holonym(); | ||
51 | noun_query& substance_holonym_of(filter<noun> _f); | ||
52 | noun_query& full_substance_holonym_of(filter<noun> _f); | ||
53 | |||
54 | noun_query& is_member_meronym(); | ||
55 | noun_query& member_meronym_of(filter<noun> _f); | ||
56 | noun_query& full_member_meronym_of(filter<noun> _f); | ||
57 | |||
58 | noun_query& is_member_holonym(); | ||
59 | noun_query& member_holonym_of(filter<noun> _f); | ||
60 | noun_query& full_member_holonym_of(filter<noun> _f); | ||
61 | |||
62 | noun_query& is_proper(); | ||
63 | noun_query& is_not_proper(); | ||
64 | |||
65 | noun_query& is_instance(); | ||
66 | noun_query& instance_of(filter<noun> _f); | ||
67 | |||
68 | noun_query& is_class(); | ||
69 | noun_query& class_of(filter<noun> _f); | ||
70 | |||
71 | noun_query& has_synonyms(); | ||
72 | noun_query& synonym_of(filter<noun> _f); | ||
73 | |||
74 | noun_query& has_antonyms(); | ||
75 | noun_query& antonym_of(filter<noun> _f); | ||
76 | |||
77 | noun_query& has_pertainym(); | ||
78 | noun_query& anti_pertainym_of(filter<adjective> _f); | ||
79 | |||
80 | noun_query& is_attribute(); | ||
81 | noun_query& attribute_of(filter<adjective> _f); | ||
82 | |||
83 | noun_query& at_least_n_images(int _arg); | ||
84 | noun_query& with_wnid(int _arg); | ||
85 | |||
86 | /* noun_query& derived_from(const word& _w); | ||
87 | noun_query& not_derived_from(const word& _w);*/ | ||
88 | |||
89 | std::list<noun> run() const; | ||
90 | |||
91 | const static int unlimited = -1; | ||
92 | |||
93 | private: | ||
94 | const data& _data; | ||
95 | int _limit = unlimited; | ||
96 | bool _random = false; | ||
97 | std::list<rhyme> _rhymes; | ||
98 | std::list<noun> _except; | ||
99 | bool _has_prn = false; | ||
100 | bool _has_rhyming_noun = false; | ||
101 | bool _has_rhyming_adjective = false; | ||
102 | bool _has_rhyming_adverb = false; | ||
103 | bool _has_rhyming_verb = false; | ||
104 | filter<std::vector<bool>> _stress; | ||
105 | |||
106 | std::list<std::string> _with_singular_form; | ||
107 | filter<std::string> _with_prefix; | ||
108 | filter<std::string> _with_suffix; | ||
109 | |||
110 | int _with_complexity = unlimited; | ||
111 | |||
112 | bool _requires_plural_form = false; | ||
113 | |||
114 | bool _is_hypernym = false; | ||
115 | filter<noun> _hypernym_of; | ||
116 | filter<noun> _full_hypernym_of; | ||
117 | |||
118 | bool _is_hyponym = false; | ||
119 | filter<noun> _hyponym_of; | ||
120 | filter<noun> _full_hyponym_of; | ||
121 | |||
122 | bool _is_part_meronym = false; | ||
123 | filter<noun> _part_meronym_of; | ||
124 | filter<noun> _full_part_meronym_of; | ||
125 | |||
126 | bool _is_substance_meronym = false; | ||
127 | filter<noun> _substance_meronym_of; | ||
128 | filter<noun> _full_substance_meronym_of; | ||
129 | |||
130 | bool _is_member_meronym = false; | ||
131 | filter<noun> _member_meronym_of; | ||
132 | filter<noun> _full_member_meronym_of; | ||
133 | |||
134 | bool _is_part_holonym = false; | ||
135 | filter<noun> _part_holonym_of; | ||
136 | filter<noun> _full_part_holonym_of; | ||
137 | |||
138 | bool _is_substance_holonym = false; | ||
139 | filter<noun> _substance_holonym_of; | ||
140 | filter<noun> _full_substance_holonym_of; | ||
141 | |||
142 | bool _is_member_holonym = false; | ||
143 | filter<noun> _member_holonym_of; | ||
144 | filter<noun> _full_member_holonym_of; | ||
145 | |||
146 | bool _is_proper = false; | ||
147 | bool _is_not_proper = false; | ||
148 | |||
149 | bool _is_instance = false; | ||
150 | filter<noun> _instance_of; | ||
151 | |||
152 | bool _is_class = false; | ||
153 | filter<noun> _class_of; | ||
154 | |||
155 | bool _has_synonyms = false; | ||
156 | filter<noun> _synonym_of; | ||
157 | |||
158 | bool _has_antonyms = false; | ||
159 | filter<noun> _antonym_of; | ||
160 | |||
161 | bool _has_pertainym = false; | ||
162 | filter<adjective> _anti_pertainym_of; | ||
163 | |||
164 | bool _is_attribute = false; | ||
165 | filter<adjective> _attribute_of; | ||
166 | |||
167 | int _at_least_n_images = unlimited; | ||
168 | std::set<int> _with_wnid; | ||
169 | |||
170 | /* std::list<adjective> _derived_from_adjective; | ||
171 | std::list<adjective> _not_derived_from_adjective; | ||
172 | std::list<adverb> _derived_from_adverb; | ||
173 | std::list<adverb> _not_derived_from_adverb; | ||
174 | std::list<noun> _derived_from_noun; | ||
175 | std::list<noun> _not_derived_from_noun;*/ | ||
176 | }; | ||
177 | |||
178 | }; | ||
179 | |||
180 | #endif /* end of include guard: NOUN_QUERY_H_5DE51DD7 */ | ||
diff --git a/lib/preposition.cpp b/lib/preposition.cpp deleted file mode 100644 index cea9165..0000000 --- a/lib/preposition.cpp +++ /dev/null | |||
@@ -1,107 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | std::string preposition::get_form() const | ||
6 | { | ||
7 | return form; | ||
8 | } | ||
9 | |||
10 | preposition_query::preposition_query(const data& _data) : _data(_data) | ||
11 | { | ||
12 | |||
13 | } | ||
14 | |||
15 | preposition_query& preposition_query::limit(int _limit) | ||
16 | { | ||
17 | this->_limit = _limit; | ||
18 | |||
19 | return *this; | ||
20 | } | ||
21 | |||
22 | preposition_query& preposition_query::random() | ||
23 | { | ||
24 | _random = true; | ||
25 | |||
26 | return *this; | ||
27 | } | ||
28 | |||
29 | preposition_query& preposition_query::in_group(std::string _arg) | ||
30 | { | ||
31 | _in_group.push_back(_arg); | ||
32 | |||
33 | return *this; | ||
34 | } | ||
35 | |||
36 | std::list<preposition> preposition_query::run() const | ||
37 | { | ||
38 | std::stringstream construct; | ||
39 | construct << "SELECT form FROM prepositions"; | ||
40 | std::list<binding> bindings; | ||
41 | |||
42 | if (!_in_group.empty()) | ||
43 | { | ||
44 | std::list<std::string> clauses(_in_group.size(), "groupname = ?"); | ||
45 | construct << " WHERE preposition_id IN (SELECT preposition_id FROM preposition_groups WHERE "; | ||
46 | construct << verbly::implode(std::begin(clauses), std::end(clauses), " OR "); | ||
47 | construct << ")"; | ||
48 | |||
49 | for (auto g : _in_group) | ||
50 | { | ||
51 | bindings.emplace_back(g); | ||
52 | } | ||
53 | } | ||
54 | |||
55 | if (_random) | ||
56 | { | ||
57 | construct << " ORDER BY RANDOM()"; | ||
58 | } | ||
59 | |||
60 | if (_limit != unlimited) | ||
61 | { | ||
62 | construct << " LIMIT " << _limit; | ||
63 | } | ||
64 | |||
65 | sqlite3_stmt* ppstmt; | ||
66 | std::string query = construct.str(); | ||
67 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
68 | { | ||
69 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
70 | } | ||
71 | |||
72 | int i = 1; | ||
73 | for (auto& binding : bindings) | ||
74 | { | ||
75 | switch (binding.get_type()) | ||
76 | { | ||
77 | case binding::type::integer: | ||
78 | { | ||
79 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
80 | |||
81 | break; | ||
82 | } | ||
83 | |||
84 | case binding::type::string: | ||
85 | { | ||
86 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
87 | |||
88 | break; | ||
89 | } | ||
90 | } | ||
91 | |||
92 | i++; | ||
93 | } | ||
94 | |||
95 | std::list<preposition> output; | ||
96 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
97 | { | ||
98 | preposition pp; | ||
99 | pp.form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
100 | |||
101 | output.push_back(pp); | ||
102 | } | ||
103 | |||
104 | return output; | ||
105 | } | ||
106 | |||
107 | }; | ||
diff --git a/lib/preposition.h b/lib/preposition.h deleted file mode 100644 index 89f24fa..0000000 --- a/lib/preposition.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | #ifndef PREPOSITION_H_FF908021 | ||
2 | #define PREPOSITION_H_FF908021 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class preposition_query; | ||
7 | |||
8 | class preposition { | ||
9 | public: | ||
10 | std::string get_form() const; | ||
11 | |||
12 | private: | ||
13 | friend class preposition_query; | ||
14 | |||
15 | std::string form; | ||
16 | }; | ||
17 | |||
18 | class preposition_query { | ||
19 | public: | ||
20 | preposition_query(const data& _data); | ||
21 | |||
22 | preposition_query& limit(int _limit); | ||
23 | preposition_query& random(); | ||
24 | preposition_query& in_group(std::string _arg); | ||
25 | |||
26 | std::list<preposition> run() const; | ||
27 | |||
28 | const static int unlimited = -1; | ||
29 | private: | ||
30 | const data& _data; | ||
31 | int _limit = unlimited; | ||
32 | bool _random = false; | ||
33 | std::list<std::string> _in_group; | ||
34 | }; | ||
35 | |||
36 | }; | ||
37 | |||
38 | #endif /* end of include guard: PREPOSITION_H_FF908021 */ | ||
diff --git a/lib/pronunciation.cpp b/lib/pronunciation.cpp new file mode 100644 index 0000000..f5b742f --- /dev/null +++ b/lib/pronunciation.cpp | |||
@@ -0,0 +1,69 @@ | |||
1 | #include "pronunciation.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include "form.h" | ||
4 | #include "lemma.h" | ||
5 | #include "word.h" | ||
6 | #include "util.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | |||
10 | const object pronunciation::objectType = object::pronunciation; | ||
11 | |||
12 | const std::list<std::string> pronunciation::select = {"pronunciation_id", "phonemes", "syllables", "stress", "prerhyme", "rhyme"}; | ||
13 | |||
14 | const field pronunciation::id = field::integerField(object::pronunciation, "pronunciation_id"); | ||
15 | const field pronunciation::numOfSyllables = field::integerField(object::pronunciation, "syllables"); | ||
16 | const field pronunciation::stress = field::stringField(object::pronunciation, "stress"); | ||
17 | |||
18 | const field pronunciation::form = field::joinThrough(object::pronunciation, "pronunciation_id", object::form, "forms_pronunciations", "form_id"); | ||
19 | |||
20 | const field pronunciation::prerhyme = field::stringField(object::pronunciation, "prerhyme", true); | ||
21 | const field pronunciation::rhyme = field::stringField(object::pronunciation, "rhyme", true); | ||
22 | |||
23 | pronunciation::pronunciation(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
24 | { | ||
25 | id_ = sqlite3_column_int(row, 0); | ||
26 | |||
27 | std::string phonemesStr(reinterpret_cast<const char*>(sqlite3_column_text(row, 1))); | ||
28 | phonemes_ = split<std::vector<std::string>>(phonemesStr, " "); | ||
29 | |||
30 | syllables_ = sqlite3_column_int(row, 2); | ||
31 | stress_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 3))); | ||
32 | |||
33 | if (sqlite3_column_type(row, 5) != SQLITE_NULL) | ||
34 | { | ||
35 | hasRhyme_ = true; | ||
36 | |||
37 | prerhyme_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 4))); | ||
38 | rhyme_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 5))); | ||
39 | } | ||
40 | } | ||
41 | |||
42 | filter pronunciation::rhymesWith(const pronunciation& arg) | ||
43 | { | ||
44 | return (prerhyme != arg.getPrerhyme()) && (rhyme == arg.getRhyme()); | ||
45 | } | ||
46 | |||
47 | /*filter pronunciation::rhymesWith(const class form& arg) | ||
48 | { | ||
49 | filter result; | ||
50 | |||
51 | for (const pronunciation& p : arg.getPronunciations()) | ||
52 | { | ||
53 | result |= rhymesWith(p); | ||
54 | } | ||
55 | |||
56 | return result; | ||
57 | } | ||
58 | |||
59 | filter pronunciation::rhymesWith(const lemma& arg) | ||
60 | { | ||
61 | return rhymesWith(arg.getBaseForm()); | ||
62 | } | ||
63 | |||
64 | filter pronunciation::rhymesWith(const word& arg) | ||
65 | { | ||
66 | return rhymesWith(arg.getLemma()); | ||
67 | }*/ | ||
68 | |||
69 | }; | ||
diff --git a/lib/pronunciation.h b/lib/pronunciation.h new file mode 100644 index 0000000..c7a1d4d --- /dev/null +++ b/lib/pronunciation.h | |||
@@ -0,0 +1,163 @@ | |||
1 | #ifndef PRONUNCIATION_H_C68F86B0 | ||
2 | #define PRONUNCIATION_H_C68F86B0 | ||
3 | |||
4 | #include <stdexcept> | ||
5 | #include <vector> | ||
6 | #include <string> | ||
7 | #include "field.h" | ||
8 | #include "filter.h" | ||
9 | |||
10 | struct sqlite3_stmt; | ||
11 | |||
12 | namespace verbly { | ||
13 | |||
14 | class form; | ||
15 | class lemma; | ||
16 | class word; | ||
17 | class database; | ||
18 | |||
19 | class pronunciation { | ||
20 | public: | ||
21 | |||
22 | // Default constructor | ||
23 | |||
24 | pronunciation() = default; | ||
25 | |||
26 | // Construct from database | ||
27 | |||
28 | pronunciation(const database& db, sqlite3_stmt* row); | ||
29 | |||
30 | // Accessors | ||
31 | |||
32 | operator bool() const | ||
33 | { | ||
34 | return valid_; | ||
35 | } | ||
36 | |||
37 | int getId() const | ||
38 | { | ||
39 | if (!valid_) | ||
40 | { | ||
41 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
42 | } | ||
43 | |||
44 | return id_; | ||
45 | } | ||
46 | |||
47 | const std::vector<std::string>& getPhonemes() const | ||
48 | { | ||
49 | if (!valid_) | ||
50 | { | ||
51 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
52 | } | ||
53 | |||
54 | return phonemes_; | ||
55 | } | ||
56 | |||
57 | int getSyllables() const | ||
58 | { | ||
59 | if (!valid_) | ||
60 | { | ||
61 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
62 | } | ||
63 | |||
64 | return syllables_; | ||
65 | } | ||
66 | |||
67 | std::string getStress() const | ||
68 | { | ||
69 | if (!valid_) | ||
70 | { | ||
71 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
72 | } | ||
73 | |||
74 | return stress_; | ||
75 | } | ||
76 | |||
77 | bool hasRhyme() const | ||
78 | { | ||
79 | if (!valid_) | ||
80 | { | ||
81 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
82 | } | ||
83 | |||
84 | return hasRhyme_; | ||
85 | } | ||
86 | |||
87 | std::string getPrerhyme() const | ||
88 | { | ||
89 | if (!valid_) | ||
90 | { | ||
91 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
92 | } | ||
93 | |||
94 | if (!hasRhyme_) | ||
95 | { | ||
96 | throw std::domain_error("This pronunciation has no rhyme"); | ||
97 | } | ||
98 | |||
99 | return prerhyme_; | ||
100 | } | ||
101 | |||
102 | std::string getRhyme() const | ||
103 | { | ||
104 | if (!valid_) | ||
105 | { | ||
106 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
107 | } | ||
108 | |||
109 | if (!hasRhyme_) | ||
110 | { | ||
111 | throw std::domain_error("This pronunciation has no rhyme"); | ||
112 | } | ||
113 | |||
114 | return rhyme_; | ||
115 | } | ||
116 | |||
117 | // Type info | ||
118 | |||
119 | static const object objectType; | ||
120 | |||
121 | static const std::list<std::string> select; | ||
122 | |||
123 | // Query fields | ||
124 | |||
125 | static const field id; | ||
126 | static const field numOfSyllables; | ||
127 | static const field stress; | ||
128 | |||
129 | operator filter() const | ||
130 | { | ||
131 | return (id == id_); | ||
132 | } | ||
133 | |||
134 | static filter rhymesWith(const pronunciation& arg); | ||
135 | static filter rhymesWith(const class form& arg); | ||
136 | static filter rhymesWith(const lemma& arg); | ||
137 | static filter rhymesWith(const word& arg); | ||
138 | |||
139 | // Relationships to other objects | ||
140 | |||
141 | static const field form; | ||
142 | |||
143 | private: | ||
144 | bool valid_ = false; | ||
145 | |||
146 | int id_; | ||
147 | std::vector<std::string> phonemes_; | ||
148 | int syllables_; | ||
149 | std::string stress_; | ||
150 | bool hasRhyme_ = false; | ||
151 | std::string prerhyme_; | ||
152 | std::string rhyme_; | ||
153 | |||
154 | const database* db_; | ||
155 | |||
156 | static const field prerhyme; | ||
157 | static const field rhyme; | ||
158 | |||
159 | }; | ||
160 | |||
161 | }; | ||
162 | |||
163 | #endif /* end of include guard: PRONUNCIATION_H_C68F86B0 */ | ||
diff --git a/lib/query.h b/lib/query.h new file mode 100644 index 0000000..e31be3d --- /dev/null +++ b/lib/query.h | |||
@@ -0,0 +1,123 @@ | |||
1 | #ifndef QUERY_H_7CC5284C | ||
2 | #define QUERY_H_7CC5284C | ||
3 | |||
4 | #include <vector> | ||
5 | #include <stdexcept> | ||
6 | #include <string> | ||
7 | #include <list> | ||
8 | #include <sqlite3.h> | ||
9 | #include <iostream> | ||
10 | #include "statement.h" | ||
11 | #include "binding.h" | ||
12 | |||
13 | namespace verbly { | ||
14 | |||
15 | class database_error : public std::logic_error { | ||
16 | public: | ||
17 | |||
18 | database_error(std::string msg, std::string sqlMsg) : std::logic_error(msg + " (" + sqlMsg + ")") | ||
19 | { | ||
20 | } | ||
21 | }; | ||
22 | |||
23 | template <typename Object> | ||
24 | class query { | ||
25 | public: | ||
26 | |||
27 | query(const database& db, sqlite3* ppdb, filter queryFilter, bool random, int limit) : db_(&db) | ||
28 | { | ||
29 | statement stmt(Object::objectType, std::move(queryFilter)); | ||
30 | |||
31 | std::string queryString = stmt.getQueryString(Object::select, random, limit); | ||
32 | std::list<binding> bindings = stmt.getBindings(); | ||
33 | |||
34 | std::cout << queryString << std::endl; | ||
35 | |||
36 | if (sqlite3_prepare_v2(ppdb, queryString.c_str(), queryString.length(), &ppstmt_, NULL) != SQLITE_OK) | ||
37 | { | ||
38 | std::string errorMsg = sqlite3_errmsg(ppdb); | ||
39 | sqlite3_finalize(ppstmt_); | ||
40 | |||
41 | throw database_error("Error preparing query", errorMsg); | ||
42 | } | ||
43 | |||
44 | int i = 1; | ||
45 | for (const binding& value : bindings) | ||
46 | { | ||
47 | switch (value.getType()) | ||
48 | { | ||
49 | case binding::type::integer: | ||
50 | { | ||
51 | if (sqlite3_bind_int(ppstmt_, i, value.getInteger()) != SQLITE_OK) | ||
52 | { | ||
53 | std::string errorMsg = sqlite3_errmsg(ppdb); | ||
54 | sqlite3_finalize(ppstmt_); | ||
55 | |||
56 | throw database_error("Error binding value to query", errorMsg); | ||
57 | } | ||
58 | |||
59 | break; | ||
60 | } | ||
61 | |||
62 | case binding::type::string: | ||
63 | { | ||
64 | if (sqlite3_bind_text(ppstmt_, i, value.getString().c_str(), value.getString().length(), SQLITE_TRANSIENT) != SQLITE_OK) | ||
65 | { | ||
66 | std::string errorMsg = sqlite3_errmsg(ppdb); | ||
67 | sqlite3_finalize(ppstmt_); | ||
68 | |||
69 | throw database_error("Error binding value to query", errorMsg); | ||
70 | } | ||
71 | |||
72 | break; | ||
73 | } | ||
74 | |||
75 | case binding::type::invalid: | ||
76 | { | ||
77 | throw std::logic_error("Cannot use invalid bindings"); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | i++; | ||
82 | } | ||
83 | } | ||
84 | |||
85 | ~query() | ||
86 | { | ||
87 | sqlite3_finalize(ppstmt_); | ||
88 | } | ||
89 | |||
90 | std::vector<Object> all() const | ||
91 | { | ||
92 | std::vector<Object> result; | ||
93 | |||
94 | while (sqlite3_step(ppstmt_) == SQLITE_ROW) | ||
95 | { | ||
96 | result.emplace_back(*db_, ppstmt_); | ||
97 | } | ||
98 | |||
99 | sqlite3_reset(ppstmt_); | ||
100 | |||
101 | return result; | ||
102 | } | ||
103 | |||
104 | Object first() const | ||
105 | { | ||
106 | std::vector<Object> results = all(); | ||
107 | if (!results.empty()) | ||
108 | { | ||
109 | return results.front(); | ||
110 | } else { | ||
111 | throw std::logic_error("query returned empty dataset"); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | private: | ||
116 | const database* db_; | ||
117 | sqlite3_stmt* ppstmt_; | ||
118 | |||
119 | }; | ||
120 | |||
121 | }; | ||
122 | |||
123 | #endif /* end of include guard: QUERY_H_7CC5284C */ | ||
diff --git a/lib/statement.cpp b/lib/statement.cpp new file mode 100644 index 0000000..52fa00d --- /dev/null +++ b/lib/statement.cpp | |||
@@ -0,0 +1,806 @@ | |||
1 | #include "statement.h" | ||
2 | #include <sstream> | ||
3 | #include <utility> | ||
4 | #include "filter.h" | ||
5 | #include "util.h" | ||
6 | #include "notion.h" | ||
7 | #include "word.h" | ||
8 | #include "group.h" | ||
9 | #include "frame.h" | ||
10 | #include "lemma.h" | ||
11 | #include "form.h" | ||
12 | #include "pronunciation.h" | ||
13 | |||
14 | namespace verbly { | ||
15 | |||
16 | statement::statement( | ||
17 | object context, | ||
18 | filter queryFilter) : | ||
19 | statement(getTableForContext(context), queryFilter.normalize(context)) | ||
20 | { | ||
21 | } | ||
22 | |||
23 | std::string statement::getQueryString(std::list<std::string> select, bool random, int limit) const | ||
24 | { | ||
25 | std::stringstream queryStream; | ||
26 | |||
27 | if (!withs_.empty()) | ||
28 | { | ||
29 | queryStream << "WITH RECURSIVE "; | ||
30 | |||
31 | std::list<std::string> ctes; | ||
32 | for (const with& cte : withs_) | ||
33 | { | ||
34 | std::stringstream cteStream; | ||
35 | cteStream << cte.getIdentifier(); | ||
36 | cteStream << " AS (SELECT "; | ||
37 | cteStream << cte.getTopTable(); | ||
38 | cteStream << ".* FROM "; | ||
39 | cteStream << cte.getTableForId(cte.getTopTable()); | ||
40 | cteStream << " AS "; | ||
41 | cteStream << cte.getTopTable(); | ||
42 | |||
43 | for (const join& j : cte.getJoins()) | ||
44 | { | ||
45 | cteStream << " "; | ||
46 | cteStream << j; | ||
47 | } | ||
48 | |||
49 | if (cte.getCondition().getType() != condition::type::empty) | ||
50 | { | ||
51 | cteStream << " WHERE "; | ||
52 | cteStream << cte.getCondition().toSql(); | ||
53 | } | ||
54 | |||
55 | cteStream << " UNION SELECT l.* FROM "; | ||
56 | cteStream << cte.getIdentifier(); | ||
57 | cteStream << " AS t INNER JOIN "; | ||
58 | cteStream << cte.getField().getTable(); | ||
59 | cteStream << " AS j ON t."; | ||
60 | cteStream << cte.getField().getColumn(); | ||
61 | cteStream << " = j."; | ||
62 | cteStream << cte.getField().getForeignJoinColumn(); | ||
63 | cteStream << " INNER JOIN "; | ||
64 | cteStream << cte.getTableForId(cte.getTopTable()); | ||
65 | cteStream << " AS l ON j."; | ||
66 | cteStream << cte.getField().getJoinColumn(); | ||
67 | cteStream << " = l."; | ||
68 | cteStream << cte.getField().getColumn(); | ||
69 | cteStream << ")"; | ||
70 | |||
71 | ctes.push_back(cteStream.str()); | ||
72 | } | ||
73 | |||
74 | queryStream << implode(std::begin(ctes), std::end(ctes), ", "); | ||
75 | queryStream << " "; | ||
76 | } | ||
77 | |||
78 | std::list<std::string> realSelect; | ||
79 | for (std::string& s : select) | ||
80 | { | ||
81 | realSelect.push_back(topTable_ + "." + s); | ||
82 | } | ||
83 | |||
84 | queryStream << "SELECT "; | ||
85 | queryStream << implode(std::begin(realSelect), std::end(realSelect), ", "); | ||
86 | queryStream << " FROM "; | ||
87 | queryStream << tables_.at(topTable_); | ||
88 | queryStream << " AS "; | ||
89 | queryStream << topTable_; | ||
90 | |||
91 | for (const join& j : joins_) | ||
92 | { | ||
93 | queryStream << " "; | ||
94 | queryStream << j; | ||
95 | } | ||
96 | |||
97 | if (topCondition_.getType() != condition::type::empty) | ||
98 | { | ||
99 | queryStream << " WHERE "; | ||
100 | queryStream << topCondition_.toSql(); | ||
101 | } | ||
102 | |||
103 | if (random) | ||
104 | { | ||
105 | queryStream << " ORDER BY RANDOM()"; | ||
106 | } | ||
107 | |||
108 | if (limit > 0) | ||
109 | { | ||
110 | queryStream << " LIMIT "; | ||
111 | queryStream << limit; | ||
112 | } | ||
113 | |||
114 | return queryStream.str(); | ||
115 | } | ||
116 | |||
117 | std::list<binding> statement::getBindings() const | ||
118 | { | ||
119 | std::list<binding> result; | ||
120 | |||
121 | for (const with& w : withs_) | ||
122 | { | ||
123 | for (binding value : w.getCondition().flattenBindings()) | ||
124 | { | ||
125 | result.push_back(std::move(value)); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | for (binding value : topCondition_.flattenBindings()) | ||
130 | { | ||
131 | result.push_back(std::move(value)); | ||
132 | } | ||
133 | |||
134 | return result; | ||
135 | } | ||
136 | |||
137 | statement::statement( | ||
138 | std::string tableName, | ||
139 | filter clause, | ||
140 | int nextTableId, | ||
141 | int nextWithId) : | ||
142 | nextTableId_(nextTableId), | ||
143 | nextWithId_(nextWithId), | ||
144 | topTable_(instantiateTable(std::move(tableName))), | ||
145 | topCondition_(parseFilter(std::move(clause))) | ||
146 | { | ||
147 | } | ||
148 | |||
149 | statement::condition statement::parseFilter(filter clause) | ||
150 | { | ||
151 | switch (clause.getType()) | ||
152 | { | ||
153 | case filter::type::empty: | ||
154 | { | ||
155 | return {}; | ||
156 | } | ||
157 | |||
158 | case filter::type::singleton: | ||
159 | { | ||
160 | switch (clause.getField().getType()) | ||
161 | { | ||
162 | case field::type::undefined: | ||
163 | { | ||
164 | return {}; | ||
165 | } | ||
166 | |||
167 | case field::type::string: | ||
168 | case field::type::integer: | ||
169 | case field::type::boolean: | ||
170 | { | ||
171 | switch (clause.getComparison()) | ||
172 | { | ||
173 | case filter::comparison::is_null: | ||
174 | { | ||
175 | return condition(topTable_, clause.getField().getColumn(), true); | ||
176 | } | ||
177 | |||
178 | case filter::comparison::is_not_null: | ||
179 | { | ||
180 | return condition(topTable_, clause.getField().getColumn(), false); | ||
181 | } | ||
182 | |||
183 | case filter::comparison::int_equals: | ||
184 | { | ||
185 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getIntegerArgument()); | ||
186 | } | ||
187 | |||
188 | case filter::comparison::int_does_not_equal: | ||
189 | { | ||
190 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getIntegerArgument()); | ||
191 | } | ||
192 | |||
193 | case filter::comparison::int_is_at_least: | ||
194 | { | ||
195 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_least, clause.getIntegerArgument()); | ||
196 | } | ||
197 | |||
198 | case filter::comparison::int_is_greater_than: | ||
199 | { | ||
200 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_greater_than, clause.getIntegerArgument()); | ||
201 | } | ||
202 | |||
203 | case filter::comparison::int_is_at_most: | ||
204 | { | ||
205 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_most, clause.getIntegerArgument()); | ||
206 | } | ||
207 | |||
208 | case filter::comparison::int_is_less_than: | ||
209 | { | ||
210 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_less_than, clause.getIntegerArgument()); | ||
211 | } | ||
212 | |||
213 | case filter::comparison::boolean_equals: | ||
214 | { | ||
215 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getBooleanArgument() ? 1 : 0); | ||
216 | } | ||
217 | |||
218 | case filter::comparison::string_equals: | ||
219 | { | ||
220 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getStringArgument()); | ||
221 | } | ||
222 | |||
223 | case filter::comparison::string_does_not_equal: | ||
224 | { | ||
225 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getStringArgument()); | ||
226 | } | ||
227 | |||
228 | case filter::comparison::string_is_like: | ||
229 | { | ||
230 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_like, clause.getStringArgument()); | ||
231 | } | ||
232 | |||
233 | case filter::comparison::string_is_not_like: | ||
234 | { | ||
235 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_not_like, clause.getStringArgument()); | ||
236 | } | ||
237 | |||
238 | case filter::comparison::matches: | ||
239 | case filter::comparison::does_not_match: | ||
240 | case filter::comparison::hierarchally_matches: | ||
241 | case filter::comparison::does_not_hierarchally_match: | ||
242 | { | ||
243 | throw std::logic_error("Invalid comparison type for field"); | ||
244 | } | ||
245 | } | ||
246 | } | ||
247 | |||
248 | case field::type::join: | ||
249 | { | ||
250 | std::string joinTableName; | ||
251 | if (clause.getField().hasTable()) | ||
252 | { | ||
253 | joinTableName = clause.getField().getTable(); | ||
254 | } else { | ||
255 | joinTableName = getTableForContext(clause.getField().getJoinObject()); | ||
256 | } | ||
257 | |||
258 | statement joinStmt( | ||
259 | joinTableName, | ||
260 | clause.getJoinCondition().normalize(clause.getField().getJoinObject()), | ||
261 | nextTableId_, | ||
262 | nextWithId_); | ||
263 | |||
264 | std::string joinTable = joinStmt.topTable_; | ||
265 | condition curCond = integrate(std::move(joinStmt)); | ||
266 | |||
267 | bool outer = false; | ||
268 | if (clause.getComparison() == filter::comparison::does_not_match) | ||
269 | { | ||
270 | outer = true; | ||
271 | |||
272 | curCond &= condition(joinTable, clause.getField().getColumn(), true); | ||
273 | } | ||
274 | |||
275 | joins_.emplace_back(outer, joinTableName, topTable_, clause.getField().getColumn(), joinTable, clause.getField().getColumn()); | ||
276 | |||
277 | return curCond; | ||
278 | } | ||
279 | |||
280 | case field::type::join_through: | ||
281 | { | ||
282 | statement joinStmt( | ||
283 | getTableForContext(clause.getField().getJoinObject()), | ||
284 | clause.getJoinCondition().normalize(clause.getField().getJoinObject()), | ||
285 | nextTableId_, | ||
286 | nextWithId_); | ||
287 | |||
288 | std::string joinTable = joinStmt.topTable_; | ||
289 | std::string throughTable = instantiateTable(clause.getField().getTable()); | ||
290 | condition curCond = integrate(std::move(joinStmt)); | ||
291 | |||
292 | bool outer = false; | ||
293 | if (clause.getComparison() == filter::comparison::does_not_match) | ||
294 | { | ||
295 | outer = true; | ||
296 | |||
297 | curCond &= condition(throughTable, clause.getField().getJoinColumn(), true); | ||
298 | } | ||
299 | |||
300 | joins_.emplace_back(outer, clause.getField().getTable(), topTable_, clause.getField().getColumn(), throughTable, clause.getField().getJoinColumn()); | ||
301 | joins_.emplace_back(false, getTableForContext(clause.getField().getJoinObject()), throughTable, clause.getField().getForeignJoinColumn(), joinTable, clause.getField().getForeignColumn()); | ||
302 | |||
303 | return curCond; | ||
304 | } | ||
305 | |||
306 | case field::type::hierarchal_join: | ||
307 | { | ||
308 | std::string withName = std::string(clause.getField().getTable()) + "_tree_" + std::to_string(nextWithId_++); | ||
309 | std::string withInstName = instantiateTable(withName); | ||
310 | |||
311 | bool outer = false; | ||
312 | if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) | ||
313 | { | ||
314 | outer = true; | ||
315 | } | ||
316 | |||
317 | joins_.emplace_back(outer, withName, topTable_, clause.getField().getColumn(), withInstName, clause.getField().getColumn()); | ||
318 | |||
319 | statement withStmt( | ||
320 | getTableForContext(clause.getField().getObject()), | ||
321 | clause.getJoinCondition().normalize(clause.getField().getObject()), | ||
322 | nextTableId_, | ||
323 | nextWithId_); | ||
324 | |||
325 | for (auto& w : withStmt.withs_) | ||
326 | { | ||
327 | withs_.push_back(std::move(w)); | ||
328 | } | ||
329 | |||
330 | nextTableId_ = withStmt.nextTableId_; | ||
331 | nextWithId_ = withStmt.nextWithId_; | ||
332 | |||
333 | withs_.emplace_back( | ||
334 | withName, | ||
335 | clause.getField(), | ||
336 | std::move(withStmt.tables_), | ||
337 | std::move(withStmt.topTable_), | ||
338 | std::move(withStmt.topCondition_), | ||
339 | std::move(withStmt.joins_)); | ||
340 | |||
341 | if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) | ||
342 | { | ||
343 | return condition(withInstName, clause.getField().getColumn(), true); | ||
344 | } else { | ||
345 | return {}; | ||
346 | } | ||
347 | } | ||
348 | } | ||
349 | } | ||
350 | |||
351 | case filter::type::group: | ||
352 | { | ||
353 | condition grp(clause.getOrlogic()); | ||
354 | |||
355 | for (const filter& child : clause) | ||
356 | { | ||
357 | condition newChild = parseFilter(child); | ||
358 | if (newChild.getType() != condition::type::empty) | ||
359 | { | ||
360 | grp += std::move(newChild); | ||
361 | } | ||
362 | } | ||
363 | |||
364 | if (grp.getChildren().empty()) | ||
365 | { | ||
366 | grp = {}; | ||
367 | } | ||
368 | |||
369 | return grp; | ||
370 | } | ||
371 | } | ||
372 | } | ||
373 | |||
374 | std::string statement::instantiateTable(std::string name) | ||
375 | { | ||
376 | std::string identifier = name + "_" + std::to_string(nextTableId_++); | ||
377 | tables_[identifier] = name; | ||
378 | |||
379 | return identifier; | ||
380 | } | ||
381 | |||
382 | statement::condition statement::integrate(statement subStmt) | ||
383 | { | ||
384 | for (auto& mapping : subStmt.tables_) | ||
385 | { | ||
386 | tables_[mapping.first] = mapping.second; | ||
387 | } | ||
388 | |||
389 | for (auto& j : subStmt.joins_) | ||
390 | { | ||
391 | joins_.push_back(j); | ||
392 | } | ||
393 | |||
394 | for (auto& w : subStmt.withs_) | ||
395 | { | ||
396 | withs_.push_back(w); | ||
397 | } | ||
398 | |||
399 | nextTableId_ = subStmt.nextTableId_; | ||
400 | nextWithId_ = subStmt.nextWithId_; | ||
401 | |||
402 | return subStmt.topCondition_; | ||
403 | } | ||
404 | |||
405 | std::ostream& operator<<(std::ostream& oss, const statement::join& j) | ||
406 | { | ||
407 | if (j.isOuterJoin()) | ||
408 | { | ||
409 | oss << "LEFT"; | ||
410 | } else { | ||
411 | oss << "INNER"; | ||
412 | } | ||
413 | |||
414 | return oss | ||
415 | << " JOIN " | ||
416 | << j.getForeignTableName() | ||
417 | << " AS " | ||
418 | << j.getForeignTable() | ||
419 | << " ON " | ||
420 | << j.getForeignTable() | ||
421 | << "." | ||
422 | << j.getForeignColumn() | ||
423 | << " = " | ||
424 | << j.getJoinTable() | ||
425 | << "." | ||
426 | << j.getJoinColumn(); | ||
427 | } | ||
428 | |||
429 | statement::condition::condition(const condition& other) | ||
430 | { | ||
431 | type_ = other.type_; | ||
432 | |||
433 | switch (type_) | ||
434 | { | ||
435 | case type::empty: | ||
436 | { | ||
437 | break; | ||
438 | } | ||
439 | |||
440 | case type::singleton: | ||
441 | { | ||
442 | new(&singleton_.table_) std::string(other.singleton_.table_); | ||
443 | new(&singleton_.column_) std::string(other.singleton_.column_); | ||
444 | singleton_.comparison_ = other.singleton_.comparison_; | ||
445 | new(&singleton_.value_) binding(other.singleton_.value_); | ||
446 | |||
447 | break; | ||
448 | } | ||
449 | |||
450 | case type::group: | ||
451 | { | ||
452 | new(&group_.children_) std::list<condition>(other.group_.children_); | ||
453 | group_.orlogic_ = other.group_.orlogic_; | ||
454 | |||
455 | break; | ||
456 | } | ||
457 | } | ||
458 | } | ||
459 | |||
460 | statement::condition::condition(condition&& other) : condition() | ||
461 | { | ||
462 | swap(*this, other); | ||
463 | } | ||
464 | |||
465 | statement::condition& statement::condition::operator=(condition other) | ||
466 | { | ||
467 | swap(*this, other); | ||
468 | |||
469 | return *this; | ||
470 | } | ||
471 | |||
472 | void swap(statement::condition& first, statement::condition& second) | ||
473 | { | ||
474 | using type = statement::condition::type; | ||
475 | using condition = statement::condition; | ||
476 | |||
477 | type tempType = first.type_; | ||
478 | std::string tempTable; | ||
479 | std::string tempColumn; | ||
480 | condition::comparison tempComparison; | ||
481 | binding tempBinding; | ||
482 | std::list<condition> tempChildren; | ||
483 | bool tempOrlogic; | ||
484 | |||
485 | switch (tempType) | ||
486 | { | ||
487 | case type::empty: | ||
488 | { | ||
489 | break; | ||
490 | } | ||
491 | |||
492 | case type::singleton: | ||
493 | { | ||
494 | tempTable = std::move(first.singleton_.table_); | ||
495 | tempColumn = std::move(first.singleton_.column_); | ||
496 | tempComparison = first.singleton_.comparison_; | ||
497 | tempBinding = std::move(first.singleton_.value_); | ||
498 | |||
499 | break; | ||
500 | } | ||
501 | |||
502 | case type::group: | ||
503 | { | ||
504 | tempChildren = std::move(first.group_.children_); | ||
505 | tempOrlogic = first.group_.orlogic_; | ||
506 | |||
507 | break; | ||
508 | } | ||
509 | } | ||
510 | |||
511 | first.~condition(); | ||
512 | |||
513 | first.type_ = second.type_; | ||
514 | |||
515 | switch (first.type_) | ||
516 | { | ||
517 | case type::empty: | ||
518 | { | ||
519 | break; | ||
520 | } | ||
521 | |||
522 | case type::singleton: | ||
523 | { | ||
524 | new(&first.singleton_.table_) std::string(std::move(second.singleton_.table_)); | ||
525 | new(&first.singleton_.column_) std::string(std::move(second.singleton_.column_)); | ||
526 | first.singleton_.comparison_ = second.singleton_.comparison_; | ||
527 | new(&first.singleton_.value_) binding(std::move(second.singleton_.value_)); | ||
528 | |||
529 | break; | ||
530 | } | ||
531 | |||
532 | case type::group: | ||
533 | { | ||
534 | new(&first.group_.children_) std::list<condition>(std::move(second.group_.children_)); | ||
535 | first.group_.orlogic_ = second.group_.orlogic_; | ||
536 | |||
537 | break; | ||
538 | } | ||
539 | } | ||
540 | |||
541 | second.~condition(); | ||
542 | |||
543 | second.type_ = tempType; | ||
544 | |||
545 | switch (second.type_) | ||
546 | { | ||
547 | case type::empty: | ||
548 | { | ||
549 | break; | ||
550 | } | ||
551 | |||
552 | case type::singleton: | ||
553 | { | ||
554 | new(&second.singleton_.table_) std::string(std::move(tempTable)); | ||
555 | new(&second.singleton_.column_) std::string(std::move(tempColumn)); | ||
556 | second.singleton_.comparison_ = tempComparison; | ||
557 | new(&second.singleton_.value_) binding(std::move(tempBinding)); | ||
558 | |||
559 | break; | ||
560 | } | ||
561 | |||
562 | case type::group: | ||
563 | { | ||
564 | new(&second.group_.children_) std::list<condition>(std::move(tempChildren)); | ||
565 | second.group_.orlogic_ = tempOrlogic; | ||
566 | |||
567 | break; | ||
568 | } | ||
569 | } | ||
570 | } | ||
571 | |||
572 | statement::condition::~condition() | ||
573 | { | ||
574 | switch (type_) | ||
575 | { | ||
576 | case type::empty: | ||
577 | { | ||
578 | break; | ||
579 | } | ||
580 | |||
581 | case type::singleton: | ||
582 | { | ||
583 | using string_type = std::string; | ||
584 | |||
585 | singleton_.table_.~string_type(); | ||
586 | singleton_.column_.~string_type(); | ||
587 | singleton_.value_.~binding(); | ||
588 | |||
589 | break; | ||
590 | } | ||
591 | |||
592 | case type::group: | ||
593 | { | ||
594 | using list_type = std::list<condition>; | ||
595 | |||
596 | group_.children_.~list_type(); | ||
597 | |||
598 | break; | ||
599 | } | ||
600 | } | ||
601 | } | ||
602 | |||
603 | statement::condition::condition() : type_(type::empty) | ||
604 | { | ||
605 | } | ||
606 | |||
607 | statement::condition::condition( | ||
608 | std::string table, | ||
609 | std::string column, | ||
610 | bool isNull) : | ||
611 | type_(type::singleton) | ||
612 | { | ||
613 | new(&singleton_.table_) std::string(std::move(table)); | ||
614 | new(&singleton_.column_) std::string(std::move(column)); | ||
615 | |||
616 | if (isNull) | ||
617 | { | ||
618 | singleton_.comparison_ = comparison::is_null; | ||
619 | } else { | ||
620 | singleton_.comparison_ = comparison::is_not_null; | ||
621 | } | ||
622 | } | ||
623 | |||
624 | statement::condition::condition( | ||
625 | std::string table, | ||
626 | std::string column, | ||
627 | comparison comp, | ||
628 | binding value) : | ||
629 | type_(type::singleton) | ||
630 | { | ||
631 | new(&singleton_.table_) std::string(std::move(table)); | ||
632 | new(&singleton_.column_) std::string(std::move(column)); | ||
633 | singleton_.comparison_ = comp; | ||
634 | new(&singleton_.value_) binding(std::move(value)); | ||
635 | } | ||
636 | |||
637 | std::string statement::condition::toSql() const | ||
638 | { | ||
639 | switch (type_) | ||
640 | { | ||
641 | case type::empty: | ||
642 | { | ||
643 | return ""; | ||
644 | } | ||
645 | |||
646 | case type::singleton: | ||
647 | { | ||
648 | switch (singleton_.comparison_) | ||
649 | { | ||
650 | case comparison::equals: | ||
651 | { | ||
652 | return singleton_.table_ + "." + singleton_.column_ + " = ?"; | ||
653 | } | ||
654 | |||
655 | case comparison::does_not_equal: | ||
656 | { | ||
657 | return singleton_.table_ + "." + singleton_.column_ + " != ?"; | ||
658 | } | ||
659 | |||
660 | case comparison::is_greater_than: | ||
661 | { | ||
662 | return singleton_.table_ + "." + singleton_.column_ + " > ?"; | ||
663 | } | ||
664 | |||
665 | case comparison::is_at_most: | ||
666 | { | ||
667 | return singleton_.table_ + "." + singleton_.column_ + " <= ?"; | ||
668 | } | ||
669 | |||
670 | case comparison::is_less_than: | ||
671 | { | ||
672 | return singleton_.table_ + "." + singleton_.column_ + " < ?"; | ||
673 | } | ||
674 | |||
675 | case comparison::is_at_least: | ||
676 | { | ||
677 | return singleton_.table_ + "." + singleton_.column_ + " >= ?"; | ||
678 | } | ||
679 | |||
680 | case comparison::is_like: | ||
681 | { | ||
682 | return singleton_.table_ + "." + singleton_.column_ + " LIKE ?"; | ||
683 | } | ||
684 | |||
685 | case comparison::is_not_like: | ||
686 | { | ||
687 | return singleton_.table_ + "." + singleton_.column_ + " NOT LIKE ?"; | ||
688 | } | ||
689 | |||
690 | case comparison::is_not_null: | ||
691 | { | ||
692 | return singleton_.table_ + "." + singleton_.column_ + " IS NOT NULL"; | ||
693 | } | ||
694 | |||
695 | case comparison::is_null: | ||
696 | { | ||
697 | return singleton_.table_ + "." + singleton_.column_ + " IS NULL"; | ||
698 | } | ||
699 | } | ||
700 | } | ||
701 | |||
702 | case type::group: | ||
703 | { | ||
704 | std::list<std::string> clauses; | ||
705 | for (const condition& cond : group_.children_) | ||
706 | { | ||
707 | clauses.push_back(cond.toSql()); | ||
708 | } | ||
709 | |||
710 | return implode(std::begin(clauses), std::end(clauses), group_.orlogic_ ? " OR " : " AND "); | ||
711 | } | ||
712 | } | ||
713 | } | ||
714 | |||
715 | std::list<binding> statement::condition::flattenBindings() const | ||
716 | { | ||
717 | switch (type_) | ||
718 | { | ||
719 | case type::empty: | ||
720 | { | ||
721 | return {}; | ||
722 | } | ||
723 | |||
724 | case type::singleton: | ||
725 | { | ||
726 | return {singleton_.value_}; | ||
727 | } | ||
728 | |||
729 | case type::group: | ||
730 | { | ||
731 | std::list<binding> bindings; | ||
732 | for (const condition& cond : group_.children_) | ||
733 | { | ||
734 | for (binding value : cond.flattenBindings()) | ||
735 | { | ||
736 | bindings.push_back(std::move(value)); | ||
737 | } | ||
738 | } | ||
739 | |||
740 | return bindings; | ||
741 | } | ||
742 | } | ||
743 | } | ||
744 | |||
745 | statement::condition::condition(bool orlogic) : type_(type::group) | ||
746 | { | ||
747 | new(&group_.children_) std::list<condition>(); | ||
748 | group_.orlogic_ = orlogic; | ||
749 | } | ||
750 | |||
751 | statement::condition& statement::condition::operator+=(condition n) | ||
752 | { | ||
753 | if (type_ == type::group) | ||
754 | { | ||
755 | group_.children_.push_back(std::move(n)); | ||
756 | |||
757 | return *this; | ||
758 | } else { | ||
759 | throw std::domain_error("Cannot add condition to non-group condition"); | ||
760 | } | ||
761 | } | ||
762 | |||
763 | statement::condition& statement::condition::operator&=(condition n) | ||
764 | { | ||
765 | switch (type_) | ||
766 | { | ||
767 | case type::empty: | ||
768 | { | ||
769 | *this = std::move(n); | ||
770 | |||
771 | break; | ||
772 | } | ||
773 | |||
774 | case type::singleton: | ||
775 | { | ||
776 | condition grp(false); | ||
777 | grp += *this; | ||
778 | grp += std::move(n); | ||
779 | |||
780 | *this = grp; | ||
781 | |||
782 | break; | ||
783 | } | ||
784 | |||
785 | case type::group: | ||
786 | { | ||
787 | *this += std::move(n); | ||
788 | |||
789 | break; | ||
790 | } | ||
791 | } | ||
792 | |||
793 | return *this; | ||
794 | } | ||
795 | |||
796 | const std::list<statement::condition>& statement::condition::getChildren() const | ||
797 | { | ||
798 | if (type_ == type::group) | ||
799 | { | ||
800 | return group_.children_; | ||
801 | } else { | ||
802 | throw std::domain_error("Cannot get children of non-group condition"); | ||
803 | } | ||
804 | } | ||
805 | |||
806 | }; | ||
diff --git a/lib/statement.h b/lib/statement.h new file mode 100644 index 0000000..a528d60 --- /dev/null +++ b/lib/statement.h | |||
@@ -0,0 +1,272 @@ | |||
1 | #ifndef STATEMENT_H_29F51659 | ||
2 | #define STATEMENT_H_29F51659 | ||
3 | |||
4 | #include <string> | ||
5 | #include <list> | ||
6 | #include <map> | ||
7 | #include <set> | ||
8 | #include "binding.h" | ||
9 | #include "enums.h" | ||
10 | #include "field.h" | ||
11 | #include "filter.h" | ||
12 | |||
13 | namespace verbly { | ||
14 | |||
15 | class filter; | ||
16 | |||
17 | class statement { | ||
18 | public: | ||
19 | |||
20 | statement(object context, filter queryFilter); | ||
21 | |||
22 | std::string getQueryString(std::list<std::string> select, bool random, int limit) const; | ||
23 | |||
24 | std::list<binding> getBindings() const; | ||
25 | |||
26 | private: | ||
27 | |||
28 | class join { | ||
29 | public: | ||
30 | |||
31 | join( | ||
32 | bool outer, | ||
33 | std::string foreignTableName, | ||
34 | std::string joinTable, | ||
35 | std::string joinColumn, | ||
36 | std::string foreignTable, | ||
37 | std::string foreignColumn) : | ||
38 | outer_(outer), | ||
39 | foreignTableName_(std::move(foreignTableName)), | ||
40 | joinTable_(std::move(joinTable)), | ||
41 | joinColumn_(std::move(joinColumn)), | ||
42 | foreignTable_(std::move(foreignTable)), | ||
43 | foreignColumn_(std::move(foreignColumn)) | ||
44 | { | ||
45 | } | ||
46 | |||
47 | bool isOuterJoin() const | ||
48 | { | ||
49 | return outer_; | ||
50 | } | ||
51 | |||
52 | const std::string& getForeignTableName() const | ||
53 | { | ||
54 | return foreignTableName_; | ||
55 | } | ||
56 | |||
57 | const std::string& getJoinTable() const | ||
58 | { | ||
59 | return joinTable_; | ||
60 | } | ||
61 | |||
62 | const std::string& getJoinColumn() const | ||
63 | { | ||
64 | return joinColumn_; | ||
65 | } | ||
66 | |||
67 | const std::string& getForeignTable() const | ||
68 | { | ||
69 | return foreignTable_; | ||
70 | } | ||
71 | |||
72 | const std::string& getForeignColumn() const | ||
73 | { | ||
74 | return foreignColumn_; | ||
75 | } | ||
76 | |||
77 | private: | ||
78 | bool outer_ = false; | ||
79 | std::string foreignTableName_; | ||
80 | std::string joinTable_; | ||
81 | std::string joinColumn_; | ||
82 | std::string foreignTable_; | ||
83 | std::string foreignColumn_; | ||
84 | |||
85 | }; | ||
86 | |||
87 | friend std::ostream& operator<<(std::ostream& oss, const join& j); | ||
88 | |||
89 | class condition { | ||
90 | public: | ||
91 | enum class type { | ||
92 | empty, | ||
93 | singleton, | ||
94 | group | ||
95 | }; | ||
96 | |||
97 | enum class comparison { | ||
98 | equals, | ||
99 | does_not_equal, | ||
100 | is_greater_than, | ||
101 | is_at_most, | ||
102 | is_less_than, | ||
103 | is_at_least, | ||
104 | is_like, | ||
105 | is_not_like, | ||
106 | is_not_null, | ||
107 | is_null | ||
108 | }; | ||
109 | |||
110 | // Copy and move constructors | ||
111 | |||
112 | condition(const condition& other); | ||
113 | condition(condition&& other); | ||
114 | |||
115 | // Assignment | ||
116 | |||
117 | condition& operator=(condition other); | ||
118 | |||
119 | // Swap | ||
120 | |||
121 | friend void swap(condition& first, condition& second); | ||
122 | |||
123 | // Destructor | ||
124 | |||
125 | ~condition(); | ||
126 | |||
127 | // Accessors | ||
128 | |||
129 | type getType() const | ||
130 | { | ||
131 | return type_; | ||
132 | } | ||
133 | |||
134 | // Empty | ||
135 | |||
136 | condition(); | ||
137 | |||
138 | // Singleton | ||
139 | |||
140 | condition(std::string table, std::string column, bool isNull); | ||
141 | |||
142 | condition(std::string table, std::string column, comparison comp, binding value); | ||
143 | |||
144 | // Group | ||
145 | |||
146 | explicit condition(bool orlogic); | ||
147 | |||
148 | condition& operator+=(condition n); | ||
149 | |||
150 | condition& operator&=(condition n); | ||
151 | |||
152 | const std::list<condition>& getChildren() const; | ||
153 | |||
154 | // Utility | ||
155 | |||
156 | std::string toSql() const; | ||
157 | |||
158 | std::list<binding> flattenBindings() const; | ||
159 | |||
160 | private: | ||
161 | union { | ||
162 | struct { | ||
163 | std::string table_; | ||
164 | std::string column_; | ||
165 | comparison comparison_; | ||
166 | binding value_; | ||
167 | } singleton_; | ||
168 | struct { | ||
169 | std::list<condition> children_; | ||
170 | bool orlogic_; | ||
171 | } group_; | ||
172 | }; | ||
173 | type type_; | ||
174 | }; | ||
175 | |||
176 | friend void swap(condition& first, condition& second); | ||
177 | |||
178 | class with { | ||
179 | public: | ||
180 | |||
181 | with( | ||
182 | std::string identifier, | ||
183 | field f, | ||
184 | std::map<std::string, std::string> tables, | ||
185 | std::string topTable, | ||
186 | condition where, | ||
187 | std::list<join> joins) : | ||
188 | identifier_(std::move(identifier)), | ||
189 | field_(f), | ||
190 | tables_(std::move(tables)), | ||
191 | topTable_(std::move(topTable)), | ||
192 | topCondition_(std::move(where)), | ||
193 | joins_(std::move(joins)) | ||
194 | { | ||
195 | } | ||
196 | |||
197 | const std::string& getIdentifier() const | ||
198 | { | ||
199 | return identifier_; | ||
200 | } | ||
201 | |||
202 | field getField() const | ||
203 | { | ||
204 | return field_; | ||
205 | } | ||
206 | |||
207 | std::string getTableForId(std::string identifier) const | ||
208 | { | ||
209 | return tables_.at(identifier); | ||
210 | } | ||
211 | |||
212 | const std::string& getTopTable() const | ||
213 | { | ||
214 | return topTable_; | ||
215 | } | ||
216 | |||
217 | const condition& getCondition() const | ||
218 | { | ||
219 | return topCondition_; | ||
220 | } | ||
221 | |||
222 | const std::list<join>& getJoins() const | ||
223 | { | ||
224 | return joins_; | ||
225 | } | ||
226 | |||
227 | private: | ||
228 | std::string identifier_; | ||
229 | field field_; | ||
230 | std::map<std::string, std::string> tables_; | ||
231 | std::string topTable_; | ||
232 | condition topCondition_; | ||
233 | std::list<join> joins_; | ||
234 | |||
235 | }; | ||
236 | |||
237 | static constexpr const char* getTableForContext(object context) | ||
238 | { | ||
239 | return (context == object::notion) ? "notions" | ||
240 | : (context == object::word) ? "words" | ||
241 | : (context == object::group) ? "groups" | ||
242 | : (context == object::frame) ? "frames" | ||
243 | : (context == object::lemma) ? "lemmas_forms" | ||
244 | : (context == object::form) ? "forms" | ||
245 | : (context == object::pronunciation) ? "pronunciations" | ||
246 | : throw std::domain_error("Provided context has no associated table"); | ||
247 | } | ||
248 | |||
249 | static const std::list<field> getSelectForContext(object context); | ||
250 | |||
251 | statement(std::string tableName, filter clause, int nextTableId = 0, int nextWithId = 0); | ||
252 | |||
253 | condition parseFilter(filter queryFilter); | ||
254 | |||
255 | std::string instantiateTable(std::string name); | ||
256 | |||
257 | condition integrate(statement subStmt); | ||
258 | |||
259 | int nextTableId_; | ||
260 | int nextWithId_; | ||
261 | |||
262 | std::map<std::string, std::string> tables_; | ||
263 | std::string topTable_; | ||
264 | std::list<join> joins_; | ||
265 | std::list<with> withs_; | ||
266 | condition topCondition_; | ||
267 | |||
268 | }; | ||
269 | |||
270 | }; | ||
271 | |||
272 | #endif /* end of include guard: STATEMENT_H_29F51659 */ | ||
diff --git a/lib/util.h b/lib/util.h index fb5fe67..b74b050 100644 --- a/lib/util.h +++ b/lib/util.h | |||
@@ -1,6 +1,10 @@ | |||
1 | #ifndef UTIL_H_15DDCA2D | 1 | #ifndef UTIL_H_15DDCA2D |
2 | #define UTIL_H_15DDCA2D | 2 | #define UTIL_H_15DDCA2D |
3 | 3 | ||
4 | #include <string> | ||
5 | #include <sstream> | ||
6 | #include <iterator> | ||
7 | |||
4 | namespace verbly { | 8 | namespace verbly { |
5 | 9 | ||
6 | template <class InputIterator> | 10 | template <class InputIterator> |
@@ -21,25 +25,33 @@ namespace verbly { | |||
21 | return result.str(); | 25 | return result.str(); |
22 | } | 26 | } |
23 | 27 | ||
24 | template <class Container> | 28 | template <class OutputIterator> |
25 | Container split(std::string input, std::string delimiter) | 29 | void split(std::string input, std::string delimiter, OutputIterator out) |
26 | { | 30 | { |
27 | Container result; | ||
28 | |||
29 | while (!input.empty()) | 31 | while (!input.empty()) |
30 | { | 32 | { |
31 | int divider = input.find(delimiter); | 33 | int divider = input.find(delimiter); |
32 | if (divider == std::string::npos) | 34 | if (divider == std::string::npos) |
33 | { | 35 | { |
34 | result.push_back(input); | 36 | *out = input; |
37 | out++; | ||
35 | 38 | ||
36 | input = ""; | 39 | input = ""; |
37 | } else { | 40 | } else { |
38 | result.push_back(input.substr(0, divider)); | 41 | *out = input.substr(0, divider); |
42 | out++; | ||
39 | 43 | ||
40 | input = input.substr(divider+delimiter.length()); | 44 | input = input.substr(divider+delimiter.length()); |
41 | } | 45 | } |
42 | } | 46 | } |
47 | } | ||
48 | |||
49 | template <class Container> | ||
50 | Container split(std::string input, std::string delimiter) | ||
51 | { | ||
52 | Container result; | ||
53 | |||
54 | split(input, delimiter, std::back_inserter(result)); | ||
43 | 55 | ||
44 | return result; | 56 | return result; |
45 | } | 57 | } |
diff --git a/lib/verb.cpp b/lib/verb.cpp deleted file mode 100644 index 1f45d53..0000000 --- a/lib/verb.cpp +++ /dev/null | |||
@@ -1,64 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | verb::verb() | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | verb::verb(const data& _data, int _id) : word(_data, _id) | ||
11 | { | ||
12 | |||
13 | } | ||
14 | |||
15 | std::string verb::base_form() const | ||
16 | { | ||
17 | assert(_valid == true); | ||
18 | |||
19 | return _infinitive; | ||
20 | } | ||
21 | |||
22 | std::string verb::infinitive_form() const | ||
23 | { | ||
24 | assert(_valid == true); | ||
25 | |||
26 | return _infinitive; | ||
27 | } | ||
28 | |||
29 | std::string verb::past_tense_form() const | ||
30 | { | ||
31 | assert(_valid == true); | ||
32 | |||
33 | return _past_tense; | ||
34 | } | ||
35 | |||
36 | std::string verb::past_participle_form() const | ||
37 | { | ||
38 | assert(_valid == true); | ||
39 | |||
40 | return _past_participle; | ||
41 | } | ||
42 | |||
43 | std::string verb::ing_form() const | ||
44 | { | ||
45 | assert(_valid == true); | ||
46 | |||
47 | return _ing_form; | ||
48 | } | ||
49 | |||
50 | std::string verb::s_form() const | ||
51 | { | ||
52 | assert(_valid == true); | ||
53 | |||
54 | return _s_form; | ||
55 | } | ||
56 | |||
57 | frame_query verb::frames() const | ||
58 | { | ||
59 | assert(_valid == true); | ||
60 | |||
61 | return _data->frames().for_verb(*this); | ||
62 | } | ||
63 | |||
64 | }; | ||
diff --git a/lib/verb.h b/lib/verb.h deleted file mode 100644 index 7a2486e..0000000 --- a/lib/verb.h +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | #ifndef VERB_H_BCC929AD | ||
2 | #define VERB_H_BCC929AD | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class frame_query; | ||
7 | |||
8 | class verb : public word { | ||
9 | private: | ||
10 | std::string _infinitive; | ||
11 | std::string _past_tense; | ||
12 | std::string _past_participle; | ||
13 | std::string _ing_form; | ||
14 | std::string _s_form; | ||
15 | |||
16 | friend class verb_query; | ||
17 | |||
18 | public: | ||
19 | verb(); | ||
20 | verb(const data& _data, int _id); | ||
21 | |||
22 | std::string base_form() const; | ||
23 | std::string infinitive_form() const; | ||
24 | std::string past_tense_form() const; | ||
25 | std::string past_participle_form() const; | ||
26 | std::string ing_form() const; | ||
27 | std::string s_form() const; | ||
28 | |||
29 | frame_query frames() const; | ||
30 | }; | ||
31 | |||
32 | }; | ||
33 | |||
34 | #endif /* end of include guard: VERB_H_BCC929AD */ | ||
diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp deleted file mode 100644 index 4e6c253..0000000 --- a/lib/verb_query.cpp +++ /dev/null | |||
@@ -1,315 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | verb_query::verb_query(const data& _data) : _data(_data) | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | verb_query& verb_query::limit(int _limit) | ||
11 | { | ||
12 | if ((_limit > 0) || (_limit == unlimited)) | ||
13 | { | ||
14 | this->_limit = _limit; | ||
15 | } | ||
16 | |||
17 | return *this; | ||
18 | } | ||
19 | |||
20 | verb_query& verb_query::random() | ||
21 | { | ||
22 | this->_random = true; | ||
23 | |||
24 | return *this; | ||
25 | } | ||
26 | |||
27 | verb_query& verb_query::except(const verb& _word) | ||
28 | { | ||
29 | _except.push_back(_word); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | verb_query& verb_query::rhymes_with(const word& _word) | ||
35 | { | ||
36 | for (auto rhyme : _word.get_rhymes()) | ||
37 | { | ||
38 | _rhymes.push_back(rhyme); | ||
39 | } | ||
40 | |||
41 | if (dynamic_cast<const verb*>(&_word) != nullptr) | ||
42 | { | ||
43 | _except.push_back(dynamic_cast<const verb&>(_word)); | ||
44 | } | ||
45 | |||
46 | return *this; | ||
47 | } | ||
48 | |||
49 | verb_query& verb_query::rhymes_with(rhyme _r) | ||
50 | { | ||
51 | _rhymes.push_back(_r); | ||
52 | |||
53 | return *this; | ||
54 | } | ||
55 | |||
56 | verb_query& verb_query::has_pronunciation() | ||
57 | { | ||
58 | this->_has_prn = true; | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | verb_query& verb_query::has_rhyming_noun() | ||
64 | { | ||
65 | _has_rhyming_noun = true; | ||
66 | |||
67 | return *this; | ||
68 | } | ||
69 | |||
70 | verb_query& verb_query::has_rhyming_adjective() | ||
71 | { | ||
72 | _has_rhyming_adjective = true; | ||
73 | |||
74 | return *this; | ||
75 | } | ||
76 | |||
77 | verb_query& verb_query::has_rhyming_adverb() | ||
78 | { | ||
79 | _has_rhyming_adverb = true; | ||
80 | |||
81 | return *this; | ||
82 | } | ||
83 | |||
84 | verb_query& verb_query::has_rhyming_verb() | ||
85 | { | ||
86 | _has_rhyming_verb = true; | ||
87 | |||
88 | return *this; | ||
89 | } | ||
90 | |||
91 | verb_query& verb_query::with_stress(filter<std::vector<bool>> _arg) | ||
92 | { | ||
93 | _stress = _arg; | ||
94 | |||
95 | return *this; | ||
96 | } | ||
97 | |||
98 | verb_query& verb_query::has_frames() | ||
99 | { | ||
100 | this->_has_frames = true; | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | std::list<verb> verb_query::run() const | ||
106 | { | ||
107 | std::stringstream construct; | ||
108 | construct << "SELECT verb_id, infinitive, past_tense, past_participle, ing_form, s_form FROM verbs"; | ||
109 | std::list<std::string> conditions; | ||
110 | std::list<binding> bindings; | ||
111 | |||
112 | if (_has_prn) | ||
113 | { | ||
114 | conditions.push_back("verb_id IN (SELECT verb_id FROM verb_pronunciations)"); | ||
115 | } | ||
116 | |||
117 | if (!_rhymes.empty()) | ||
118 | { | ||
119 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
120 | std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
121 | conditions.push_back(cond); | ||
122 | |||
123 | for (auto rhy : _rhymes) | ||
124 | { | ||
125 | bindings.emplace_back(rhy.get_prerhyme()); | ||
126 | bindings.emplace_back(rhy.get_rhyme()); | ||
127 | } | ||
128 | } | ||
129 | |||
130 | if (_has_rhyming_noun) | ||
131 | { | ||
132 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
133 | } | ||
134 | |||
135 | if (_has_rhyming_adjective) | ||
136 | { | ||
137 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
138 | } | ||
139 | |||
140 | if (_has_rhyming_adverb) | ||
141 | { | ||
142 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
143 | } | ||
144 | |||
145 | if (_has_rhyming_verb) | ||
146 | { | ||
147 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)"); | ||
148 | } | ||
149 | |||
150 | if (!_stress.empty()) | ||
151 | { | ||
152 | std::stringstream cond; | ||
153 | if (_stress.get_notlogic()) | ||
154 | { | ||
155 | cond << "verb_id NOT IN"; | ||
156 | } else { | ||
157 | cond << "verb_id IN"; | ||
158 | } | ||
159 | |||
160 | cond << "(SELECT verb_id FROM verb_pronunciations WHERE "; | ||
161 | |||
162 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
163 | switch (f.get_type()) | ||
164 | { | ||
165 | case filter<std::vector<bool>>::type::singleton: | ||
166 | { | ||
167 | std::ostringstream _val; | ||
168 | for (auto syl : f.get_elem()) | ||
169 | { | ||
170 | if (syl) | ||
171 | { | ||
172 | _val << "1"; | ||
173 | } else { | ||
174 | _val << "0"; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | bindings.emplace_back(_val.str()); | ||
179 | |||
180 | if (notlogic == f.get_notlogic()) | ||
181 | { | ||
182 | return "stress = ?"; | ||
183 | } else { | ||
184 | return "stress != ?"; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | case filter<std::vector<bool>>::type::group: | ||
189 | { | ||
190 | bool truelogic = notlogic != f.get_notlogic(); | ||
191 | |||
192 | std::list<std::string> clauses; | ||
193 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
194 | return recur(f2, truelogic); | ||
195 | }); | ||
196 | |||
197 | if (truelogic == f.get_orlogic()) | ||
198 | { | ||
199 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
200 | } else { | ||
201 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
202 | } | ||
203 | } | ||
204 | } | ||
205 | }; | ||
206 | |||
207 | cond << recur(_stress, _stress.get_notlogic()); | ||
208 | cond << ")"; | ||
209 | conditions.push_back(cond.str()); | ||
210 | } | ||
211 | |||
212 | for (auto except : _except) | ||
213 | { | ||
214 | conditions.push_back("verb_id != ?"); | ||
215 | bindings.emplace_back(except._id); | ||
216 | } | ||
217 | |||
218 | if (!_has_frames) | ||
219 | { | ||
220 | conditions.push_back("verb_id IN (SELECT verb_id FROM verb_groups)"); | ||
221 | } | ||
222 | |||
223 | if (!conditions.empty()) | ||
224 | { | ||
225 | construct << " WHERE "; | ||
226 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
227 | } | ||
228 | |||
229 | if (_random) | ||
230 | { | ||
231 | construct << " ORDER BY RANDOM()"; | ||
232 | } | ||
233 | |||
234 | if (_limit != unlimited) | ||
235 | { | ||
236 | construct << " LIMIT " << _limit; | ||
237 | } | ||
238 | |||
239 | sqlite3_stmt* ppstmt; | ||
240 | std::string query = construct.str(); | ||
241 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
242 | { | ||
243 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
244 | } | ||
245 | |||
246 | int i = 1; | ||
247 | for (auto& binding : bindings) | ||
248 | { | ||
249 | switch (binding.get_type()) | ||
250 | { | ||
251 | case binding::type::integer: | ||
252 | { | ||
253 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
254 | |||
255 | break; | ||
256 | } | ||
257 | |||
258 | case binding::type::string: | ||
259 | { | ||
260 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
261 | |||
262 | break; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | i++; | ||
267 | } | ||
268 | |||
269 | std::list<verb> output; | ||
270 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
271 | { | ||
272 | verb tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
273 | tnc._infinitive = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
274 | tnc._past_tense = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
275 | tnc._past_participle = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
276 | tnc._ing_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4))); | ||
277 | tnc._s_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 5))); | ||
278 | |||
279 | output.push_back(tnc); | ||
280 | } | ||
281 | |||
282 | sqlite3_finalize(ppstmt); | ||
283 | |||
284 | for (auto& verb : output) | ||
285 | { | ||
286 | query = "SELECT pronunciation, prerhyme, rhyme FROM verb_pronunciations WHERE verb_id = ?"; | ||
287 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
288 | { | ||
289 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
290 | } | ||
291 | |||
292 | sqlite3_bind_int(ppstmt, 1, verb._id); | ||
293 | |||
294 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
295 | { | ||
296 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
297 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
298 | |||
299 | verb.pronunciations.push_back(phonemes); | ||
300 | |||
301 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
302 | { | ||
303 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
304 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
305 | verb.rhymes.emplace_back(prerhyme, rhyming); | ||
306 | } | ||
307 | } | ||
308 | |||
309 | sqlite3_finalize(ppstmt); | ||
310 | } | ||
311 | |||
312 | return output; | ||
313 | } | ||
314 | |||
315 | }; | ||
diff --git a/lib/verb_query.h b/lib/verb_query.h deleted file mode 100644 index 566ae37..0000000 --- a/lib/verb_query.h +++ /dev/null | |||
@@ -1,45 +0,0 @@ | |||
1 | #ifndef VERB_QUERY_H_34E5A679 | ||
2 | #define VERB_QUERY_H_34E5A679 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class verb_query { | ||
7 | public: | ||
8 | verb_query(const data& _data); | ||
9 | |||
10 | verb_query& limit(int _limit); | ||
11 | verb_query& random(); | ||
12 | verb_query& except(const verb& _word); | ||
13 | verb_query& rhymes_with(const word& _word); | ||
14 | verb_query& rhymes_with(rhyme _r); | ||
15 | verb_query& has_pronunciation(); | ||
16 | verb_query& has_rhyming_noun(); | ||
17 | verb_query& has_rhyming_adjective(); | ||
18 | verb_query& has_rhyming_adverb(); | ||
19 | verb_query& has_rhyming_verb(); | ||
20 | verb_query& with_stress(filter<std::vector<bool>> _arg); | ||
21 | |||
22 | verb_query& has_frames(); | ||
23 | |||
24 | std::list<verb> run() const; | ||
25 | |||
26 | const static int unlimited = -1; | ||
27 | |||
28 | private: | ||
29 | const data& _data; | ||
30 | int _limit = unlimited; | ||
31 | bool _random = false; | ||
32 | std::list<rhyme> _rhymes; | ||
33 | std::list<verb> _except; | ||
34 | bool _has_prn = false; | ||
35 | bool _has_frames = false; | ||
36 | bool _has_rhyming_noun = false; | ||
37 | bool _has_rhyming_adjective = false; | ||
38 | bool _has_rhyming_adverb = false; | ||
39 | bool _has_rhyming_verb = false; | ||
40 | filter<std::vector<bool>> _stress; | ||
41 | }; | ||
42 | |||
43 | }; | ||
44 | |||
45 | #endif /* end of include guard: VERB_QUERY_H_34E5A679 */ | ||
diff --git a/lib/verbly.h b/lib/verbly.h index cfaf5bc..6dfc01a 100644 --- a/lib/verbly.h +++ b/lib/verbly.h | |||
@@ -1,35 +1,17 @@ | |||
1 | #ifndef VERBLY_H_5B39CE50 | 1 | #ifndef VERBLY_H_5B39CE50 |
2 | #define VERBLY_H_5B39CE50 | 2 | #define VERBLY_H_5B39CE50 |
3 | 3 | ||
4 | #include <string> | ||
5 | #include <list> | ||
6 | #include <sstream> | ||
7 | #include <algorithm> | ||
8 | #include <cassert> | ||
9 | #include <set> | ||
10 | #include <stdexcept> | ||
11 | #include <vector> | ||
12 | #include <map> | ||
13 | #include <iterator> | ||
14 | #include <sstream> | ||
15 | #include <functional> | ||
16 | #include <iostream> | ||
17 | #include <new> | ||
18 | |||
19 | #include "util.h" | 4 | #include "util.h" |
20 | #include "data.h" | 5 | #include "database.h" |
6 | #include "filter.h" | ||
7 | #include "field.h" | ||
8 | #include "query.h" | ||
9 | #include "notion.h" | ||
21 | #include "word.h" | 10 | #include "word.h" |
22 | #include "verb.h" | 11 | #include "group.h" |
23 | #include "adverb.h" | ||
24 | #include "adjective.h" | ||
25 | #include "noun.h" | ||
26 | #include "frame.h" | 12 | #include "frame.h" |
27 | #include "preposition.h" | 13 | #include "lemma.h" |
28 | #include "token.h" | 14 | #include "form.h" |
29 | #include "noun_query.h" | 15 | #include "pronunciation.h" |
30 | #include "adverb_query.h" | ||
31 | #include "adjective_query.h" | ||
32 | #include "verb_query.h" | ||
33 | #include "frame_query.h" | ||
34 | 16 | ||
35 | #endif /* end of include guard: VERBLY_H_5B39CE50 */ | 17 | #endif /* end of include guard: VERBLY_H_5B39CE50 */ |
diff --git a/lib/word.cpp b/lib/word.cpp index 49e34a1..3edf2d2 100644 --- a/lib/word.cpp +++ b/lib/word.cpp | |||
@@ -1,60 +1,112 @@ | |||
1 | #include "verbly.h" | 1 | #include "word.h" |
2 | #include <algorithm> | 2 | #include <sqlite3.h> |
3 | #include "form.h" | ||
4 | #include "util.h" | ||
5 | #include "database.h" | ||
6 | #include "query.h" | ||
3 | 7 | ||
4 | namespace verbly { | 8 | namespace verbly { |
5 | 9 | ||
6 | rhyme::rhyme(std::string prerhyme, std::string phonemes) : _prerhyme(prerhyme), _rhyme(phonemes) | 10 | const object word::objectType = object::word; |
7 | { | ||
8 | |||
9 | } | ||
10 | 11 | ||
11 | std::string rhyme::get_prerhyme() const | 12 | const std::list<std::string> word::select = {"word_id", "notion_id", "lemma_id", "tag_count", "position", "group_id"}; |
12 | { | ||
13 | return _prerhyme; | ||
14 | } | ||
15 | 13 | ||
16 | std::string rhyme::get_rhyme() const | 14 | const field word::id = field::integerField(object::word, "word_id"); |
17 | { | 15 | const field word::tagCount = field::integerField(object::word, "tag_count", true); |
18 | return _rhyme; | 16 | const field word::adjectivePosition = field::integerField(object::word, "position", true); |
19 | } | 17 | |
18 | const field word::notion = field::joinField(object::word, "notion_id", object::notion); | ||
19 | const field word::lemma = field::joinField(object::word, "lemma_id", object::lemma); | ||
20 | const field word::group = field::joinField(object::word, "group_id", object::group, true); | ||
21 | |||
22 | const field word::antonyms = field::selfJoin(object::word, "word_id", "antonymy", "antonym_2_id", "antonym_1_id"); | ||
23 | |||
24 | const field word::specifications = field::selfJoin(object::word, "word_id", "specification", "general_id", "specific_id"); | ||
25 | const field word::generalizations = field::selfJoin(object::word, "word_id", "specification", "specific_id", "general_id"); | ||
20 | 26 | ||
21 | bool rhyme::operator==(const rhyme& other) const | 27 | const field word::pertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "noun_id", "pertainym_id"); |
28 | const field word::antiPertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "pertainym_id", "noun_id"); | ||
29 | |||
30 | const field word::mannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "adjective_id", "mannernym_id"); | ||
31 | const field word::antiMannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "mannernym_id", "adjective_id"); | ||
32 | |||
33 | const field word::usageTerms = field::selfJoin(object::word, "word_id", "usage", "domain_id", "term_id"); | ||
34 | const field word::usageDomains = field::selfJoin(object::word, "word_id", "usage", "term_id", "domain_id"); | ||
35 | |||
36 | const field word::topicalTerms = field::selfJoin(object::word, "word_id", "topicality", "domain_id", "term_id"); | ||
37 | const field word::topicalDomains = field::selfJoin(object::word, "word_id", "topicality", "term_id", "domain_id"); | ||
38 | |||
39 | const field word::regionalTerms = field::selfJoin(object::word, "word_id", "regionality", "domain_id", "term_id"); | ||
40 | const field word::regionalDomains = field::selfJoin(object::word, "word_id", "regionality", "term_id", "domain_id"); | ||
41 | |||
42 | word::word(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
22 | { | 43 | { |
23 | return std::tie(_prerhyme, _rhyme) == std::tie(other._prerhyme, other._rhyme); | 44 | id_ = sqlite3_column_int(row, 0); |
45 | notionId_ = sqlite3_column_int(row, 1); | ||
46 | lemmaId_ = sqlite3_column_int(row, 2); | ||
47 | |||
48 | if (sqlite3_column_type(row, 3) != SQLITE_NULL) | ||
49 | { | ||
50 | hasTagCount_ = true; | ||
51 | tagCount_ = sqlite3_column_int(row, 3); | ||
52 | } | ||
53 | |||
54 | if (sqlite3_column_type(row, 4) != SQLITE_NULL) | ||
55 | { | ||
56 | adjectivePosition_ = static_cast<positioning>(sqlite3_column_int(row, 4)); | ||
57 | } | ||
58 | |||
59 | if (sqlite3_column_type(row, 5) != SQLITE_NULL) | ||
60 | { | ||
61 | hasGroup_ = true; | ||
62 | groupId_ = sqlite3_column_int(row, 5); | ||
63 | } | ||
24 | } | 64 | } |
25 | 65 | ||
26 | word::word() | 66 | const notion& word::getNotion() const |
27 | { | 67 | { |
68 | if (!valid_) | ||
69 | { | ||
70 | throw std::domain_error("Bad access to uninitialized word"); | ||
71 | } | ||
72 | |||
73 | if (!notion_) | ||
74 | { | ||
75 | notion_ = db_->notions(notion::id == notionId_).first(); | ||
76 | } | ||
28 | 77 | ||
78 | return notion_; | ||
29 | } | 79 | } |
30 | 80 | ||
31 | word::word(const data& _data, int _id) : _data(&_data), _id(_id), _valid(true) | 81 | const lemma& word::getLemma() const |
32 | { | 82 | { |
83 | if (!valid_) | ||
84 | { | ||
85 | throw std::domain_error("Bad access to uninitialized word"); | ||
86 | } | ||
33 | 87 | ||
88 | if (!lemma_) | ||
89 | { | ||
90 | lemma_ = db_->lemmas(lemma::id == lemmaId_).first(); | ||
91 | } | ||
92 | |||
93 | return lemma_; | ||
34 | } | 94 | } |
35 | 95 | ||
36 | std::list<rhyme> word::get_rhymes() const | 96 | std::string word::getBaseForm() const |
37 | { | 97 | { |
38 | assert(_valid == true); | 98 | return getLemma().getBaseForm().getText(); |
39 | |||
40 | return rhymes; | ||
41 | } | 99 | } |
42 | 100 | ||
43 | bool word::starts_with_vowel_sound() const | 101 | std::list<std::string> word::getInflections(inflection category) const |
44 | { | 102 | { |
45 | assert(_valid == true); | 103 | std::list<std::string> result; |
46 | 104 | for (const form& infl : getLemma().getInflections(category)) | |
47 | if (pronunciations.size() > 0) | ||
48 | { | 105 | { |
49 | return std::any_of(std::begin(pronunciations), std::end(pronunciations), [] (std::list<std::string> phonemes) { | 106 | result.push_back(infl.getText()); |
50 | return (phonemes.front().find_first_of("012") != std::string::npos); | ||
51 | }); | ||
52 | } else { | ||
53 | // If the word is not in CMUDICT, fall back to checking whether the first letter is a vowel | ||
54 | // Not perfect but will work in most cases | ||
55 | char ch = tolower(base_form().front()); | ||
56 | return (ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u'); | ||
57 | } | 107 | } |
108 | |||
109 | return result; | ||
58 | } | 110 | } |
59 | 111 | ||
60 | }; | 112 | }; |
diff --git a/lib/word.h b/lib/word.h index 08797a3..f71dad9 100644 --- a/lib/word.h +++ b/lib/word.h | |||
@@ -1,48 +1,173 @@ | |||
1 | #ifndef WORD_H_8FC89498 | 1 | #ifndef WORD_H_DF91B1B4 |
2 | #define WORD_H_8FC89498 | 2 | #define WORD_H_DF91B1B4 |
3 | |||
4 | #include <stdexcept> | ||
5 | #include <map> | ||
6 | #include "field.h" | ||
7 | #include "filter.h" | ||
8 | #include "notion.h" | ||
9 | #include "lemma.h" | ||
10 | #include "group.h" | ||
11 | |||
12 | struct sqlite3_stmt; | ||
3 | 13 | ||
4 | namespace verbly { | 14 | namespace verbly { |
5 | 15 | ||
6 | class rhyme { | 16 | class database; |
7 | public: | 17 | |
8 | rhyme(std::string prerhyme, std::string phonemes); | 18 | class word { |
19 | public: | ||
20 | |||
21 | // Default constructor | ||
22 | |||
23 | word() = default; | ||
24 | |||
25 | // Construct from database | ||
26 | |||
27 | word(const database& db, sqlite3_stmt* row); | ||
28 | |||
29 | // Accessors | ||
30 | |||
31 | operator bool() const | ||
32 | { | ||
33 | return valid_; | ||
34 | } | ||
35 | |||
36 | int getId() const | ||
37 | { | ||
38 | if (!valid_) | ||
39 | { | ||
40 | throw std::domain_error("Bad access to uninitialized word"); | ||
41 | } | ||
9 | 42 | ||
10 | std::string get_prerhyme() const; | 43 | return id_; |
11 | std::string get_rhyme() const; | 44 | } |
45 | |||
46 | bool hasTagCount() const | ||
47 | { | ||
48 | if (!valid_) | ||
49 | { | ||
50 | throw std::domain_error("Bad access to uninitialized word"); | ||
51 | } | ||
12 | 52 | ||
13 | bool operator==(const rhyme& other) const; | 53 | return hasTagCount_; |
54 | } | ||
55 | |||
56 | int getTagCount() const | ||
57 | { | ||
58 | if (!valid_) | ||
59 | { | ||
60 | throw std::domain_error("Bad access to uninitialized word"); | ||
61 | } | ||
14 | 62 | ||
15 | private: | 63 | if (!hasTagCount_) |
16 | std::string _prerhyme; | 64 | { |
17 | std::string _rhyme; | 65 | throw std::domain_error("Word has no tag count"); |
18 | }; | 66 | } |
19 | |||
20 | class word { | ||
21 | protected: | ||
22 | const data* _data; | ||
23 | int _id; | ||
24 | bool _valid = false; | ||
25 | 67 | ||
26 | std::list<std::list<std::string>> pronunciations; | 68 | return tagCount_; |
27 | std::list<rhyme> rhymes; | 69 | } |
70 | |||
71 | bool hasAdjectivePositioning() const | ||
72 | { | ||
73 | if (!valid_) | ||
74 | { | ||
75 | throw std::domain_error("Bad access to uninitialized word"); | ||
76 | } | ||
28 | 77 | ||
29 | word(); | 78 | return (adjectivePosition_ != positioning::undefined); |
30 | word(const data& _data, int _id); | 79 | } |
80 | |||
81 | positioning getAdjectivePosition() const | ||
82 | { | ||
83 | if (!valid_) | ||
84 | { | ||
85 | throw std::domain_error("Bad access to uninitialized word"); | ||
86 | } | ||
31 | 87 | ||
32 | friend class adjective_query; | 88 | if (adjectivePosition_ == positioning::undefined) |
33 | friend class verb_query; | 89 | { |
34 | friend class noun_query; | 90 | throw std::domain_error("Word has no adjective position"); |
35 | friend class adverb_query; | 91 | } |
36 | friend class frame_query; | ||
37 | friend class preposition_query; | ||
38 | |||
39 | public: | ||
40 | virtual std::string base_form() const = 0; | ||
41 | 92 | ||
42 | std::list<rhyme> get_rhymes() const; | 93 | return adjectivePosition_; |
43 | bool starts_with_vowel_sound() const; | 94 | } |
95 | |||
96 | const notion& getNotion() const; | ||
97 | |||
98 | const lemma& getLemma() const; | ||
99 | |||
100 | // Convenience accessors | ||
101 | |||
102 | std::string getBaseForm() const; | ||
103 | |||
104 | std::list<std::string> getInflections(inflection infl) const; | ||
105 | |||
106 | // Type info | ||
107 | |||
108 | static const object objectType; | ||
109 | |||
110 | static const std::list<std::string> select; | ||
111 | |||
112 | // Query fields | ||
113 | |||
114 | static const field id; | ||
115 | static const field tagCount; | ||
116 | static const field adjectivePosition; | ||
117 | |||
118 | operator filter() const | ||
119 | { | ||
120 | return (id == id_); | ||
121 | } | ||
122 | |||
123 | // Relationships with other objects | ||
124 | |||
125 | static const field notion; | ||
126 | static const field lemma; | ||
127 | static const field group; | ||
128 | |||
129 | // Relationships with self | ||
130 | |||
131 | static const field antonyms; | ||
132 | |||
133 | static const field specifications; | ||
134 | static const field generalizations; | ||
135 | |||
136 | static const field pertainyms; | ||
137 | static const field antiPertainyms; | ||
138 | |||
139 | static const field mannernyms; | ||
140 | static const field antiMannernyms; | ||
141 | |||
142 | static const field usageTerms; | ||
143 | static const field usageDomains; | ||
144 | |||
145 | static const field topicalTerms; | ||
146 | static const field topicalDomains; | ||
147 | |||
148 | static const field regionalTerms; | ||
149 | static const field regionalDomains; | ||
150 | |||
151 | private: | ||
152 | bool valid_ = false; | ||
153 | |||
154 | int id_; | ||
155 | bool hasTagCount_ = false; | ||
156 | int tagCount_; | ||
157 | positioning adjectivePosition_ = positioning::undefined; | ||
158 | int notionId_; | ||
159 | int lemmaId_; | ||
160 | bool hasGroup_ = false; | ||
161 | int groupId_; | ||
162 | |||
163 | const database* db_; | ||
164 | |||
165 | mutable class notion notion_; | ||
166 | mutable class lemma lemma_; | ||
167 | mutable class group group_; | ||
168 | |||
44 | }; | 169 | }; |
45 | 170 | ||
46 | }; | 171 | }; |
47 | 172 | ||
48 | #endif /* end of include guard: WORD_H_8FC89498 */ | 173 | #endif /* end of include guard: WORD_H_DF91B1B4 */ |