diff options
78 files changed, 8971 insertions, 8696 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c39d99..61fcce2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt | |||
@@ -4,8 +4,10 @@ project (verbly) | |||
4 | find_package(PkgConfig) | 4 | find_package(PkgConfig) |
5 | pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) | 5 | pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) |
6 | 6 | ||
7 | set(CMAKE_BUILD_TYPE Debug) | ||
8 | |||
7 | include_directories(vendor/json) | 9 | include_directories(vendor/json) |
8 | add_library(verbly lib/data.cpp lib/adjective.cpp lib/noun.cpp lib/verb.cpp lib/adverb.cpp lib/token.cpp lib/word.cpp lib/frame.cpp lib/preposition.cpp lib/adjective_query.cpp lib/adverb_query.cpp lib/noun_query.cpp lib/verb_query.cpp lib/frame_query.cpp) | 10 | add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp) |
9 | set_property(TARGET verbly PROPERTY CXX_STANDARD 11) | 11 | set_property(TARGET verbly PROPERTY CXX_STANDARD 11) |
10 | set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) | 12 | set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) |
11 | target_link_libraries(verbly ${sqlite3_LIBRARIES}) | 13 | target_link_libraries(verbly ${sqlite3_LIBRARIES}) |
diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 552526d..4f78eb8 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt | |||
@@ -1,12 +1,12 @@ | |||
1 | cmake_minimum_required (VERSION 2.6) | 1 | cmake_minimum_required (VERSION 3.1) |
2 | project (generator) | 2 | project (generator) |
3 | 3 | ||
4 | find_package(PkgConfig) | 4 | find_package(PkgConfig) |
5 | pkg_check_modules(sqlite3 sqlite3 REQUIRED) | 5 | pkg_check_modules(sqlite3 sqlite3 REQUIRED) |
6 | find_package(libxml2 REQUIRED) | 6 | find_package(libxml2 REQUIRED) |
7 | 7 | ||
8 | include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json/src) | 8 | include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json) |
9 | add_executable(generator generator.cpp) | 9 | add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp selrestr.cpp database.cpp field.cpp generator.cpp main.cpp) |
10 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) | 10 | set_property(TARGET generator PROPERTY CXX_STANDARD 11) |
11 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) | 11 | set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) |
12 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) | 12 | target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) |
diff --git a/generator/database.cpp b/generator/database.cpp new file mode 100644 index 0000000..c7e4cfa --- /dev/null +++ b/generator/database.cpp | |||
@@ -0,0 +1,173 @@ | |||
1 | #include "database.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include <cassert> | ||
4 | #include <fstream> | ||
5 | #include <stdexcept> | ||
6 | #include <cstdio> | ||
7 | #include <sstream> | ||
8 | #include "field.h" | ||
9 | #include "../lib/util.h" | ||
10 | |||
11 | namespace verbly { | ||
12 | namespace generator { | ||
13 | |||
14 | sqlite3_error::sqlite3_error( | ||
15 | const std::string& what, | ||
16 | const std::string& db_err) : | ||
17 | what_(what + " (" + db_err + ")"), | ||
18 | db_err_(db_err) | ||
19 | { | ||
20 | } | ||
21 | |||
22 | const char* sqlite3_error::what() const noexcept | ||
23 | { | ||
24 | return what_.c_str(); | ||
25 | } | ||
26 | |||
27 | const char* sqlite3_error::db_err() const noexcept | ||
28 | { | ||
29 | return db_err_.c_str(); | ||
30 | } | ||
31 | |||
32 | database::database(std::string path) | ||
33 | { | ||
34 | // If there is already a file at this path, overwrite it. | ||
35 | if (std::ifstream(path)) | ||
36 | { | ||
37 | if (std::remove(path.c_str())) | ||
38 | { | ||
39 | throw std::logic_error("Could not overwrite file at path"); | ||
40 | } | ||
41 | } | ||
42 | |||
43 | if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) | ||
44 | { | ||
45 | // We still have to free the resources allocated. In the event that | ||
46 | // allocation failed, ppdb will be null and sqlite3_close_v2 will just | ||
47 | // ignore it. | ||
48 | std::string errmsg(sqlite3_errmsg(ppdb_)); | ||
49 | sqlite3_close_v2(ppdb_); | ||
50 | |||
51 | throw sqlite3_error("Could not create output datafile", errmsg); | ||
52 | } | ||
53 | } | ||
54 | |||
55 | database::database(database&& other) : database() | ||
56 | { | ||
57 | swap(*this, other); | ||
58 | } | ||
59 | |||
60 | database& database::operator=(database&& other) | ||
61 | { | ||
62 | swap(*this, other); | ||
63 | |||
64 | return *this; | ||
65 | } | ||
66 | |||
67 | void swap(database& first, database& second) | ||
68 | { | ||
69 | std::swap(first.ppdb_, second.ppdb_); | ||
70 | } | ||
71 | |||
72 | database::~database() | ||
73 | { | ||
74 | sqlite3_close_v2(ppdb_); | ||
75 | } | ||
76 | |||
77 | void database::runQuery(std::string query) | ||
78 | { | ||
79 | // This can only happen when doing bad things with move semantics. | ||
80 | assert(ppdb_ != nullptr); | ||
81 | |||
82 | sqlite3_stmt* ppstmt; | ||
83 | |||
84 | if (sqlite3_prepare_v2(ppdb_, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
85 | { | ||
86 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
87 | } | ||
88 | |||
89 | int result = sqlite3_step(ppstmt); | ||
90 | sqlite3_finalize(ppstmt); | ||
91 | |||
92 | if (result != SQLITE_DONE) | ||
93 | { | ||
94 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
95 | } | ||
96 | } | ||
97 | |||
98 | void database::insertIntoTable(std::string table, std::list<field> fields) | ||
99 | { | ||
100 | // This can only happen when doing bad things with move semantics. | ||
101 | assert(ppdb_ != nullptr); | ||
102 | |||
103 | // This shouldn't happen. | ||
104 | assert(!fields.empty()); | ||
105 | |||
106 | std::list<std::string> fieldNames; | ||
107 | std::list<std::string> qs; | ||
108 | for (field& f : fields) | ||
109 | { | ||
110 | fieldNames.push_back(f.getName()); | ||
111 | qs.push_back("?"); | ||
112 | } | ||
113 | |||
114 | std::ostringstream query; | ||
115 | query << "INSERT INTO "; | ||
116 | query << table; | ||
117 | query << " ("; | ||
118 | query << implode(std::begin(fieldNames), std::end(fieldNames), ", "); | ||
119 | query << ") VALUES ("; | ||
120 | query << implode(std::begin(qs), std::end(qs), ", "); | ||
121 | query << ")"; | ||
122 | |||
123 | std::string query_str = query.str(); | ||
124 | |||
125 | sqlite3_stmt* ppstmt; | ||
126 | |||
127 | if (sqlite3_prepare_v2(ppdb_, query_str.c_str(), query_str.length(), &ppstmt, NULL) != SQLITE_OK) | ||
128 | { | ||
129 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
130 | } | ||
131 | |||
132 | int i = 1; | ||
133 | for (field& f : fields) | ||
134 | { | ||
135 | switch (f.getType()) | ||
136 | { | ||
137 | case field::type::integer: | ||
138 | { | ||
139 | sqlite3_bind_int(ppstmt, i, f.getInteger()); | ||
140 | |||
141 | break; | ||
142 | } | ||
143 | |||
144 | case field::type::string: | ||
145 | { | ||
146 | sqlite3_bind_text(ppstmt, i, f.getString().c_str(), f.getString().length(), SQLITE_TRANSIENT); | ||
147 | |||
148 | break; | ||
149 | } | ||
150 | |||
151 | case field::type::invalid: | ||
152 | { | ||
153 | // Fields can only be invalid when doing bad things with move semantics. | ||
154 | assert(false); | ||
155 | |||
156 | break; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | i++; | ||
161 | } | ||
162 | |||
163 | int result = sqlite3_step(ppstmt); | ||
164 | sqlite3_finalize(ppstmt); | ||
165 | |||
166 | if (result != SQLITE_DONE) | ||
167 | { | ||
168 | throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_)); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | }; | ||
173 | }; | ||
diff --git a/generator/database.h b/generator/database.h new file mode 100644 index 0000000..15cdff5 --- /dev/null +++ b/generator/database.h | |||
@@ -0,0 +1,73 @@ | |||
1 | #ifndef DATABASE_H_0B0A47D2 | ||
2 | #define DATABASE_H_0B0A47D2 | ||
3 | |||
4 | #include <string> | ||
5 | #include <exception> | ||
6 | #include <list> | ||
7 | |||
8 | struct sqlite3; | ||
9 | |||
10 | namespace verbly { | ||
11 | namespace generator { | ||
12 | |||
13 | class field; | ||
14 | |||
15 | class sqlite3_error : public std::exception { | ||
16 | public: | ||
17 | |||
18 | sqlite3_error(const std::string& what, const std::string& db_err); | ||
19 | |||
20 | const char* what() const noexcept override; | ||
21 | const char* db_err() const noexcept; | ||
22 | |||
23 | private: | ||
24 | std::string what_; | ||
25 | std::string db_err_; | ||
26 | |||
27 | }; | ||
28 | |||
29 | class database { | ||
30 | public: | ||
31 | |||
32 | // Constructor | ||
33 | |||
34 | explicit database(std::string path); | ||
35 | |||
36 | // Disable copying | ||
37 | |||
38 | database(const database& other) = delete; | ||
39 | database& operator=(const database& other) = delete; | ||
40 | |||
41 | // Move constructor and move assignment | ||
42 | |||
43 | database(database&& other); | ||
44 | database& operator=(database&& other); | ||
45 | |||
46 | // Swap | ||
47 | |||
48 | friend void swap(database& first, database& second); | ||
49 | |||
50 | // Destructor | ||
51 | |||
52 | ~database(); | ||
53 | |||
54 | // Actions | ||
55 | |||
56 | void runQuery(std::string query); | ||
57 | |||
58 | void insertIntoTable(std::string table, std::list<field> fields); | ||
59 | |||
60 | private: | ||
61 | |||
62 | database() | ||
63 | { | ||
64 | } | ||
65 | |||
66 | sqlite3* ppdb_ = nullptr; | ||
67 | |||
68 | }; | ||
69 | |||
70 | }; | ||
71 | }; | ||
72 | |||
73 | #endif /* end of include guard: DATABASE_H_0B0A47D2 */ | ||
diff --git a/generator/field.cpp b/generator/field.cpp new file mode 100644 index 0000000..84b2f91 --- /dev/null +++ b/generator/field.cpp | |||
@@ -0,0 +1,193 @@ | |||
1 | #include "field.h" | ||
2 | #include <stdexcept> | ||
3 | #include <utility> | ||
4 | |||
5 | namespace verbly { | ||
6 | namespace generator { | ||
7 | |||
8 | field::field(const field& other) | ||
9 | { | ||
10 | type_ = other.type_; | ||
11 | name_ = other.name_; | ||
12 | |||
13 | switch (type_) | ||
14 | { | ||
15 | case type::integer: | ||
16 | { | ||
17 | integer_ = other.integer_; | ||
18 | |||
19 | break; | ||
20 | } | ||
21 | |||
22 | case type::string: | ||
23 | { | ||
24 | new(&string_) std::string(other.string_); | ||
25 | |||
26 | break; | ||
27 | } | ||
28 | |||
29 | case type::invalid: | ||
30 | { | ||
31 | break; | ||
32 | } | ||
33 | } | ||
34 | } | ||
35 | |||
36 | field::field(field&& other) : field() | ||
37 | { | ||
38 | swap(*this, other); | ||
39 | } | ||
40 | |||
41 | field& field::operator=(field other) | ||
42 | { | ||
43 | swap(*this, other); | ||
44 | |||
45 | return *this; | ||
46 | } | ||
47 | |||
48 | void swap(field& first, field& second) | ||
49 | { | ||
50 | using type = field::type; | ||
51 | |||
52 | type tempType = first.type_; | ||
53 | std::string tempName = std::move(first.name_); | ||
54 | int tempInteger; | ||
55 | std::string tempString; | ||
56 | |||
57 | switch (first.type_) | ||
58 | { | ||
59 | case type::integer: | ||
60 | { | ||
61 | tempInteger = first.integer_; | ||
62 | |||
63 | break; | ||
64 | } | ||
65 | |||
66 | case type::string: | ||
67 | { | ||
68 | tempString = std::move(tempString); | ||
69 | |||
70 | break; | ||
71 | } | ||
72 | |||
73 | case type::invalid: | ||
74 | { | ||
75 | break; | ||
76 | } | ||
77 | } | ||
78 | |||
79 | first.~field(); | ||
80 | |||
81 | first.type_ = second.type_; | ||
82 | first.name_ = std::move(second.name_); | ||
83 | |||
84 | switch (second.type_) | ||
85 | { | ||
86 | case type::integer: | ||
87 | { | ||
88 | first.integer_ = second.integer_; | ||
89 | |||
90 | break; | ||
91 | } | ||
92 | |||
93 | case type::string: | ||
94 | { | ||
95 | new(&first.string_) std::string(std::move(second.string_)); | ||
96 | |||
97 | break; | ||
98 | } | ||
99 | |||
100 | case type::invalid: | ||
101 | { | ||
102 | break; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | second.~field(); | ||
107 | |||
108 | second.type_ = tempType; | ||
109 | second.name_ = std::move(tempName); | ||
110 | |||
111 | switch (tempType) | ||
112 | { | ||
113 | case type::integer: | ||
114 | { | ||
115 | second.integer_ = tempInteger; | ||
116 | |||
117 | break; | ||
118 | } | ||
119 | |||
120 | case type::string: | ||
121 | { | ||
122 | new(&second.string_) std::string(std::move(tempString)); | ||
123 | |||
124 | break; | ||
125 | } | ||
126 | |||
127 | case type::invalid: | ||
128 | { | ||
129 | break; | ||
130 | } | ||
131 | } | ||
132 | } | ||
133 | |||
134 | field::~field() | ||
135 | { | ||
136 | switch (type_) | ||
137 | { | ||
138 | case type::string: | ||
139 | { | ||
140 | using string_type = std::string; | ||
141 | string_.~string_type(); | ||
142 | |||
143 | break; | ||
144 | } | ||
145 | |||
146 | case type::integer: | ||
147 | case type::invalid: | ||
148 | { | ||
149 | break; | ||
150 | } | ||
151 | } | ||
152 | } | ||
153 | |||
154 | field::field( | ||
155 | std::string name, | ||
156 | int arg) : | ||
157 | type_(type::integer), | ||
158 | name_(name), | ||
159 | integer_(arg) | ||
160 | { | ||
161 | } | ||
162 | |||
163 | int field::getInteger() const | ||
164 | { | ||
165 | if (type_ != type::integer) | ||
166 | { | ||
167 | throw std::domain_error("field::getInteger called on non-integer field"); | ||
168 | } | ||
169 | |||
170 | return integer_; | ||
171 | } | ||
172 | |||
173 | field::field( | ||
174 | std::string name, | ||
175 | std::string arg) : | ||
176 | type_(type::string), | ||
177 | name_(name) | ||
178 | { | ||
179 | new(&string_) std::string(arg); | ||
180 | } | ||
181 | |||
182 | std::string field::getString() const | ||
183 | { | ||
184 | if (type_ != type::string) | ||
185 | { | ||
186 | throw std::domain_error("field::getString called on non-string field"); | ||
187 | } | ||
188 | |||
189 | return string_; | ||
190 | } | ||
191 | |||
192 | }; | ||
193 | }; | ||
diff --git a/generator/field.h b/generator/field.h new file mode 100644 index 0000000..1fbabfc --- /dev/null +++ b/generator/field.h | |||
@@ -0,0 +1,76 @@ | |||
1 | #ifndef BINDING_H_CAE0B18E | ||
2 | #define BINDING_H_CAE0B18E | ||
3 | |||
4 | #include <string> | ||
5 | |||
6 | namespace verbly { | ||
7 | namespace generator { | ||
8 | |||
9 | class field { | ||
10 | public: | ||
11 | enum class type { | ||
12 | invalid, | ||
13 | integer, | ||
14 | string | ||
15 | }; | ||
16 | |||
17 | // Copy and move constructors | ||
18 | |||
19 | field(const field& other); | ||
20 | field(field&& other); | ||
21 | |||
22 | // Assignment | ||
23 | |||
24 | field& operator=(field other); | ||
25 | |||
26 | // Swap | ||
27 | |||
28 | friend void swap(field& first, field& second); | ||
29 | |||
30 | // Destructor | ||
31 | |||
32 | ~field(); | ||
33 | |||
34 | // Generic accessors | ||
35 | |||
36 | type getType() const | ||
37 | { | ||
38 | return type_; | ||
39 | } | ||
40 | |||
41 | std::string getName() const | ||
42 | { | ||
43 | return name_; | ||
44 | } | ||
45 | |||
46 | // Integer | ||
47 | |||
48 | field(std::string name, int arg); | ||
49 | |||
50 | int getInteger() const; | ||
51 | |||
52 | // String | ||
53 | |||
54 | field(std::string name, std::string arg); | ||
55 | |||
56 | std::string getString() const; | ||
57 | |||
58 | private: | ||
59 | |||
60 | field() | ||
61 | { | ||
62 | } | ||
63 | |||
64 | union { | ||
65 | int integer_; | ||
66 | std::string string_; | ||
67 | }; | ||
68 | |||
69 | type type_ = type::invalid; | ||
70 | std::string name_; | ||
71 | }; | ||
72 | |||
73 | }; | ||
74 | }; | ||
75 | |||
76 | #endif /* end of include guard: BINDING_H_CAE0B18E */ | ||
diff --git a/generator/form.cpp b/generator/form.cpp new file mode 100644 index 0000000..6be9d47 --- /dev/null +++ b/generator/form.cpp | |||
@@ -0,0 +1,53 @@ | |||
1 | #include "form.h" | ||
2 | #include <algorithm> | ||
3 | #include <list> | ||
4 | #include "database.h" | ||
5 | #include "field.h" | ||
6 | #include "pronunciation.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | namespace generator { | ||
10 | |||
11 | int form::nextId_ = 0; | ||
12 | |||
13 | form::form(std::string text) : | ||
14 | id_(nextId_++), | ||
15 | text_(text), | ||
16 | complexity_(std::count(std::begin(text), std::end(text), ' ') + 1), | ||
17 | proper_(std::any_of(std::begin(text), std::end(text), std::isupper)) | ||
18 | { | ||
19 | } | ||
20 | |||
21 | void form::addPronunciation(const pronunciation& p) | ||
22 | { | ||
23 | pronunciations_.insert(&p); | ||
24 | } | ||
25 | |||
26 | database& operator<<(database& db, const form& arg) | ||
27 | { | ||
28 | // Serialize the form first. | ||
29 | { | ||
30 | std::list<field> fields; | ||
31 | fields.emplace_back("form_id", arg.getId()); | ||
32 | fields.emplace_back("form", arg.getText()); | ||
33 | fields.emplace_back("complexity", arg.getComplexity()); | ||
34 | fields.emplace_back("proper", arg.isProper()); | ||
35 | |||
36 | db.insertIntoTable("forms", std::move(fields)); | ||
37 | } | ||
38 | |||
39 | // Then, serialize the form/pronunciation relationship. | ||
40 | for (const pronunciation* p : arg.getPronunciations()) | ||
41 | { | ||
42 | std::list<field> fields; | ||
43 | fields.emplace_back("form_id", arg.getId()); | ||
44 | fields.emplace_back("pronunciation_id", p->getId()); | ||
45 | |||
46 | db.insertIntoTable("forms_pronunciations", std::move(fields)); | ||
47 | } | ||
48 | |||
49 | return db; | ||
50 | } | ||
51 | |||
52 | }; | ||
53 | }; | ||
diff --git a/generator/form.h b/generator/form.h new file mode 100644 index 0000000..5576035 --- /dev/null +++ b/generator/form.h | |||
@@ -0,0 +1,71 @@ | |||
1 | #ifndef FORM_H_7EFBC970 | ||
2 | #define FORM_H_7EFBC970 | ||
3 | |||
4 | #include <string> | ||
5 | #include <set> | ||
6 | |||
7 | namespace verbly { | ||
8 | namespace generator { | ||
9 | |||
10 | class pronunciation; | ||
11 | class database; | ||
12 | |||
13 | class form { | ||
14 | public: | ||
15 | |||
16 | // Constructor | ||
17 | |||
18 | explicit form(std::string text); | ||
19 | |||
20 | // Mutators | ||
21 | |||
22 | void addPronunciation(const pronunciation& p); | ||
23 | |||
24 | // Accessors | ||
25 | |||
26 | int getId() const | ||
27 | { | ||
28 | return id_; | ||
29 | } | ||
30 | |||
31 | std::string getText() const | ||
32 | { | ||
33 | return text_; | ||
34 | } | ||
35 | |||
36 | int getComplexity() const | ||
37 | { | ||
38 | return complexity_; | ||
39 | } | ||
40 | |||
41 | bool isProper() const | ||
42 | { | ||
43 | return proper_; | ||
44 | } | ||
45 | |||
46 | std::set<const pronunciation*> getPronunciations() const | ||
47 | { | ||
48 | return pronunciations_; | ||
49 | } | ||
50 | |||
51 | private: | ||
52 | |||
53 | static int nextId_; | ||
54 | |||
55 | const int id_; | ||
56 | const std::string text_; | ||
57 | const int complexity_; | ||
58 | const bool proper_; | ||
59 | |||
60 | std::set<const pronunciation*> pronunciations_; | ||
61 | |||
62 | }; | ||
63 | |||
64 | // Serializer | ||
65 | |||
66 | database& operator<<(database& db, const form& arg); | ||
67 | |||
68 | }; | ||
69 | }; | ||
70 | |||
71 | #endif /* end of include guard: FORM_H_7EFBC970 */ | ||
diff --git a/generator/frame.cpp b/generator/frame.cpp new file mode 100644 index 0000000..9f0653f --- /dev/null +++ b/generator/frame.cpp | |||
@@ -0,0 +1,83 @@ | |||
1 | #include "frame.h" | ||
2 | #include "database.h" | ||
3 | #include "field.h" | ||
4 | |||
5 | namespace verbly { | ||
6 | namespace generator { | ||
7 | |||
8 | int frame::nextId_ = 0; | ||
9 | |||
10 | frame::frame() : id_(nextId_++) | ||
11 | { | ||
12 | } | ||
13 | |||
14 | void frame::push_back(part fp) | ||
15 | { | ||
16 | parts_.push_back(std::move(fp)); | ||
17 | } | ||
18 | |||
19 | database& operator<<(database& db, const frame& arg) | ||
20 | { | ||
21 | std::list<field> fields; | ||
22 | fields.emplace_back("frame_id", arg.getId()); | ||
23 | |||
24 | nlohmann::json jsonParts; | ||
25 | for (const part& p : arg) | ||
26 | { | ||
27 | nlohmann::json jsonPart; | ||
28 | jsonPart["type"] = static_cast<int>(p.getType()); | ||
29 | |||
30 | switch (p.getType()) | ||
31 | { | ||
32 | case part::type::noun_phrase: | ||
33 | { | ||
34 | jsonPart["role"] = p.getNounRole(); | ||
35 | jsonPart["selrestrs"] = p.getNounSelrestrs().toJson(); | ||
36 | jsonPart["synrestrs"] = p.getNounSynrestrs(); | ||
37 | |||
38 | break; | ||
39 | } | ||
40 | |||
41 | case part::type::preposition: | ||
42 | { | ||
43 | jsonPart["choices"] = p.getPrepositionChoices(); | ||
44 | jsonPart["literal"] = p.isPrepositionLiteral(); | ||
45 | |||
46 | break; | ||
47 | } | ||
48 | |||
49 | case part::type::literal: | ||
50 | { | ||
51 | jsonPart["value"] = p.getLiteralValue(); | ||
52 | |||
53 | break; | ||
54 | } | ||
55 | |||
56 | case part::type::verb: | ||
57 | case part::type::adjective: | ||
58 | case part::type::adverb: | ||
59 | { | ||
60 | break; | ||
61 | } | ||
62 | |||
63 | case part::type::invalid: | ||
64 | { | ||
65 | // Invalid parts should not be serialized. | ||
66 | assert(false); | ||
67 | |||
68 | break; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | jsonParts.emplace_back(std::move(jsonPart)); | ||
73 | } | ||
74 | |||
75 | fields.emplace_back("data", jsonParts.dump()); | ||
76 | |||
77 | db.insertIntoTable("frames", std::move(fields)); | ||
78 | |||
79 | return db; | ||
80 | } | ||
81 | |||
82 | }; | ||
83 | }; | ||
diff --git a/generator/frame.h b/generator/frame.h new file mode 100644 index 0000000..411ce6c --- /dev/null +++ b/generator/frame.h | |||
@@ -0,0 +1,59 @@ | |||
1 | #ifndef FRAME_H_26770FF1 | ||
2 | #define FRAME_H_26770FF1 | ||
3 | |||
4 | #include <list> | ||
5 | #include "part.h" | ||
6 | |||
7 | namespace verbly { | ||
8 | namespace generator { | ||
9 | |||
10 | class database; | ||
11 | |||
12 | class frame { | ||
13 | public: | ||
14 | |||
15 | // Aliases | ||
16 | |||
17 | using const_iterator = std::list<part>::const_iterator; | ||
18 | |||
19 | // Constructor | ||
20 | |||
21 | frame(); | ||
22 | |||
23 | // Mutators | ||
24 | |||
25 | void push_back(part fp); | ||
26 | |||
27 | // Accessors | ||
28 | |||
29 | int getId() const | ||
30 | { | ||
31 | return id_; | ||
32 | } | ||
33 | |||
34 | const_iterator begin() const | ||
35 | { | ||
36 | return std::begin(parts_); | ||
37 | } | ||
38 | |||
39 | const_iterator end() const | ||
40 | { | ||
41 | return std::end(parts_); | ||
42 | } | ||
43 | |||
44 | private: | ||
45 | |||
46 | static int nextId_; | ||
47 | |||
48 | const int id_; | ||
49 | |||
50 | std::list<part> parts_; | ||
51 | |||
52 | }; | ||
53 | |||
54 | database& operator<<(database& db, const frame& arg); | ||
55 | |||
56 | }; | ||
57 | }; | ||
58 | |||
59 | #endif /* end of include guard: FRAME_H_26770FF1 */ | ||
diff --git a/generator/generator.cpp b/generator/generator.cpp index 6a16467..d88cb31 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -1,2320 +1,1477 @@ | |||
1 | #include <libxml/parser.h> | 1 | #include "generator.h" |
2 | #include <cassert> | ||
3 | #include <stdexcept> | ||
2 | #include <iostream> | 4 | #include <iostream> |
5 | #include <regex> | ||
3 | #include <dirent.h> | 6 | #include <dirent.h> |
4 | #include <set> | ||
5 | #include <map> | ||
6 | #include <string> | ||
7 | #include <vector> | ||
8 | #include <fstream> | 7 | #include <fstream> |
9 | #include <sqlite3.h> | 8 | #include "enums.h" |
10 | #include <sstream> | ||
11 | #include <regex> | ||
12 | #include <list> | ||
13 | #include <algorithm> | ||
14 | #include <json.hpp> | ||
15 | #include "progress.h" | 9 | #include "progress.h" |
10 | #include "selrestr.h" | ||
11 | #include "role.h" | ||
12 | #include "part.h" | ||
13 | #include "field.h" | ||
16 | #include "../lib/util.h" | 14 | #include "../lib/util.h" |
17 | 15 | ||
18 | using json = nlohmann::json; | 16 | namespace verbly { |
19 | 17 | namespace generator { | |
20 | struct verb_t { | ||
21 | std::string infinitive; | ||
22 | std::string past_tense; | ||
23 | std::string past_participle; | ||
24 | std::string ing_form; | ||
25 | std::string s_form; | ||
26 | int id; | ||
27 | }; | ||
28 | |||
29 | struct adjective_t { | ||
30 | std::string base; | ||
31 | std::string comparative; | ||
32 | std::string superlative; | ||
33 | }; | ||
34 | |||
35 | struct noun_t { | ||
36 | std::string singular; | ||
37 | std::string plural; | ||
38 | }; | ||
39 | |||
40 | struct selrestr_t { | ||
41 | enum class type_t { | ||
42 | singleton, | ||
43 | andlogic, | ||
44 | orlogic, | ||
45 | empty | ||
46 | }; | ||
47 | type_t type; | ||
48 | std::string restriction; | ||
49 | bool pos; | ||
50 | std::list<selrestr_t> subordinates; | ||
51 | }; | ||
52 | |||
53 | struct framepart_t { | ||
54 | enum class type_t { | ||
55 | np, | ||
56 | v, | ||
57 | pp, | ||
58 | adj, | ||
59 | adv, | ||
60 | lex | ||
61 | }; | ||
62 | type_t type; | ||
63 | std::string role; | ||
64 | selrestr_t selrestrs; | ||
65 | std::set<std::string> preprestrs; | ||
66 | std::set<std::string> synrestrs; | ||
67 | std::list<std::string> choices; | ||
68 | std::string lexval; | ||
69 | }; | ||
70 | |||
71 | struct group_t { | ||
72 | std::string id; | ||
73 | std::string parent; | ||
74 | std::set<std::string> members; | ||
75 | std::map<std::string, selrestr_t> roles; | ||
76 | std::list<std::list<framepart_t>> frames; | ||
77 | }; | ||
78 | |||
79 | struct pronunciation_t { | ||
80 | std::string phonemes; | ||
81 | std::string prerhyme; | ||
82 | std::string rhyme; | ||
83 | int syllables = 0; | ||
84 | std::string stress; | ||
85 | |||
86 | bool operator<(const pronunciation_t& other) const | ||
87 | { | ||
88 | return phonemes < other.phonemes; | ||
89 | } | ||
90 | }; | ||
91 | |||
92 | std::map<std::string, group_t> groups; | ||
93 | std::map<std::string, verb_t> verbs; | ||
94 | std::map<std::string, adjective_t> adjectives; | ||
95 | std::map<std::string, noun_t> nouns; | ||
96 | std::map<int, std::map<int, int>> wn; | ||
97 | std::map<int, int> images; | ||
98 | std::map<std::string, std::set<pronunciation_t>> pronunciations; | ||
99 | |||
100 | void print_usage() | ||
101 | { | ||
102 | std::cout << "Verbly Datafile Generator" << std::endl; | ||
103 | std::cout << "-------------------------" << std::endl; | ||
104 | std::cout << "Requires exactly six arguments." << std::endl; | ||
105 | std::cout << "1. The path to a VerbNet data directory." << std::endl; | ||
106 | std::cout << "2. The path to an AGID infl.txt file." << std::endl; | ||
107 | std::cout << "3. The path to a WordNet prolog data directory." << std::endl; | ||
108 | std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl; | ||
109 | std::cout << "5. The path to an ImageNet urls.txt file." << std::endl; | ||
110 | std::cout << "6. Datafile output path." << std::endl; | ||
111 | |||
112 | exit(1); | ||
113 | } | ||
114 | |||
115 | void db_error(sqlite3* ppdb, std::string query) | ||
116 | { | ||
117 | std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl; | ||
118 | std::cout << query << std::endl; | ||
119 | sqlite3_close_v2(ppdb); | ||
120 | print_usage(); | ||
121 | } | ||
122 | |||
123 | json export_selrestrs(selrestr_t r) | ||
124 | { | ||
125 | if (r.type == selrestr_t::type_t::empty) | ||
126 | { | ||
127 | return {}; | ||
128 | } else if (r.type == selrestr_t::type_t::singleton) | ||
129 | { | ||
130 | json result; | ||
131 | result["type"] = r.restriction; | ||
132 | result["pos"] = r.pos; | ||
133 | return result; | ||
134 | } else { | ||
135 | json result; | ||
136 | if (r.type == selrestr_t::type_t::andlogic) | ||
137 | { | ||
138 | result["logic"] = "and"; | ||
139 | } else { | ||
140 | result["logic"] = "or"; | ||
141 | } | ||
142 | |||
143 | std::list<json> outlist; | ||
144 | std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs); | ||
145 | result["children"] = outlist; | ||
146 | 18 | ||
147 | return result; | 19 | generator::generator( |
148 | } | 20 | std::string verbNetPath, |
149 | } | 21 | std::string agidPath, |
150 | 22 | std::string wordNetPath, | |
151 | selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename) | 23 | std::string cmudictPath, |
152 | { | 24 | std::string imageNetPath, |
153 | selrestr_t r; | 25 | std::string outputPath) : |
154 | xmlChar* key; | 26 | verbNetPath_(verbNetPath), |
155 | 27 | agidPath_(agidPath), | |
156 | if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS")) | 28 | wordNetPath_(wordNetPath), |
157 | { | 29 | cmudictPath_(cmudictPath), |
158 | if (xmlChildElementCount(top) == 0) | 30 | imageNetPath_(imageNetPath), |
31 | db_(outputPath) | ||
159 | { | 32 | { |
160 | r.type = selrestr_t::type_t::empty; | 33 | // Ensure VerbNet directory exists |
161 | } else if (xmlChildElementCount(top) == 1) | 34 | DIR* dir; |
162 | { | 35 | if ((dir = opendir(verbNetPath_.c_str())) == nullptr) |
163 | r = parse_selrestrs(xmlFirstElementChild(top), filename); | ||
164 | } else { | ||
165 | r.type = selrestr_t::type_t::andlogic; | ||
166 | |||
167 | if (xmlHasProp(top, (const xmlChar*) "logic")) | ||
168 | { | 36 | { |
169 | key = xmlGetProp(top, (const xmlChar*) "logic"); | 37 | throw std::invalid_argument("Invalid VerbNet data directory"); |
170 | if (!xmlStrcmp(key, (const xmlChar*) "or")) | ||
171 | { | ||
172 | r.type = selrestr_t::type_t::orlogic; | ||
173 | } | ||
174 | xmlFree(key); | ||
175 | } | 38 | } |
176 | 39 | ||
177 | for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) | 40 | closedir(dir); |
41 | |||
42 | // Ensure AGID infl.txt exists | ||
43 | if (!std::ifstream(agidPath_)) | ||
178 | { | 44 | { |
179 | if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR")) | 45 | throw std::invalid_argument("AGID infl.txt file not found"); |
180 | { | ||
181 | r.subordinates.push_back(parse_selrestrs(selrestr, filename)); | ||
182 | } | ||
183 | } | 46 | } |
184 | } | 47 | |
185 | } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR")) | 48 | // Add directory separator to WordNet path |
186 | { | 49 | if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\')) |
187 | r.type = selrestr_t::type_t::singleton; | ||
188 | |||
189 | key = xmlGetProp(top, (xmlChar*) "Value"); | ||
190 | r.pos = (std::string((const char*)key) == "+"); | ||
191 | xmlFree(key); | ||
192 | |||
193 | key = xmlGetProp(top, (xmlChar*) "type"); | ||
194 | r.restriction = (const char*) key; | ||
195 | xmlFree(key); | ||
196 | } else { | ||
197 | // Invalid | ||
198 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
199 | print_usage(); | ||
200 | } | ||
201 | |||
202 | return r; | ||
203 | } | ||
204 | |||
205 | group_t& parse_group(xmlNodePtr top, std::string filename) | ||
206 | { | ||
207 | xmlChar* key = xmlGetProp(top, (xmlChar*) "ID"); | ||
208 | if (key == 0) | ||
209 | { | ||
210 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
211 | print_usage(); | ||
212 | } | ||
213 | std::string vnid = (const char*)key; | ||
214 | vnid = vnid.substr(vnid.find_first_of("-")+1); | ||
215 | xmlFree(key); | ||
216 | |||
217 | group_t g; | ||
218 | g.id = vnid; | ||
219 | |||
220 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) | ||
221 | { | ||
222 | if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES")) | ||
223 | { | ||
224 | for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next) | ||
225 | { | 50 | { |
226 | if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS")) | 51 | wordNetPath_ += '/'; |
227 | { | ||
228 | auto& sg = parse_group(subclass, filename); | ||
229 | sg.parent = vnid; | ||
230 | |||
231 | for (auto member : sg.members) | ||
232 | { | ||
233 | g.members.insert(member); | ||
234 | } | ||
235 | |||
236 | // The schema requires that subclasses appear after role definitions, so we can do this now | ||
237 | for (auto role : g.roles) | ||
238 | { | ||
239 | if (sg.roles.count(role.first) == 0) | ||
240 | { | ||
241 | sg.roles[role.first] = role.second; | ||
242 | } | ||
243 | } | ||
244 | } | ||
245 | } | 52 | } |
246 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) | 53 | |
247 | { | 54 | // Ensure WordNet tables exist |
248 | for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) | 55 | for (std::string table : { |
56 | "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax" | ||
57 | }) | ||
249 | { | 58 | { |
250 | if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) | 59 | if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl")) |
251 | { | 60 | { |
252 | key = xmlGetProp(member, (xmlChar*) "name"); | 61 | throw std::invalid_argument("WordNet " + table + " table not found"); |
253 | g.members.insert((const char*)key); | ||
254 | xmlFree(key); | ||
255 | } | 62 | } |
256 | } | 63 | } |
257 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES")) | 64 | |
258 | { | 65 | // Ensure CMUDICT file exists |
259 | for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next) | 66 | if (!std::ifstream(cmudictPath_)) |
260 | { | 67 | { |
261 | if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE")) | 68 | throw std::invalid_argument("CMUDICT file not found"); |
262 | { | ||
263 | selrestr_t r; | ||
264 | r.type = selrestr_t::type_t::empty; | ||
265 | |||
266 | key = xmlGetProp(role, (const xmlChar*) "type"); | ||
267 | std::string type = (const char*)key; | ||
268 | xmlFree(key); | ||
269 | |||
270 | for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) | ||
271 | { | ||
272 | if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS")) | ||
273 | { | ||
274 | r = parse_selrestrs(rolenode, filename); | ||
275 | } | ||
276 | } | ||
277 | |||
278 | g.roles[type] = r; | ||
279 | } | ||
280 | } | 69 | } |
281 | } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) | 70 | |
282 | { | 71 | // Ensure ImageNet urls.txt exists |
283 | for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) | 72 | if (!std::ifstream(imageNetPath_)) |
284 | { | 73 | { |
285 | if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) | 74 | throw std::invalid_argument("ImageNet urls.txt file not found"); |
286 | { | ||
287 | std::list<framepart_t> f; | ||
288 | |||
289 | for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) | ||
290 | { | ||
291 | if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX")) | ||
292 | { | ||
293 | for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) | ||
294 | { | ||
295 | framepart_t fp; | ||
296 | |||
297 | if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP")) | ||
298 | { | ||
299 | fp.type = framepart_t::type_t::np; | ||
300 | |||
301 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
302 | fp.role = (const char*)key; | ||
303 | xmlFree(key); | ||
304 | |||
305 | fp.selrestrs.type = selrestr_t::type_t::empty; | ||
306 | |||
307 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
308 | { | ||
309 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS")) | ||
310 | { | ||
311 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
312 | { | ||
313 | if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR")) | ||
314 | { | ||
315 | key = xmlGetProp(synrestr, (xmlChar*) "type"); | ||
316 | fp.synrestrs.insert(std::string((const char*)key)); | ||
317 | xmlFree(key); | ||
318 | } | ||
319 | } | ||
320 | } | ||
321 | |||
322 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) | ||
323 | { | ||
324 | fp.selrestrs = parse_selrestrs(npnode, filename); | ||
325 | } | ||
326 | } | ||
327 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB")) | ||
328 | { | ||
329 | fp.type = framepart_t::type_t::v; | ||
330 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP")) | ||
331 | { | ||
332 | fp.type = framepart_t::type_t::pp; | ||
333 | |||
334 | if (xmlHasProp(syntaxnode, (xmlChar*) "value")) | ||
335 | { | ||
336 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
337 | std::string choices = (const char*)key; | ||
338 | xmlFree(key); | ||
339 | |||
340 | fp.choices = verbly::split<std::list<std::string>>(choices, " "); | ||
341 | } | ||
342 | |||
343 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
344 | { | ||
345 | if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS")) | ||
346 | { | ||
347 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
348 | { | ||
349 | if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR")) | ||
350 | { | ||
351 | key = xmlGetProp(synrestr, (xmlChar*) "type"); | ||
352 | fp.preprestrs.insert(std::string((const char*)key)); | ||
353 | xmlFree(key); | ||
354 | } | ||
355 | } | ||
356 | } | ||
357 | } | ||
358 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ")) | ||
359 | { | ||
360 | fp.type = framepart_t::type_t::adj; | ||
361 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV")) | ||
362 | { | ||
363 | fp.type = framepart_t::type_t::adv; | ||
364 | } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX")) | ||
365 | { | ||
366 | fp.type = framepart_t::type_t::lex; | ||
367 | |||
368 | key = xmlGetProp(syntaxnode, (xmlChar*) "value"); | ||
369 | fp.lexval = (const char*)key; | ||
370 | xmlFree(key); | ||
371 | } else { | ||
372 | continue; | ||
373 | } | ||
374 | |||
375 | f.push_back(fp); | ||
376 | } | ||
377 | |||
378 | g.frames.push_back(f); | ||
379 | } | ||
380 | } | ||
381 | } | ||
382 | } | 75 | } |
383 | } | 76 | } |
384 | } | ||
385 | |||
386 | groups[vnid] = g; | ||
387 | |||
388 | return groups[vnid]; | ||
389 | } | ||
390 | |||
391 | int main(int argc, char** argv) | ||
392 | { | ||
393 | if (argc != 7) | ||
394 | { | ||
395 | print_usage(); | ||
396 | } | ||
397 | |||
398 | // VerbNet data | ||
399 | std::cout << "Reading verb frames..." << std::endl; | ||
400 | |||
401 | DIR* dir; | ||
402 | if ((dir = opendir(argv[1])) == nullptr) | ||
403 | { | ||
404 | std::cout << "Invalid VerbNet data directory." << std::endl; | ||
405 | |||
406 | print_usage(); | ||
407 | } | ||
408 | |||
409 | struct dirent* ent; | ||
410 | while ((ent = readdir(dir)) != nullptr) | ||
411 | { | ||
412 | std::string filename(argv[1]); | ||
413 | if (filename.back() != '/') | ||
414 | { | ||
415 | filename += '/'; | ||
416 | } | ||
417 | 77 | ||
418 | filename += ent->d_name; | 78 | void generator::run() |
419 | //std::cout << ent->d_name << std::endl; | ||
420 | |||
421 | if (filename.rfind(".xml") != filename.size() - 4) | ||
422 | { | ||
423 | continue; | ||
424 | } | ||
425 | |||
426 | xmlDocPtr doc = xmlParseFile(filename.c_str()); | ||
427 | if (doc == nullptr) | ||
428 | { | ||
429 | std::cout << "Error opening " << filename << std::endl; | ||
430 | print_usage(); | ||
431 | } | ||
432 | |||
433 | xmlNodePtr top = xmlDocGetRootElement(doc); | ||
434 | if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS"))) | ||
435 | { | ||
436 | std::cout << "Bad VerbNet file format: " << filename << std::endl; | ||
437 | print_usage(); | ||
438 | } | ||
439 | |||
440 | parse_group(top, filename); | ||
441 | } | ||
442 | |||
443 | closedir(dir); | ||
444 | |||
445 | // Get verbs from AGID | ||
446 | std::cout << "Reading inflections..." << std::endl; | ||
447 | |||
448 | std::ifstream agidfile(argv[2]); | ||
449 | if (!agidfile.is_open()) | ||
450 | { | ||
451 | std::cout << "Could not open AGID file: " << argv[2] << std::endl; | ||
452 | print_usage(); | ||
453 | } | ||
454 | |||
455 | for (;;) | ||
456 | { | ||
457 | std::string line; | ||
458 | if (!getline(agidfile, line)) | ||
459 | { | ||
460 | break; | ||
461 | } | ||
462 | |||
463 | if (line.back() == '\r') | ||
464 | { | 79 | { |
465 | line.pop_back(); | 80 | // Create notions, words, lemmas, and forms from WordNet synsets |
466 | } | 81 | readWordNetSynsets(); |
467 | 82 | ||
468 | int divider = line.find_first_of(" "); | 83 | // Reads adjective positioning WordNet data |
469 | std::string word = line.substr(0, divider); | 84 | readAdjectivePositioning(); |
470 | line = line.substr(divider+1); | 85 | |
471 | char type = line[0]; | 86 | // Counts the number of URLs ImageNet has per notion |
472 | 87 | readImageNetUrls(); | |
473 | if (line[1] == '?') | 88 | |
474 | { | 89 | // Creates a word by WordNet sense key lookup table |
475 | line.erase(0, 4); | 90 | readWordNetSenseKeys(); |
476 | } else { | 91 | |
477 | line.erase(0, 3); | 92 | // Creates groups and frames from VerbNet data |
478 | } | 93 | readVerbNet(); |
479 | 94 | ||
480 | std::vector<std::string> forms; | 95 | // Creates forms and inflections from AGID. To reduce the amount of forms |
481 | while (!line.empty()) | 96 | // created, we do this after most lemmas that need inflecting have been |
482 | { | 97 | // created through other means, and then only generate forms for |
483 | std::string inflection; | 98 | // inflections of already-existing lemmas. The exception to this regards |
484 | if ((divider = line.find(" | ")) != std::string::npos) | 99 | // verb lemmas. If a verb lemma in AGID either does not exist yet, or does |
485 | { | 100 | // exist but is not related to any words that are related to verb notions, |
486 | inflection = line.substr(0, divider); | 101 | // then a notion and a word is generated and the form generation proceeds |
487 | line = line.substr(divider + 3); | 102 | // as usual. |
488 | } else { | 103 | readAgidInflections(); |
489 | inflection = line; | 104 | |
490 | line = ""; | 105 | // Reads in prepositions and the is_a relationship |
491 | } | 106 | readPrepositions(); |
492 | 107 | ||
493 | if ((divider = inflection.find_first_of(",?")) != std::string::npos) | 108 | // Creates pronunciations from CMUDICT. To reduce the amount of |
494 | { | 109 | // pronunciations created, we do this after all forms have been created, |
495 | inflection = inflection.substr(0, divider); | 110 | // and then only generate pronunciations for already-exisiting forms. |
496 | } | 111 | readCmudictPronunciations(); |
497 | 112 | ||
498 | forms.push_back(inflection); | 113 | // Writes the database schema |
114 | writeSchema(); | ||
115 | |||
116 | // Dumps data to the database | ||
117 | dumpObjects(); | ||
118 | |||
119 | // Populates the antonymy relationship from WordNet | ||
120 | readWordNetAntonymy(); | ||
121 | |||
122 | // Populates the variation relationship from WordNet | ||
123 | readWordNetVariation(); | ||
124 | |||
125 | // Populates the usage, topicality, and regionality relationships from | ||
126 | // WordNet | ||
127 | readWordNetClasses(); | ||
128 | |||
129 | // Populates the causality relationship from WordNet | ||
130 | readWordNetCausality(); | ||
131 | |||
132 | // Populates the entailment relationship from WordNet | ||
133 | readWordNetEntailment(); | ||
134 | |||
135 | // Populates the hypernymy relationship from WordNet | ||
136 | readWordNetHypernymy(); | ||
137 | |||
138 | // Populates the instantiation relationship from WordNet | ||
139 | readWordNetInstantiation(); | ||
140 | |||
141 | // Populates the member meronymy relationship from WordNet | ||
142 | readWordNetMemberMeronymy(); | ||
143 | |||
144 | // Populates the part meronymy relationship from WordNet | ||
145 | readWordNetPartMeronymy(); | ||
146 | |||
147 | // Populates the substance meronymy relationship from WordNet | ||
148 | readWordNetSubstanceMeronymy(); | ||
149 | |||
150 | // Populates the pertainymy and mannernymy relationships from WordNet | ||
151 | readWordNetPertainymy(); | ||
152 | |||
153 | // Populates the specification relationship from WordNet | ||
154 | readWordNetSpecification(); | ||
155 | |||
156 | // Populates the adjective similarity relationship from WordNet | ||
157 | readWordNetSimilarity(); | ||
158 | |||
159 | |||
160 | |||
161 | |||
162 | |||
163 | |||
164 | |||
165 | |||
499 | } | 166 | } |
500 | 167 | ||
501 | switch (type) | 168 | void generator::readWordNetSynsets() |
502 | { | 169 | { |
503 | case 'V': | 170 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl")); |
171 | progress ppgs("Reading synsets from WordNet...", lines.size()); | ||
172 | |||
173 | for (std::string line : lines) | ||
504 | { | 174 | { |
505 | verb_t v; | 175 | ppgs.update(); |
506 | v.infinitive = word; | 176 | |
507 | if (forms.size() == 4) | 177 | std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$"); |
508 | { | 178 | std::smatch relation_data; |
509 | v.past_tense = forms[0]; | 179 | if (!std::regex_search(line, relation_data, relation)) |
510 | v.past_participle = forms[1]; | 180 | { |
511 | v.ing_form = forms[2]; | 181 | continue; |
512 | v.s_form = forms[3]; | ||
513 | } else if (forms.size() == 3) | ||
514 | { | ||
515 | v.past_tense = forms[0]; | ||
516 | v.past_participle = forms[0]; | ||
517 | v.ing_form = forms[1]; | ||
518 | v.s_form = forms[2]; | ||
519 | } else if (forms.size() == 8) | ||
520 | { | ||
521 | // As of AGID 2014.08.11, this is only "to be" | ||
522 | v.past_tense = forms[0]; | ||
523 | v.past_participle = forms[2]; | ||
524 | v.ing_form = forms[3]; | ||
525 | v.s_form = forms[4]; | ||
526 | } else { | ||
527 | // Words that don't fit the cases above as of AGID 2014.08.11: | ||
528 | // - may and shall do not conjugate the way we want them to | ||
529 | // - methinks only has a past tense and is an outlier | ||
530 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now | ||
531 | std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl; | ||
532 | } | 182 | } |
533 | 183 | ||
534 | verbs[word] = v; | 184 | int synset_id = std::stoi(relation_data[1]); |
535 | 185 | int wnum = std::stoi(relation_data[2]); | |
536 | break; | 186 | std::string text = relation_data[3]; |
537 | } | 187 | int tag_count = std::stoi(relation_data[4]); |
538 | 188 | size_t word_it; | |
539 | case 'A': | 189 | while ((word_it = text.find("''")) != std::string::npos) |
540 | { | ||
541 | adjective_t adj; | ||
542 | adj.base = word; | ||
543 | if (forms.size() == 2) | ||
544 | { | 190 | { |
545 | adj.comparative = forms[0]; | 191 | text.erase(word_it, 1); |
546 | adj.superlative = forms[1]; | ||
547 | } else { | ||
548 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" | ||
549 | std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl; | ||
550 | } | 192 | } |
551 | 193 | ||
552 | adjectives[word] = adj; | 194 | // The WordNet data does contain duplicates, so we need to check that we |
553 | 195 | // haven't already created this word. | |
554 | break; | 196 | std::pair<int, int> lookup(synset_id, wnum); |
555 | } | 197 | if (!wordByWnidAndWnum_.count(lookup)) |
556 | |||
557 | case 'N': | ||
558 | { | ||
559 | noun_t n; | ||
560 | n.singular = word; | ||
561 | if (forms.size() == 1) | ||
562 | { | 198 | { |
563 | n.plural = forms[0]; | 199 | notion& synset = lookupOrCreateNotion(synset_id); |
564 | } else { | 200 | lemma& lex = lookupOrCreateLemma(text); |
565 | // As of AGID 2014.08.11, this is non-existent. | 201 | word& entry = createWord(synset, lex, tag_count); |
566 | std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl; | 202 | |
203 | wordByWnidAndWnum_[lookup] = &entry; | ||
567 | } | 204 | } |
568 | |||
569 | nouns[word] = n; | ||
570 | |||
571 | break; | ||
572 | } | 205 | } |
573 | } | 206 | } |
574 | } | ||
575 | |||
576 | // Pronounciations | ||
577 | std::cout << "Reading pronunciations..." << std::endl; | ||
578 | |||
579 | std::ifstream pronfile(argv[4]); | ||
580 | if (!pronfile.is_open()) | ||
581 | { | ||
582 | std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl; | ||
583 | print_usage(); | ||
584 | } | ||
585 | |||
586 | for (;;) | ||
587 | { | ||
588 | std::string line; | ||
589 | if (!getline(pronfile, line)) | ||
590 | { | ||
591 | break; | ||
592 | } | ||
593 | |||
594 | if (line.back() == '\r') | ||
595 | { | ||
596 | line.pop_back(); | ||
597 | } | ||
598 | 207 | ||
599 | std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); | 208 | void generator::readAdjectivePositioning() |
600 | std::smatch phoneme_data; | ||
601 | if (std::regex_search(line, phoneme_data, phoneme)) | ||
602 | { | 209 | { |
603 | std::string canonical(phoneme_data[1]); | 210 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl")); |
604 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | 211 | progress ppgs("Reading adjective positionings from WordNet...", lines.size()); |
605 | |||
606 | std::string phonemes = phoneme_data[2]; | ||
607 | auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " "); | ||
608 | auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) { | ||
609 | return phoneme.find("1") != std::string::npos; | ||
610 | }); | ||
611 | 212 | ||
612 | pronunciation_t p; | 213 | for (std::string line : lines) |
613 | p.phonemes = phonemes; | ||
614 | |||
615 | // Rhyme detection | ||
616 | if (phemstrt != std::end(phoneme_set)) | ||
617 | { | 214 | { |
618 | std::stringstream rhymer; | 215 | ppgs.update(); |
619 | for (auto it = phemstrt; it != std::end(phoneme_set); it++) | ||
620 | { | ||
621 | std::string naked; | ||
622 | std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) { | ||
623 | return isdigit(ch); | ||
624 | }); | ||
625 | |||
626 | if (it != phemstrt) | ||
627 | { | ||
628 | rhymer << " "; | ||
629 | } | ||
630 | |||
631 | rhymer << naked; | ||
632 | } | ||
633 | 216 | ||
634 | p.rhyme = rhymer.str(); | 217 | std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); |
635 | 218 | std::smatch relation_data; | |
636 | if (phemstrt != std::begin(phoneme_set)) | 219 | if (!std::regex_search(line, relation_data, relation)) |
637 | { | 220 | { |
638 | phemstrt--; | 221 | continue; |
639 | p.prerhyme = *phemstrt; | ||
640 | } else { | ||
641 | p.prerhyme = ""; | ||
642 | } | 222 | } |
643 | } else { | ||
644 | p.prerhyme = ""; | ||
645 | p.rhyme = ""; | ||
646 | } | ||
647 | 223 | ||
648 | // Syllable/stress | 224 | int synset_id = stoi(relation_data[1]); |
649 | for (auto phm : phoneme_set) | 225 | int wnum = stoi(relation_data[2]); |
650 | { | 226 | std::string adjpos_str = relation_data[3]; |
651 | if (isdigit(phm.back())) | ||
652 | { | ||
653 | // It's a vowel! | ||
654 | p.syllables++; | ||
655 | 227 | ||
656 | if (phm.back() == '1') | 228 | std::pair<int, int> lookup(synset_id, wnum); |
229 | if (wordByWnidAndWnum_.count(lookup)) | ||
230 | { | ||
231 | word& adj = *wordByWnidAndWnum_.at(lookup); | ||
232 | |||
233 | if (adjpos_str == "p") | ||
234 | { | ||
235 | adj.setAdjectivePosition(positioning::predicate); | ||
236 | } else if (adjpos_str == "a") | ||
237 | { | ||
238 | adj.setAdjectivePosition(positioning::attributive); | ||
239 | } else if (adjpos_str == "i") | ||
657 | { | 240 | { |
658 | p.stress.push_back('1'); | 241 | adj.setAdjectivePosition(positioning::postnominal); |
659 | } else { | 242 | } else { |
660 | p.stress.push_back('0'); | 243 | // Can't happen because of how we specified the regex. |
244 | assert(false); | ||
661 | } | 245 | } |
662 | } | 246 | } |
663 | } | 247 | } |
664 | |||
665 | pronunciations[canonical].insert(p); | ||
666 | } | ||
667 | } | ||
668 | |||
669 | // Images | ||
670 | std::cout << "Reading images..." << std::endl; | ||
671 | |||
672 | std::ifstream imagefile(argv[5]); | ||
673 | if (!imagefile.is_open()) | ||
674 | { | ||
675 | std::cout << "Could not open ImageNet file: " << argv[5] << std::endl; | ||
676 | print_usage(); | ||
677 | } | ||
678 | |||
679 | for (;;) | ||
680 | { | ||
681 | std::string line; | ||
682 | if (!getline(imagefile, line)) | ||
683 | { | ||
684 | break; | ||
685 | } | ||
686 | |||
687 | if (line.back() == '\r') | ||
688 | { | ||
689 | line.pop_back(); | ||
690 | } | ||
691 | |||
692 | std::string wnid_s = line.substr(1, 8); | ||
693 | int wnid = stoi(wnid_s) + 100000000; | ||
694 | images[wnid]++; | ||
695 | } | ||
696 | |||
697 | imagefile.close(); | ||
698 | |||
699 | // Start writing output | ||
700 | std::cout << "Writing schema..." << std::endl; | ||
701 | |||
702 | sqlite3* ppdb; | ||
703 | if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK) | ||
704 | { | ||
705 | std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl; | ||
706 | print_usage(); | ||
707 | } | ||
708 | |||
709 | std::ifstream schemafile("schema.sql"); | ||
710 | if (!schemafile.is_open()) | ||
711 | { | ||
712 | std::cout << "Could not find schema file" << std::endl; | ||
713 | print_usage(); | ||
714 | } | ||
715 | |||
716 | std::stringstream schemabuilder; | ||
717 | for (;;) | ||
718 | { | ||
719 | std::string line; | ||
720 | if (!getline(schemafile, line)) | ||
721 | { | ||
722 | break; | ||
723 | } | ||
724 | |||
725 | if (line.back() == '\r') | ||
726 | { | ||
727 | line.pop_back(); | ||
728 | } | ||
729 | |||
730 | schemabuilder << line << std::endl; | ||
731 | } | ||
732 | |||
733 | std::string schema = schemabuilder.str(); | ||
734 | while (!schema.empty()) | ||
735 | { | ||
736 | std::string query; | ||
737 | int divider = schema.find(";"); | ||
738 | if (divider != std::string::npos) | ||
739 | { | ||
740 | query = schema.substr(0, divider+1); | ||
741 | schema = schema.substr(divider+2); | ||
742 | } else { | ||
743 | break; | ||
744 | } | 248 | } |
745 | 249 | ||
746 | sqlite3_stmt* schmstmt; | 250 | void generator::readImageNetUrls() |
747 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK) | ||
748 | { | 251 | { |
749 | db_error(ppdb, query); | 252 | // The ImageNet datafile is so large that it is unreasonable and |
750 | } | 253 | // unnecessary to read it into memory; instead, we will parse each line as |
751 | 254 | // we read it. This has the caveat that we cannot display a progress bar. | |
752 | if (sqlite3_step(schmstmt) != SQLITE_DONE) | 255 | std::cout << "Reading image counts from ImageNet..." << std::endl; |
753 | { | ||
754 | db_error(ppdb, query); | ||
755 | } | ||
756 | |||
757 | sqlite3_finalize(schmstmt); | ||
758 | } | ||
759 | |||
760 | std::cout << "Writing prepositions..." << std::endl; | ||
761 | std::ifstream prepfile("prepositions.txt"); | ||
762 | if (!prepfile.is_open()) | ||
763 | { | ||
764 | std::cout << "Could not find prepositions file" << std::endl; | ||
765 | print_usage(); | ||
766 | } | ||
767 | |||
768 | for (;;) | ||
769 | { | ||
770 | std::string line; | ||
771 | if (!getline(prepfile, line)) | ||
772 | { | ||
773 | break; | ||
774 | } | ||
775 | |||
776 | if (line.back() == '\r') | ||
777 | { | ||
778 | line.pop_back(); | ||
779 | } | ||
780 | |||
781 | std::regex relation("^([^:]+): (.+)"); | ||
782 | std::smatch relation_data; | ||
783 | std::regex_search(line, relation_data, relation); | ||
784 | std::string prep = relation_data[1]; | ||
785 | std::list<std::string> groups = verbly::split<std::list<std::string>>(relation_data[2], ", "); | ||
786 | |||
787 | std::string query("INSERT INTO prepositions (form) VALUES (?)"); | ||
788 | sqlite3_stmt* ppstmt; | ||
789 | |||
790 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
791 | { | ||
792 | db_error(ppdb, query); | ||
793 | } | ||
794 | |||
795 | sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT); | ||
796 | |||
797 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
798 | { | ||
799 | db_error(ppdb, query); | ||
800 | } | ||
801 | |||
802 | sqlite3_finalize(ppstmt); | ||
803 | |||
804 | query = "SELECT last_insert_rowid()"; | ||
805 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
806 | { | ||
807 | db_error(ppdb, query); | ||
808 | } | ||
809 | |||
810 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
811 | { | ||
812 | db_error(ppdb, query); | ||
813 | } | ||
814 | |||
815 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
816 | sqlite3_finalize(ppstmt); | ||
817 | |||
818 | for (auto group : groups) | ||
819 | { | ||
820 | query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)"; | ||
821 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
822 | { | ||
823 | db_error(ppdb, query); | ||
824 | } | ||
825 | 256 | ||
826 | sqlite3_bind_int(ppstmt, 1, rowid); | 257 | std::ifstream file(imageNetPath_); |
827 | sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); | 258 | if (!file) |
828 | |||
829 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
830 | { | 259 | { |
831 | db_error(ppdb, query); | 260 | throw std::invalid_argument("Could not find file " + imageNetPath_); |
832 | } | 261 | } |
833 | |||
834 | sqlite3_finalize(ppstmt); | ||
835 | } | ||
836 | } | ||
837 | |||
838 | 262 | ||
839 | { | 263 | std::string line; |
840 | progress ppgs("Writing verbs...", verbs.size()); | 264 | while (std::getline(file, line)) |
841 | for (auto& mapping : verbs) | ||
842 | { | ||
843 | sqlite3_stmt* ppstmt; | ||
844 | std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)"); | ||
845 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
846 | { | ||
847 | db_error(ppdb, query); | ||
848 | } | ||
849 | |||
850 | sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT); | ||
851 | sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT); | ||
852 | sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT); | ||
853 | sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT); | ||
854 | sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT); | ||
855 | |||
856 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
857 | { | ||
858 | db_error(ppdb, query); | ||
859 | } | ||
860 | |||
861 | sqlite3_finalize(ppstmt); | ||
862 | |||
863 | std::string canonical(mapping.second.infinitive); | ||
864 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | ||
865 | if (pronunciations.count(canonical) == 1) | ||
866 | { | 265 | { |
867 | query = "SELECT last_insert_rowid()"; | 266 | if (line.back() == '\r') |
868 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
869 | { | 267 | { |
870 | db_error(ppdb, query); | 268 | line.pop_back(); |
871 | } | 269 | } |
872 | 270 | ||
873 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | 271 | std::string wnid_s = line.substr(1, 8); |
272 | int wnid = stoi(wnid_s) + 100000000; | ||
273 | if (notionByWnid_.count(wnid)) | ||
874 | { | 274 | { |
875 | db_error(ppdb, query); | 275 | // We know that this notion has a wnid and is a noun. |
876 | } | 276 | notionByWnid_.at(wnid)->incrementNumOfImages(); |
877 | |||
878 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
879 | |||
880 | sqlite3_finalize(ppstmt); | ||
881 | |||
882 | mapping.second.id = rowid; | ||
883 | |||
884 | for (auto pronunciation : pronunciations[canonical]) | ||
885 | { | ||
886 | if (!pronunciation.rhyme.empty()) | ||
887 | { | ||
888 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | ||
889 | } else { | ||
890 | query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | ||
891 | } | ||
892 | |||
893 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
894 | { | ||
895 | db_error(ppdb, query); | ||
896 | } | ||
897 | |||
898 | sqlite3_bind_int(ppstmt, 1, rowid); | ||
899 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); | ||
900 | sqlite3_bind_int(ppstmt, 3, pronunciation.syllables); | ||
901 | sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT); | ||
902 | |||
903 | if (!pronunciation.rhyme.empty()) | ||
904 | { | ||
905 | sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
906 | sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
907 | } | ||
908 | |||
909 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
910 | { | ||
911 | db_error(ppdb, query); | ||
912 | } | ||
913 | |||
914 | sqlite3_finalize(ppstmt); | ||
915 | } | 277 | } |
916 | } | 278 | } |
917 | |||
918 | ppgs.update(); | ||
919 | } | 279 | } |
920 | } | 280 | |
921 | 281 | void generator::readWordNetSenseKeys() | |
922 | { | ||
923 | progress ppgs("Writing verb frames...", groups.size()); | ||
924 | for (auto& mapping : groups) | ||
925 | { | 282 | { |
926 | std::list<json> roledatal; | 283 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl")); |
927 | std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair<std::string, selrestr_t> r) { | 284 | progress ppgs("Reading sense keys from WordNet...", lines.size()); |
928 | json role; | ||
929 | role["type"] = r.first; | ||
930 | role["selrestrs"] = export_selrestrs(r.second); | ||
931 | |||
932 | return role; | ||
933 | }); | ||
934 | |||
935 | json roledata(roledatal); | ||
936 | std::string rdm = roledata.dump(); | ||
937 | |||
938 | sqlite3_stmt* ppstmt; | ||
939 | std::string query("INSERT INTO groups (data) VALUES (?)"); | ||
940 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
941 | { | ||
942 | db_error(ppdb, query); | ||
943 | } | ||
944 | |||
945 | sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT); | ||
946 | |||
947 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
948 | { | ||
949 | db_error(ppdb, query); | ||
950 | } | ||
951 | 285 | ||
952 | sqlite3_finalize(ppstmt); | 286 | for (std::string line : lines) |
953 | |||
954 | query = "SELECT last_insert_rowid()"; | ||
955 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
956 | { | ||
957 | db_error(ppdb, query); | ||
958 | } | ||
959 | |||
960 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
961 | { | ||
962 | db_error(ppdb, query); | ||
963 | } | ||
964 | |||
965 | int gid = sqlite3_column_int(ppstmt, 0); | ||
966 | sqlite3_finalize(ppstmt); | ||
967 | |||
968 | for (auto frame : mapping.second.frames) | ||
969 | { | 287 | { |
970 | std::list<json> fdatap; | 288 | ppgs.update(); |
971 | std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) { | ||
972 | json part; | ||
973 | |||
974 | switch (fp.type) | ||
975 | { | ||
976 | case framepart_t::type_t::np: | ||
977 | { | ||
978 | part["type"] = "np"; | ||
979 | part["role"] = fp.role; | ||
980 | part["selrestrs"] = export_selrestrs(fp.selrestrs); | ||
981 | part["synrestrs"] = fp.synrestrs; | ||
982 | |||
983 | break; | ||
984 | } | ||
985 | |||
986 | case framepart_t::type_t::pp: | ||
987 | { | ||
988 | part["type"] = "pp"; | ||
989 | part["values"] = fp.choices; | ||
990 | part["preprestrs"] = fp.preprestrs; | ||
991 | |||
992 | break; | ||
993 | } | ||
994 | |||
995 | case framepart_t::type_t::v: | ||
996 | { | ||
997 | part["type"] = "v"; | ||
998 | |||
999 | break; | ||
1000 | } | ||
1001 | |||
1002 | case framepart_t::type_t::adj: | ||
1003 | { | ||
1004 | part["type"] = "adj"; | ||
1005 | |||
1006 | break; | ||
1007 | } | ||
1008 | |||
1009 | case framepart_t::type_t::adv: | ||
1010 | { | ||
1011 | part["type"] = "adv"; | ||
1012 | |||
1013 | break; | ||
1014 | } | ||
1015 | |||
1016 | case framepart_t::type_t::lex: | ||
1017 | { | ||
1018 | part["type"] = "lex"; | ||
1019 | part["value"] = fp.lexval; | ||
1020 | |||
1021 | break; | ||
1022 | } | ||
1023 | } | ||
1024 | |||
1025 | return part; | ||
1026 | }); | ||
1027 | |||
1028 | json fdata(fdatap); | ||
1029 | std::string marshall = fdata.dump(); | ||
1030 | |||
1031 | query = "INSERT INTO frames (group_id, data) VALUES (?, ?)"; | ||
1032 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1033 | { | ||
1034 | db_error(ppdb, query); | ||
1035 | } | ||
1036 | |||
1037 | sqlite3_bind_int(ppstmt, 1, gid); | ||
1038 | sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT); | ||
1039 | 289 | ||
1040 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 290 | // We only actually need to lookup verbs by sense key so we'll just |
291 | // ignore everything that isn't a verb. | ||
292 | std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$"); | ||
293 | std::smatch relation_data; | ||
294 | if (!std::regex_search(line, relation_data, relation)) | ||
1041 | { | 295 | { |
1042 | db_error(ppdb, query); | 296 | continue; |
1043 | } | 297 | } |
298 | |||
299 | int synset_id = stoi(relation_data[1]); | ||
300 | int wnum = stoi(relation_data[2]); | ||
301 | std::string sense_key = relation_data[3]; | ||
1044 | 302 | ||
1045 | sqlite3_finalize(ppstmt); | 303 | // We are treating this mapping as injective, which is not entirely |
1046 | } | 304 | // accurate. First, the WordNet table contains duplicate rows, so those |
1047 | 305 | // need to be ignored. More importantly, a small number of sense keys | |
1048 | for (auto member : mapping.second.members) | 306 | // (one for each letter of the Latin alphabet, plus 9 other words) each |
1049 | { | 307 | // map to two different words in the same synset which differ only by |
1050 | if (verbs.count(member) == 1) | 308 | // capitalization. Luckily, none of these exceptions are verbs, so we |
309 | // can pretend that the mapping is injective. | ||
310 | if (!wnSenseKeys_.count(sense_key)) | ||
1051 | { | 311 | { |
1052 | auto& v = verbs[member]; | 312 | std::pair<int, int> lookup(synset_id, wnum); |
1053 | 313 | if (wordByWnidAndWnum_.count(lookup)) | |
1054 | query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)"; | ||
1055 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1056 | { | ||
1057 | db_error(ppdb, query); | ||
1058 | } | ||
1059 | |||
1060 | sqlite3_bind_int(ppstmt, 1, v.id); | ||
1061 | sqlite3_bind_int(ppstmt, 2, gid); | ||
1062 | |||
1063 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1064 | { | 314 | { |
1065 | db_error(ppdb, query); | 315 | wnSenseKeys_[sense_key] = wordByWnidAndWnum_.at(lookup); |
1066 | } | 316 | } |
1067 | |||
1068 | sqlite3_finalize(ppstmt); | ||
1069 | } | 317 | } |
1070 | } | 318 | } |
1071 | |||
1072 | ppgs.update(); | ||
1073 | } | 319 | } |
1074 | } | 320 | |
1075 | 321 | void generator::readVerbNet() | |
1076 | // Get nouns/adjectives/adverbs from WordNet | ||
1077 | // Useful relations: | ||
1078 | // - s: master list | ||
1079 | // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness) | ||
1080 | // - at: variation (e.g. a measurement can be standard or nonstandard) | ||
1081 | // - der: derivation (e.g. happy/happily, happily/happy) | ||
1082 | // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue) | ||
1083 | // - ins: instantiation (do we need this? let's see) | ||
1084 | // - mm: member meronymy/holonymy (e.g. family/mother, family/child) | ||
1085 | // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire) | ||
1086 | // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber) | ||
1087 | // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska) | ||
1088 | // mannernymy (e.g. something done quickly is done in a manner that is quick) | ||
1089 | // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific)) | ||
1090 | // - sim: synonymy (e.g. cheerful/happy, happy/cheerful) | ||
1091 | // - syntax: positioning flags for some adjectives | ||
1092 | std::string wnpref {argv[3]}; | ||
1093 | if (wnpref.back() != '/') | ||
1094 | { | ||
1095 | wnpref += '/'; | ||
1096 | } | ||
1097 | |||
1098 | // s table | ||
1099 | { | ||
1100 | std::ifstream wnsfile(wnpref + "wn_s.pl"); | ||
1101 | if (!wnsfile.is_open()) | ||
1102 | { | 322 | { |
1103 | std::cout << "Invalid WordNet data directory." << std::endl; | 323 | std::cout << "Reading frames from VerbNet..." << std::endl; |
1104 | print_usage(); | ||
1105 | } | ||
1106 | 324 | ||
1107 | std::list<std::string> lines; | 325 | DIR* dir; |
1108 | for (;;) | 326 | if ((dir = opendir(verbNetPath_.c_str())) == nullptr) |
1109 | { | ||
1110 | std::string line; | ||
1111 | if (!getline(wnsfile, line)) | ||
1112 | { | 327 | { |
1113 | break; | 328 | throw std::invalid_argument("Invalid VerbNet data directory"); |
1114 | } | 329 | } |
1115 | 330 | ||
1116 | if (line.back() == '\r') | 331 | struct dirent* ent; |
1117 | { | 332 | while ((ent = readdir(dir)) != nullptr) |
1118 | line.pop_back(); | ||
1119 | } | ||
1120 | |||
1121 | lines.push_back(line); | ||
1122 | } | ||
1123 | |||
1124 | progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size()); | ||
1125 | for (auto line : lines) | ||
1126 | { | ||
1127 | ppgs.update(); | ||
1128 | |||
1129 | std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$"); | ||
1130 | std::smatch relation_data; | ||
1131 | if (!std::regex_search(line, relation_data, relation)) | ||
1132 | { | 333 | { |
1133 | continue; | 334 | std::string filename(verbNetPath_); |
1134 | } | 335 | |
336 | if (filename.back() != '/') | ||
337 | { | ||
338 | filename += '/'; | ||
339 | } | ||
1135 | 340 | ||
1136 | int synset_id = stoi(relation_data[1]); | 341 | filename += ent->d_name; |
1137 | int wnum = stoi(relation_data[2]); | ||
1138 | std::string word = relation_data[3]; | ||
1139 | size_t word_it; | ||
1140 | while ((word_it = word.find("''")) != std::string::npos) | ||
1141 | { | ||
1142 | word.erase(word_it, 1); | ||
1143 | } | ||
1144 | 342 | ||
1145 | std::string query; | 343 | if (filename.rfind(".xml") != filename.size() - 4) |
1146 | switch (synset_id / 100000000) | ||
1147 | { | ||
1148 | case 1: // Noun | ||
1149 | { | 344 | { |
1150 | if (nouns.count(word) == 1) | 345 | continue; |
1151 | { | ||
1152 | query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)"; | ||
1153 | } else { | ||
1154 | query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)"; | ||
1155 | } | ||
1156 | |||
1157 | break; | ||
1158 | } | 346 | } |
1159 | 347 | ||
1160 | case 2: // Verb | 348 | xmlDocPtr doc = xmlParseFile(filename.c_str()); |
349 | if (doc == nullptr) | ||
1161 | { | 350 | { |
1162 | // Ignore | 351 | throw std::logic_error("Error opening " + filename); |
1163 | |||
1164 | break; | ||
1165 | } | 352 | } |
1166 | 353 | ||
1167 | case 3: // Adjective | 354 | xmlNodePtr top = xmlDocGetRootElement(doc); |
355 | if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS")))) | ||
1168 | { | 356 | { |
1169 | if (adjectives.count(word) == 1) | 357 | throw std::logic_error("Bad VerbNet file format: " + filename); |
1170 | { | ||
1171 | query = "INSERT INTO adjectives (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; | ||
1172 | } else { | ||
1173 | query = "INSERT INTO adjectives (base_form, complexity) VALUES (?, ?)"; | ||
1174 | } | ||
1175 | |||
1176 | break; | ||
1177 | } | 358 | } |
1178 | 359 | ||
1179 | case 4: // Adverb | 360 | try |
1180 | { | 361 | { |
1181 | if (adjectives.count(word) == 1) | 362 | createGroup(top); |
1182 | { | 363 | } catch (const std::exception& e) |
1183 | query = "INSERT INTO adverbs (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; | 364 | { |
1184 | } else { | 365 | std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename)); |
1185 | query = "INSERT INTO adverbs (base_form, complexity) VALUES (?, ?)"; | ||
1186 | } | ||
1187 | |||
1188 | break; | ||
1189 | } | 366 | } |
1190 | } | 367 | } |
368 | |||
369 | closedir(dir); | ||
370 | } | ||
1191 | 371 | ||
1192 | sqlite3_stmt* ppstmt; | 372 | void generator::readAgidInflections() |
1193 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | 373 | { |
374 | std::list<std::string> lines(readFile(agidPath_)); | ||
375 | progress ppgs("Reading inflections from AGID...", lines.size()); | ||
376 | |||
377 | for (std::string line : lines) | ||
1194 | { | 378 | { |
1195 | db_error(ppdb, query); | 379 | ppgs.update(); |
1196 | } | 380 | |
381 | int divider = line.find_first_of(" "); | ||
382 | std::string infinitive = line.substr(0, divider); | ||
383 | line = line.substr(divider+1); | ||
384 | char type = line[0]; | ||
1197 | 385 | ||
1198 | sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); | 386 | if (line[1] == '?') |
1199 | switch (synset_id / 100000000) | ||
1200 | { | ||
1201 | case 1: // Noun | ||
1202 | { | 387 | { |
1203 | sqlite3_bind_int(ppstmt, 2, (std::any_of(std::begin(word), std::end(word), [] (char ch) { | 388 | line.erase(0, 4); |
1204 | return isupper(ch); | 389 | } else { |
1205 | }) ? 1 : 0)); | 390 | line.erase(0, 3); |
1206 | |||
1207 | sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size()); | ||
1208 | sqlite3_bind_int(ppstmt, 4, images[synset_id]); | ||
1209 | sqlite3_bind_int(ppstmt, 5, synset_id); | ||
1210 | |||
1211 | if (nouns.count(word) == 1) | ||
1212 | { | ||
1213 | sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT); | ||
1214 | } | ||
1215 | |||
1216 | break; | ||
1217 | } | 391 | } |
1218 | 392 | ||
1219 | case 3: // Adjective | 393 | if (!lemmaByBaseForm_.count(infinitive) && (type != 'V')) |
1220 | case 4: // Adverb | ||
1221 | { | 394 | { |
1222 | sqlite3_bind_int(ppstmt, 2, verbly::split<std::list<std::string>>(word, " ").size()); | 395 | continue; |
1223 | 396 | } | |
1224 | if (adjectives.count(word) == 1) | 397 | |
398 | lemma& curLemma = lookupOrCreateLemma(infinitive); | ||
399 | |||
400 | auto forms = split<std::vector<std::string>>(line, " | "); | ||
401 | for (std::string& inflForm : forms) | ||
402 | { | ||
403 | int sympos = inflForm.find_first_of(",?"); | ||
404 | if (sympos != std::string::npos) | ||
1225 | { | 405 | { |
1226 | sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); | 406 | inflForm = inflForm.substr(0, sympos); |
1227 | sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT); | ||
1228 | } | 407 | } |
1229 | |||
1230 | break; | ||
1231 | } | 408 | } |
1232 | } | ||
1233 | 409 | ||
1234 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 410 | switch (type) |
1235 | { | ||
1236 | db_error(ppdb, query); | ||
1237 | } | ||
1238 | |||
1239 | sqlite3_finalize(ppstmt); | ||
1240 | |||
1241 | query = "SELECT last_insert_rowid()"; | ||
1242 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1243 | { | ||
1244 | db_error(ppdb, query); | ||
1245 | } | ||
1246 | |||
1247 | if (sqlite3_step(ppstmt) != SQLITE_ROW) | ||
1248 | { | ||
1249 | db_error(ppdb, query); | ||
1250 | } | ||
1251 | |||
1252 | int rowid = sqlite3_column_int(ppstmt, 0); | ||
1253 | wn[synset_id][wnum] = rowid; | ||
1254 | |||
1255 | sqlite3_finalize(ppstmt); | ||
1256 | |||
1257 | std::string canonical(word); | ||
1258 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); | ||
1259 | if (pronunciations.count(canonical) == 1) | ||
1260 | { | ||
1261 | for (auto pronunciation : pronunciations[canonical]) | ||
1262 | { | 411 | { |
1263 | switch (synset_id / 100000000) | 412 | case 'V': |
1264 | { | 413 | { |
1265 | case 1: // Noun | 414 | if (forms.size() == 4) |
1266 | { | 415 | { |
1267 | if (!pronunciation.rhyme.empty()) | 416 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); |
1268 | { | 417 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1])); |
1269 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | 418 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2])); |
1270 | } else { | 419 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3])); |
1271 | query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | 420 | } else if (forms.size() == 3) |
1272 | } | ||
1273 | |||
1274 | break; | ||
1275 | } | ||
1276 | |||
1277 | case 3: // Adjective | ||
1278 | { | 421 | { |
1279 | if (!pronunciation.rhyme.empty()) | 422 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); |
1280 | { | 423 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0])); |
1281 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | 424 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1])); |
1282 | } else { | 425 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2])); |
1283 | query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | 426 | } else if (forms.size() == 8) |
1284 | } | 427 | { |
1285 | 428 | // As of AGID 2014.08.11, this is only "to be" | |
1286 | break; | 429 | curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0])); |
430 | curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2])); | ||
431 | curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3])); | ||
432 | curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4])); | ||
433 | } else { | ||
434 | // Words that don't fit the cases above as of AGID 2014.08.11: | ||
435 | // - may and shall do not conjugate the way we want them to | ||
436 | // - methinks only has a past tense and is an outlier | ||
437 | // - wit has five forms, and is archaic/obscure enough that we can ignore it for now | ||
438 | std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | ||
1287 | } | 439 | } |
1288 | 440 | ||
1289 | case 4: // Adverb | 441 | // For verbs in particular, we sometimes create a notion and a word |
442 | // from inflection data. Specifically, if there are not yet any | ||
443 | // verbs existing that have the same infinitive form. "Yet" means | ||
444 | // that this verb appears in the AGID data but not in either WordNet | ||
445 | // or VerbNet. | ||
446 | if (!wordsByBaseForm_.count(infinitive) | ||
447 | || !std::any_of(std::begin(wordsByBaseForm_.at(infinitive)), std::end(wordsByBaseForm_.at(infinitive)), [] (word* w) { | ||
448 | return w->getNotion().getPartOfSpeech() == part_of_speech::verb; | ||
449 | })) | ||
1290 | { | 450 | { |
1291 | if (!pronunciation.rhyme.empty()) | 451 | notion& n = createNotion(part_of_speech::verb); |
1292 | { | 452 | createWord(n, curLemma); |
1293 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; | ||
1294 | } else { | ||
1295 | query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; | ||
1296 | } | ||
1297 | |||
1298 | break; | ||
1299 | } | 453 | } |
1300 | } | ||
1301 | |||
1302 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1303 | { | ||
1304 | db_error(ppdb, query); | ||
1305 | } | ||
1306 | |||
1307 | sqlite3_bind_int(ppstmt, 1, rowid); | ||
1308 | sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT); | ||
1309 | sqlite3_bind_int(ppstmt, 3, pronunciation.syllables); | ||
1310 | sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT); | ||
1311 | |||
1312 | if (!pronunciation.rhyme.empty()) | ||
1313 | { | ||
1314 | sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT); | ||
1315 | sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT); | ||
1316 | } | ||
1317 | 454 | ||
1318 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 455 | break; |
1319 | { | ||
1320 | db_error(ppdb, query); | ||
1321 | } | 456 | } |
1322 | |||
1323 | sqlite3_finalize(ppstmt); | ||
1324 | } | ||
1325 | } | ||
1326 | } | ||
1327 | } | ||
1328 | |||
1329 | // While we're working on s | ||
1330 | { | ||
1331 | progress ppgs("Writing word synonyms...", wn.size()); | ||
1332 | for (auto sense : wn) | ||
1333 | { | ||
1334 | ppgs.update(); | ||
1335 | 457 | ||
1336 | for (auto word1 : sense.second) | 458 | case 'A': |
1337 | { | ||
1338 | for (auto word2 : sense.second) | ||
1339 | { | ||
1340 | if (word1 != word2) | ||
1341 | { | 459 | { |
1342 | std::string query; | 460 | if (forms.size() == 2) |
1343 | switch (sense.first / 100000000) | ||
1344 | { | 461 | { |
1345 | case 1: // Noun | 462 | curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0])); |
1346 | { | 463 | curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1])); |
1347 | query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; | 464 | } else { |
1348 | 465 | // As of AGID 2014.08.11, this is only "only", which has only the form "onliest" | |
1349 | break; | 466 | std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl; |
1350 | } | 467 | } |
1351 | |||
1352 | case 2: // Verb | ||
1353 | { | ||
1354 | // Ignore | ||
1355 | |||
1356 | break; | ||
1357 | } | ||
1358 | |||
1359 | case 3: // Adjective | ||
1360 | { | ||
1361 | query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; | ||
1362 | 468 | ||
1363 | break; | 469 | break; |
1364 | } | 470 | } |
1365 | 471 | ||
1366 | case 4: // Adverb | 472 | case 'N': |
1367 | { | 473 | { |
1368 | query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | 474 | if (forms.size() == 1) |
1369 | |||
1370 | break; | ||
1371 | } | ||
1372 | } | ||
1373 | |||
1374 | sqlite3_stmt* ppstmt; | ||
1375 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1376 | { | ||
1377 | db_error(ppdb, query); | ||
1378 | } | ||
1379 | |||
1380 | sqlite3_bind_int(ppstmt, 1, word1.second); | ||
1381 | sqlite3_bind_int(ppstmt, 2, word2.second); | ||
1382 | |||
1383 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1384 | { | 475 | { |
1385 | db_error(ppdb, query); | 476 | curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0])); |
477 | } else { | ||
478 | // As of AGID 2014.08.11, this is non-existent. | ||
479 | std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl; | ||
1386 | } | 480 | } |
1387 | 481 | ||
1388 | sqlite3_finalize(ppstmt); | 482 | break; |
1389 | } | 483 | } |
1390 | } | 484 | } |
1391 | } | 485 | } |
1392 | } | 486 | } |
1393 | } | ||
1394 | |||
1395 | // ant table | ||
1396 | { | ||
1397 | std::ifstream wnantfile(wnpref + "wn_ant.pl"); | ||
1398 | if (!wnantfile.is_open()) | ||
1399 | { | ||
1400 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1401 | print_usage(); | ||
1402 | } | ||
1403 | |||
1404 | std::list<std::string> lines; | ||
1405 | for (;;) | ||
1406 | { | ||
1407 | std::string line; | ||
1408 | if (!getline(wnantfile, line)) | ||
1409 | { | ||
1410 | break; | ||
1411 | } | ||
1412 | 487 | ||
1413 | if (line.back() == '\r') | 488 | void generator::readPrepositions() |
1414 | { | ||
1415 | line.pop_back(); | ||
1416 | } | ||
1417 | |||
1418 | lines.push_back(line); | ||
1419 | } | ||
1420 | |||
1421 | progress ppgs("Writing antonyms...", lines.size()); | ||
1422 | for (auto line : lines) | ||
1423 | { | 489 | { |
1424 | ppgs.update(); | 490 | std::list<std::string> lines(readFile("prepositions.txt")); |
491 | progress ppgs("Reading prepositions...", lines.size()); | ||
1425 | 492 | ||
1426 | std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | 493 | for (std::string line : lines) |
1427 | std::smatch relation_data; | ||
1428 | if (!std::regex_search(line, relation_data, relation)) | ||
1429 | { | ||
1430 | continue; | ||
1431 | } | ||
1432 | |||
1433 | int synset_id_1 = stoi(relation_data[1]); | ||
1434 | int wnum_1 = stoi(relation_data[2]); | ||
1435 | int synset_id_2 = stoi(relation_data[3]); | ||
1436 | int wnum_2 = stoi(relation_data[4]); | ||
1437 | |||
1438 | std::string query; | ||
1439 | switch (synset_id_1 / 100000000) | ||
1440 | { | 494 | { |
1441 | case 1: // Noun | 495 | ppgs.update(); |
1442 | { | ||
1443 | query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; | ||
1444 | 496 | ||
1445 | break; | 497 | std::regex relation("^([^:]+): (.+)"); |
1446 | } | 498 | std::smatch relation_data; |
1447 | 499 | std::regex_search(line, relation_data, relation); | |
1448 | case 2: // Verb | 500 | std::string prep = relation_data[1]; |
1449 | { | 501 | auto groups = split<std::list<std::string>>(relation_data[2], ", "); |
1450 | // Ignore | ||
1451 | 502 | ||
1452 | break; | 503 | notion& n = createNotion(part_of_speech::preposition); |
1453 | } | 504 | lemma& l = lookupOrCreateLemma(prep); |
1454 | 505 | word& w = createWord(n, l); | |
1455 | case 3: // Adjective | ||
1456 | { | ||
1457 | query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"; | ||
1458 | 506 | ||
1459 | break; | 507 | n.setPrepositionGroups(groups); |
1460 | } | ||
1461 | |||
1462 | case 4: // Adverb | ||
1463 | { | ||
1464 | query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | ||
1465 | |||
1466 | break; | ||
1467 | } | ||
1468 | } | ||
1469 | |||
1470 | sqlite3_stmt* ppstmt; | ||
1471 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1472 | { | ||
1473 | db_error(ppdb, query); | ||
1474 | } | ||
1475 | |||
1476 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
1477 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
1478 | |||
1479 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1480 | { | ||
1481 | db_error(ppdb, query); | ||
1482 | } | ||
1483 | |||
1484 | sqlite3_finalize(ppstmt); | ||
1485 | } | ||
1486 | } | ||
1487 | |||
1488 | // at table | ||
1489 | { | ||
1490 | std::ifstream wnatfile(wnpref + "wn_at.pl"); | ||
1491 | if (!wnatfile.is_open()) | ||
1492 | { | ||
1493 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1494 | print_usage(); | ||
1495 | } | ||
1496 | |||
1497 | std::list<std::string> lines; | ||
1498 | for (;;) | ||
1499 | { | ||
1500 | std::string line; | ||
1501 | if (!getline(wnatfile, line)) | ||
1502 | { | ||
1503 | break; | ||
1504 | } | 508 | } |
1505 | |||
1506 | if (line.back() == '\r') | ||
1507 | { | ||
1508 | line.pop_back(); | ||
1509 | } | ||
1510 | |||
1511 | lines.push_back(line); | ||
1512 | } | 509 | } |
1513 | 510 | ||
1514 | progress ppgs("Writing variations...", lines.size()); | 511 | void generator::readCmudictPronunciations() |
1515 | for (auto line : lines) | ||
1516 | { | 512 | { |
1517 | ppgs.update(); | 513 | std::list<std::string> lines(readFile(cmudictPath_)); |
514 | progress ppgs("Reading pronunciations from CMUDICT...", lines.size()); | ||
1518 | 515 | ||
1519 | std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); | 516 | for (std::string line : lines) |
1520 | std::smatch relation_data; | ||
1521 | if (!std::regex_search(line, relation_data, relation)) | ||
1522 | { | 517 | { |
1523 | continue; | 518 | ppgs.update(); |
1524 | } | 519 | |
1525 | 520 | std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); | |
1526 | int synset_id_1 = stoi(relation_data[1]); | 521 | std::smatch phoneme_data; |
1527 | int synset_id_2 = stoi(relation_data[2]); | 522 | if (std::regex_search(line, phoneme_data, phoneme)) |
1528 | std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)"); | ||
1529 | |||
1530 | for (auto mapping1 : wn[synset_id_1]) | ||
1531 | { | ||
1532 | for (auto mapping2 : wn[synset_id_2]) | ||
1533 | { | 523 | { |
1534 | sqlite3_stmt* ppstmt; | 524 | std::string canonical(phoneme_data[1]); |
1535 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 525 | std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); |
1536 | { | ||
1537 | db_error(ppdb, query); | ||
1538 | } | ||
1539 | |||
1540 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | ||
1541 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
1542 | 526 | ||
1543 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 527 | if (!formByText_.count(canonical)) |
1544 | { | 528 | { |
1545 | db_error(ppdb, query); | 529 | continue; |
1546 | } | 530 | } |
1547 | 531 | ||
1548 | sqlite3_finalize(ppstmt); | 532 | std::string phonemes = phoneme_data[2]; |
533 | pronunciations_.emplace_back(phonemes); | ||
534 | pronunciation& p = pronunciations_.back(); | ||
535 | formByText_.at(canonical)->addPronunciation(p); | ||
1549 | } | 536 | } |
1550 | } | 537 | } |
1551 | } | 538 | } |
1552 | } | ||
1553 | |||
1554 | // der table | ||
1555 | { | ||
1556 | std::ifstream wnderfile(wnpref + "wn_der.pl"); | ||
1557 | if (!wnderfile.is_open()) | ||
1558 | { | ||
1559 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1560 | print_usage(); | ||
1561 | } | ||
1562 | 539 | ||
1563 | std::list<std::string> lines; | 540 | void generator::writeSchema() |
1564 | for (;;) | ||
1565 | { | 541 | { |
1566 | std::string line; | 542 | std::ifstream file("schema.sql"); |
1567 | if (!getline(wnderfile, line)) | 543 | if (!file) |
1568 | { | 544 | { |
1569 | break; | 545 | throw std::invalid_argument("Could not find database schema"); |
1570 | } | 546 | } |
1571 | 547 | ||
1572 | if (line.back() == '\r') | 548 | std::ostringstream schemaBuilder; |
549 | std::string line; | ||
550 | while (std::getline(file, line)) | ||
1573 | { | 551 | { |
1574 | line.pop_back(); | 552 | if (line.back() == '\r') |
553 | { | ||
554 | line.pop_back(); | ||
555 | } | ||
556 | |||
557 | schemaBuilder << line; | ||
1575 | } | 558 | } |
1576 | 559 | ||
1577 | lines.push_back(line); | 560 | std::string schema = schemaBuilder.str(); |
561 | auto queries = split<std::list<std::string>>(schema, ";"); | ||
562 | progress ppgs("Writing database schema...", queries.size()); | ||
563 | for (std::string query : queries) | ||
564 | { | ||
565 | if (!queries.empty()) | ||
566 | { | ||
567 | db_.runQuery(query); | ||
568 | } | ||
569 | |||
570 | ppgs.update(); | ||
571 | } | ||
1578 | } | 572 | } |
1579 | 573 | ||
1580 | progress ppgs("Writing morphological derivation...", lines.size()); | 574 | void generator::dumpObjects() |
1581 | for (auto line : lines) | ||
1582 | { | 575 | { |
1583 | ppgs.update(); | ||
1584 | |||
1585 | std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); | ||
1586 | std::smatch relation_data; | ||
1587 | if (!std::regex_search(line, relation_data, relation)) | ||
1588 | { | 576 | { |
1589 | continue; | 577 | progress ppgs("Writing notions...", notions_.size()); |
578 | |||
579 | for (notion& n : notions_) | ||
580 | { | ||
581 | db_ << n; | ||
582 | |||
583 | ppgs.update(); | ||
584 | } | ||
1590 | } | 585 | } |
1591 | 586 | ||
1592 | int synset_id_1 = stoi(relation_data[1]); | ||
1593 | int wnum_1 = stoi(relation_data[2]); | ||
1594 | int synset_id_2 = stoi(relation_data[3]); | ||
1595 | int wnum_2 = stoi(relation_data[4]); | ||
1596 | std::string query; | ||
1597 | switch (synset_id_1 / 100000000) | ||
1598 | { | 587 | { |
1599 | case 1: // Noun | 588 | progress ppgs("Writing words...", words_.size()); |
589 | |||
590 | for (word& w : words_) | ||
1600 | { | 591 | { |
1601 | switch (synset_id_2 / 100000000) | 592 | db_ << w; |
1602 | { | ||
1603 | case 1: // Noun | ||
1604 | { | ||
1605 | query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)"; | ||
1606 | break; | ||
1607 | } | ||
1608 | |||
1609 | case 3: // Adjective | ||
1610 | { | ||
1611 | query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)"; | ||
1612 | break; | ||
1613 | } | ||
1614 | |||
1615 | case 4: // Adverb | ||
1616 | { | ||
1617 | query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)"; | ||
1618 | break; | ||
1619 | } | ||
1620 | } | ||
1621 | 593 | ||
1622 | break; | 594 | ppgs.update(); |
1623 | } | 595 | } |
596 | } | ||
597 | |||
598 | { | ||
599 | progress ppgs("Writing lemmas...", lemmas_.size()); | ||
1624 | 600 | ||
1625 | case 3: // Adjective | 601 | for (lemma& l : lemmas_) |
1626 | { | 602 | { |
1627 | switch (synset_id_2 / 100000000) | 603 | db_ << l; |
1628 | { | ||
1629 | case 1: // Noun | ||
1630 | { | ||
1631 | query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)"; | ||
1632 | break; | ||
1633 | } | ||
1634 | |||
1635 | case 3: // Adjective | ||
1636 | { | ||
1637 | query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)"; | ||
1638 | break; | ||
1639 | } | ||
1640 | |||
1641 | case 4: // Adverb | ||
1642 | { | ||
1643 | query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)"; | ||
1644 | break; | ||
1645 | } | ||
1646 | } | ||
1647 | 604 | ||
1648 | break; | 605 | ppgs.update(); |
1649 | } | 606 | } |
607 | } | ||
608 | |||
609 | { | ||
610 | progress ppgs("Writing forms...", forms_.size()); | ||
1650 | 611 | ||
1651 | case 4: // Adverb | 612 | for (form& f : forms_) |
1652 | { | 613 | { |
1653 | switch (synset_id_2 / 100000000) | 614 | db_ << f; |
1654 | { | ||
1655 | case 1: // Noun | ||
1656 | { | ||
1657 | query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)"; | ||
1658 | break; | ||
1659 | } | ||
1660 | |||
1661 | case 3: // Adjective | ||
1662 | { | ||
1663 | query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)"; | ||
1664 | break; | ||
1665 | } | ||
1666 | |||
1667 | case 4: // Adverb | ||
1668 | { | ||
1669 | query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)"; | ||
1670 | break; | ||
1671 | } | ||
1672 | } | ||
1673 | 615 | ||
1674 | break; | 616 | ppgs.update(); |
1675 | } | 617 | } |
1676 | } | 618 | } |
1677 | 619 | ||
1678 | sqlite3_stmt* ppstmt; | ||
1679 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
1680 | { | 620 | { |
1681 | db_error(ppdb, query); | 621 | progress ppgs("Writing pronunciations...", pronunciations_.size()); |
622 | |||
623 | for (pronunciation& p : pronunciations_) | ||
624 | { | ||
625 | db_ << p; | ||
626 | |||
627 | ppgs.update(); | ||
628 | } | ||
1682 | } | 629 | } |
1683 | 630 | ||
1684 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
1685 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
1686 | |||
1687 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1688 | { | 631 | { |
1689 | db_error(ppdb, query); | 632 | progress ppgs("Writing verb groups...", groups_.size()); |
633 | |||
634 | for (group& g : groups_) | ||
635 | { | ||
636 | db_ << g; | ||
637 | |||
638 | ppgs.update(); | ||
639 | } | ||
1690 | } | 640 | } |
1691 | 641 | ||
1692 | sqlite3_finalize(ppstmt); | ||
1693 | } | ||
1694 | } | ||
1695 | |||
1696 | // hyp table | ||
1697 | { | ||
1698 | std::ifstream wnhypfile(wnpref + "wn_hyp.pl"); | ||
1699 | if (!wnhypfile.is_open()) | ||
1700 | { | ||
1701 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1702 | print_usage(); | ||
1703 | } | ||
1704 | |||
1705 | std::list<std::string> lines; | ||
1706 | for (;;) | ||
1707 | { | ||
1708 | std::string line; | ||
1709 | if (!getline(wnhypfile, line)) | ||
1710 | { | ||
1711 | break; | ||
1712 | } | ||
1713 | |||
1714 | if (line.back() == '\r') | ||
1715 | { | 642 | { |
1716 | line.pop_back(); | 643 | progress ppgs("Writing verb frames...", frames_.size()); |
644 | |||
645 | for (frame& f : frames_) | ||
646 | { | ||
647 | db_ << f; | ||
648 | |||
649 | ppgs.update(); | ||
650 | } | ||
1717 | } | 651 | } |
1718 | |||
1719 | lines.push_back(line); | ||
1720 | } | 652 | } |
1721 | 653 | ||
1722 | progress ppgs("Writing hypernyms...", lines.size()); | 654 | void generator::readWordNetAntonymy() |
1723 | for (auto line : lines) | ||
1724 | { | 655 | { |
1725 | ppgs.update(); | 656 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl")); |
1726 | 657 | progress ppgs("Writing antonyms...", lines.size()); | |
1727 | std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\."); | 658 | for (auto line : lines) |
1728 | std::smatch relation_data; | ||
1729 | if (!std::regex_search(line, relation_data, relation)) | ||
1730 | { | 659 | { |
1731 | continue; | 660 | ppgs.update(); |
1732 | } | ||
1733 | |||
1734 | int synset_id_1 = stoi(relation_data[1]); | ||
1735 | int synset_id_2 = stoi(relation_data[2]); | ||
1736 | std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)"); | ||
1737 | 661 | ||
1738 | for (auto mapping1 : wn[synset_id_1]) | 662 | std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); |
1739 | { | 663 | std::smatch relation_data; |
1740 | for (auto mapping2 : wn[synset_id_2]) | 664 | if (!std::regex_search(line, relation_data, relation)) |
1741 | { | 665 | { |
1742 | sqlite3_stmt* ppstmt; | 666 | continue; |
1743 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 667 | } |
1744 | { | 668 | |
1745 | db_error(ppdb, query); | 669 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); |
1746 | } | 670 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); |
1747 | 671 | ||
1748 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 672 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) |
1749 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 673 | { |
674 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | ||
675 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | ||
1750 | 676 | ||
1751 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 677 | std::list<field> fields; |
1752 | { | 678 | fields.emplace_back("antonym_1_id", word1.getId()); |
1753 | db_error(ppdb, query); | 679 | fields.emplace_back("antonym_2_id", word2.getId()); |
1754 | } | ||
1755 | 680 | ||
1756 | sqlite3_finalize(ppstmt); | 681 | db_.insertIntoTable("antonymy", std::move(fields)); |
1757 | } | 682 | } |
1758 | } | 683 | } |
1759 | } | 684 | } |
1760 | } | ||
1761 | |||
1762 | // ins table | ||
1763 | { | ||
1764 | std::ifstream wninsfile(wnpref + "wn_ins.pl"); | ||
1765 | if (!wninsfile.is_open()) | ||
1766 | { | ||
1767 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1768 | print_usage(); | ||
1769 | } | ||
1770 | |||
1771 | std::list<std::string> lines; | ||
1772 | for (;;) | ||
1773 | { | ||
1774 | std::string line; | ||
1775 | if (!getline(wninsfile, line)) | ||
1776 | { | ||
1777 | break; | ||
1778 | } | ||
1779 | 685 | ||
1780 | if (line.back() == '\r') | 686 | void generator::readWordNetVariation() |
687 | { | ||
688 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl")); | ||
689 | progress ppgs("Writing variation...", lines.size()); | ||
690 | for (auto line : lines) | ||
1781 | { | 691 | { |
1782 | line.pop_back(); | 692 | ppgs.update(); |
1783 | } | ||
1784 | 693 | ||
1785 | lines.push_back(line); | 694 | std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); |
695 | std::smatch relation_data; | ||
696 | if (!std::regex_search(line, relation_data, relation)) | ||
697 | { | ||
698 | continue; | ||
699 | } | ||
700 | |||
701 | int lookup1 = std::stoi(relation_data[1]); | ||
702 | int lookup2 = std::stoi(relation_data[2]); | ||
703 | |||
704 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
705 | { | ||
706 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
707 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
708 | |||
709 | std::list<field> fields; | ||
710 | fields.emplace_back("noun_id", notion1.getId()); | ||
711 | fields.emplace_back("adjective_id", notion2.getId()); | ||
712 | |||
713 | db_.insertIntoTable("variation", std::move(fields)); | ||
714 | } | ||
715 | } | ||
1786 | } | 716 | } |
1787 | 717 | ||
1788 | progress ppgs("Writing instantiations...", lines.size()); | 718 | void generator::readWordNetClasses() |
1789 | for (auto line : lines) | ||
1790 | { | 719 | { |
1791 | ppgs.update(); | 720 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl")); |
1792 | 721 | progress ppgs("Writing usage, topicality, and regionality...", lines.size()); | |
1793 | std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); | 722 | for (auto line : lines) |
1794 | std::smatch relation_data; | ||
1795 | if (!std::regex_search(line, relation_data, relation)) | ||
1796 | { | 723 | { |
1797 | continue; | 724 | ppgs.update(); |
1798 | } | ||
1799 | |||
1800 | int synset_id_1 = stoi(relation_data[1]); | ||
1801 | int synset_id_2 = stoi(relation_data[2]); | ||
1802 | std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)"); | ||
1803 | 725 | ||
1804 | for (auto mapping1 : wn[synset_id_1]) | 726 | std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\."); |
1805 | { | 727 | std::smatch relation_data; |
1806 | for (auto mapping2 : wn[synset_id_2]) | 728 | if (!std::regex_search(line, relation_data, relation)) |
729 | { | ||
730 | continue; | ||
731 | } | ||
732 | |||
733 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | ||
734 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | ||
735 | std::string class_type = relation_data[5]; | ||
736 | |||
737 | std::string table_name; | ||
738 | if (class_type == "t") | ||
739 | { | ||
740 | table_name += "topicality"; | ||
741 | } else if (class_type == "u") | ||
742 | { | ||
743 | table_name += "usage"; | ||
744 | } else if (class_type == "r") | ||
745 | { | ||
746 | table_name += "regionality"; | ||
747 | } | ||
748 | |||
749 | std::list<int> leftJoin; | ||
750 | std::list<int> rightJoin; | ||
751 | |||
752 | if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first))) | ||
1807 | { | 753 | { |
1808 | sqlite3_stmt* ppstmt; | 754 | std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) { |
1809 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 755 | return w->getId(); |
756 | }); | ||
757 | } else if (wordByWnidAndWnum_.count(lookup1)) { | ||
758 | leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId()); | ||
759 | } | ||
760 | |||
761 | if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first))) | ||
762 | { | ||
763 | std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) { | ||
764 | return w->getId(); | ||
765 | }); | ||
766 | } else if (wordByWnidAndWnum_.count(lookup2)) { | ||
767 | rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId()); | ||
768 | } | ||
769 | |||
770 | for (int word1 : leftJoin) | ||
771 | { | ||
772 | for (int word2 : rightJoin) | ||
1810 | { | 773 | { |
1811 | db_error(ppdb, query); | 774 | std::list<field> fields; |
1812 | } | 775 | fields.emplace_back("term_id", word1); |
776 | fields.emplace_back("domain_id", word2); | ||
1813 | 777 | ||
1814 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 778 | db_.insertIntoTable(table_name, std::move(fields)); |
1815 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | ||
1816 | |||
1817 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
1818 | { | ||
1819 | db_error(ppdb, query); | ||
1820 | } | 779 | } |
1821 | |||
1822 | sqlite3_finalize(ppstmt); | ||
1823 | } | 780 | } |
1824 | } | 781 | } |
1825 | } | 782 | } |
1826 | } | ||
1827 | |||
1828 | // mm table | ||
1829 | { | ||
1830 | std::ifstream wnmmfile(wnpref + "wn_mm.pl"); | ||
1831 | if (!wnmmfile.is_open()) | ||
1832 | { | ||
1833 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1834 | print_usage(); | ||
1835 | } | ||
1836 | |||
1837 | std::list<std::string> lines; | ||
1838 | for (;;) | ||
1839 | { | ||
1840 | std::string line; | ||
1841 | if (!getline(wnmmfile, line)) | ||
1842 | { | ||
1843 | break; | ||
1844 | } | ||
1845 | 783 | ||
1846 | if (line.back() == '\r') | 784 | void generator::readWordNetCausality() |
785 | { | ||
786 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl")); | ||
787 | progress ppgs("Writing causality...", lines.size()); | ||
788 | for (auto line : lines) | ||
1847 | { | 789 | { |
1848 | line.pop_back(); | 790 | ppgs.update(); |
1849 | } | ||
1850 | 791 | ||
1851 | lines.push_back(line); | 792 | std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\."); |
793 | std::smatch relation_data; | ||
794 | if (!std::regex_search(line, relation_data, relation)) | ||
795 | { | ||
796 | continue; | ||
797 | } | ||
798 | |||
799 | int lookup1 = std::stoi(relation_data[1]); | ||
800 | int lookup2 = std::stoi(relation_data[2]); | ||
801 | |||
802 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
803 | { | ||
804 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
805 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
806 | |||
807 | std::list<field> fields; | ||
808 | fields.emplace_back("effect_id", notion1.getId()); | ||
809 | fields.emplace_back("cause_id", notion2.getId()); | ||
810 | |||
811 | db_.insertIntoTable("causality", std::move(fields)); | ||
812 | } | ||
813 | } | ||
1852 | } | 814 | } |
1853 | 815 | ||
1854 | progress ppgs("Writing member meronyms...", lines.size()); | 816 | void generator::readWordNetEntailment() |
1855 | for (auto line : lines) | ||
1856 | { | 817 | { |
1857 | ppgs.update(); | 818 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl")); |
1858 | 819 | progress ppgs("Writing entailment...", lines.size()); | |
1859 | std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); | 820 | for (auto line : lines) |
1860 | std::smatch relation_data; | ||
1861 | if (!std::regex_search(line, relation_data, relation)) | ||
1862 | { | 821 | { |
1863 | continue; | 822 | ppgs.update(); |
1864 | } | ||
1865 | 823 | ||
1866 | int synset_id_1 = stoi(relation_data[1]); | 824 | std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\."); |
1867 | int synset_id_2 = stoi(relation_data[2]); | 825 | std::smatch relation_data; |
1868 | std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | 826 | if (!std::regex_search(line, relation_data, relation)) |
1869 | |||
1870 | for (auto mapping1 : wn[synset_id_1]) | ||
1871 | { | ||
1872 | for (auto mapping2 : wn[synset_id_2]) | ||
1873 | { | 827 | { |
1874 | sqlite3_stmt* ppstmt; | 828 | continue; |
1875 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 829 | } |
1876 | { | 830 | |
1877 | db_error(ppdb, query); | 831 | int lookup1 = std::stoi(relation_data[1]); |
1878 | } | 832 | int lookup2 = std::stoi(relation_data[2]); |
1879 | 833 | ||
1880 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 834 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
1881 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 835 | { |
836 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
837 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
1882 | 838 | ||
1883 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 839 | std::list<field> fields; |
1884 | { | 840 | fields.emplace_back("given_id", notion1.getId()); |
1885 | db_error(ppdb, query); | 841 | fields.emplace_back("entailment_id", notion2.getId()); |
1886 | } | ||
1887 | 842 | ||
1888 | sqlite3_finalize(ppstmt); | 843 | db_.insertIntoTable("entailment", std::move(fields)); |
1889 | } | 844 | } |
1890 | } | 845 | } |
1891 | } | 846 | } |
1892 | } | 847 | |
1893 | 848 | void generator::readWordNetHypernymy() | |
1894 | // ms table | ||
1895 | { | ||
1896 | std::ifstream wnmsfile(wnpref + "wn_ms.pl"); | ||
1897 | if (!wnmsfile.is_open()) | ||
1898 | { | ||
1899 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1900 | print_usage(); | ||
1901 | } | ||
1902 | |||
1903 | std::list<std::string> lines; | ||
1904 | for (;;) | ||
1905 | { | 849 | { |
1906 | std::string line; | 850 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl")); |
1907 | if (!getline(wnmsfile, line)) | 851 | progress ppgs("Writing hypernymy...", lines.size()); |
852 | for (auto line : lines) | ||
1908 | { | 853 | { |
1909 | break; | 854 | ppgs.update(); |
855 | |||
856 | std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\."); | ||
857 | std::smatch relation_data; | ||
858 | if (!std::regex_search(line, relation_data, relation)) | ||
859 | { | ||
860 | continue; | ||
861 | } | ||
862 | |||
863 | int lookup1 = std::stoi(relation_data[1]); | ||
864 | int lookup2 = std::stoi(relation_data[2]); | ||
865 | |||
866 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
867 | { | ||
868 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
869 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
870 | |||
871 | std::list<field> fields; | ||
872 | fields.emplace_back("hyponym_id", notion1.getId()); | ||
873 | fields.emplace_back("hypernym_id", notion2.getId()); | ||
874 | |||
875 | db_.insertIntoTable("hypernymy", std::move(fields)); | ||
876 | } | ||
1910 | } | 877 | } |
878 | } | ||
1911 | 879 | ||
1912 | if (line.back() == '\r') | 880 | void generator::readWordNetInstantiation() |
881 | { | ||
882 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl")); | ||
883 | progress ppgs("Writing instantiation...", lines.size()); | ||
884 | for (auto line : lines) | ||
1913 | { | 885 | { |
1914 | line.pop_back(); | 886 | ppgs.update(); |
1915 | } | ||
1916 | 887 | ||
1917 | lines.push_back(line); | 888 | std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); |
889 | std::smatch relation_data; | ||
890 | if (!std::regex_search(line, relation_data, relation)) | ||
891 | { | ||
892 | continue; | ||
893 | } | ||
894 | |||
895 | int lookup1 = std::stoi(relation_data[1]); | ||
896 | int lookup2 = std::stoi(relation_data[2]); | ||
897 | |||
898 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
899 | { | ||
900 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
901 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
902 | |||
903 | std::list<field> fields; | ||
904 | fields.emplace_back("instance_id", notion1.getId()); | ||
905 | fields.emplace_back("class_id", notion2.getId()); | ||
906 | |||
907 | db_.insertIntoTable("instantiation", std::move(fields)); | ||
908 | } | ||
909 | } | ||
1918 | } | 910 | } |
1919 | 911 | ||
1920 | progress ppgs("Writing substance meronyms...", lines.size()); | 912 | void generator::readWordNetMemberMeronymy() |
1921 | for (auto line : lines) | ||
1922 | { | 913 | { |
1923 | ppgs.update(); | 914 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl")); |
1924 | 915 | progress ppgs("Writing member meronymy...", lines.size()); | |
1925 | std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); | 916 | for (auto line : lines) |
1926 | std::smatch relation_data; | ||
1927 | if (!std::regex_search(line, relation_data, relation)) | ||
1928 | { | 917 | { |
1929 | continue; | 918 | ppgs.update(); |
1930 | } | ||
1931 | |||
1932 | int synset_id_1 = stoi(relation_data[1]); | ||
1933 | int synset_id_2 = stoi(relation_data[2]); | ||
1934 | std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | ||
1935 | 919 | ||
1936 | for (auto mapping1 : wn[synset_id_1]) | 920 | std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); |
1937 | { | 921 | std::smatch relation_data; |
1938 | for (auto mapping2 : wn[synset_id_2]) | 922 | if (!std::regex_search(line, relation_data, relation)) |
1939 | { | 923 | { |
1940 | sqlite3_stmt* ppstmt; | 924 | continue; |
1941 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 925 | } |
1942 | { | 926 | |
1943 | db_error(ppdb, query); | 927 | int lookup1 = std::stoi(relation_data[1]); |
1944 | } | 928 | int lookup2 = std::stoi(relation_data[2]); |
929 | |||
930 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
931 | { | ||
932 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
933 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
1945 | 934 | ||
1946 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 935 | std::list<field> fields; |
1947 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 936 | fields.emplace_back("holonym_id", notion1.getId()); |
937 | fields.emplace_back("meronym_id", notion2.getId()); | ||
1948 | 938 | ||
1949 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 939 | db_.insertIntoTable("member_meronymy", std::move(fields)); |
1950 | { | ||
1951 | db_error(ppdb, query); | ||
1952 | } | ||
1953 | |||
1954 | sqlite3_finalize(ppstmt); | ||
1955 | } | 940 | } |
1956 | } | 941 | } |
1957 | } | 942 | } |
1958 | } | 943 | |
1959 | 944 | void generator::readWordNetPartMeronymy() | |
1960 | // mm table | ||
1961 | { | ||
1962 | std::ifstream wnmpfile(wnpref + "wn_mp.pl"); | ||
1963 | if (!wnmpfile.is_open()) | ||
1964 | { | ||
1965 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
1966 | print_usage(); | ||
1967 | } | ||
1968 | |||
1969 | std::list<std::string> lines; | ||
1970 | for (;;) | ||
1971 | { | 945 | { |
1972 | std::string line; | 946 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl")); |
1973 | if (!getline(wnmpfile, line)) | 947 | progress ppgs("Writing part meronymy...", lines.size()); |
948 | for (auto line : lines) | ||
1974 | { | 949 | { |
1975 | break; | 950 | ppgs.update(); |
951 | |||
952 | std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); | ||
953 | std::smatch relation_data; | ||
954 | if (!std::regex_search(line, relation_data, relation)) | ||
955 | { | ||
956 | continue; | ||
957 | } | ||
958 | |||
959 | int lookup1 = std::stoi(relation_data[1]); | ||
960 | int lookup2 = std::stoi(relation_data[2]); | ||
961 | |||
962 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
963 | { | ||
964 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
965 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
966 | |||
967 | std::list<field> fields; | ||
968 | fields.emplace_back("holonym_id", notion1.getId()); | ||
969 | fields.emplace_back("meronym_id", notion2.getId()); | ||
970 | |||
971 | db_.insertIntoTable("part_meronymy", std::move(fields)); | ||
972 | } | ||
1976 | } | 973 | } |
974 | } | ||
1977 | 975 | ||
1978 | if (line.back() == '\r') | 976 | void generator::readWordNetSubstanceMeronymy() |
977 | { | ||
978 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl")); | ||
979 | progress ppgs("Writing substance meronymy...", lines.size()); | ||
980 | for (auto line : lines) | ||
1979 | { | 981 | { |
1980 | line.pop_back(); | 982 | ppgs.update(); |
1981 | } | ||
1982 | 983 | ||
1983 | lines.push_back(line); | 984 | std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); |
985 | std::smatch relation_data; | ||
986 | if (!std::regex_search(line, relation_data, relation)) | ||
987 | { | ||
988 | continue; | ||
989 | } | ||
990 | |||
991 | int lookup1 = std::stoi(relation_data[1]); | ||
992 | int lookup2 = std::stoi(relation_data[2]); | ||
993 | |||
994 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) | ||
995 | { | ||
996 | notion& notion1 = *notionByWnid_.at(lookup1); | ||
997 | notion& notion2 = *notionByWnid_.at(lookup2); | ||
998 | |||
999 | std::list<field> fields; | ||
1000 | fields.emplace_back("holonym_id", notion1.getId()); | ||
1001 | fields.emplace_back("meronym_id", notion2.getId()); | ||
1002 | |||
1003 | db_.insertIntoTable("substance_meronymy", std::move(fields)); | ||
1004 | } | ||
1005 | } | ||
1984 | } | 1006 | } |
1985 | 1007 | ||
1986 | progress ppgs("Writing part meronyms...", lines.size()); | 1008 | void generator::readWordNetPertainymy() |
1987 | for (auto line : lines) | ||
1988 | { | 1009 | { |
1989 | ppgs.update(); | 1010 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl")); |
1990 | 1011 | progress ppgs("Writing pertainymy and mannernymy...", lines.size()); | |
1991 | std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); | 1012 | for (auto line : lines) |
1992 | std::smatch relation_data; | ||
1993 | if (!std::regex_search(line, relation_data, relation)) | ||
1994 | { | 1013 | { |
1995 | continue; | 1014 | ppgs.update(); |
1996 | } | ||
1997 | |||
1998 | int synset_id_1 = stoi(relation_data[1]); | ||
1999 | int synset_id_2 = stoi(relation_data[2]); | ||
2000 | std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); | ||
2001 | 1015 | ||
2002 | for (auto mapping1 : wn[synset_id_1]) | 1016 | std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); |
2003 | { | 1017 | std::smatch relation_data; |
2004 | for (auto mapping2 : wn[synset_id_2]) | 1018 | if (!std::regex_search(line, relation_data, relation)) |
2005 | { | 1019 | { |
2006 | sqlite3_stmt* ppstmt; | 1020 | continue; |
2007 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | 1021 | } |
2008 | { | 1022 | |
2009 | db_error(ppdb, query); | 1023 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); |
2010 | } | 1024 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); |
1025 | |||
1026 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | ||
1027 | { | ||
1028 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | ||
1029 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | ||
2011 | 1030 | ||
2012 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 1031 | if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective) |
2013 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 1032 | { |
1033 | std::list<field> fields; | ||
1034 | fields.emplace_back("pertainym_id", word1.getId()); | ||
1035 | fields.emplace_back("noun_id", word2.getId()); | ||
2014 | 1036 | ||
2015 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1037 | db_.insertIntoTable("pertainymy", std::move(fields)); |
1038 | } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb) | ||
2016 | { | 1039 | { |
2017 | db_error(ppdb, query); | 1040 | std::list<field> fields; |
2018 | } | 1041 | fields.emplace_back("mannernym_id", word1.getId()); |
1042 | fields.emplace_back("adjective_id", word2.getId()); | ||
2019 | 1043 | ||
2020 | sqlite3_finalize(ppstmt); | 1044 | db_.insertIntoTable("mannernymy", std::move(fields)); |
1045 | } | ||
2021 | } | 1046 | } |
2022 | } | 1047 | } |
2023 | } | 1048 | } |
2024 | } | ||
2025 | |||
2026 | // per table | ||
2027 | { | ||
2028 | std::ifstream wnperfile(wnpref + "wn_per.pl"); | ||
2029 | if (!wnperfile.is_open()) | ||
2030 | { | ||
2031 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
2032 | print_usage(); | ||
2033 | } | ||
2034 | |||
2035 | std::list<std::string> lines; | ||
2036 | for (;;) | ||
2037 | { | ||
2038 | std::string line; | ||
2039 | if (!getline(wnperfile, line)) | ||
2040 | { | ||
2041 | break; | ||
2042 | } | ||
2043 | 1049 | ||
2044 | if (line.back() == '\r') | 1050 | void generator::readWordNetSpecification() |
1051 | { | ||
1052 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl")); | ||
1053 | progress ppgs("Writing specifications...", lines.size()); | ||
1054 | for (auto line : lines) | ||
2045 | { | 1055 | { |
2046 | line.pop_back(); | 1056 | ppgs.update(); |
1057 | |||
1058 | std::regex relation("^sa\\((23\\d{8}),(\\d+),(23\\d{8}),(\\d+)\\)\\."); | ||
1059 | std::smatch relation_data; | ||
1060 | if (!std::regex_search(line, relation_data, relation)) | ||
1061 | { | ||
1062 | continue; | ||
1063 | } | ||
1064 | |||
1065 | std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2])); | ||
1066 | std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4])); | ||
1067 | |||
1068 | if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2)) | ||
1069 | { | ||
1070 | word& word1 = *wordByWnidAndWnum_.at(lookup1); | ||
1071 | word& word2 = *wordByWnidAndWnum_.at(lookup2); | ||
1072 | |||
1073 | std::list<field> fields; | ||
1074 | fields.emplace_back("general_id", word1.getId()); | ||
1075 | fields.emplace_back("specific_id", word2.getId()); | ||
1076 | |||
1077 | db_.insertIntoTable("specification", std::move(fields)); | ||
1078 | } | ||
2047 | } | 1079 | } |
2048 | |||
2049 | lines.push_back(line); | ||
2050 | } | 1080 | } |
2051 | 1081 | ||
2052 | progress ppgs("Writing pertainyms and mannernyms...", lines.size()); | 1082 | void generator::readWordNetSimilarity() |
2053 | for (auto line : lines) | ||
2054 | { | 1083 | { |
2055 | ppgs.update(); | 1084 | std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl")); |
2056 | 1085 | progress ppgs("Writing adjective similarity...", lines.size()); | |
2057 | std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); | 1086 | for (auto line : lines) |
2058 | std::smatch relation_data; | ||
2059 | if (!std::regex_search(line, relation_data, relation)) | ||
2060 | { | 1087 | { |
2061 | continue; | 1088 | ppgs.update(); |
2062 | } | ||
2063 | 1089 | ||
2064 | int synset_id_1 = stoi(relation_data[1]); | 1090 | std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); |
2065 | int wnum_1 = stoi(relation_data[2]); | 1091 | std::smatch relation_data; |
2066 | int synset_id_2 = stoi(relation_data[3]); | 1092 | if (!std::regex_search(line, relation_data, relation)) |
2067 | int wnum_2 = stoi(relation_data[4]); | ||
2068 | std::string query; | ||
2069 | switch (synset_id_1 / 100000000) | ||
2070 | { | ||
2071 | case 3: // Adjective | ||
2072 | { | 1093 | { |
2073 | // This is a pertainym, the second word should be a noun | 1094 | continue; |
2074 | // Technically it can be an adjective but we're ignoring that | ||
2075 | if (synset_id_2 / 100000000 != 1) | ||
2076 | { | ||
2077 | continue; | ||
2078 | } | ||
2079 | |||
2080 | query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)"; | ||
2081 | |||
2082 | break; | ||
2083 | } | 1095 | } |
1096 | |||
1097 | int lookup1 = std::stoi(relation_data[1]); | ||
1098 | int lookup2 = std::stoi(relation_data[2]); | ||
2084 | 1099 | ||
2085 | case 4: // Adverb | 1100 | if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2)) |
2086 | { | 1101 | { |
2087 | // This is a mannernym, the second word should be an adjective | 1102 | notion& notion1 = *notionByWnid_.at(lookup1); |
2088 | if (synset_id_2 / 100000000 != 3) | 1103 | notion& notion2 = *notionByWnid_.at(lookup2); |
2089 | { | ||
2090 | continue; | ||
2091 | } | ||
2092 | 1104 | ||
2093 | query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)"; | 1105 | std::list<field> fields; |
1106 | fields.emplace_back("adjective_1_id", notion1.getId()); | ||
1107 | fields.emplace_back("adjective_2_id", notion2.getId()); | ||
2094 | 1108 | ||
2095 | break; | 1109 | db_.insertIntoTable("similarity", std::move(fields)); |
2096 | } | 1110 | } |
2097 | } | 1111 | } |
2098 | 1112 | } | |
2099 | sqlite3_stmt* ppstmt; | ||
2100 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
2101 | { | ||
2102 | db_error(ppdb, query); | ||
2103 | } | ||
2104 | |||
2105 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | ||
2106 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | ||
2107 | 1113 | ||
2108 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1114 | std::list<std::string> generator::readFile(std::string path) |
1115 | { | ||
1116 | std::ifstream file(path); | ||
1117 | if (!file) | ||
2109 | { | 1118 | { |
2110 | db_error(ppdb, query); | 1119 | throw std::invalid_argument("Could not find file " + path); |
2111 | } | 1120 | } |
2112 | |||
2113 | sqlite3_finalize(ppstmt); | ||
2114 | } | ||
2115 | } | ||
2116 | 1121 | ||
2117 | // sa table | 1122 | std::list<std::string> lines; |
2118 | { | ||
2119 | std::ifstream wnsafile(wnpref + "wn_sa.pl"); | ||
2120 | if (!wnsafile.is_open()) | ||
2121 | { | ||
2122 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
2123 | print_usage(); | ||
2124 | } | ||
2125 | |||
2126 | std::list<std::string> lines; | ||
2127 | for (;;) | ||
2128 | { | ||
2129 | std::string line; | 1123 | std::string line; |
2130 | if (!getline(wnsafile, line)) | 1124 | while (std::getline(file, line)) |
2131 | { | ||
2132 | break; | ||
2133 | } | ||
2134 | |||
2135 | if (line.back() == '\r') | ||
2136 | { | 1125 | { |
2137 | line.pop_back(); | 1126 | if (line.back() == '\r') |
1127 | { | ||
1128 | line.pop_back(); | ||
1129 | } | ||
1130 | |||
1131 | lines.push_back(line); | ||
2138 | } | 1132 | } |
2139 | 1133 | ||
2140 | lines.push_back(line); | 1134 | return lines; |
2141 | } | 1135 | } |
2142 | 1136 | ||
2143 | progress ppgs("Writing specifications...", lines.size()); | 1137 | part_of_speech generator::partOfSpeechByWnid(int wnid) |
2144 | for (auto line : lines) | ||
2145 | { | 1138 | { |
2146 | ppgs.update(); | 1139 | switch (wnid / 100000000) |
2147 | |||
2148 | std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\."); | ||
2149 | std::smatch relation_data; | ||
2150 | if (!std::regex_search(line, relation_data, relation)) | ||
2151 | { | ||
2152 | continue; | ||
2153 | } | ||
2154 | |||
2155 | int synset_id_1 = stoi(relation_data[1]); | ||
2156 | int wnum_1 = stoi(relation_data[2]); | ||
2157 | int synset_id_2 = stoi(relation_data[3]); | ||
2158 | int wnum_2 = stoi(relation_data[4]); | ||
2159 | std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)"); | ||
2160 | |||
2161 | sqlite3_stmt* ppstmt; | ||
2162 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
2163 | { | 1140 | { |
2164 | db_error(ppdb, query); | 1141 | case 1: return part_of_speech::noun; |
1142 | case 2: return part_of_speech::verb; | ||
1143 | case 3: return part_of_speech::adjective; | ||
1144 | case 4: return part_of_speech::adverb; | ||
1145 | default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid)); | ||
2165 | } | 1146 | } |
1147 | } | ||
2166 | 1148 | ||
2167 | sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); | 1149 | notion& generator::createNotion(part_of_speech partOfSpeech) |
2168 | sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); | 1150 | { |
1151 | notions_.emplace_back(partOfSpeech); | ||
1152 | |||
1153 | return notions_.back(); | ||
1154 | } | ||
2169 | 1155 | ||
2170 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1156 | notion& generator::lookupOrCreateNotion(int wnid) |
1157 | { | ||
1158 | if (!notionByWnid_.count(wnid)) | ||
2171 | { | 1159 | { |
2172 | db_error(ppdb, query); | 1160 | notions_.emplace_back(partOfSpeechByWnid(wnid), wnid); |
1161 | notionByWnid_[wnid] = ¬ions_.back(); | ||
2173 | } | 1162 | } |
2174 | 1163 | ||
2175 | sqlite3_finalize(ppstmt); | 1164 | return *notionByWnid_.at(wnid); |
2176 | } | ||
2177 | } | ||
2178 | |||
2179 | // sim table | ||
2180 | { | ||
2181 | std::ifstream wnsimfile(wnpref + "wn_sim.pl"); | ||
2182 | if (!wnsimfile.is_open()) | ||
2183 | { | ||
2184 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
2185 | print_usage(); | ||
2186 | } | 1165 | } |
2187 | 1166 | ||
2188 | std::list<std::string> lines; | 1167 | lemma& generator::lookupOrCreateLemma(std::string base_form) |
2189 | for (;;) | ||
2190 | { | 1168 | { |
2191 | std::string line; | 1169 | if (!lemmaByBaseForm_.count(base_form)) |
2192 | if (!getline(wnsimfile, line)) | ||
2193 | { | 1170 | { |
2194 | break; | 1171 | lemmas_.emplace_back(lookupOrCreateForm(base_form)); |
1172 | lemmaByBaseForm_[base_form] = &lemmas_.back(); | ||
2195 | } | 1173 | } |
1174 | |||
1175 | return *lemmaByBaseForm_.at(base_form); | ||
1176 | } | ||
2196 | 1177 | ||
2197 | if (line.back() == '\r') | 1178 | form& generator::lookupOrCreateForm(std::string text) |
1179 | { | ||
1180 | if (!formByText_.count(text)) | ||
2198 | { | 1181 | { |
2199 | line.pop_back(); | 1182 | forms_.emplace_back(text); |
1183 | formByText_[text] = &forms_.back(); | ||
2200 | } | 1184 | } |
2201 | 1185 | ||
2202 | lines.push_back(line); | 1186 | return *formByText_[text]; |
2203 | } | 1187 | } |
2204 | 1188 | ||
2205 | progress ppgs("Writing sense synonyms...", lines.size()); | 1189 | template <typename... Args> word& generator::createWord(Args&&... args) |
2206 | for (auto line : lines) | ||
2207 | { | 1190 | { |
2208 | ppgs.update(); | 1191 | words_.emplace_back(std::forward<Args>(args)...); |
1192 | word& w = words_.back(); | ||
2209 | 1193 | ||
2210 | std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); | 1194 | wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w); |
2211 | std::smatch relation_data; | 1195 | |
2212 | if (!std::regex_search(line, relation_data, relation)) | 1196 | if (w.getNotion().hasWnid()) |
2213 | { | 1197 | { |
2214 | continue; | 1198 | wordsByWnid_[w.getNotion().getWnid()].insert(&w); |
2215 | } | 1199 | } |
2216 | 1200 | ||
2217 | int synset_id_1 = stoi(relation_data[1]); | 1201 | return w; |
2218 | int synset_id_2 = stoi(relation_data[2]); | 1202 | } |
2219 | std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"); | 1203 | |
1204 | group& generator::createGroup(xmlNodePtr top) | ||
1205 | { | ||
1206 | groups_.emplace_back(); | ||
1207 | group& grp = groups_.back(); | ||
2220 | 1208 | ||
2221 | for (auto mapping1 : wn[synset_id_1]) | 1209 | xmlChar* key; |
1210 | |||
1211 | for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next) | ||
2222 | { | 1212 | { |
2223 | for (auto mapping2 : wn[synset_id_2]) | 1213 | if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES"))) |
2224 | { | 1214 | { |
2225 | sqlite3_stmt* ppstmt; | 1215 | for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next) |
2226 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
2227 | { | 1216 | { |
2228 | db_error(ppdb, query); | 1217 | if (!xmlStrcmp(subclass->name, reinterpret_cast<const xmlChar*>("VNSUBCLASS"))) |
1218 | { | ||
1219 | try | ||
1220 | { | ||
1221 | group& subgrp = createGroup(subclass); | ||
1222 | subgrp.setParent(grp); | ||
1223 | } catch (const std::exception& e) | ||
1224 | { | ||
1225 | key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID")); | ||
1226 | |||
1227 | if (key == nullptr) | ||
1228 | { | ||
1229 | std::throw_with_nested(std::logic_error("Error parsing IDless subgroup")); | ||
1230 | } else { | ||
1231 | std::string subgroupId(reinterpret_cast<const char*>(key)); | ||
1232 | xmlFree(key); | ||
1233 | |||
1234 | std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId)); | ||
1235 | } | ||
1236 | } | ||
1237 | } | ||
2229 | } | 1238 | } |
2230 | 1239 | } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("MEMBERS"))) | |
2231 | sqlite3_bind_int(ppstmt, 1, mapping1.second); | 1240 | { |
2232 | sqlite3_bind_int(ppstmt, 2, mapping2.second); | 1241 | for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) |
2233 | |||
2234 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
2235 | { | 1242 | { |
2236 | db_error(ppdb, query); | 1243 | if (!xmlStrcmp(member->name, reinterpret_cast<const xmlChar*>("MEMBER"))) |
1244 | { | ||
1245 | key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn")); | ||
1246 | std::string wnSenses(reinterpret_cast<const char*>(key)); | ||
1247 | xmlFree(key); | ||
1248 | |||
1249 | auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " "); | ||
1250 | if (!wnSenseKeys.empty()) | ||
1251 | { | ||
1252 | std::list<std::string> tempKeys; | ||
1253 | |||
1254 | std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) { | ||
1255 | return sense + "::"; | ||
1256 | }); | ||
1257 | |||
1258 | std::list<std::string> filteredKeys; | ||
1259 | |||
1260 | std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) { | ||
1261 | return !wnSenseKeys_.count(sense); | ||
1262 | }); | ||
1263 | |||
1264 | wnSenseKeys = std::move(filteredKeys); | ||
1265 | } | ||
1266 | |||
1267 | if (!wnSenseKeys.empty()) | ||
1268 | { | ||
1269 | for (std::string sense : wnSenseKeys) | ||
1270 | { | ||
1271 | word& wordSense = *wnSenseKeys_[sense]; | ||
1272 | wordSense.setVerbGroup(grp); | ||
1273 | } | ||
1274 | } else { | ||
1275 | key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name")); | ||
1276 | std::string memberName(reinterpret_cast<const char*>(key)); | ||
1277 | xmlFree(key); | ||
1278 | |||
1279 | notion& n = createNotion(part_of_speech::verb); | ||
1280 | lemma& l = lookupOrCreateLemma(memberName); | ||
1281 | word& w = createWord(n, l); | ||
1282 | |||
1283 | w.setVerbGroup(grp); | ||
1284 | } | ||
1285 | } | ||
2237 | } | 1286 | } |
2238 | 1287 | } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("THEMROLES"))) | |
2239 | sqlite3_reset(ppstmt); | 1288 | { |
2240 | sqlite3_clear_bindings(ppstmt); | 1289 | for (xmlNodePtr roletopnode = node->xmlChildrenNode; roletopnode != nullptr; roletopnode = roletopnode->next) |
2241 | |||
2242 | sqlite3_bind_int(ppstmt, 1, mapping2.second); | ||
2243 | sqlite3_bind_int(ppstmt, 2, mapping1.second); | ||
2244 | |||
2245 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | ||
2246 | { | 1290 | { |
2247 | db_error(ppdb, query); | 1291 | if (!xmlStrcmp(roletopnode->name, reinterpret_cast<const xmlChar*>("THEMROLE"))) |
1292 | { | ||
1293 | role r; | ||
1294 | |||
1295 | key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type")); | ||
1296 | std::string roleName = reinterpret_cast<const char*>(key); | ||
1297 | xmlFree(key); | ||
1298 | |||
1299 | for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) | ||
1300 | { | ||
1301 | if (!xmlStrcmp(rolenode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
1302 | { | ||
1303 | r.setSelrestrs(parseSelrestr(rolenode)); | ||
1304 | } | ||
1305 | } | ||
1306 | |||
1307 | grp.addRole(roleName, std::move(r)); | ||
1308 | } | ||
2248 | } | 1309 | } |
1310 | } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("FRAMES"))) | ||
1311 | { | ||
1312 | for (xmlNodePtr frametopnode = node->xmlChildrenNode; frametopnode != nullptr; frametopnode = frametopnode->next) | ||
1313 | { | ||
1314 | if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME"))) | ||
1315 | { | ||
1316 | frames_.emplace_back(); | ||
1317 | frame& fr = frames_.back(); | ||
2249 | 1318 | ||
2250 | sqlite3_finalize(ppstmt); | 1319 | for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) |
1320 | { | ||
1321 | if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX"))) | ||
1322 | { | ||
1323 | for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next) | ||
1324 | { | ||
1325 | if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP"))) | ||
1326 | { | ||
1327 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | ||
1328 | std::string partRole = reinterpret_cast<const char*>(key); | ||
1329 | xmlFree(key); | ||
1330 | |||
1331 | selrestr partSelrestrs; | ||
1332 | std::set<std::string> partSynrestrs; | ||
1333 | |||
1334 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
1335 | { | ||
1336 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SYNRESTRS"))) | ||
1337 | { | ||
1338 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
1339 | { | ||
1340 | if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SYNRESTR"))) | ||
1341 | { | ||
1342 | key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type")); | ||
1343 | partSynrestrs.insert(reinterpret_cast<const char*>(key)); | ||
1344 | xmlFree(key); | ||
1345 | } | ||
1346 | } | ||
1347 | } | ||
1348 | |||
1349 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
1350 | { | ||
1351 | partSelrestrs = parseSelrestr(npnode); | ||
1352 | } | ||
1353 | } | ||
1354 | |||
1355 | fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs))); | ||
1356 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB"))) | ||
1357 | { | ||
1358 | fr.push_back(part::createVerb()); | ||
1359 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("PREP"))) | ||
1360 | { | ||
1361 | std::set<std::string> partChoices; | ||
1362 | bool partLiteral; | ||
1363 | |||
1364 | if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"))) | ||
1365 | { | ||
1366 | partLiteral = true; | ||
1367 | |||
1368 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | ||
1369 | std::string choicesStr = reinterpret_cast<const char*>(key); | ||
1370 | xmlFree(key); | ||
1371 | |||
1372 | split(choicesStr, " ", std::inserter(partChoices, std::end(partChoices))); | ||
1373 | } else { | ||
1374 | partLiteral = false; | ||
1375 | |||
1376 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | ||
1377 | { | ||
1378 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
1379 | { | ||
1380 | for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next) | ||
1381 | { | ||
1382 | if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
1383 | { | ||
1384 | key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type")); | ||
1385 | partChoices.insert(reinterpret_cast<const char*>(key)); | ||
1386 | xmlFree(key); | ||
1387 | } | ||
1388 | } | ||
1389 | } | ||
1390 | } | ||
1391 | } | ||
1392 | |||
1393 | fr.push_back(part::createPreposition(std::move(partChoices), partLiteral)); | ||
1394 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ"))) | ||
1395 | { | ||
1396 | fr.push_back(part::createAdjective()); | ||
1397 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADV"))) | ||
1398 | { | ||
1399 | fr.push_back(part::createAdverb()); | ||
1400 | } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("LEX"))) | ||
1401 | { | ||
1402 | key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")); | ||
1403 | std::string literalValue = reinterpret_cast<const char*>(key); | ||
1404 | xmlFree(key); | ||
1405 | |||
1406 | fr.push_back(part::createLiteral(literalValue)); | ||
1407 | } else { | ||
1408 | continue; | ||
1409 | } | ||
1410 | } | ||
1411 | |||
1412 | grp.addFrame(fr); | ||
1413 | } | ||
1414 | } | ||
1415 | } | ||
1416 | } | ||
2251 | } | 1417 | } |
2252 | } | 1418 | } |
2253 | } | ||
2254 | } | ||
2255 | |||
2256 | // syntax table | ||
2257 | { | ||
2258 | std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl"); | ||
2259 | if (!wnsyntaxfile.is_open()) | ||
2260 | { | ||
2261 | std::cout << "Invalid WordNet data directory." << std::endl; | ||
2262 | print_usage(); | ||
2263 | } | ||
2264 | 1419 | ||
2265 | std::list<std::string> lines; | 1420 | return grp; |
2266 | for (;;) | ||
2267 | { | ||
2268 | std::string line; | ||
2269 | if (!getline(wnsyntaxfile, line)) | ||
2270 | { | ||
2271 | break; | ||
2272 | } | ||
2273 | |||
2274 | if (line.back() == '\r') | ||
2275 | { | ||
2276 | line.pop_back(); | ||
2277 | } | ||
2278 | |||
2279 | lines.push_back(line); | ||
2280 | } | 1421 | } |
2281 | 1422 | ||
2282 | progress ppgs("Writing adjective syntax markers...", lines.size()); | 1423 | selrestr generator::parseSelrestr(xmlNodePtr top) |
2283 | for (auto line : lines) | ||
2284 | { | 1424 | { |
2285 | ppgs.update(); | 1425 | xmlChar* key; |
2286 | 1426 | ||
2287 | std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); | 1427 | if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) |
2288 | std::smatch relation_data; | ||
2289 | if (!std::regex_search(line, relation_data, relation)) | ||
2290 | { | ||
2291 | continue; | ||
2292 | } | ||
2293 | |||
2294 | int synset_id = stoi(relation_data[1]); | ||
2295 | int wnum = stoi(relation_data[2]); | ||
2296 | std::string syn = relation_data[3]; | ||
2297 | std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?"); | ||
2298 | |||
2299 | sqlite3_stmt* ppstmt; | ||
2300 | if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) | ||
2301 | { | 1428 | { |
2302 | db_error(ppdb, query); | 1429 | if (xmlChildElementCount(top) == 0) |
2303 | } | 1430 | { |
2304 | 1431 | return {}; | |
2305 | sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); | 1432 | } else if (xmlChildElementCount(top) == 1) |
2306 | sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); | 1433 | { |
2307 | 1434 | return parseSelrestr(xmlFirstElementChild(top)); | |
2308 | if (sqlite3_step(ppstmt) != SQLITE_DONE) | 1435 | } else { |
1436 | bool orlogic = false; | ||
1437 | if (xmlHasProp(top, reinterpret_cast<const xmlChar*>("logic"))) | ||
1438 | { | ||
1439 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("logic")); | ||
1440 | if (!xmlStrcmp(key, reinterpret_cast<const xmlChar*>("or"))) | ||
1441 | { | ||
1442 | orlogic = true; | ||
1443 | } | ||
1444 | |||
1445 | xmlFree(key); | ||
1446 | } | ||
1447 | |||
1448 | std::list<selrestr> children; | ||
1449 | for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) | ||
1450 | { | ||
1451 | if (!xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTRS")) | ||
1452 | || !xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
1453 | { | ||
1454 | children.push_back(parseSelrestr(selrestr)); | ||
1455 | } | ||
1456 | } | ||
1457 | |||
1458 | return selrestr(children, orlogic); | ||
1459 | } | ||
1460 | } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
2309 | { | 1461 | { |
2310 | db_error(ppdb, query); | 1462 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("Value")); |
1463 | bool selPos = (std::string(reinterpret_cast<const char*>(key)) == "+"); | ||
1464 | xmlFree(key); | ||
1465 | |||
1466 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type")); | ||
1467 | std::string selRestriction = reinterpret_cast<const char*>(key); | ||
1468 | xmlFree(key); | ||
1469 | |||
1470 | return selrestr(selRestriction, selPos); | ||
1471 | } else { | ||
1472 | throw std::logic_error("Badly formatted selrestr"); | ||
2311 | } | 1473 | } |
2312 | |||
2313 | sqlite3_finalize(ppstmt); | ||
2314 | } | 1474 | } |
2315 | } | 1475 | |
2316 | 1476 | }; | |
2317 | sqlite3_close_v2(ppdb); | 1477 | }; |
2318 | |||
2319 | std::cout << "Done." << std::endl; | ||
2320 | } | ||
diff --git a/generator/generator.h b/generator/generator.h new file mode 100644 index 0000000..e2a7404 --- /dev/null +++ b/generator/generator.h | |||
@@ -0,0 +1,151 @@ | |||
1 | #ifndef GENERATOR_H_5B61CBC5 | ||
2 | #define GENERATOR_H_5B61CBC5 | ||
3 | |||
4 | #include <string> | ||
5 | #include <map> | ||
6 | #include <list> | ||
7 | #include <set> | ||
8 | #include <libxml/parser.h> | ||
9 | #include "database.h" | ||
10 | #include "notion.h" | ||
11 | #include "word.h" | ||
12 | #include "lemma.h" | ||
13 | #include "form.h" | ||
14 | #include "pronunciation.h" | ||
15 | #include "group.h" | ||
16 | #include "frame.h" | ||
17 | |||
18 | namespace verbly { | ||
19 | namespace generator { | ||
20 | |||
21 | enum class part_of_speech; | ||
22 | class selrestr; | ||
23 | |||
24 | class generator { | ||
25 | public: | ||
26 | |||
27 | // Constructor | ||
28 | |||
29 | generator( | ||
30 | std::string verbNetPath, | ||
31 | std::string agidPath, | ||
32 | std::string wordNetPath, | ||
33 | std::string cmudictPath, | ||
34 | std::string imageNetPath, | ||
35 | std::string outputPath); | ||
36 | |||
37 | // Action | ||
38 | |||
39 | void run(); | ||
40 | |||
41 | private: | ||
42 | |||
43 | // Subroutines | ||
44 | |||
45 | void readWordNetSynsets(); | ||
46 | |||
47 | void readAdjectivePositioning(); | ||
48 | |||
49 | void readImageNetUrls(); | ||
50 | |||
51 | void readWordNetSenseKeys(); | ||
52 | |||
53 | void readVerbNet(); | ||
54 | |||
55 | void readAgidInflections(); | ||
56 | |||
57 | void readPrepositions(); | ||
58 | |||
59 | void readCmudictPronunciations(); | ||
60 | |||
61 | void writeSchema(); | ||
62 | |||
63 | void dumpObjects(); | ||
64 | |||
65 | void readWordNetAntonymy(); | ||
66 | |||
67 | void readWordNetVariation(); | ||
68 | |||
69 | void readWordNetClasses(); | ||
70 | |||
71 | void readWordNetCausality(); | ||
72 | |||
73 | void readWordNetEntailment(); | ||
74 | |||
75 | void readWordNetHypernymy(); | ||
76 | |||
77 | void readWordNetInstantiation(); | ||
78 | |||
79 | void readWordNetMemberMeronymy(); | ||
80 | |||
81 | void readWordNetPartMeronymy(); | ||
82 | |||
83 | void readWordNetSubstanceMeronymy(); | ||
84 | |||
85 | void readWordNetPertainymy(); | ||
86 | |||
87 | void readWordNetSpecification(); | ||
88 | |||
89 | void readWordNetSimilarity(); | ||
90 | |||
91 | // Helpers | ||
92 | |||
93 | std::list<std::string> readFile(std::string path); | ||
94 | |||
95 | inline part_of_speech partOfSpeechByWnid(int wnid); | ||
96 | |||
97 | notion& createNotion(part_of_speech partOfSpeech); | ||
98 | |||
99 | notion& lookupOrCreateNotion(int wnid); | ||
100 | |||
101 | lemma& lookupOrCreateLemma(std::string base_form); | ||
102 | |||
103 | form& lookupOrCreateForm(std::string text); | ||
104 | |||
105 | template <typename... Args> word& createWord(Args&&... args); | ||
106 | |||
107 | group& createGroup(xmlNodePtr top); | ||
108 | |||
109 | selrestr parseSelrestr(xmlNodePtr top); | ||
110 | |||
111 | // Input | ||
112 | |||
113 | std::string verbNetPath_; | ||
114 | std::string agidPath_; | ||
115 | std::string wordNetPath_; | ||
116 | std::string cmudictPath_; | ||
117 | std::string imageNetPath_; | ||
118 | |||
119 | // Output | ||
120 | |||
121 | database db_; | ||
122 | |||
123 | // Data | ||
124 | |||
125 | std::list<notion> notions_; | ||
126 | std::list<word> words_; | ||
127 | std::list<lemma> lemmas_; | ||
128 | std::list<form> forms_; | ||
129 | std::list<pronunciation> pronunciations_; | ||
130 | std::list<frame> frames_; | ||
131 | std::list<group> groups_; | ||
132 | |||
133 | // Indexes | ||
134 | |||
135 | std::map<int, notion*> notionByWnid_; | ||
136 | std::map<int, std::set<word*>> wordsByWnid_; | ||
137 | std::map<std::pair<int, int>, word*> wordByWnidAndWnum_; | ||
138 | std::map<std::string, std::set<word*>> wordsByBaseForm_; | ||
139 | std::map<std::string, lemma*> lemmaByBaseForm_; | ||
140 | std::map<std::string, form*> formByText_; | ||
141 | |||
142 | // Caches | ||
143 | |||
144 | std::map<std::string, word*> wnSenseKeys_; | ||
145 | |||
146 | }; | ||
147 | |||
148 | }; | ||
149 | }; | ||
150 | |||
151 | #endif /* end of include guard: GENERATOR_H_5B61CBC5 */ | ||
diff --git a/generator/group.cpp b/generator/group.cpp new file mode 100644 index 0000000..7cbd4c8 --- /dev/null +++ b/generator/group.cpp | |||
@@ -0,0 +1,119 @@ | |||
1 | #include "group.h" | ||
2 | #include <stdexcept> | ||
3 | #include <list> | ||
4 | #include <json.hpp> | ||
5 | #include "database.h" | ||
6 | #include "field.h" | ||
7 | #include "frame.h" | ||
8 | |||
9 | namespace verbly { | ||
10 | namespace generator { | ||
11 | |||
12 | int group::nextId_ = 0; | ||
13 | |||
14 | group::group() : id_(nextId_++) | ||
15 | { | ||
16 | } | ||
17 | |||
18 | void group::setParent(const group& parent) | ||
19 | { | ||
20 | // Adding a group to itself is nonsensical. | ||
21 | assert(&parent != this); | ||
22 | |||
23 | parent_ = &parent; | ||
24 | } | ||
25 | |||
26 | void group::addRole(std::string name, role r) | ||
27 | { | ||
28 | roleNames_.insert(name); | ||
29 | roles_[name] = std::move(r); | ||
30 | } | ||
31 | |||
32 | void group::addFrame(const frame& f) | ||
33 | { | ||
34 | frames_.insert(&f); | ||
35 | } | ||
36 | |||
37 | std::set<std::string> group::getRoles() const | ||
38 | { | ||
39 | std::set<std::string> fullRoles = roleNames_; | ||
40 | |||
41 | if (hasParent()) | ||
42 | { | ||
43 | for (std::string name : getParent().getRoles()) | ||
44 | { | ||
45 | fullRoles.insert(name); | ||
46 | } | ||
47 | } | ||
48 | |||
49 | return fullRoles; | ||
50 | } | ||
51 | |||
52 | const role& group::getRole(std::string name) const | ||
53 | { | ||
54 | if (roles_.count(name)) | ||
55 | { | ||
56 | return roles_.at(name); | ||
57 | } else if (hasParent()) | ||
58 | { | ||
59 | return getParent().getRole(name); | ||
60 | } else { | ||
61 | throw std::invalid_argument("Specified role not found in verb group"); | ||
62 | } | ||
63 | } | ||
64 | |||
65 | std::set<const frame*> group::getFrames() const | ||
66 | { | ||
67 | std::set<const frame*> fullFrames = frames_; | ||
68 | |||
69 | if (hasParent()) | ||
70 | { | ||
71 | for (const frame* f : getParent().getFrames()) | ||
72 | { | ||
73 | fullFrames.insert(f); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | return fullFrames; | ||
78 | } | ||
79 | |||
80 | database& operator<<(database& db, const group& arg) | ||
81 | { | ||
82 | // Serialize the group first | ||
83 | { | ||
84 | std::list<field> fields; | ||
85 | fields.emplace_back("group_id", arg.getId()); | ||
86 | |||
87 | nlohmann::json jsonRoles; | ||
88 | for (std::string name : arg.getRoles()) | ||
89 | { | ||
90 | const role& r = arg.getRole(name); | ||
91 | |||
92 | nlohmann::json jsonRole; | ||
93 | jsonRole["type"] = name; | ||
94 | jsonRole["selrestrs"] = r.getSelrestrs().toJson(); | ||
95 | |||
96 | jsonRoles.emplace_back(std::move(jsonRole)); | ||
97 | } | ||
98 | |||
99 | fields.emplace_back("data", jsonRoles.dump()); | ||
100 | |||
101 | db.insertIntoTable("groups", std::move(fields)); | ||
102 | } | ||
103 | |||
104 | // Then, serialize the group/frame relationship | ||
105 | for (const frame* f : arg.getFrames()) | ||
106 | { | ||
107 | std::list<field> fields; | ||
108 | |||
109 | fields.emplace_back("group_id", arg.getId()); | ||
110 | fields.emplace_back("frame_id", f->getId()); | ||
111 | |||
112 | db.insertIntoTable("groups_frames", std::move(fields)); | ||
113 | } | ||
114 | |||
115 | return db; | ||
116 | } | ||
117 | |||
118 | }; | ||
119 | }; | ||
diff --git a/generator/group.h b/generator/group.h new file mode 100644 index 0000000..efb8c5d --- /dev/null +++ b/generator/group.h | |||
@@ -0,0 +1,80 @@ | |||
1 | #ifndef GROUP_H_EDAFB5DC | ||
2 | #define GROUP_H_EDAFB5DC | ||
3 | |||
4 | #include <map> | ||
5 | #include <set> | ||
6 | #include <string> | ||
7 | #include <cassert> | ||
8 | #include "role.h" | ||
9 | |||
10 | namespace verbly { | ||
11 | namespace generator { | ||
12 | |||
13 | class frame; | ||
14 | class database; | ||
15 | |||
16 | class group { | ||
17 | public: | ||
18 | |||
19 | // Constructor | ||
20 | |||
21 | group(); | ||
22 | |||
23 | // Mutators | ||
24 | |||
25 | void setParent(const group& parent); | ||
26 | |||
27 | void addRole(std::string name, role r); | ||
28 | |||
29 | void addFrame(const frame& f); | ||
30 | |||
31 | // Accessors | ||
32 | |||
33 | int getId() const | ||
34 | { | ||
35 | return id_; | ||
36 | } | ||
37 | |||
38 | bool hasParent() const | ||
39 | { | ||
40 | return (parent_ != nullptr); | ||
41 | } | ||
42 | |||
43 | const group& getParent() const | ||
44 | { | ||
45 | // Calling code should always call hasParent first | ||
46 | assert(parent_ != nullptr); | ||
47 | |||
48 | return *parent_; | ||
49 | } | ||
50 | |||
51 | std::set<std::string> getRoles() const; | ||
52 | |||
53 | const role& getRole(std::string name) const; | ||
54 | |||
55 | std::set<const frame*> getFrames() const; | ||
56 | |||
57 | private: | ||
58 | |||
59 | static int nextId_; | ||
60 | |||
61 | const int id_; | ||
62 | |||
63 | const group* parent_ = nullptr; | ||
64 | std::map<std::string, role> roles_; | ||
65 | std::set<const frame*> frames_; | ||
66 | |||
67 | // Caches | ||
68 | |||
69 | std::set<std::string> roleNames_; | ||
70 | |||
71 | }; | ||
72 | |||
73 | // Serializer | ||
74 | |||
75 | database& operator<<(database& db, const group& arg); | ||
76 | |||
77 | }; | ||
78 | }; | ||
79 | |||
80 | #endif /* end of include guard: GROUP_H_EDAFB5DC */ | ||
diff --git a/generator/lemma.cpp b/generator/lemma.cpp new file mode 100644 index 0000000..e66b153 --- /dev/null +++ b/generator/lemma.cpp | |||
@@ -0,0 +1,65 @@ | |||
1 | #include "lemma.h" | ||
2 | #include <list> | ||
3 | #include <cassert> | ||
4 | #include "field.h" | ||
5 | #include "database.h" | ||
6 | #include "form.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | namespace generator { | ||
10 | |||
11 | int lemma::nextId_ = 0; | ||
12 | |||
13 | lemma::lemma(const form& baseForm) : | ||
14 | id_(nextId_++), | ||
15 | baseForm_(baseForm) | ||
16 | { | ||
17 | inflections_[inflection::base] = {&baseForm}; | ||
18 | } | ||
19 | |||
20 | void lemma::addInflection(inflection type, const form& f) | ||
21 | { | ||
22 | // There can only be one base form. | ||
23 | assert(type != inflection::base); | ||
24 | |||
25 | inflections_[type].insert(&f); | ||
26 | } | ||
27 | |||
28 | std::set<const form*> lemma::getInflections(inflection type) const | ||
29 | { | ||
30 | if (inflections_.count(type)) | ||
31 | { | ||
32 | return inflections_.at(type); | ||
33 | } else { | ||
34 | return {}; | ||
35 | } | ||
36 | } | ||
37 | |||
38 | database& operator<<(database& db, const lemma& arg) | ||
39 | { | ||
40 | for (inflection type : { | ||
41 | inflection::base, | ||
42 | inflection::plural, | ||
43 | inflection::comparative, | ||
44 | inflection::superlative, | ||
45 | inflection::past_tense, | ||
46 | inflection::past_participle, | ||
47 | inflection::ing_form, | ||
48 | inflection::s_form}) | ||
49 | { | ||
50 | for (const form* f : arg.getInflections(type)) | ||
51 | { | ||
52 | std::list<field> fields; | ||
53 | fields.emplace_back("lemma_id", arg.getId()); | ||
54 | fields.emplace_back("form_id", f->getId()); | ||
55 | fields.emplace_back("category", static_cast<int>(type)); | ||
56 | |||
57 | db.insertIntoTable("lemmas_forms", std::move(fields)); | ||
58 | } | ||
59 | } | ||
60 | |||
61 | return db; | ||
62 | } | ||
63 | |||
64 | }; | ||
65 | }; | ||
diff --git a/generator/lemma.h b/generator/lemma.h new file mode 100644 index 0000000..6452e08 --- /dev/null +++ b/generator/lemma.h | |||
@@ -0,0 +1,58 @@ | |||
1 | #ifndef LEMMA_H_D73105A7 | ||
2 | #define LEMMA_H_D73105A7 | ||
3 | |||
4 | #include <string> | ||
5 | #include <map> | ||
6 | #include <set> | ||
7 | #include "enums.h" | ||
8 | |||
9 | namespace verbly { | ||
10 | namespace generator { | ||
11 | |||
12 | class database; | ||
13 | class form; | ||
14 | |||
15 | class lemma { | ||
16 | public: | ||
17 | |||
18 | // Constructors | ||
19 | |||
20 | explicit lemma(const form& baseForm); | ||
21 | |||
22 | // Mutators | ||
23 | |||
24 | void addInflection(inflection type, const form& f); | ||
25 | |||
26 | // Accessors | ||
27 | |||
28 | int getId() const | ||
29 | { | ||
30 | return id_; | ||
31 | } | ||
32 | |||
33 | const form& getBaseForm() const | ||
34 | { | ||
35 | return baseForm_; | ||
36 | } | ||
37 | |||
38 | std::set<const form*> getInflections(inflection type) const; | ||
39 | |||
40 | private: | ||
41 | |||
42 | static int nextId_; | ||
43 | |||
44 | const int id_; | ||
45 | const form& baseForm_; | ||
46 | |||
47 | std::map<inflection, std::set<const form*>> inflections_; | ||
48 | |||
49 | }; | ||
50 | |||
51 | // Serializer | ||
52 | |||
53 | database& operator<<(database& db, const lemma& arg); | ||
54 | |||
55 | }; | ||
56 | }; | ||
57 | |||
58 | #endif /* end of include guard: LEMMA_H_D73105A7 */ | ||
diff --git a/generator/main.cpp b/generator/main.cpp new file mode 100644 index 0000000..827c963 --- /dev/null +++ b/generator/main.cpp | |||
@@ -0,0 +1,40 @@ | |||
1 | #include <iostream> | ||
2 | #include <exception> | ||
3 | #include "generator.h" | ||
4 | |||
5 | void printUsage() | ||
6 | { | ||
7 | std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl; | ||
8 | std::cout << "verbnet :: path to a VerbNet data directory" << std::endl; | ||
9 | std::cout << "agid :: path to an AGID infl.txt file" << std::endl; | ||
10 | std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl; | ||
11 | std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl; | ||
12 | std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl; | ||
13 | std::cout << "output :: datafile output path" << std::endl; | ||
14 | } | ||
15 | |||
16 | int main(int argc, char** argv) | ||
17 | { | ||
18 | if (argc == 7) | ||
19 | { | ||
20 | try | ||
21 | { | ||
22 | verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); | ||
23 | |||
24 | try | ||
25 | { | ||
26 | app.run(); | ||
27 | } catch (const std::exception& e) | ||
28 | { | ||
29 | std::cout << e.what() << std::endl; | ||
30 | } | ||
31 | } catch (const std::exception& e) | ||
32 | { | ||
33 | std::cout << e.what() << std::endl; | ||
34 | printUsage(); | ||
35 | } | ||
36 | } else { | ||
37 | std::cout << "verbly datafile generator" << std::endl; | ||
38 | printUsage(); | ||
39 | } | ||
40 | } | ||
diff --git a/generator/notion.cpp b/generator/notion.cpp new file mode 100644 index 0000000..290d982 --- /dev/null +++ b/generator/notion.cpp | |||
@@ -0,0 +1,85 @@ | |||
1 | #include "notion.h" | ||
2 | #include <string> | ||
3 | #include <list> | ||
4 | #include "database.h" | ||
5 | #include "field.h" | ||
6 | |||
7 | namespace verbly { | ||
8 | namespace generator { | ||
9 | |||
10 | int notion::nextId_ = 0; | ||
11 | |||
12 | notion::notion( | ||
13 | part_of_speech partOfSpeech) : | ||
14 | id_(nextId_++), | ||
15 | partOfSpeech_(partOfSpeech) | ||
16 | { | ||
17 | } | ||
18 | |||
19 | notion::notion( | ||
20 | part_of_speech partOfSpeech, | ||
21 | int wnid) : | ||
22 | id_(nextId_++), | ||
23 | partOfSpeech_(partOfSpeech), | ||
24 | wnid_(wnid), | ||
25 | hasWnid_(true) | ||
26 | { | ||
27 | } | ||
28 | |||
29 | void notion::incrementNumOfImages() | ||
30 | { | ||
31 | // Calling code should always call hasWnid and check that the notion is a noun first. | ||
32 | assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); | ||
33 | |||
34 | numOfImages_++; | ||
35 | } | ||
36 | |||
37 | void notion::setPrepositionGroups(std::list<std::string> groups) | ||
38 | { | ||
39 | // Calling code should always check that the notion is a preposition first. | ||
40 | assert(partOfSpeech_ == part_of_speech::preposition); | ||
41 | |||
42 | prepositionGroups_ = groups; | ||
43 | } | ||
44 | |||
45 | database& operator<<(database& db, const notion& arg) | ||
46 | { | ||
47 | // First, serialize the notion | ||
48 | { | ||
49 | std::list<field> fields; | ||
50 | |||
51 | fields.emplace_back("notion_id", arg.getId()); | ||
52 | fields.emplace_back("part_of_speech", static_cast<int>(arg.getPartOfSpeech())); | ||
53 | |||
54 | if (arg.hasWnid()) | ||
55 | { | ||
56 | fields.emplace_back("wnid", arg.getWnid()); | ||
57 | |||
58 | if (arg.getPartOfSpeech() == part_of_speech::noun) | ||
59 | { | ||
60 | fields.emplace_back("images", arg.getNumOfImages()); | ||
61 | } | ||
62 | } | ||
63 | |||
64 | db.insertIntoTable("notions", std::move(fields)); | ||
65 | } | ||
66 | |||
67 | // Next, serialize the is_a relationship if this is a preposition | ||
68 | if (arg.getPartOfSpeech() == part_of_speech::preposition) | ||
69 | { | ||
70 | for (std::string group : arg.getPrepositionGroups()) | ||
71 | { | ||
72 | std::list<field> fields; | ||
73 | |||
74 | fields.emplace_back("notion_id", arg.getId()); | ||
75 | fields.emplace_back("groupname", group); | ||
76 | |||
77 | db.insertIntoTable("is_a", std::move(fields)); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | return db; | ||
82 | } | ||
83 | |||
84 | }; | ||
85 | }; | ||
diff --git a/generator/notion.h b/generator/notion.h new file mode 100644 index 0000000..76210de --- /dev/null +++ b/generator/notion.h | |||
@@ -0,0 +1,91 @@ | |||
1 | #ifndef NOTION_H_221DE2BC | ||
2 | #define NOTION_H_221DE2BC | ||
3 | |||
4 | #include <cassert> | ||
5 | #include <list> | ||
6 | #include <string> | ||
7 | #include "enums.h" | ||
8 | |||
9 | namespace verbly { | ||
10 | namespace generator { | ||
11 | |||
12 | class database; | ||
13 | |||
14 | class notion { | ||
15 | public: | ||
16 | |||
17 | // Constructors | ||
18 | |||
19 | explicit notion(part_of_speech partOfSpeech); | ||
20 | |||
21 | notion(part_of_speech partOfSpeech, int wnid); | ||
22 | |||
23 | // Mutators | ||
24 | |||
25 | void incrementNumOfImages(); | ||
26 | |||
27 | void setPrepositionGroups(std::list<std::string> groups); | ||
28 | |||
29 | // Accessors | ||
30 | |||
31 | int getId() const | ||
32 | { | ||
33 | return id_; | ||
34 | } | ||
35 | |||
36 | part_of_speech getPartOfSpeech() const | ||
37 | { | ||
38 | return partOfSpeech_; | ||
39 | } | ||
40 | |||
41 | bool hasWnid() const | ||
42 | { | ||
43 | return hasWnid_; | ||
44 | } | ||
45 | |||
46 | int getWnid() const | ||
47 | { | ||
48 | // Calling code should always call hasWnid first. | ||
49 | assert(hasWnid_); | ||
50 | |||
51 | return wnid_; | ||
52 | } | ||
53 | |||
54 | int getNumOfImages() const | ||
55 | { | ||
56 | // Calling code should always call hasWnid and check that the notion is a noun first. | ||
57 | assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun)); | ||
58 | |||
59 | return numOfImages_; | ||
60 | } | ||
61 | |||
62 | std::list<std::string> getPrepositionGroups() const | ||
63 | { | ||
64 | // Calling code should always check that the notion is a preposition first. | ||
65 | assert(partOfSpeech_ == part_of_speech::preposition); | ||
66 | |||
67 | return prepositionGroups_; | ||
68 | } | ||
69 | |||
70 | private: | ||
71 | |||
72 | static int nextId_; | ||
73 | |||
74 | const int id_; | ||
75 | const part_of_speech partOfSpeech_; | ||
76 | const int wnid_ = 0; | ||
77 | const bool hasWnid_ = false; | ||
78 | |||
79 | int numOfImages_ = 0; | ||
80 | std::list<std::string> prepositionGroups_; | ||
81 | |||
82 | }; | ||
83 | |||
84 | // Serializer | ||
85 | |||
86 | database& operator<<(database& db, const notion& arg); | ||
87 | |||
88 | }; | ||
89 | }; | ||
90 | |||
91 | #endif /* end of include guard: NOTION_H_221DE2BC */ | ||
diff --git a/generator/part.cpp b/generator/part.cpp new file mode 100644 index 0000000..dbd4e11 --- /dev/null +++ b/generator/part.cpp | |||
@@ -0,0 +1,336 @@ | |||
1 | #include "part.h" | ||
2 | #include <stdexcept> | ||
3 | #include "selrestr.h" | ||
4 | |||
5 | namespace verbly { | ||
6 | namespace generator { | ||
7 | |||
8 | part part::createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs) | ||
9 | { | ||
10 | part p(type::noun_phrase); | ||
11 | |||
12 | new(&p.noun_phrase_.role) std::string(std::move(role)); | ||
13 | new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs)); | ||
14 | new(&p.noun_phrase_.synrestrs) std::set<std::string>(std::move(synrestrs)); | ||
15 | |||
16 | return p; | ||
17 | } | ||
18 | |||
19 | part part::createVerb() | ||
20 | { | ||
21 | return part(type::verb); | ||
22 | } | ||
23 | |||
24 | part part::createPreposition(std::set<std::string> choices, bool literal) | ||
25 | { | ||
26 | part p(type::preposition); | ||
27 | |||
28 | new(&p.preposition_.choices) std::set<std::string>(std::move(choices)); | ||
29 | p.preposition_.literal = literal; | ||
30 | |||
31 | return p; | ||
32 | } | ||
33 | |||
34 | part part::createAdjective() | ||
35 | { | ||
36 | return part(type::adjective); | ||
37 | } | ||
38 | |||
39 | part part::createAdverb() | ||
40 | { | ||
41 | return part(type::adverb); | ||
42 | } | ||
43 | |||
44 | part part::createLiteral(std::string value) | ||
45 | { | ||
46 | part p(type::literal); | ||
47 | |||
48 | new(&p.literal_) std::string(std::move(value)); | ||
49 | |||
50 | return p; | ||
51 | } | ||
52 | |||
53 | part::part(const part& other) | ||
54 | { | ||
55 | type_ = other.type_; | ||
56 | |||
57 | switch (type_) | ||
58 | { | ||
59 | case type::noun_phrase: | ||
60 | { | ||
61 | new(&noun_phrase_.role) std::string(other.noun_phrase_.role); | ||
62 | new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs); | ||
63 | new(&noun_phrase_.synrestrs) std::set<std::string>(other.noun_phrase_.synrestrs); | ||
64 | |||
65 | break; | ||
66 | } | ||
67 | |||
68 | case type::preposition: | ||
69 | { | ||
70 | new(&preposition_.choices) std::set<std::string>(other.preposition_.choices); | ||
71 | preposition_.literal = other.preposition_.literal; | ||
72 | |||
73 | break; | ||
74 | } | ||
75 | |||
76 | case type::literal: | ||
77 | { | ||
78 | new(&literal_) std::string(other.literal_); | ||
79 | |||
80 | break; | ||
81 | } | ||
82 | |||
83 | case type::verb: | ||
84 | case type::adjective: | ||
85 | case type::adverb: | ||
86 | case type::invalid: | ||
87 | { | ||
88 | break; | ||
89 | } | ||
90 | } | ||
91 | } | ||
92 | |||
93 | part::part(part&& other) : part() | ||
94 | { | ||
95 | swap(*this, other); | ||
96 | } | ||
97 | |||
98 | part& part::operator=(part other) | ||
99 | { | ||
100 | swap(*this, other); | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | void swap(part& first, part& second) | ||
106 | { | ||
107 | using type = part::type; | ||
108 | |||
109 | type tempType = first.type_; | ||
110 | std::string tempRole; | ||
111 | selrestr tempSelrestrs; | ||
112 | std::set<std::string> tempSynrestrs; | ||
113 | std::set<std::string> tempChoices; | ||
114 | bool tempPrepLiteral; | ||
115 | std::string tempLiteralValue; | ||
116 | |||
117 | switch (tempType) | ||
118 | { | ||
119 | case type::noun_phrase: | ||
120 | { | ||
121 | tempRole = std::move(first.noun_phrase_.role); | ||
122 | tempSelrestrs = std::move(first.noun_phrase_.selrestrs); | ||
123 | tempSynrestrs = std::move(first.noun_phrase_.synrestrs); | ||
124 | |||
125 | break; | ||
126 | } | ||
127 | |||
128 | case type::preposition: | ||
129 | { | ||
130 | tempChoices = std::move(first.preposition_.choices); | ||
131 | tempPrepLiteral = first.preposition_.literal; | ||
132 | |||
133 | break; | ||
134 | } | ||
135 | |||
136 | case type::literal: | ||
137 | { | ||
138 | tempLiteralValue = std::move(first.literal_); | ||
139 | |||
140 | break; | ||
141 | } | ||
142 | |||
143 | case type::verb: | ||
144 | case type::adjective: | ||
145 | case type::adverb: | ||
146 | case type::invalid: | ||
147 | { | ||
148 | break; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | first.~part(); | ||
153 | |||
154 | first.type_ = second.type_; | ||
155 | |||
156 | switch (first.type_) | ||
157 | { | ||
158 | case type::noun_phrase: | ||
159 | { | ||
160 | new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role)); | ||
161 | new(&first.noun_phrase_.selrestrs) selrestr(std::move(second.noun_phrase_.selrestrs)); | ||
162 | new(&first.noun_phrase_.synrestrs) std::set<std::string>(std::move(second.noun_phrase_.synrestrs)); | ||
163 | |||
164 | break; | ||
165 | } | ||
166 | |||
167 | case type::preposition: | ||
168 | { | ||
169 | new(&first.preposition_.choices) std::set<std::string>(std::move(second.preposition_.choices)); | ||
170 | first.preposition_.literal = second.preposition_.literal; | ||
171 | |||
172 | break; | ||
173 | } | ||
174 | |||
175 | case type::literal: | ||
176 | { | ||
177 | new(&first.literal_) std::string(std::move(second.literal_)); | ||
178 | |||
179 | break; | ||
180 | } | ||
181 | |||
182 | case type::verb: | ||
183 | case type::adjective: | ||
184 | case type::adverb: | ||
185 | case type::invalid: | ||
186 | { | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | second.~part(); | ||
192 | |||
193 | second.type_ = tempType; | ||
194 | |||
195 | switch (second.type_) | ||
196 | { | ||
197 | case type::noun_phrase: | ||
198 | { | ||
199 | new(&second.noun_phrase_.role) std::string(std::move(tempRole)); | ||
200 | new(&second.noun_phrase_.selrestrs) selrestr(std::move(tempSelrestrs)); | ||
201 | new(&second.noun_phrase_.synrestrs) std::set<std::string>(std::move(tempSynrestrs)); | ||
202 | |||
203 | break; | ||
204 | } | ||
205 | |||
206 | case type::preposition: | ||
207 | { | ||
208 | new(&second.preposition_.choices) std::set<std::string>(std::move(tempChoices)); | ||
209 | second.preposition_.literal = tempPrepLiteral; | ||
210 | |||
211 | break; | ||
212 | } | ||
213 | |||
214 | case type::literal: | ||
215 | { | ||
216 | new(&second.literal_) std::string(std::move(tempLiteralValue)); | ||
217 | |||
218 | break; | ||
219 | } | ||
220 | |||
221 | case type::verb: | ||
222 | case type::adjective: | ||
223 | case type::adverb: | ||
224 | case type::invalid: | ||
225 | { | ||
226 | break; | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | |||
231 | part::~part() | ||
232 | { | ||
233 | switch (type_) | ||
234 | { | ||
235 | case type::noun_phrase: | ||
236 | { | ||
237 | using string_type = std::string; | ||
238 | using set_type = std::set<std::string>; | ||
239 | |||
240 | noun_phrase_.role.~string_type(); | ||
241 | noun_phrase_.selrestrs.~selrestr(); | ||
242 | noun_phrase_.synrestrs.~set_type(); | ||
243 | |||
244 | break; | ||
245 | } | ||
246 | |||
247 | case type::preposition: | ||
248 | { | ||
249 | using set_type = std::set<std::string>; | ||
250 | |||
251 | preposition_.choices.~set_type(); | ||
252 | |||
253 | break; | ||
254 | } | ||
255 | |||
256 | case type::literal: | ||
257 | { | ||
258 | using string_type = std::string; | ||
259 | |||
260 | literal_.~string_type(); | ||
261 | |||
262 | break; | ||
263 | } | ||
264 | |||
265 | case type::verb: | ||
266 | case type::adjective: | ||
267 | case type::adverb: | ||
268 | case type::invalid: | ||
269 | { | ||
270 | break; | ||
271 | } | ||
272 | } | ||
273 | } | ||
274 | |||
275 | std::string part::getNounRole() const | ||
276 | { | ||
277 | if (type_ == type::noun_phrase) | ||
278 | { | ||
279 | return noun_phrase_.role; | ||
280 | } else { | ||
281 | throw std::domain_error("part::getNounRole is only valid for noun phrase parts"); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | selrestr part::getNounSelrestrs() const | ||
286 | { | ||
287 | if (type_ == type::noun_phrase) | ||
288 | { | ||
289 | return noun_phrase_.selrestrs; | ||
290 | } else { | ||
291 | throw std::domain_error("part::getNounSelrestrs is only valid for noun phrase parts"); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | std::set<std::string> part::getNounSynrestrs() const | ||
296 | { | ||
297 | if (type_ == type::noun_phrase) | ||
298 | { | ||
299 | return noun_phrase_.synrestrs; | ||
300 | } else { | ||
301 | throw std::domain_error("part::getNounSynrestrs is only valid for noun phrase parts"); | ||
302 | } | ||
303 | } | ||
304 | |||
305 | std::set<std::string> part::getPrepositionChoices() const | ||
306 | { | ||
307 | if (type_ == type::preposition) | ||
308 | { | ||
309 | return preposition_.choices; | ||
310 | } else { | ||
311 | throw std::domain_error("part::getPrepositionChoices is only valid for preposition parts"); | ||
312 | } | ||
313 | } | ||
314 | |||
315 | bool part::isPrepositionLiteral() const | ||
316 | { | ||
317 | if (type_ == type::preposition) | ||
318 | { | ||
319 | return preposition_.literal; | ||
320 | } else { | ||
321 | throw std::domain_error("part::isPrepositionLiteral is only valid for preposition parts"); | ||
322 | } | ||
323 | } | ||
324 | |||
325 | std::string part::getLiteralValue() const | ||
326 | { | ||
327 | if (type_ == type::literal) | ||
328 | { | ||
329 | return literal_; | ||
330 | } else { | ||
331 | throw std::domain_error("part::getLiteralValue is only valid for literal parts"); | ||
332 | } | ||
333 | } | ||
334 | |||
335 | }; | ||
336 | }; | ||
diff --git a/generator/part.h b/generator/part.h new file mode 100644 index 0000000..d044630 --- /dev/null +++ b/generator/part.h | |||
@@ -0,0 +1,114 @@ | |||
1 | #ifndef PART_H_FB54F361 | ||
2 | #define PART_H_FB54F361 | ||
3 | |||
4 | #include <string> | ||
5 | #include <set> | ||
6 | #include "selrestr.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | namespace generator { | ||
10 | |||
11 | class part { | ||
12 | public: | ||
13 | enum class type { | ||
14 | invalid = -1, | ||
15 | noun_phrase = 0, | ||
16 | verb = 1, | ||
17 | preposition = 2, | ||
18 | adjective = 3, | ||
19 | adverb = 4, | ||
20 | literal = 5 | ||
21 | }; | ||
22 | |||
23 | // Static factories | ||
24 | |||
25 | static part createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs); | ||
26 | |||
27 | static part createVerb(); | ||
28 | |||
29 | static part createPreposition(std::set<std::string> choices, bool literal); | ||
30 | |||
31 | static part createAdjective(); | ||
32 | |||
33 | static part createAdverb(); | ||
34 | |||
35 | static part createLiteral(std::string value); | ||
36 | |||
37 | // Copy and move constructors | ||
38 | |||
39 | part(const part& other); | ||
40 | |||
41 | part(part&& other); | ||
42 | |||
43 | // Assignment | ||
44 | |||
45 | part& operator=(part other); | ||
46 | |||
47 | // Swap | ||
48 | |||
49 | friend void swap(part& first, part& second); | ||
50 | |||
51 | // Destructor | ||
52 | |||
53 | ~part(); | ||
54 | |||
55 | // General accessors | ||
56 | |||
57 | type getType() const | ||
58 | { | ||
59 | return type_; | ||
60 | } | ||
61 | |||
62 | // Noun phrase accessors | ||
63 | |||
64 | std::string getNounRole() const; | ||
65 | |||
66 | selrestr getNounSelrestrs() const; | ||
67 | |||
68 | std::set<std::string> getNounSynrestrs() const; | ||
69 | |||
70 | // Preposition accessors | ||
71 | |||
72 | std::set<std::string> getPrepositionChoices() const; | ||
73 | |||
74 | bool isPrepositionLiteral() const; | ||
75 | |||
76 | // Literal accessors | ||
77 | |||
78 | std::string getLiteralValue() const; | ||
79 | |||
80 | private: | ||
81 | |||
82 | // Private constructors | ||
83 | |||
84 | part() | ||
85 | { | ||
86 | } | ||
87 | |||
88 | part(type t) : type_(t) | ||
89 | { | ||
90 | } | ||
91 | |||
92 | // Data | ||
93 | |||
94 | union { | ||
95 | struct { | ||
96 | std::string role; | ||
97 | selrestr selrestrs; | ||
98 | std::set<std::string> synrestrs; | ||
99 | } noun_phrase_; | ||
100 | struct { | ||
101 | std::set<std::string> choices; | ||
102 | bool literal; | ||
103 | } preposition_; | ||
104 | std::string literal_; | ||
105 | }; | ||
106 | |||
107 | type type_ = type::invalid; | ||
108 | |||
109 | }; | ||
110 | |||
111 | }; | ||
112 | }; | ||
113 | |||
114 | #endif /* end of include guard: PART_H_FB54F361 */ | ||
diff --git a/generator/progress.h b/generator/progress.h index 81f07a3..fcb680d 100644 --- a/generator/progress.h +++ b/generator/progress.h | |||
@@ -3,48 +3,54 @@ | |||
3 | 3 | ||
4 | #include <string> | 4 | #include <string> |
5 | 5 | ||
6 | class progress { | 6 | namespace verbly { |
7 | private: | 7 | namespace generator { |
8 | std::string message; | ||
9 | int total; | ||
10 | int cur = 0; | ||
11 | int lprint = 0; | ||
12 | 8 | ||
13 | public: | 9 | class progress { |
14 | progress(std::string message, int total) : message(message), total(total) | 10 | private: |
15 | { | 11 | std::string message; |
16 | std::cout << message << " 0%" << std::flush; | 12 | int total; |
17 | } | 13 | int cur = 0; |
14 | int lprint = 0; | ||
18 | 15 | ||
19 | void update(int val) | 16 | public: |
20 | { | 17 | progress(std::string message, int total) : message(message), total(total) |
21 | if (val <= total) | 18 | { |
22 | { | 19 | std::cout << message << " 0%" << std::flush; |
23 | cur = val; | 20 | } |
24 | } else { | 21 | |
25 | cur = total; | 22 | void update(int val) |
26 | } | 23 | { |
24 | if (val <= total) | ||
25 | { | ||
26 | cur = val; | ||
27 | } else { | ||
28 | cur = total; | ||
29 | } | ||
27 | 30 | ||
28 | int pp = cur * 100 / total; | 31 | int pp = cur * 100 / total; |
29 | if (pp != lprint) | 32 | if (pp != lprint) |
30 | { | 33 | { |
31 | lprint = pp; | 34 | lprint = pp; |
32 | 35 | ||
33 | std::cout << "\b\b\b\b" << std::right; | 36 | std::cout << "\b\b\b\b" << std::right; |
34 | std::cout.width(3); | 37 | std::cout.width(3); |
35 | std::cout << pp << "%" << std::flush; | 38 | std::cout << pp << "%" << std::flush; |
36 | } | 39 | } |
37 | } | 40 | } |
41 | |||
42 | void update() | ||
43 | { | ||
44 | update(cur+1); | ||
45 | } | ||
38 | 46 | ||
39 | void update() | 47 | ~progress() |
40 | { | 48 | { |
41 | update(cur+1); | 49 | std::cout << "\b\b\b\b100%" << std::endl; |
42 | } | 50 | } |
51 | }; | ||
43 | 52 | ||
44 | ~progress() | 53 | }; |
45 | { | ||
46 | std::cout << "\b\b\b\b100%" << std::endl; | ||
47 | } | ||
48 | }; | 54 | }; |
49 | 55 | ||
50 | #endif /* end of include guard: PROGRESS_H_A34EF856 */ | 56 | #endif /* end of include guard: PROGRESS_H_A34EF856 */ |
diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp new file mode 100644 index 0000000..eb07607 --- /dev/null +++ b/generator/pronunciation.cpp | |||
@@ -0,0 +1,87 @@ | |||
1 | #include "pronunciation.h" | ||
2 | #include <list> | ||
3 | #include <algorithm> | ||
4 | #include <cctype> | ||
5 | #include <iterator> | ||
6 | #include "database.h" | ||
7 | #include "field.h" | ||
8 | #include "../lib/util.h" | ||
9 | |||
10 | namespace verbly { | ||
11 | namespace generator { | ||
12 | |||
13 | int pronunciation::nextId_ = 0; | ||
14 | |||
15 | pronunciation::pronunciation(std::string phonemes) : | ||
16 | id_(nextId_++), | ||
17 | phonemes_(phonemes) | ||
18 | { | ||
19 | auto phonemeList = split<std::list<std::string>>(phonemes, " "); | ||
20 | |||
21 | auto rhymeStart = std::find_if(std::begin(phonemeList), std::end(phonemeList), [] (std::string phoneme) { | ||
22 | return phoneme.find("1") != std::string::npos; | ||
23 | }); | ||
24 | |||
25 | // Rhyme detection | ||
26 | if (rhymeStart != std::end(phonemeList)) | ||
27 | { | ||
28 | std::list<std::string> rhymePhonemes; | ||
29 | |||
30 | std::transform(rhymeStart, std::end(phonemeList), std::back_inserter(rhymePhonemes), [] (std::string phoneme) { | ||
31 | std::string naked; | ||
32 | |||
33 | std::remove_copy_if(std::begin(phoneme), std::end(phoneme), std::back_inserter(naked), [] (char ch) { | ||
34 | return std::isdigit(ch); | ||
35 | }); | ||
36 | |||
37 | return naked; | ||
38 | }); | ||
39 | |||
40 | rhyme_ = implode(std::begin(rhymePhonemes), std::end(rhymePhonemes), " "); | ||
41 | |||
42 | if (rhymeStart != std::begin(phonemeList)) | ||
43 | { | ||
44 | prerhyme_ = *std::prev(rhymeStart); | ||
45 | } | ||
46 | } | ||
47 | |||
48 | // Syllable/stress | ||
49 | for (std::string phoneme : phonemeList) | ||
50 | { | ||
51 | if (std::isdigit(phoneme.back())) | ||
52 | { | ||
53 | // It's a vowel! | ||
54 | syllables_++; | ||
55 | |||
56 | if (phoneme.back() == '1') | ||
57 | { | ||
58 | stress_.push_back('1'); | ||
59 | } else { | ||
60 | stress_.push_back('0'); | ||
61 | } | ||
62 | } | ||
63 | } | ||
64 | } | ||
65 | |||
66 | database& operator<<(database& db, const pronunciation& arg) | ||
67 | { | ||
68 | std::list<field> fields; | ||
69 | |||
70 | fields.emplace_back("pronunciation_id", arg.getId()); | ||
71 | fields.emplace_back("phonemes", arg.getPhonemes()); | ||
72 | fields.emplace_back("syllables", arg.getSyllables()); | ||
73 | fields.emplace_back("stress", arg.getStress()); | ||
74 | |||
75 | if (arg.hasRhyme()) | ||
76 | { | ||
77 | fields.emplace_back("rhyme", arg.getRhymePhonemes()); | ||
78 | fields.emplace_back("prerhyme", arg.getPrerhyme()); | ||
79 | } | ||
80 | |||
81 | db.insertIntoTable("pronunciations", std::move(fields)); | ||
82 | |||
83 | return db; | ||
84 | } | ||
85 | |||
86 | }; | ||
87 | }; | ||
diff --git a/generator/pronunciation.h b/generator/pronunciation.h new file mode 100644 index 0000000..81be6c4 --- /dev/null +++ b/generator/pronunciation.h | |||
@@ -0,0 +1,82 @@ | |||
1 | #ifndef PRONUNCIATION_H_584A08DD | ||
2 | #define PRONUNCIATION_H_584A08DD | ||
3 | |||
4 | #include <string> | ||
5 | #include <cassert> | ||
6 | |||
7 | namespace verbly { | ||
8 | namespace generator { | ||
9 | |||
10 | class database; | ||
11 | |||
12 | class pronunciation { | ||
13 | public: | ||
14 | |||
15 | // Constructor | ||
16 | |||
17 | explicit pronunciation(std::string phonemes); | ||
18 | |||
19 | // Accessors | ||
20 | |||
21 | int getId() const | ||
22 | { | ||
23 | return id_; | ||
24 | } | ||
25 | |||
26 | std::string getPhonemes() const | ||
27 | { | ||
28 | return phonemes_; | ||
29 | } | ||
30 | |||
31 | bool hasRhyme() const | ||
32 | { | ||
33 | return !rhyme_.empty(); | ||
34 | } | ||
35 | |||
36 | std::string getRhymePhonemes() const | ||
37 | { | ||
38 | // Calling code should always call hasRhyme first. | ||
39 | assert(!rhyme_.empty()); | ||
40 | |||
41 | return rhyme_; | ||
42 | } | ||
43 | |||
44 | std::string getPrerhyme() const | ||
45 | { | ||
46 | // Calling code should always call hasRhyme first. | ||
47 | assert(!rhyme_.empty()); | ||
48 | |||
49 | return prerhyme_; | ||
50 | } | ||
51 | |||
52 | int getSyllables() const | ||
53 | { | ||
54 | return syllables_; | ||
55 | } | ||
56 | |||
57 | std::string getStress() const | ||
58 | { | ||
59 | return stress_; | ||
60 | } | ||
61 | |||
62 | private: | ||
63 | |||
64 | static int nextId_; | ||
65 | |||
66 | const int id_; | ||
67 | const std::string phonemes_; | ||
68 | std::string rhyme_; | ||
69 | std::string prerhyme_; | ||
70 | int syllables_ = 0; | ||
71 | std::string stress_; | ||
72 | |||
73 | }; | ||
74 | |||
75 | // Serializer | ||
76 | |||
77 | database& operator<<(database& db, const pronunciation& arg); | ||
78 | |||
79 | }; | ||
80 | }; | ||
81 | |||
82 | #endif /* end of include guard: PRONUNCIATION_H_584A08DD */ | ||
diff --git a/generator/role.h b/generator/role.h new file mode 100644 index 0000000..5fa68b8 --- /dev/null +++ b/generator/role.h | |||
@@ -0,0 +1,35 @@ | |||
1 | #ifndef ROLE_H_249F9A9C | ||
2 | #define ROLE_H_249F9A9C | ||
3 | |||
4 | #include "selrestr.h" | ||
5 | |||
6 | namespace verbly { | ||
7 | namespace generator { | ||
8 | |||
9 | class role { | ||
10 | public: | ||
11 | |||
12 | // Mutators | ||
13 | |||
14 | void setSelrestrs(selrestr selrestrs) | ||
15 | { | ||
16 | selrestrs_ = selrestrs; | ||
17 | } | ||
18 | |||
19 | // Accessors | ||
20 | |||
21 | const selrestr& getSelrestrs() const | ||
22 | { | ||
23 | return selrestrs_; | ||
24 | } | ||
25 | |||
26 | private: | ||
27 | |||
28 | selrestr selrestrs_; | ||
29 | |||
30 | }; | ||
31 | |||
32 | }; | ||
33 | }; | ||
34 | |||
35 | #endif /* end of include guard: ROLE_H_249F9A9C */ | ||
diff --git a/generator/schema.sql b/generator/schema.sql index 410b536..c3e54d8 100644 --- a/generator/schema.sql +++ b/generator/schema.sql | |||
@@ -1,286 +1,204 @@ | |||
1 | DROP TABLE IF EXISTS `verbs`; | 1 | CREATE TABLE `notions` ( |
2 | CREATE TABLE `verbs` ( | 2 | `notion_id` INTEGER PRIMARY KEY, |
3 | `verb_id` INTEGER PRIMARY KEY, | 3 | `part_of_speech` SMALLINT NOT NULL, |
4 | `infinitive` VARCHAR(32) NOT NULL, | 4 | `wnid` INTEGER, |
5 | `past_tense` VARCHAR(32) NOT NULL, | 5 | `images` INTEGER |
6 | `past_participle` VARCHAR(32) NOT NULL, | ||
7 | `ing_form` VARCHAR(32) NOT NULL, | ||
8 | `s_form` VARCHAR(32) NOT NULL | ||
9 | ); | 6 | ); |
10 | 7 | ||
11 | DROP TABLE IF EXISTS `groups`; | 8 | CREATE UNIQUE INDEX `notion_by_wnid` ON `notions`(`wnid`); |
12 | CREATE TABLE `groups` ( | ||
13 | `group_id` INTEGER PRIMARY KEY, | ||
14 | `data` BLOB NOT NULL | ||
15 | ); | ||
16 | |||
17 | DROP TABLE IF EXISTS `frames`; | ||
18 | CREATE TABLE `frames` ( | ||
19 | `frame_id` INTEGER PRIMARY KEY, | ||
20 | `group_id` INTEGER NOT NULL, | ||
21 | `data` BLOB NOT NULL, | ||
22 | FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) | ||
23 | ); | ||
24 | 9 | ||
25 | DROP TABLE IF EXISTS `verb_groups`; | ||
26 | CREATE TABLE `verb_groups` ( | ||
27 | `verb_id` INTEGER NOT NULL, | ||
28 | `group_id` INTEGER NOT NULL, | ||
29 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`), | ||
30 | FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`) | ||
31 | ); | ||
32 | |||
33 | DROP TABLE IF EXISTS `adjectives`; | ||
34 | CREATE TABLE `adjectives` ( | ||
35 | `adjective_id` INTEGER PRIMARY KEY, | ||
36 | `base_form` VARCHAR(32) NOT NULL, | ||
37 | `comparative` VARCHAR(32), | ||
38 | `superlative` VARCHAR(32), | ||
39 | `position` CHAR(1), | ||
40 | `complexity` INTEGER NOT NULL | ||
41 | ); | ||
42 | |||
43 | DROP TABLE IF EXISTS `adverbs`; | ||
44 | CREATE TABLE `adverbs` ( | ||
45 | `adverb_id` INTEGER PRIMARY KEY, | ||
46 | `base_form` VARCHAR(32) NOT NULL, | ||
47 | `comparative` VARCHAR(32), | ||
48 | `superlative` VARCHAR(32), | ||
49 | `complexity` INTEGER NOT NULL | ||
50 | ); | ||
51 | |||
52 | DROP TABLE IF EXISTS `nouns`; | ||
53 | CREATE TABLE `nouns` ( | ||
54 | `noun_id` INTEGER PRIMARY KEY, | ||
55 | `singular` VARCHAR(32) NOT NULL, | ||
56 | `plural` VARCHAR(32), | ||
57 | `proper` INTEGER(1) NOT NULL, | ||
58 | `complexity` INTEGER NOT NULL, | ||
59 | `images` INTEGER NOT NULL, | ||
60 | `wnid` INTEGER NOT NULL | ||
61 | ); | ||
62 | |||
63 | DROP TABLE IF EXISTS `hypernymy`; | ||
64 | CREATE TABLE `hypernymy` ( | 10 | CREATE TABLE `hypernymy` ( |
65 | `hypernym_id` INTEGER NOT NULL, | 11 | `hypernym_id` INTEGER NOT NULL, |
66 | `hyponym_id` INTEGER NOT NULL, | 12 | `hyponym_id` INTEGER NOT NULL |
67 | FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`), | ||
68 | FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`) | ||
69 | ); | 13 | ); |
70 | 14 | ||
71 | DROP TABLE IF EXISTS `instantiation`; | 15 | CREATE INDEX `hyponym_of` ON `hypernymy`(`hypernym_id`); |
16 | CREATE INDEX `hypernym_of` ON `hypernymy`(`hyponym_id`); | ||
17 | |||
72 | CREATE TABLE `instantiation` ( | 18 | CREATE TABLE `instantiation` ( |
73 | `class_id` INTEGER NOT NULL, | 19 | `class_id` INTEGER NOT NULL, |
74 | `instance_id` INTEGER NOT NULL, | 20 | `instance_id` INTEGER NOT NULL |
75 | FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`), | ||
76 | FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`) | ||
77 | ); | 21 | ); |
78 | 22 | ||
79 | DROP TABLE IF EXISTS `member_meronymy`; | 23 | CREATE INDEX `instance_of` ON `instantiation`(`class_id`); |
24 | CREATE INDEX `class_of` ON `instantiation`(`instance_id`); | ||
25 | |||
80 | CREATE TABLE `member_meronymy` ( | 26 | CREATE TABLE `member_meronymy` ( |
81 | `meronym_id` INTEGER NOT NULL, | 27 | `meronym_id` INTEGER NOT NULL, |
82 | `holonym_id` INTEGER NOT NULL, | 28 | `holonym_id` INTEGER NOT NULL |
83 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
84 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
85 | ); | 29 | ); |
86 | 30 | ||
87 | DROP TABLE IF EXISTS `part_meronymy`; | 31 | CREATE INDEX `member_holonym_of` ON `member_meronymy`(`meronym_id`); |
32 | CREATE INDEX `member_meronym_of` ON `member_meronymy`(`holonym_id`); | ||
33 | |||
88 | CREATE TABLE `part_meronymy` ( | 34 | CREATE TABLE `part_meronymy` ( |
89 | `meronym_id` INTEGER NOT NULL, | 35 | `meronym_id` INTEGER NOT NULL, |
90 | `holonym_id` INTEGER NOT NULL, | 36 | `holonym_id` INTEGER NOT NULL |
91 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
92 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
93 | ); | 37 | ); |
94 | 38 | ||
95 | DROP TABLE IF EXISTS `substance_meronymy`; | 39 | CREATE INDEX `part_holonym_of` ON `part_meronymy`(`meronym_id`); |
40 | CREATE INDEX `part_meronym_of` ON `part_meronymy`(`holonym_id`); | ||
41 | |||
96 | CREATE TABLE `substance_meronymy` ( | 42 | CREATE TABLE `substance_meronymy` ( |
97 | `meronym_id` INTEGER NOT NULL, | 43 | `meronym_id` INTEGER NOT NULL, |
98 | `holonym_id` INTEGER NOT NULL, | 44 | `holonym_id` INTEGER NOT NULL |
99 | FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`), | ||
100 | FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`) | ||
101 | ); | 45 | ); |
102 | 46 | ||
103 | DROP TABLE IF EXISTS `variation`; | 47 | CREATE INDEX `substance_holonym_of` ON `substance_meronymy`(`meronym_id`); |
48 | CREATE INDEX `substance_meronym_of` ON `substance_meronymy`(`holonym_id`); | ||
49 | |||
104 | CREATE TABLE `variation` ( | 50 | CREATE TABLE `variation` ( |
105 | `noun_id` INTEGER NOT NULL, | 51 | `noun_id` INTEGER NOT NULL, |
106 | `adjective_id` INTEGER NOT NULL, | 52 | `adjective_id` INTEGER NOT NULL |
107 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
108 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
109 | ); | 53 | ); |
110 | 54 | ||
111 | DROP TABLE IF EXISTS `noun_antonymy`; | 55 | CREATE INDEX `variant_of` ON `variation`(`noun_id`); |
112 | CREATE TABLE `noun_antonymy` ( | 56 | CREATE INDEX `attribute_of` ON `variation`(`adjective_id`); |
113 | `noun_1_id` INTEGER NOT NULL, | ||
114 | `noun_2_id` INTEGER NOT NULL, | ||
115 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), | ||
116 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) | ||
117 | ); | ||
118 | 57 | ||
119 | DROP TABLE IF EXISTS `adjective_antonymy`; | 58 | CREATE TABLE `similarity` ( |
120 | CREATE TABLE `adjective_antonymy` ( | ||
121 | `adjective_1_id` INTEGER NOT NULL, | 59 | `adjective_1_id` INTEGER NOT NULL, |
122 | `adjective_2_id` INTEGER NOT NULL, | 60 | `adjective_2_id` INTEGER NOT NULL |
123 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | 61 | ); |
124 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | 62 | |
63 | CREATE INDEX `similar_to` ON `similarity`(`adjective_1_id`); | ||
64 | |||
65 | CREATE TABLE `is_a` ( | ||
66 | `notion_id` INTEGER NOT NULL, | ||
67 | `groupname` VARCHAR(32) NOT NULL | ||
125 | ); | 68 | ); |
126 | 69 | ||
127 | DROP TABLE IF EXISTS `adverb_antonymy`; | 70 | CREATE TABLE `entailment` ( |
128 | CREATE TABLE `adverb_antonymy` ( | 71 | `given_id` INTEGER NOT NULL, |
129 | `adverb_1_id` INTEGER NOT NULL, | 72 | `entailment_id` INTEGER NOT NULL |
130 | `adverb_2_id` INTEGER NOT NULL, | 73 | ); |
131 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | 74 | |
132 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | 75 | CREATE INDEX `entailment_of` ON `entailment`(`given_id`); |
76 | CREATE INDEX `entailed_by` ON `entailment`(`entailment_id`); | ||
77 | |||
78 | CREATE TABLE `causality` ( | ||
79 | `cause_id` INTEGER NOT NULL, | ||
80 | `effect_id` INTEGER NOT NULL | ||
81 | ); | ||
82 | |||
83 | CREATE INDEX `effect_of` ON `causality`(`cause_id`); | ||
84 | CREATE INDEX `cause_of` ON `causality`(`effect_id`); | ||
85 | |||
86 | CREATE TABLE `words` ( | ||
87 | `word_id` INTEGER PRIMARY KEY, | ||
88 | `notion_id` INTEGER NOT NULL, | ||
89 | `lemma_id` INTEGER NOT NULL, | ||
90 | `tag_count` INTEGER, | ||
91 | `position` SMALLINT, | ||
92 | `group_id` INTEGER | ||
93 | ); | ||
94 | |||
95 | CREATE INDEX `notion_words` ON `words`(`notion_id`); | ||
96 | CREATE INDEX `lemma_words` ON `words`(`lemma_id`); | ||
97 | CREATE INDEX `group_words` ON `words`(`group_id`); | ||
98 | |||
99 | CREATE TABLE `antonymy` ( | ||
100 | `antonym_1_id` INTEGER NOT NULL, | ||
101 | `antonym_2_id` INTEGER NOT NULL | ||
133 | ); | 102 | ); |
134 | 103 | ||
135 | DROP TABLE IF EXISTS `specification`; | 104 | CREATE INDEX `antonym_of` ON `antonymy`(`antonym_1_id`); |
105 | |||
136 | CREATE TABLE `specification` ( | 106 | CREATE TABLE `specification` ( |
137 | `general_id` INTEGER NOT NULL, | 107 | `general_id` INTEGER NOT NULL, |
138 | `specific_id` INTEGER NOT NULL, | 108 | `specific_id` INTEGER NOT NULL |
139 | FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`), | ||
140 | FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`) | ||
141 | ); | 109 | ); |
142 | 110 | ||
143 | DROP TABLE IF EXISTS `pertainymy`; | 111 | CREATE INDEX `specification_of` ON `specification`(`general_id`); |
112 | CREATE INDEX `generalization_of` ON `specification`(`specific_id`); | ||
113 | |||
144 | CREATE TABLE `pertainymy` ( | 114 | CREATE TABLE `pertainymy` ( |
145 | `noun_id` INTEGER NOT NULL, | 115 | `noun_id` INTEGER NOT NULL, |
146 | `pertainym_id` INTEGER NOT NULL, | 116 | `pertainym_id` INTEGER NOT NULL |
147 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
148 | FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`) | ||
149 | ); | 117 | ); |
150 | 118 | ||
151 | DROP TABLE IF EXISTS `mannernymy`; | 119 | CREATE INDEX `pertainym_of` ON `pertainymy`(`noun_id`); |
120 | CREATE INDEX `anti_pertainym_of` ON `pertainymy`(`pertainym_id`); | ||
121 | |||
152 | CREATE TABLE `mannernymy` ( | 122 | CREATE TABLE `mannernymy` ( |
153 | `adjective_id` INTEGER NOT NULL, | 123 | `adjective_id` INTEGER NOT NULL, |
154 | `mannernym_id` INTEGER NOT NULL, | 124 | `mannernym_id` INTEGER NOT NULL |
155 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), | ||
156 | FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`) | ||
157 | ); | 125 | ); |
158 | 126 | ||
159 | DROP TABLE IF EXISTS `noun_synonymy`; | 127 | CREATE INDEX `mannernym_of` ON `mannernymy`(`adjective_id`); |
160 | CREATE TABLE `noun_synonymy` ( | 128 | CREATE INDEX `anti_mannernym_of` ON `mannernymy`(`mannernym_id`); |
161 | `noun_1_id` INTEGER NOT NULL, | ||
162 | `noun_2_id` INTEGER NOT NULL, | ||
163 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`), | ||
164 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`) | ||
165 | ); | ||
166 | 129 | ||
167 | DROP TABLE IF EXISTS `adjective_synonymy`; | 130 | CREATE TABLE `usage` ( |
168 | CREATE TABLE `adjective_synonymy` ( | 131 | `domain_id` INTEGER NOT NULL, |
169 | `adjective_1_id` INTEGER NOT NULL, | 132 | `term_id` INTEGER NOT NULL |
170 | `adjective_2_id` INTEGER NOT NULL, | ||
171 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | ||
172 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | ||
173 | ); | 133 | ); |
174 | 134 | ||
175 | DROP TABLE IF EXISTS `adverb_synonymy`; | 135 | CREATE INDEX `usage_term_of` ON `usage`(`domain_id`); |
176 | CREATE TABLE `adverb_synonymy` ( | 136 | CREATE INDEX `usage_domain_of` ON `usage`(`term_id`); |
177 | `adverb_1_id` INTEGER NOT NULL, | ||
178 | `adverb_2_id` INTEGER NOT NULL, | ||
179 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | ||
180 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | ||
181 | ); | ||
182 | 137 | ||
183 | DROP TABLE IF EXISTS `noun_pronunciations`; | 138 | CREATE TABLE `topicality` ( |
184 | CREATE TABLE `noun_pronunciations` ( | 139 | `domain_id` INTEGER NOT NULL, |
185 | `noun_id` INTEGER NOT NULL, | 140 | `term_id` INTEGER NOT NULL |
186 | `pronunciation` VARCHAR(64) NOT NULL, | ||
187 | `prerhyme` VARCHAR(8), | ||
188 | `rhyme` VARCHAR(64), | ||
189 | `syllables` INT NOT NULL, | ||
190 | `stress` VARCHAR(64) NOT NULL, | ||
191 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`) | ||
192 | ); | 141 | ); |
193 | 142 | ||
194 | DROP TABLE IF EXISTS `verb_pronunciations`; | 143 | CREATE INDEX `topical_term_of` ON `topicality`(`domain_id`); |
195 | CREATE TABLE `verb_pronunciations` ( | 144 | CREATE INDEX `topical_domain_of` ON `topicality`(`term_id`); |
196 | `verb_id` INTEGER NOT NULL, | ||
197 | `pronunciation` VARCHAR(64) NOT NULL, | ||
198 | `prerhyme` VARCHAR(8), | ||
199 | `rhyme` VARCHAR(64), | ||
200 | `syllables` INT NOT NULL, | ||
201 | `stress` VARCHAR(64) NOT NULL, | ||
202 | FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`) | ||
203 | ); | ||
204 | 145 | ||
205 | DROP TABLE IF EXISTS `adjective_pronunciations`; | 146 | CREATE TABLE `regionality` ( |
206 | CREATE TABLE `adjective_pronunciations` ( | 147 | `domain_id` INTEGER NOT NULL, |
207 | `adjective_id` INTEGER NOT NULL, | 148 | `term_id` INTEGER NOT NULL |
208 | `pronunciation` VARCHAR(64) NOT NULL, | ||
209 | `prerhyme` VARCHAR(8), | ||
210 | `rhyme` VARCHAR(64), | ||
211 | `syllables` INT NOT NULL, | ||
212 | `stress` VARCHAR(64) NOT NULL, | ||
213 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
214 | ); | 149 | ); |
215 | 150 | ||
216 | DROP TABLE IF EXISTS `adverb_pronunciations`; | 151 | CREATE INDEX `regional_term_of` ON `regionality`(`domain_id`); |
217 | CREATE TABLE `adverb_pronunciations` ( | 152 | CREATE INDEX `regional_domain_of` ON `regionality`(`term_id`); |
218 | `adverb_id` INTEGER NOT NULL, | ||
219 | `pronunciation` VARCHAR(64) NOT NULL, | ||
220 | `prerhyme` VARCHAR(8), | ||
221 | `rhyme` VARCHAR(64), | ||
222 | `syllables` INT NOT NULL, | ||
223 | `stress` VARCHAR(64) NOT NULL, | ||
224 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | ||
225 | ); | ||
226 | 153 | ||
227 | DROP TABLE IF EXISTS `noun_noun_derivation`; | 154 | CREATE TABLE `forms` ( |
228 | CREATE TABLE `noun_noun_derivation` ( | 155 | `form_id` INTEGER PRIMARY KEY, |
229 | `noun_1_id` INTEGER NOT NULL, | 156 | `form` VARCHAR(32) NOT NULL, |
230 | `noun_2_id` INTEGER NOT NULL, | 157 | `complexity` SMALLINT NOT NULL, |
231 | FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), | 158 | `proper` SMALLINT NOT NULL |
232 | FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`) | ||
233 | ); | 159 | ); |
234 | 160 | ||
235 | DROP TABLE IF EXISTS `noun_adjective_derivation`; | 161 | CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`); |
236 | CREATE TABLE `noun_adjective_derivation` ( | ||
237 | `noun_id` INTEGER NOT NULL, | ||
238 | `adjective_id` INTEGER NOT NULL, | ||
239 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
240 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`) | ||
241 | ); | ||
242 | 162 | ||
243 | DROP TABLE IF EXISTS `noun_adverb_derivation`; | 163 | CREATE TABLE `lemmas_forms` ( |
244 | CREATE TABLE `noun_adverb_derivation` ( | 164 | `lemma_id` INTEGER NOT NULL, |
245 | `noun_id` INTEGER NOT NULL, | 165 | `form_id` INTEGER NOT NULL, |
246 | `adverb_id` INTEGER NOT NULL, | 166 | `category` SMALLINT NOT NULL |
247 | FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`), | ||
248 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`) | ||
249 | ); | 167 | ); |
250 | 168 | ||
251 | DROP TABLE IF EXISTS `adjective_adjective_derivation`; | 169 | CREATE INDEX `form_of` ON `lemmas_forms`(`lemma_id`); |
252 | CREATE TABLE `adjective_adjective_derivation` ( | 170 | CREATE INDEX `lemma_of` ON `lemmas_forms`(`form_id`); |
253 | `adjective_1_id` INTEGER NOT NULL, | 171 | |
254 | `adjective_2_id` INTEGER NOT NULL, | 172 | CREATE TABLE `pronunciations` ( |
255 | FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), | 173 | `pronunciation_id` INTEGER PRIMARY KEY, |
256 | FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) | 174 | `phonemes` VARCHAR(64) NOT NULL, |
175 | `prerhyme` VARCHAR(8), | ||
176 | `rhyme` VARCHAR(64), | ||
177 | `syllables` INTEGER NOT NULL, | ||
178 | `stress` VARCHAR(64) NOT NULL | ||
257 | ); | 179 | ); |
258 | 180 | ||
259 | DROP TABLE IF EXISTS `adjective_adverb_derivation`; | 181 | CREATE TABLE `forms_pronunciations` ( |
260 | CREATE TABLE `adjective_adverb_derivation` ( | 182 | `form_id` INTEGER NOT NULL, |
261 | `adjective_id` INTEGER NOT NULL, | 183 | `pronunciation_id` INTEGER NOT NULL |
262 | `adverb_id` INTEGER NOT NULL, | ||
263 | FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`), | ||
264 | FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`) | ||
265 | ); | 184 | ); |
266 | 185 | ||
267 | DROP TABLE IF EXISTS `adverb_adverb_derivation`; | 186 | CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`); |
268 | CREATE TABLE `adverb_adverb_derivation` ( | 187 | CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`); |
269 | `adverb_1_id` INTEGER NOT NULL, | 188 | |
270 | `adverb_2_id` INTEGER NOT NULL, | 189 | CREATE TABLE `groups` ( |
271 | FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), | 190 | `group_id` INTEGER PRIMARY KEY, |
272 | FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) | 191 | `data` BLOB NOT NULL |
273 | ); | 192 | ); |
274 | 193 | ||
275 | DROP TABLE IF EXISTS `prepositions`; | 194 | CREATE TABLE `frames` ( |
276 | CREATE TABLE `prepositions` ( | 195 | `frame_id` INTEGER PRIMARY KEY, |
277 | `preposition_id` INTEGER PRIMARY KEY, | 196 | `data` BLOB NOT NULL |
278 | `form` VARCHAR(32) NOT NULL | ||
279 | ); | 197 | ); |
280 | 198 | ||
281 | DROP TABLE IF EXISTS `preposition_groups`; | 199 | CREATE TABLE `groups_frames` ( |
282 | CREATE TABLE `preposition_groups` ( | 200 | `group_id` INTEGER NOT NULL, |
283 | `preposition_id` INTEGER NOT NULL, | 201 | `frame_id` INTEGER NOT NULL |
284 | `groupname` VARCHAR(32) NOT NULL, | ||
285 | FOREIGN KEY (`preposition_id`) REFERENCES `prepositions`(`preposition_id`) | ||
286 | ); | 202 | ); |
203 | |||
204 | CREATE INDEX `frames_in` ON `groups_frames`(`group_id`); | ||
diff --git a/generator/selrestr.cpp b/generator/selrestr.cpp new file mode 100644 index 0000000..8bdd3f6 --- /dev/null +++ b/generator/selrestr.cpp | |||
@@ -0,0 +1,288 @@ | |||
1 | #include "selrestr.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | namespace generator { | ||
5 | |||
6 | selrestr::selrestr(const selrestr& other) | ||
7 | { | ||
8 | type_ = other.type_; | ||
9 | |||
10 | switch (type_) | ||
11 | { | ||
12 | case type::singleton: | ||
13 | { | ||
14 | singleton_.pos = other.singleton_.pos; | ||
15 | new(&singleton_.restriction) std::string(other.singleton_.restriction); | ||
16 | |||
17 | break; | ||
18 | } | ||
19 | |||
20 | case type::group: | ||
21 | { | ||
22 | new(&group_.children) std::list<selrestr>(other.group_.children); | ||
23 | group_.orlogic = other.group_.orlogic; | ||
24 | |||
25 | break; | ||
26 | } | ||
27 | |||
28 | case type::empty: | ||
29 | { | ||
30 | break; | ||
31 | } | ||
32 | } | ||
33 | } | ||
34 | |||
35 | selrestr::selrestr(selrestr&& other) : selrestr() | ||
36 | { | ||
37 | swap(*this, other); | ||
38 | } | ||
39 | |||
40 | selrestr& selrestr::operator=(selrestr other) | ||
41 | { | ||
42 | swap(*this, other); | ||
43 | |||
44 | return *this; | ||
45 | } | ||
46 | |||
47 | void swap(selrestr& first, selrestr& second) | ||
48 | { | ||
49 | using type = selrestr::type; | ||
50 | |||
51 | type tempType = first.type_; | ||
52 | int tempPos; | ||
53 | std::string tempRestriction; | ||
54 | std::list<selrestr> tempChildren; | ||
55 | bool tempOrlogic; | ||
56 | |||
57 | switch (tempType) | ||
58 | { | ||
59 | case type::singleton: | ||
60 | { | ||
61 | tempPos = first.singleton_.pos; | ||
62 | tempRestriction = std::move(first.singleton_.restriction); | ||
63 | |||
64 | break; | ||
65 | } | ||
66 | |||
67 | case type::group: | ||
68 | { | ||
69 | tempChildren = std::move(first.group_.children); | ||
70 | tempOrlogic = first.group_.orlogic; | ||
71 | |||
72 | break; | ||
73 | } | ||
74 | |||
75 | case type::empty: | ||
76 | { | ||
77 | break; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | first.~selrestr(); | ||
82 | |||
83 | first.type_ = second.type_; | ||
84 | |||
85 | switch (first.type_) | ||
86 | { | ||
87 | case type::singleton: | ||
88 | { | ||
89 | first.singleton_.pos = second.singleton_.pos; | ||
90 | new(&first.singleton_.restriction) std::string(std::move(second.singleton_.restriction)); | ||
91 | |||
92 | break; | ||
93 | } | ||
94 | |||
95 | case type::group: | ||
96 | { | ||
97 | new(&first.group_.children) std::list<selrestr>(std::move(second.group_.children)); | ||
98 | first.group_.orlogic = second.group_.orlogic; | ||
99 | |||
100 | break; | ||
101 | } | ||
102 | |||
103 | case type::empty: | ||
104 | { | ||
105 | break; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | second.~selrestr(); | ||
110 | |||
111 | second.type_ = tempType; | ||
112 | |||
113 | switch (second.type_) | ||
114 | { | ||
115 | case type::singleton: | ||
116 | { | ||
117 | second.singleton_.pos = tempPos; | ||
118 | new(&second.singleton_.restriction) std::string(std::move(tempRestriction)); | ||
119 | |||
120 | break; | ||
121 | } | ||
122 | |||
123 | case type::group: | ||
124 | { | ||
125 | new(&second.group_.children) std::list<selrestr>(std::move(tempChildren)); | ||
126 | second.group_.orlogic = tempOrlogic; | ||
127 | |||
128 | break; | ||
129 | } | ||
130 | |||
131 | case type::empty: | ||
132 | { | ||
133 | break; | ||
134 | } | ||
135 | } | ||
136 | } | ||
137 | |||
138 | selrestr::~selrestr() | ||
139 | { | ||
140 | switch (type_) | ||
141 | { | ||
142 | case type::singleton: | ||
143 | { | ||
144 | using string_type = std::string; | ||
145 | singleton_.restriction.~string_type(); | ||
146 | |||
147 | break; | ||
148 | } | ||
149 | |||
150 | case type::group: | ||
151 | { | ||
152 | using list_type = std::list<selrestr>; | ||
153 | group_.children.~list_type(); | ||
154 | |||
155 | break; | ||
156 | } | ||
157 | |||
158 | case type::empty: | ||
159 | { | ||
160 | break; | ||
161 | } | ||
162 | } | ||
163 | } | ||
164 | |||
165 | selrestr::selrestr() : type_(type::empty) | ||
166 | { | ||
167 | } | ||
168 | |||
169 | selrestr::selrestr( | ||
170 | std::string restriction, | ||
171 | bool pos) : | ||
172 | type_(type::singleton) | ||
173 | { | ||
174 | new(&singleton_.restriction) std::string(std::move(restriction)); | ||
175 | singleton_.pos = pos; | ||
176 | } | ||
177 | |||
178 | std::string selrestr::getRestriction() const | ||
179 | { | ||
180 | if (type_ == type::singleton) | ||
181 | { | ||
182 | return singleton_.restriction; | ||
183 | } else { | ||
184 | throw std::domain_error("Only singleton selrestrs have restrictions"); | ||
185 | } | ||
186 | } | ||
187 | |||
188 | bool selrestr::getPos() const | ||
189 | { | ||
190 | if (type_ == type::singleton) | ||
191 | { | ||
192 | return singleton_.pos; | ||
193 | } else { | ||
194 | throw std::domain_error("Only singleton selrestrs have positivity flags"); | ||
195 | } | ||
196 | } | ||
197 | |||
198 | selrestr::selrestr( | ||
199 | std::list<selrestr> children, | ||
200 | bool orlogic) : | ||
201 | type_(type::group) | ||
202 | { | ||
203 | new(&group_.children) std::list<selrestr>(std::move(children)); | ||
204 | group_.orlogic = orlogic; | ||
205 | } | ||
206 | |||
207 | std::list<selrestr> selrestr::getChildren() const | ||
208 | { | ||
209 | if (type_ == type::group) | ||
210 | { | ||
211 | return group_.children; | ||
212 | } else { | ||
213 | throw std::domain_error("Only group selrestrs have children"); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | std::list<selrestr>::const_iterator selrestr::begin() const | ||
218 | { | ||
219 | if (type_ == type::group) | ||
220 | { | ||
221 | return std::begin(group_.children); | ||
222 | } else { | ||
223 | throw std::domain_error("Only group selrestrs have children"); | ||
224 | } | ||
225 | } | ||
226 | |||
227 | std::list<selrestr>::const_iterator selrestr::end() const | ||
228 | { | ||
229 | if (type_ == type::group) | ||
230 | { | ||
231 | return std::end(group_.children); | ||
232 | } else { | ||
233 | throw std::domain_error("Only group selrestrs have children"); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | bool selrestr::getOrlogic() const | ||
238 | { | ||
239 | if (type_ == type::group) | ||
240 | { | ||
241 | return group_.orlogic; | ||
242 | } else { | ||
243 | throw std::domain_error("Only group selrestrs have logic"); | ||
244 | } | ||
245 | } | ||
246 | |||
247 | nlohmann::json selrestr::toJson() const | ||
248 | { | ||
249 | switch (type_) | ||
250 | { | ||
251 | case type::empty: | ||
252 | { | ||
253 | return {}; | ||
254 | } | ||
255 | |||
256 | case type::singleton: | ||
257 | { | ||
258 | return { | ||
259 | {"type", singleton_.restriction}, | ||
260 | {"pos", singleton_.pos} | ||
261 | }; | ||
262 | } | ||
263 | |||
264 | case type::group: | ||
265 | { | ||
266 | std::string logic; | ||
267 | if (group_.orlogic) | ||
268 | { | ||
269 | logic = "or"; | ||
270 | } else { | ||
271 | logic = "and"; | ||
272 | } | ||
273 | |||
274 | std::list<nlohmann::json> children; | ||
275 | std::transform(std::begin(group_.children), std::end(group_.children), std::back_inserter(children), [] (const selrestr& child) { | ||
276 | return child.toJson(); | ||
277 | }); | ||
278 | |||
279 | return { | ||
280 | {"logic", logic}, | ||
281 | {"children", children} | ||
282 | }; | ||
283 | } | ||
284 | } | ||
285 | } | ||
286 | |||
287 | }; | ||
288 | }; | ||
diff --git a/generator/selrestr.h b/generator/selrestr.h new file mode 100644 index 0000000..5000970 --- /dev/null +++ b/generator/selrestr.h | |||
@@ -0,0 +1,88 @@ | |||
1 | #ifndef SELRESTR_H_50652FB7 | ||
2 | #define SELRESTR_H_50652FB7 | ||
3 | |||
4 | #include <list> | ||
5 | #include <string> | ||
6 | #include <json.hpp> | ||
7 | |||
8 | namespace verbly { | ||
9 | namespace generator { | ||
10 | |||
11 | class selrestr { | ||
12 | public: | ||
13 | enum class type { | ||
14 | empty, | ||
15 | singleton, | ||
16 | group | ||
17 | }; | ||
18 | |||
19 | // Copy and move constructors | ||
20 | |||
21 | selrestr(const selrestr& other); | ||
22 | selrestr(selrestr&& other); | ||
23 | |||
24 | // Assignment | ||
25 | |||
26 | selrestr& operator=(selrestr other); | ||
27 | |||
28 | // Swap | ||
29 | |||
30 | friend void swap(selrestr& first, selrestr& second); | ||
31 | |||
32 | // Destructor | ||
33 | |||
34 | ~selrestr(); | ||
35 | |||
36 | // Generic accessors | ||
37 | |||
38 | type getType() const | ||
39 | { | ||
40 | return type_; | ||
41 | } | ||
42 | |||
43 | // Empty | ||
44 | |||
45 | selrestr(); | ||
46 | |||
47 | // Singleton | ||
48 | |||
49 | selrestr(std::string restriction, bool pos); | ||
50 | |||
51 | std::string getRestriction() const; | ||
52 | |||
53 | bool getPos() const; | ||
54 | |||
55 | // Group | ||
56 | |||
57 | selrestr(std::list<selrestr> children, bool orlogic); | ||
58 | |||
59 | std::list<selrestr> getChildren() const; | ||
60 | |||
61 | std::list<selrestr>::const_iterator begin() const; | ||
62 | |||
63 | std::list<selrestr>::const_iterator end() const; | ||
64 | |||
65 | bool getOrlogic() const; | ||
66 | |||
67 | // Helpers | ||
68 | |||
69 | nlohmann::json toJson() const; | ||
70 | |||
71 | private: | ||
72 | union { | ||
73 | struct { | ||
74 | bool pos; | ||
75 | std::string restriction; | ||
76 | } singleton_; | ||
77 | struct { | ||
78 | std::list<selrestr> children; | ||
79 | bool orlogic; | ||
80 | } group_; | ||
81 | }; | ||
82 | type type_; | ||
83 | }; | ||
84 | |||
85 | }; | ||
86 | }; | ||
87 | |||
88 | #endif /* end of include guard: SELRESTR_H_50652FB7 */ | ||
diff --git a/generator/word.cpp b/generator/word.cpp new file mode 100644 index 0000000..8ba3ce2 --- /dev/null +++ b/generator/word.cpp | |||
@@ -0,0 +1,77 @@ | |||
1 | #include "word.h" | ||
2 | #include <list> | ||
3 | #include <string> | ||
4 | #include "database.h" | ||
5 | #include "notion.h" | ||
6 | #include "lemma.h" | ||
7 | #include "field.h" | ||
8 | #include "group.h" | ||
9 | |||
10 | namespace verbly { | ||
11 | namespace generator { | ||
12 | |||
13 | int word::nextId_ = 0; | ||
14 | |||
15 | word::word( | ||
16 | notion& n, | ||
17 | lemma& l) : | ||
18 | id_(nextId_++), | ||
19 | notion_(n), | ||
20 | lemma_(l) | ||
21 | { | ||
22 | } | ||
23 | |||
24 | word::word( | ||
25 | notion& n, | ||
26 | lemma& l, | ||
27 | int tagCount) : | ||
28 | id_(nextId_++), | ||
29 | notion_(n), | ||
30 | lemma_(l), | ||
31 | tagCount_(tagCount), | ||
32 | hasTagCount_(true) | ||
33 | { | ||
34 | } | ||
35 | |||
36 | void word::setAdjectivePosition(positioning adjectivePosition) | ||
37 | { | ||
38 | adjectivePosition_ = adjectivePosition; | ||
39 | } | ||
40 | |||
41 | void word::setVerbGroup(const group& verbGroup) | ||
42 | { | ||
43 | verbGroup_ = &verbGroup; | ||
44 | } | ||
45 | |||
46 | database& operator<<(database& db, const word& arg) | ||
47 | { | ||
48 | std::list<field> fields; | ||
49 | |||
50 | fields.emplace_back("word_id", arg.getId()); | ||
51 | fields.emplace_back("notion_id", arg.getNotion().getId()); | ||
52 | fields.emplace_back("lemma_id", arg.getLemma().getId()); | ||
53 | |||
54 | if (arg.hasTagCount()) | ||
55 | { | ||
56 | fields.emplace_back("tag_count", arg.getTagCount()); | ||
57 | } | ||
58 | |||
59 | if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective) | ||
60 | && (arg.getAdjectivePosition() != positioning::undefined)) | ||
61 | { | ||
62 | fields.emplace_back("position", static_cast<int>(arg.getAdjectivePosition())); | ||
63 | } | ||
64 | |||
65 | if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb) | ||
66 | && (arg.hasVerbGroup())) | ||
67 | { | ||
68 | fields.emplace_back("group_id", arg.getVerbGroup().getId()); | ||
69 | } | ||
70 | |||
71 | db.insertIntoTable("words", std::move(fields)); | ||
72 | |||
73 | return db; | ||
74 | } | ||
75 | |||
76 | }; | ||
77 | }; | ||
diff --git a/generator/word.h b/generator/word.h new file mode 100644 index 0000000..bfed586 --- /dev/null +++ b/generator/word.h | |||
@@ -0,0 +1,110 @@ | |||
1 | #ifndef WORD_H_91F99D46 | ||
2 | #define WORD_H_91F99D46 | ||
3 | |||
4 | #include <cassert> | ||
5 | #include "enums.h" | ||
6 | |||
7 | namespace verbly { | ||
8 | namespace generator { | ||
9 | |||
10 | class notion; | ||
11 | class lemma; | ||
12 | class database; | ||
13 | class group; | ||
14 | |||
15 | class word { | ||
16 | public: | ||
17 | |||
18 | // Constructors | ||
19 | |||
20 | word(notion& n, lemma& l); | ||
21 | |||
22 | word(notion& n, lemma& l, int tagCount); | ||
23 | |||
24 | // Mutators | ||
25 | |||
26 | void setAdjectivePosition(positioning adjectivePosition); | ||
27 | |||
28 | void setVerbGroup(const group& verbGroup); | ||
29 | |||
30 | // Accessors | ||
31 | |||
32 | int getId() const | ||
33 | { | ||
34 | return id_; | ||
35 | } | ||
36 | |||
37 | notion& getNotion() | ||
38 | { | ||
39 | return notion_; | ||
40 | } | ||
41 | |||
42 | const notion& getNotion() const | ||
43 | { | ||
44 | return notion_; | ||
45 | } | ||
46 | |||
47 | lemma& getLemma() | ||
48 | { | ||
49 | return lemma_; | ||
50 | } | ||
51 | |||
52 | const lemma& getLemma() const | ||
53 | { | ||
54 | return lemma_; | ||
55 | } | ||
56 | |||
57 | bool hasTagCount() const | ||
58 | { | ||
59 | return hasTagCount_; | ||
60 | } | ||
61 | |||
62 | int getTagCount() const | ||
63 | { | ||
64 | // Calling code should always call hasTagCount first. | ||
65 | assert(hasTagCount_); | ||
66 | |||
67 | return tagCount_; | ||
68 | } | ||
69 | |||
70 | positioning getAdjectivePosition() const | ||
71 | { | ||
72 | return adjectivePosition_; | ||
73 | } | ||
74 | |||
75 | bool hasVerbGroup() const | ||
76 | { | ||
77 | return (verbGroup_ != nullptr); | ||
78 | } | ||
79 | |||
80 | const group& getVerbGroup() const | ||
81 | { | ||
82 | // Calling code should always call hasVerbGroup first. | ||
83 | assert(verbGroup_ != nullptr); | ||
84 | |||
85 | return *verbGroup_; | ||
86 | } | ||
87 | |||
88 | private: | ||
89 | |||
90 | static int nextId_; | ||
91 | |||
92 | const int id_; | ||
93 | notion& notion_; | ||
94 | lemma& lemma_; | ||
95 | const int tagCount_ = 0; | ||
96 | const bool hasTagCount_ = false; | ||
97 | |||
98 | positioning adjectivePosition_ = positioning::undefined; | ||
99 | const group* verbGroup_ = nullptr; | ||
100 | |||
101 | }; | ||
102 | |||
103 | // Serializer | ||
104 | |||
105 | database& operator<<(database& db, const word& arg); | ||
106 | |||
107 | }; | ||
108 | }; | ||
109 | |||
110 | #endif /* end of include guard: WORD_H_91F99D46 */ | ||
diff --git a/lib/adjective.cpp b/lib/adjective.cpp deleted file mode 100644 index ba8254a..0000000 --- a/lib/adjective.cpp +++ /dev/null | |||
@@ -1,113 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | adjective::adjective() | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | adjective::adjective(const data& _data, int _id) : word(_data, _id) | ||
11 | { | ||
12 | |||
13 | } | ||
14 | |||
15 | std::string adjective::base_form() const | ||
16 | { | ||
17 | assert(_valid == true); | ||
18 | |||
19 | return _base_form; | ||
20 | } | ||
21 | |||
22 | std::string adjective::comparative_form() const | ||
23 | { | ||
24 | assert(_valid == true); | ||
25 | |||
26 | return _comparative_form; | ||
27 | } | ||
28 | |||
29 | std::string adjective::superlative_form() const | ||
30 | { | ||
31 | assert(_valid == true); | ||
32 | |||
33 | return _superlative_form; | ||
34 | } | ||
35 | |||
36 | adjective::positioning adjective::position() const | ||
37 | { | ||
38 | assert(_valid == true); | ||
39 | |||
40 | return _position; | ||
41 | } | ||
42 | |||
43 | bool adjective::has_comparative_form() const | ||
44 | { | ||
45 | assert(_valid == true); | ||
46 | |||
47 | return !_comparative_form.empty(); | ||
48 | } | ||
49 | |||
50 | bool adjective::has_superlative_form() const | ||
51 | { | ||
52 | assert(_valid == true); | ||
53 | |||
54 | return !_superlative_form.empty(); | ||
55 | } | ||
56 | |||
57 | bool adjective::has_position() const | ||
58 | { | ||
59 | assert(_valid == true); | ||
60 | |||
61 | return _position != adjective::positioning::undefined; | ||
62 | } | ||
63 | |||
64 | adjective_query adjective::antonyms() const | ||
65 | { | ||
66 | assert(_valid == true); | ||
67 | |||
68 | return _data->adjectives().antonym_of(*this); | ||
69 | } | ||
70 | |||
71 | adjective_query adjective::synonyms() const | ||
72 | { | ||
73 | assert(_valid == true); | ||
74 | |||
75 | return _data->adjectives().synonym_of(*this); | ||
76 | } | ||
77 | |||
78 | adjective_query adjective::generalizations() const | ||
79 | { | ||
80 | assert(_valid == true); | ||
81 | |||
82 | return _data->adjectives().generalization_of(*this); | ||
83 | } | ||
84 | |||
85 | adjective_query adjective::specifications() const | ||
86 | { | ||
87 | assert(_valid == true); | ||
88 | |||
89 | return _data->adjectives().specification_of(*this); | ||
90 | } | ||
91 | |||
92 | noun_query adjective::anti_pertainyms() const | ||
93 | { | ||
94 | assert(_valid == true); | ||
95 | |||
96 | return _data->nouns().anti_pertainym_of(*this); | ||
97 | } | ||
98 | |||
99 | adverb_query adjective::mannernyms() const | ||
100 | { | ||
101 | assert(_valid == true); | ||
102 | |||
103 | return _data->adverbs().mannernym_of(*this); | ||
104 | } | ||
105 | |||
106 | noun_query adjective::attributes() const | ||
107 | { | ||
108 | assert(_valid == true); | ||
109 | |||
110 | return _data->nouns().attribute_of(*this); | ||
111 | } | ||
112 | |||
113 | }; | ||
diff --git a/lib/adjective.h b/lib/adjective.h deleted file mode 100644 index a6eb293..0000000 --- a/lib/adjective.h +++ /dev/null | |||
@@ -1,51 +0,0 @@ | |||
1 | #ifndef ADJECTIVE_H_87B3FB75 | ||
2 | #define ADJECTIVE_H_87B3FB75 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class adjective_query; | ||
7 | class adverb_query; | ||
8 | class noun_query; | ||
9 | |||
10 | class adjective : public word { | ||
11 | public: | ||
12 | enum class positioning { | ||
13 | undefined, | ||
14 | predicate, | ||
15 | attributive, | ||
16 | postnominal | ||
17 | }; | ||
18 | |||
19 | private: | ||
20 | std::string _base_form; | ||
21 | std::string _comparative_form; | ||
22 | std::string _superlative_form; | ||
23 | positioning _position = positioning::undefined; | ||
24 | |||
25 | friend class adjective_query; | ||
26 | |||
27 | public: | ||
28 | adjective(); | ||
29 | adjective(const data& _data, int _id); | ||
30 | |||
31 | std::string base_form() const; | ||
32 | std::string comparative_form() const; | ||
33 | std::string superlative_form() const; | ||
34 | positioning position() const; | ||
35 | |||
36 | bool has_comparative_form() const; | ||
37 | bool has_superlative_form() const; | ||
38 | bool has_position() const; | ||
39 | |||
40 | adjective_query antonyms() const; | ||
41 | adjective_query synonyms() const; | ||
42 | adjective_query generalizations() const; | ||
43 | adjective_query specifications() const; | ||
44 | noun_query anti_pertainyms() const; | ||
45 | adverb_query mannernyms() const; | ||
46 | noun_query attributes() const; | ||
47 | }; | ||
48 | |||
49 | }; | ||
50 | |||
51 | #endif /* end of include guard: ADJECTIVE_H_87B3FB75 */ | ||
diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp deleted file mode 100644 index 90ccef4..0000000 --- a/lib/adjective_query.cpp +++ /dev/null | |||
@@ -1,1072 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | adjective_query::adjective_query(const data& _data) : _data(_data) | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | adjective_query& adjective_query::limit(int _limit) | ||
11 | { | ||
12 | if ((_limit > 0) || (_limit == unlimited)) | ||
13 | { | ||
14 | this->_limit = _limit; | ||
15 | } | ||
16 | |||
17 | return *this; | ||
18 | } | ||
19 | |||
20 | adjective_query& adjective_query::random() | ||
21 | { | ||
22 | this->_random = true; | ||
23 | |||
24 | return *this; | ||
25 | } | ||
26 | |||
27 | adjective_query& adjective_query::except(const adjective& _word) | ||
28 | { | ||
29 | _except.push_back(_word); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | adjective_query& adjective_query::rhymes_with(const word& _word) | ||
35 | { | ||
36 | for (auto rhyme : _word.get_rhymes()) | ||
37 | { | ||
38 | _rhymes.push_back(rhyme); | ||
39 | } | ||
40 | |||
41 | if (dynamic_cast<const adjective*>(&_word) != nullptr) | ||
42 | { | ||
43 | _except.push_back(dynamic_cast<const adjective&>(_word)); | ||
44 | } | ||
45 | |||
46 | return *this; | ||
47 | } | ||
48 | |||
49 | adjective_query& adjective_query::rhymes_with(rhyme _r) | ||
50 | { | ||
51 | _rhymes.push_back(_r); | ||
52 | |||
53 | return *this; | ||
54 | } | ||
55 | |||
56 | adjective_query& adjective_query::has_pronunciation() | ||
57 | { | ||
58 | this->_has_prn = true; | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | adjective_query& adjective_query::has_rhyming_noun() | ||
64 | { | ||
65 | _has_rhyming_noun = true; | ||
66 | |||
67 | return *this; | ||
68 | } | ||
69 | |||
70 | adjective_query& adjective_query::has_rhyming_adjective() | ||
71 | { | ||
72 | _has_rhyming_adjective = true; | ||
73 | |||
74 | return *this; | ||
75 | } | ||
76 | |||
77 | adjective_query& adjective_query::has_rhyming_adverb() | ||
78 | { | ||
79 | _has_rhyming_adverb = true; | ||
80 | |||
81 | return *this; | ||
82 | } | ||
83 | |||
84 | adjective_query& adjective_query::has_rhyming_verb() | ||
85 | { | ||
86 | _has_rhyming_verb = true; | ||
87 | |||
88 | return *this; | ||
89 | } | ||
90 | |||
91 | adjective_query& adjective_query::with_stress(filter<std::vector<bool>> _arg) | ||
92 | { | ||
93 | _stress = _arg; | ||
94 | |||
95 | return *this; | ||
96 | } | ||
97 | |||
98 | adjective_query& adjective_query::with_prefix(filter<std::string> _f) | ||
99 | { | ||
100 | _f.clean(); | ||
101 | _with_prefix = _f; | ||
102 | |||
103 | return *this; | ||
104 | } | ||
105 | |||
106 | adjective_query& adjective_query::with_suffix(filter<std::string> _f) | ||
107 | { | ||
108 | _f.clean(); | ||
109 | _with_suffix = _f; | ||
110 | |||
111 | return *this; | ||
112 | } | ||
113 | |||
114 | adjective_query& adjective_query::with_complexity(int _arg) | ||
115 | { | ||
116 | _with_complexity = _arg; | ||
117 | |||
118 | return *this; | ||
119 | } | ||
120 | |||
121 | adjective_query& adjective_query::requires_comparative_form() | ||
122 | { | ||
123 | _requires_comparative_form = true; | ||
124 | |||
125 | return *this; | ||
126 | } | ||
127 | |||
128 | adjective_query& adjective_query::requires_superlative_form() | ||
129 | { | ||
130 | _requires_superlative_form = true; | ||
131 | |||
132 | return *this; | ||
133 | } | ||
134 | |||
135 | adjective_query& adjective_query::position(adjective::positioning pos) | ||
136 | { | ||
137 | _position = pos; | ||
138 | |||
139 | return *this; | ||
140 | } | ||
141 | |||
142 | adjective_query& adjective_query::is_variant() | ||
143 | { | ||
144 | this->_is_variant = true; | ||
145 | |||
146 | return *this; | ||
147 | } | ||
148 | |||
149 | adjective_query& adjective_query::variant_of(filter<noun> _f) | ||
150 | { | ||
151 | _f.clean(); | ||
152 | _variant_of = _f; | ||
153 | |||
154 | return *this; | ||
155 | } | ||
156 | |||
157 | adjective_query& adjective_query::has_antonyms() | ||
158 | { | ||
159 | this->_is_antonymic = true; | ||
160 | |||
161 | return *this; | ||
162 | } | ||
163 | |||
164 | adjective_query& adjective_query::antonym_of(filter<adjective> _f) | ||
165 | { | ||
166 | _f.clean(); | ||
167 | _antonym_of = _f; | ||
168 | |||
169 | return *this; | ||
170 | } | ||
171 | |||
172 | adjective_query& adjective_query::has_synonyms() | ||
173 | { | ||
174 | this->_is_synonymic = true; | ||
175 | |||
176 | return *this; | ||
177 | } | ||
178 | |||
179 | adjective_query& adjective_query::synonym_of(filter<adjective> _f) | ||
180 | { | ||
181 | _f.clean(); | ||
182 | _synonym_of = _f; | ||
183 | |||
184 | return *this; | ||
185 | } | ||
186 | |||
187 | adjective_query& adjective_query::is_generalization() | ||
188 | { | ||
189 | this->_is_generalization = true; | ||
190 | |||
191 | return *this; | ||
192 | } | ||
193 | |||
194 | adjective_query& adjective_query::generalization_of(filter<adjective> _f) | ||
195 | { | ||
196 | _f.clean(); | ||
197 | _generalization_of = _f; | ||
198 | |||
199 | return *this; | ||
200 | } | ||
201 | |||
202 | adjective_query& adjective_query::is_specification() | ||
203 | { | ||
204 | this->_is_specification = true; | ||
205 | |||
206 | return *this; | ||
207 | } | ||
208 | |||
209 | adjective_query& adjective_query::specification_of(filter<adjective> _f) | ||
210 | { | ||
211 | _f.clean(); | ||
212 | _specification_of = _f; | ||
213 | |||
214 | return *this; | ||
215 | } | ||
216 | |||
217 | adjective_query& adjective_query::is_pertainymic() | ||
218 | { | ||
219 | this->_is_pertainymic = true; | ||
220 | |||
221 | return *this; | ||
222 | } | ||
223 | |||
224 | adjective_query& adjective_query::pertainym_of(filter<noun> _f) | ||
225 | { | ||
226 | _f.clean(); | ||
227 | _pertainym_of = _f; | ||
228 | |||
229 | return *this; | ||
230 | } | ||
231 | |||
232 | adjective_query& adjective_query::is_mannernymic() | ||
233 | { | ||
234 | this->_is_mannernymic = true; | ||
235 | |||
236 | return *this; | ||
237 | } | ||
238 | |||
239 | adjective_query& adjective_query::anti_mannernym_of(filter<adverb> _f) | ||
240 | { | ||
241 | _f.clean(); | ||
242 | _anti_mannernym_of = _f; | ||
243 | |||
244 | return *this; | ||
245 | } | ||
246 | /* | ||
247 | adjective_query& adjective_query::derived_from(const word& _w) | ||
248 | { | ||
249 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
250 | { | ||
251 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
252 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
253 | { | ||
254 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
255 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
256 | { | ||
257 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
258 | } | ||
259 | |||
260 | return *this; | ||
261 | } | ||
262 | |||
263 | adjective_query& adjective_query::not_derived_from(const word& _w) | ||
264 | { | ||
265 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
266 | { | ||
267 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
268 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
269 | { | ||
270 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
271 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
272 | { | ||
273 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
274 | } | ||
275 | |||
276 | return *this; | ||
277 | } | ||
278 | */ | ||
279 | std::list<adjective> adjective_query::run() const | ||
280 | { | ||
281 | std::stringstream construct; | ||
282 | construct << "SELECT adjective_id, base_form, comparative, superlative, position FROM adjectives"; | ||
283 | std::list<std::string> conditions; | ||
284 | std::list<binding> bindings; | ||
285 | |||
286 | if (_has_prn) | ||
287 | { | ||
288 | conditions.push_back("adjective_id IN (SELECT adjective_id FROM adjective_pronunciations)"); | ||
289 | } | ||
290 | |||
291 | if (!_rhymes.empty()) | ||
292 | { | ||
293 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
294 | std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
295 | conditions.push_back(cond); | ||
296 | |||
297 | for (auto rhy : _rhymes) | ||
298 | { | ||
299 | bindings.emplace_back(rhy.get_prerhyme()); | ||
300 | bindings.emplace_back(rhy.get_rhyme()); | ||
301 | } | ||
302 | } | ||
303 | |||
304 | if (_has_rhyming_noun) | ||
305 | { | ||
306 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
307 | } | ||
308 | |||
309 | if (_has_rhyming_adjective) | ||
310 | { | ||
311 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adjective_id != curp.adjective_id)"); | ||
312 | } | ||
313 | |||
314 | if (_has_rhyming_adverb) | ||
315 | { | ||
316 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
317 | } | ||
318 | |||
319 | if (_has_rhyming_verb) | ||
320 | { | ||
321 | conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
322 | } | ||
323 | |||
324 | for (auto except : _except) | ||
325 | { | ||
326 | conditions.push_back("adjective_id != ?"); | ||
327 | bindings.emplace_back(except._id); | ||
328 | } | ||
329 | |||
330 | if (_requires_comparative_form) | ||
331 | { | ||
332 | conditions.push_back("comparative IS NOT NULL"); | ||
333 | } | ||
334 | |||
335 | if (_requires_superlative_form) | ||
336 | { | ||
337 | conditions.push_back("superlative IS NOT NULL"); | ||
338 | } | ||
339 | |||
340 | switch (_position) | ||
341 | { | ||
342 | case adjective::positioning::predicate: conditions.push_back("position = 'p'"); break; | ||
343 | case adjective::positioning::attributive: conditions.push_back("position = 'a'"); break; | ||
344 | case adjective::positioning::postnominal: conditions.push_back("position = 'i'"); break; | ||
345 | case adjective::positioning::undefined: break; | ||
346 | } | ||
347 | |||
348 | if (!_stress.empty()) | ||
349 | { | ||
350 | std::stringstream cond; | ||
351 | if (_stress.get_notlogic()) | ||
352 | { | ||
353 | cond << "adjective_id NOT IN"; | ||
354 | } else { | ||
355 | cond << "adjective_id IN"; | ||
356 | } | ||
357 | |||
358 | cond << "(SELECT adjective_id FROM adjective_pronunciations WHERE "; | ||
359 | |||
360 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
361 | switch (f.get_type()) | ||
362 | { | ||
363 | case filter<std::vector<bool>>::type::singleton: | ||
364 | { | ||
365 | std::ostringstream _val; | ||
366 | for (auto syl : f.get_elem()) | ||
367 | { | ||
368 | if (syl) | ||
369 | { | ||
370 | _val << "1"; | ||
371 | } else { | ||
372 | _val << "0"; | ||
373 | } | ||
374 | } | ||
375 | |||
376 | bindings.emplace_back(_val.str()); | ||
377 | |||
378 | if (notlogic == f.get_notlogic()) | ||
379 | { | ||
380 | return "stress = ?"; | ||
381 | } else { | ||
382 | return "stress != ?"; | ||
383 | } | ||
384 | } | ||
385 | |||
386 | case filter<std::vector<bool>>::type::group: | ||
387 | { | ||
388 | bool truelogic = notlogic != f.get_notlogic(); | ||
389 | |||
390 | std::list<std::string> clauses; | ||
391 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
392 | return recur(f2, truelogic); | ||
393 | }); | ||
394 | |||
395 | if (truelogic == f.get_orlogic()) | ||
396 | { | ||
397 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
398 | } else { | ||
399 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
400 | } | ||
401 | } | ||
402 | } | ||
403 | }; | ||
404 | |||
405 | cond << recur(_stress, _stress.get_notlogic()); | ||
406 | cond << ")"; | ||
407 | conditions.push_back(cond.str()); | ||
408 | } | ||
409 | |||
410 | if (!_with_prefix.empty()) | ||
411 | { | ||
412 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
413 | switch (f.get_type()) | ||
414 | { | ||
415 | case filter<std::string>::type::singleton: | ||
416 | { | ||
417 | bindings.emplace_back(f.get_elem() + "%"); | ||
418 | |||
419 | if (notlogic == f.get_notlogic()) | ||
420 | { | ||
421 | return "base_form LIKE ?"; | ||
422 | } else { | ||
423 | return "base_form NOT LIKE ?"; | ||
424 | } | ||
425 | } | ||
426 | |||
427 | case filter<std::string>::type::group: | ||
428 | { | ||
429 | bool truelogic = notlogic != f.get_notlogic(); | ||
430 | |||
431 | std::list<std::string> clauses; | ||
432 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
433 | return recur(f2, truelogic); | ||
434 | }); | ||
435 | |||
436 | if (truelogic == f.get_orlogic()) | ||
437 | { | ||
438 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
439 | } else { | ||
440 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
441 | } | ||
442 | } | ||
443 | } | ||
444 | }; | ||
445 | |||
446 | conditions.push_back(recur(_with_prefix, false)); | ||
447 | } | ||
448 | |||
449 | if (!_with_suffix.empty()) | ||
450 | { | ||
451 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
452 | switch (f.get_type()) | ||
453 | { | ||
454 | case filter<std::string>::type::singleton: | ||
455 | { | ||
456 | bindings.emplace_back("%" + f.get_elem()); | ||
457 | |||
458 | if (notlogic == f.get_notlogic()) | ||
459 | { | ||
460 | return "base_form LIKE ?"; | ||
461 | } else { | ||
462 | return "base_form NOT LIKE ?"; | ||
463 | } | ||
464 | } | ||
465 | |||
466 | case filter<std::string>::type::group: | ||
467 | { | ||
468 | bool truelogic = notlogic != f.get_notlogic(); | ||
469 | |||
470 | std::list<std::string> clauses; | ||
471 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
472 | return recur(f2, truelogic); | ||
473 | }); | ||
474 | |||
475 | if (truelogic == f.get_orlogic()) | ||
476 | { | ||
477 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
478 | } else { | ||
479 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
480 | } | ||
481 | } | ||
482 | } | ||
483 | }; | ||
484 | |||
485 | conditions.push_back(recur(_with_suffix, false)); | ||
486 | } | ||
487 | |||
488 | if (_with_complexity != unlimited) | ||
489 | { | ||
490 | conditions.push_back("complexity = ?"); | ||
491 | bindings.emplace_back(_with_complexity); | ||
492 | } | ||
493 | |||
494 | if (_is_variant) | ||
495 | { | ||
496 | conditions.push_back("adjective_id IN (SELECT adjective_id FROM variation)"); | ||
497 | } | ||
498 | |||
499 | if (!_variant_of.empty()) | ||
500 | { | ||
501 | std::stringstream cond; | ||
502 | if (_variant_of.get_notlogic()) | ||
503 | { | ||
504 | cond << "adjective_id NOT IN"; | ||
505 | } else { | ||
506 | cond << "adjective_id IN"; | ||
507 | } | ||
508 | |||
509 | cond << "(SELECT adjective_id FROM variation WHERE "; | ||
510 | |||
511 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
512 | switch (f.get_type()) | ||
513 | { | ||
514 | case filter<noun>::type::singleton: | ||
515 | { | ||
516 | bindings.emplace_back(f.get_elem()._id); | ||
517 | |||
518 | if (notlogic == f.get_notlogic()) | ||
519 | { | ||
520 | return "noun_id = ?"; | ||
521 | } else { | ||
522 | return "noun_id != ?"; | ||
523 | } | ||
524 | } | ||
525 | |||
526 | case filter<noun>::type::group: | ||
527 | { | ||
528 | bool truelogic = notlogic != f.get_notlogic(); | ||
529 | |||
530 | std::list<std::string> clauses; | ||
531 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
532 | return recur(f2, truelogic); | ||
533 | }); | ||
534 | |||
535 | if (truelogic == f.get_orlogic()) | ||
536 | { | ||
537 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
538 | } else { | ||
539 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
540 | } | ||
541 | } | ||
542 | } | ||
543 | }; | ||
544 | |||
545 | cond << recur(_variant_of, _variant_of.get_notlogic()); | ||
546 | cond << ")"; | ||
547 | conditions.push_back(cond.str()); | ||
548 | } | ||
549 | |||
550 | if (_is_antonymic) | ||
551 | { | ||
552 | conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_antonymy)"); | ||
553 | } | ||
554 | |||
555 | if (!_antonym_of.empty()) | ||
556 | { | ||
557 | std::stringstream cond; | ||
558 | if (_antonym_of.get_notlogic()) | ||
559 | { | ||
560 | cond << "adjective_id NOT IN"; | ||
561 | } else { | ||
562 | cond << "adjective_id IN"; | ||
563 | } | ||
564 | |||
565 | cond << "(SELECT adjective_2_id FROM adjective_antonymy WHERE "; | ||
566 | |||
567 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
568 | switch (f.get_type()) | ||
569 | { | ||
570 | case filter<adjective>::type::singleton: | ||
571 | { | ||
572 | bindings.emplace_back(f.get_elem()._id); | ||
573 | |||
574 | if (notlogic == f.get_notlogic()) | ||
575 | { | ||
576 | return "adjective_1_id = ?"; | ||
577 | } else { | ||
578 | return "adjective_1_id != ?"; | ||
579 | } | ||
580 | } | ||
581 | |||
582 | case filter<adjective>::type::group: | ||
583 | { | ||
584 | bool truelogic = notlogic != f.get_notlogic(); | ||
585 | |||
586 | std::list<std::string> clauses; | ||
587 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
588 | return recur(f2, truelogic); | ||
589 | }); | ||
590 | |||
591 | if (truelogic == f.get_orlogic()) | ||
592 | { | ||
593 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
594 | } else { | ||
595 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
596 | } | ||
597 | } | ||
598 | } | ||
599 | }; | ||
600 | |||
601 | cond << recur(_antonym_of, _antonym_of.get_notlogic()); | ||
602 | cond << ")"; | ||
603 | conditions.push_back(cond.str()); | ||
604 | } | ||
605 | |||
606 | if (_is_synonymic) | ||
607 | { | ||
608 | conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_synonymy)"); | ||
609 | } | ||
610 | |||
611 | if (!_synonym_of.empty()) | ||
612 | { | ||
613 | std::stringstream cond; | ||
614 | if (_synonym_of.get_notlogic()) | ||
615 | { | ||
616 | cond << "adjective_id NOT IN"; | ||
617 | } else { | ||
618 | cond << "adjective_id IN"; | ||
619 | } | ||
620 | |||
621 | cond << "(SELECT adjective_2_id FROM adjective_synonymy WHERE "; | ||
622 | |||
623 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
624 | switch (f.get_type()) | ||
625 | { | ||
626 | case filter<adjective>::type::singleton: | ||
627 | { | ||
628 | bindings.emplace_back(f.get_elem()._id); | ||
629 | |||
630 | if (notlogic == f.get_notlogic()) | ||
631 | { | ||
632 | return "adjective_1_id = ?"; | ||
633 | } else { | ||
634 | return "adjective_1_id != ?"; | ||
635 | } | ||
636 | } | ||
637 | |||
638 | case filter<adjective>::type::group: | ||
639 | { | ||
640 | bool truelogic = notlogic != f.get_notlogic(); | ||
641 | |||
642 | std::list<std::string> clauses; | ||
643 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
644 | return recur(f2, truelogic); | ||
645 | }); | ||
646 | |||
647 | if (truelogic == f.get_orlogic()) | ||
648 | { | ||
649 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
650 | } else { | ||
651 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
652 | } | ||
653 | } | ||
654 | } | ||
655 | }; | ||
656 | |||
657 | cond << recur(_synonym_of, _synonym_of.get_notlogic()); | ||
658 | cond << ")"; | ||
659 | conditions.push_back(cond.str()); | ||
660 | } | ||
661 | |||
662 | if (_is_generalization) | ||
663 | { | ||
664 | conditions.push_back("adjective_id IN (SELECT general_id FROM specification)"); | ||
665 | } | ||
666 | |||
667 | if (!_generalization_of.empty()) | ||
668 | { | ||
669 | std::stringstream cond; | ||
670 | if (_generalization_of.get_notlogic()) | ||
671 | { | ||
672 | cond << "adjective_id NOT IN"; | ||
673 | } else { | ||
674 | cond << "adjective_id IN"; | ||
675 | } | ||
676 | |||
677 | cond << "(SELECT general_id FROM specification WHERE "; | ||
678 | |||
679 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
680 | switch (f.get_type()) | ||
681 | { | ||
682 | case filter<adjective>::type::singleton: | ||
683 | { | ||
684 | bindings.emplace_back(f.get_elem()._id); | ||
685 | |||
686 | if (notlogic == f.get_notlogic()) | ||
687 | { | ||
688 | return "specific_id = ?"; | ||
689 | } else { | ||
690 | return "specific_id != ?"; | ||
691 | } | ||
692 | } | ||
693 | |||
694 | case filter<adjective>::type::group: | ||
695 | { | ||
696 | bool truelogic = notlogic != f.get_notlogic(); | ||
697 | |||
698 | std::list<std::string> clauses; | ||
699 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
700 | return recur(f2, truelogic); | ||
701 | }); | ||
702 | |||
703 | if (truelogic == f.get_orlogic()) | ||
704 | { | ||
705 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
706 | } else { | ||
707 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
708 | } | ||
709 | } | ||
710 | } | ||
711 | }; | ||
712 | |||
713 | cond << recur(_generalization_of, _generalization_of.get_notlogic()); | ||
714 | cond << ")"; | ||
715 | conditions.push_back(cond.str()); | ||
716 | } | ||
717 | |||
718 | if (_is_specification) | ||
719 | { | ||
720 | conditions.push_back("adjective_id IN (SELECT specific_id FROM specification)"); | ||
721 | } | ||
722 | |||
723 | if (!_specification_of.empty()) | ||
724 | { | ||
725 | std::stringstream cond; | ||
726 | if (_specification_of.get_notlogic()) | ||
727 | { | ||
728 | cond << "adjective_id NOT IN"; | ||
729 | } else { | ||
730 | cond << "adjective_id IN"; | ||
731 | } | ||
732 | |||
733 | cond << "(SELECT specific_id FROM specification WHERE "; | ||
734 | |||
735 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
736 | switch (f.get_type()) | ||
737 | { | ||
738 | case filter<adjective>::type::singleton: | ||
739 | { | ||
740 | bindings.emplace_back(f.get_elem()._id); | ||
741 | |||
742 | if (notlogic == f.get_notlogic()) | ||
743 | { | ||
744 | return "general_id = ?"; | ||
745 | } else { | ||
746 | return "general_id != ?"; | ||
747 | } | ||
748 | } | ||
749 | |||
750 | case filter<adjective>::type::group: | ||
751 | { | ||
752 | bool truelogic = notlogic != f.get_notlogic(); | ||
753 | |||
754 | std::list<std::string> clauses; | ||
755 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
756 | return recur(f2, truelogic); | ||
757 | }); | ||
758 | |||
759 | if (truelogic == f.get_orlogic()) | ||
760 | { | ||
761 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
762 | } else { | ||
763 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
764 | } | ||
765 | } | ||
766 | } | ||
767 | }; | ||
768 | |||
769 | cond << recur(_specification_of, _specification_of.get_notlogic()); | ||
770 | cond << ")"; | ||
771 | conditions.push_back(cond.str()); | ||
772 | } | ||
773 | |||
774 | if (_is_pertainymic) | ||
775 | { | ||
776 | conditions.push_back("adjective_id IN (SELECT pertainym_id FROM pertainymy)"); | ||
777 | } | ||
778 | |||
779 | if (!_pertainym_of.empty()) | ||
780 | { | ||
781 | std::stringstream cond; | ||
782 | if (_pertainym_of.get_notlogic()) | ||
783 | { | ||
784 | cond << "adjective_id NOT IN"; | ||
785 | } else { | ||
786 | cond << "adjective_id IN"; | ||
787 | } | ||
788 | |||
789 | cond << "(SELECT pertainym_id FROM pertainymy WHERE "; | ||
790 | |||
791 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
792 | switch (f.get_type()) | ||
793 | { | ||
794 | case filter<noun>::type::singleton: | ||
795 | { | ||
796 | bindings.emplace_back(f.get_elem()._id); | ||
797 | |||
798 | if (notlogic == f.get_notlogic()) | ||
799 | { | ||
800 | return "noun_id = ?"; | ||
801 | } else { | ||
802 | return "noun_id != ?"; | ||
803 | } | ||
804 | } | ||
805 | |||
806 | case filter<noun>::type::group: | ||
807 | { | ||
808 | bool truelogic = notlogic != f.get_notlogic(); | ||
809 | |||
810 | std::list<std::string> clauses; | ||
811 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
812 | return recur(f2, truelogic); | ||
813 | }); | ||
814 | |||
815 | if (truelogic == f.get_orlogic()) | ||
816 | { | ||
817 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
818 | } else { | ||
819 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
820 | } | ||
821 | } | ||
822 | } | ||
823 | }; | ||
824 | |||
825 | cond << recur(_pertainym_of, _pertainym_of.get_notlogic()); | ||
826 | cond << ")"; | ||
827 | conditions.push_back(cond.str()); | ||
828 | } | ||
829 | |||
830 | if (_is_mannernymic) | ||
831 | { | ||
832 | conditions.push_back("adjective_id IN (SELECT adjective_id FROM mannernymy)"); | ||
833 | } | ||
834 | |||
835 | if (!_anti_mannernym_of.empty()) | ||
836 | { | ||
837 | std::stringstream cond; | ||
838 | if (_anti_mannernym_of.get_notlogic()) | ||
839 | { | ||
840 | cond << "adjective_id NOT IN"; | ||
841 | } else { | ||
842 | cond << "adjective_id IN"; | ||
843 | } | ||
844 | |||
845 | cond << "(SELECT adjective_id FROM mannernymy WHERE "; | ||
846 | |||
847 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
848 | switch (f.get_type()) | ||
849 | { | ||
850 | case filter<adverb>::type::singleton: | ||
851 | { | ||
852 | bindings.emplace_back(f.get_elem()._id); | ||
853 | |||
854 | if (notlogic == f.get_notlogic()) | ||
855 | { | ||
856 | return "mannernym_id = ?"; | ||
857 | } else { | ||
858 | return "mannernym_id != ?"; | ||
859 | } | ||
860 | } | ||
861 | |||
862 | case filter<adverb>::type::group: | ||
863 | { | ||
864 | bool truelogic = notlogic != f.get_notlogic(); | ||
865 | |||
866 | std::list<std::string> clauses; | ||
867 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
868 | return recur(f2, truelogic); | ||
869 | }); | ||
870 | |||
871 | if (truelogic == f.get_orlogic()) | ||
872 | { | ||
873 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
874 | } else { | ||
875 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
876 | } | ||
877 | } | ||
878 | } | ||
879 | }; | ||
880 | |||
881 | cond << recur(_anti_mannernym_of, _anti_mannernym_of.get_notlogic()); | ||
882 | cond << ")"; | ||
883 | conditions.push_back(cond.str()); | ||
884 | } | ||
885 | /* | ||
886 | if (!_derived_from_adjective.empty()) | ||
887 | { | ||
888 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_2_id = @DERADJ"); | ||
889 | std::string cond = "adjective_id IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
890 | conditions.push_back(cond); | ||
891 | } | ||
892 | |||
893 | if (!_not_derived_from_adjective.empty()) | ||
894 | { | ||
895 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_2_id = @NDERADJ"); | ||
896 | std::string cond = "adjective_id NOT IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
897 | conditions.push_back(cond); | ||
898 | } | ||
899 | |||
900 | if (!_derived_from_adverb.empty()) | ||
901 | { | ||
902 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); | ||
903 | std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
904 | conditions.push_back(cond); | ||
905 | } | ||
906 | |||
907 | if (!_not_derived_from_adverb.empty()) | ||
908 | { | ||
909 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); | ||
910 | std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
911 | conditions.push_back(cond); | ||
912 | } | ||
913 | |||
914 | if (!_derived_from_noun.empty()) | ||
915 | { | ||
916 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN"); | ||
917 | std::string cond = "adjective_id IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
918 | conditions.push_back(cond); | ||
919 | } | ||
920 | |||
921 | if (!_not_derived_from_noun.empty()) | ||
922 | { | ||
923 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); | ||
924 | std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
925 | conditions.push_back(cond); | ||
926 | }*/ | ||
927 | |||
928 | if (!conditions.empty()) | ||
929 | { | ||
930 | construct << " WHERE "; | ||
931 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
932 | } | ||
933 | |||
934 | if (_random) | ||
935 | { | ||
936 | construct << " ORDER BY RANDOM()"; | ||
937 | } | ||
938 | |||
939 | if (_limit != unlimited) | ||
940 | { | ||
941 | construct << " LIMIT " << _limit; | ||
942 | } | ||
943 | |||
944 | sqlite3_stmt* ppstmt; | ||
945 | std::string query = construct.str(); | ||
946 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
947 | { | ||
948 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
949 | } | ||
950 | |||
951 | int i = 1; | ||
952 | for (auto& binding : bindings) | ||
953 | { | ||
954 | switch (binding.get_type()) | ||
955 | { | ||
956 | case binding::type::integer: | ||
957 | { | ||
958 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
959 | |||
960 | break; | ||
961 | } | ||
962 | |||
963 | case binding::type::string: | ||
964 | { | ||
965 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
966 | |||
967 | break; | ||
968 | } | ||
969 | } | ||
970 | |||
971 | i++; | ||
972 | } | ||
973 | |||
974 | /* | ||
975 | for (auto adj : _derived_from_adjective) | ||
976 | { | ||
977 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
978 | } | ||
979 | |||
980 | for (auto adj : _not_derived_from_adjective) | ||
981 | { | ||
982 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
983 | } | ||
984 | |||
985 | for (auto adv : _derived_from_adverb) | ||
986 | { | ||
987 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
988 | } | ||
989 | |||
990 | for (auto adv : _not_derived_from_adverb) | ||
991 | { | ||
992 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
993 | } | ||
994 | |||
995 | for (auto n : _derived_from_noun) | ||
996 | { | ||
997 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
998 | } | ||
999 | |||
1000 | for (auto n : _not_derived_from_noun) | ||
1001 | { | ||
1002 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
1003 | } | ||
1004 | */ | ||
1005 | std::list<adjective> output; | ||
1006 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
1007 | { | ||
1008 | adjective tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
1009 | tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
1010 | |||
1011 | if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) | ||
1012 | { | ||
1013 | tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
1014 | } | ||
1015 | |||
1016 | if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) | ||
1017 | { | ||
1018 | tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
1019 | } | ||
1020 | |||
1021 | if (sqlite3_column_type(ppstmt, 4) != SQLITE_NULL) | ||
1022 | { | ||
1023 | std::string adjpos(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4))); | ||
1024 | if (adjpos == "p") | ||
1025 | { | ||
1026 | tnc._position = adjective::positioning::predicate; | ||
1027 | } else if (adjpos == "a") | ||
1028 | { | ||
1029 | tnc._position = adjective::positioning::attributive; | ||
1030 | } else if (adjpos == "i") | ||
1031 | { | ||
1032 | tnc._position = adjective::positioning::postnominal; | ||
1033 | } | ||
1034 | } | ||
1035 | |||
1036 | output.push_back(tnc); | ||
1037 | } | ||
1038 | |||
1039 | sqlite3_finalize(ppstmt); | ||
1040 | |||
1041 | for (auto& adjective : output) | ||
1042 | { | ||
1043 | query = "SELECT pronunciation, prerhyme, rhyme FROM adjective_pronunciations WHERE adjective_id = ?"; | ||
1044 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1045 | { | ||
1046 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
1047 | } | ||
1048 | |||
1049 | sqlite3_bind_int(ppstmt, 1, adjective._id); | ||
1050 | |||
1051 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
1052 | { | ||
1053 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
1054 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
1055 | |||
1056 | adjective.pronunciations.push_back(phonemes); | ||
1057 | |||
1058 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
1059 | { | ||
1060 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
1061 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
1062 | adjective.rhymes.emplace_back(prerhyme, rhyming); | ||
1063 | } | ||
1064 | } | ||
1065 | |||
1066 | sqlite3_finalize(ppstmt); | ||
1067 | } | ||
1068 | |||
1069 | return output; | ||
1070 | } | ||
1071 | |||
1072 | }; | ||
diff --git a/lib/adjective_query.h b/lib/adjective_query.h deleted file mode 100644 index e6a6609..0000000 --- a/lib/adjective_query.h +++ /dev/null | |||
@@ -1,112 +0,0 @@ | |||
1 | #ifndef ADJECTIVE_QUERY_H_05E590FD | ||
2 | #define ADJECTIVE_QUERY_H_05E590FD | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class adjective_query { | ||
7 | public: | ||
8 | adjective_query(const data& _data); | ||
9 | |||
10 | adjective_query& limit(int _limit); | ||
11 | adjective_query& random(); | ||
12 | adjective_query& except(const adjective& _word); | ||
13 | adjective_query& rhymes_with(const word& _word); | ||
14 | adjective_query& rhymes_with(rhyme _r); | ||
15 | adjective_query& has_pronunciation(); | ||
16 | adjective_query& has_rhyming_noun(); | ||
17 | adjective_query& has_rhyming_adjective(); | ||
18 | adjective_query& has_rhyming_adverb(); | ||
19 | adjective_query& has_rhyming_verb(); | ||
20 | adjective_query& with_stress(filter<std::vector<bool>> _arg); | ||
21 | |||
22 | adjective_query& requires_comparative_form(); | ||
23 | adjective_query& requires_superlative_form(); | ||
24 | adjective_query& position(adjective::positioning pos); | ||
25 | |||
26 | adjective_query& with_prefix(filter<std::string> _f); | ||
27 | adjective_query& with_suffix(filter<std::string> _f); | ||
28 | |||
29 | adjective_query& with_complexity(int _arg); | ||
30 | |||
31 | adjective_query& is_variant(); | ||
32 | adjective_query& variant_of(filter<noun> _f); | ||
33 | |||
34 | adjective_query& has_antonyms(); | ||
35 | adjective_query& antonym_of(filter<adjective> _f); | ||
36 | |||
37 | adjective_query& has_synonyms(); | ||
38 | adjective_query& synonym_of(filter<adjective> _f); | ||
39 | |||
40 | adjective_query& is_generalization(); | ||
41 | adjective_query& generalization_of(filter<adjective> _f); | ||
42 | |||
43 | adjective_query& is_specification(); | ||
44 | adjective_query& specification_of(filter<adjective> _f); | ||
45 | |||
46 | adjective_query& is_pertainymic(); | ||
47 | adjective_query& pertainym_of(filter<noun> _f); | ||
48 | |||
49 | adjective_query& is_mannernymic(); | ||
50 | adjective_query& anti_mannernym_of(filter<adverb> _f); | ||
51 | |||
52 | /* adjective_query& derived_from(const word& _w); | ||
53 | adjective_query& not_derived_from(const word& _w);*/ | ||
54 | |||
55 | std::list<adjective> run() const; | ||
56 | |||
57 | const static int unlimited = -1; | ||
58 | |||
59 | protected: | ||
60 | const data& _data; | ||
61 | int _limit = unlimited; | ||
62 | bool _random = false; | ||
63 | std::list<rhyme> _rhymes; | ||
64 | std::list<adjective> _except; | ||
65 | bool _has_prn = false; | ||
66 | bool _has_rhyming_noun = false; | ||
67 | bool _has_rhyming_adjective = false; | ||
68 | bool _has_rhyming_adverb = false; | ||
69 | bool _has_rhyming_verb = false; | ||
70 | filter<std::vector<bool>> _stress; | ||
71 | |||
72 | bool _requires_comparative_form = false; | ||
73 | bool _requires_superlative_form = false; | ||
74 | adjective::positioning _position = adjective::positioning::undefined; | ||
75 | |||
76 | filter<std::string> _with_prefix; | ||
77 | filter<std::string> _with_suffix; | ||
78 | |||
79 | int _with_complexity = unlimited; | ||
80 | |||
81 | bool _is_variant = false; | ||
82 | filter<noun> _variant_of; | ||
83 | |||
84 | bool _is_antonymic = false; | ||
85 | filter<adjective> _antonym_of; | ||
86 | |||
87 | bool _is_synonymic = false; | ||
88 | filter<adjective> _synonym_of; | ||
89 | |||
90 | bool _is_generalization = false; | ||
91 | filter<adjective> _generalization_of; | ||
92 | |||
93 | bool _is_specification = false; | ||
94 | filter<adjective> _specification_of; | ||
95 | |||
96 | bool _is_pertainymic = false; | ||
97 | filter<noun> _pertainym_of; | ||
98 | |||
99 | bool _is_mannernymic = false; | ||
100 | filter<adverb> _anti_mannernym_of; | ||
101 | |||
102 | /* std::list<adjective> _derived_from_adjective; | ||
103 | std::list<adjective> _not_derived_from_adjective; | ||
104 | std::list<adverb> _derived_from_adverb; | ||
105 | std::list<adverb> _not_derived_from_adverb; | ||
106 | std::list<noun> _derived_from_noun; | ||
107 | std::list<noun> _not_derived_from_noun;*/ | ||
108 | }; | ||
109 | |||
110 | }; | ||
111 | |||
112 | #endif /* end of include guard: ADJECTIVE_QUERY_H_05E590FD */ | ||
diff --git a/lib/adverb.cpp b/lib/adverb.cpp deleted file mode 100644 index 442574e..0000000 --- a/lib/adverb.cpp +++ /dev/null | |||
@@ -1,71 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | adverb::adverb() | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | adverb::adverb(const data& _data, int _id) : word(_data, _id) | ||
11 | { | ||
12 | |||
13 | } | ||
14 | |||
15 | std::string adverb::base_form() const | ||
16 | { | ||
17 | assert(_valid == true); | ||
18 | |||
19 | return _base_form; | ||
20 | } | ||
21 | |||
22 | std::string adverb::comparative_form() const | ||
23 | { | ||
24 | assert(_valid == true); | ||
25 | |||
26 | return _comparative_form; | ||
27 | } | ||
28 | |||
29 | std::string adverb::superlative_form() const | ||
30 | { | ||
31 | assert(_valid == true); | ||
32 | |||
33 | return _superlative_form; | ||
34 | } | ||
35 | |||
36 | bool adverb::has_comparative_form() const | ||
37 | { | ||
38 | assert(_valid == true); | ||
39 | |||
40 | return !_comparative_form.empty(); | ||
41 | } | ||
42 | |||
43 | bool adverb::has_superlative_form() const | ||
44 | { | ||
45 | assert(_valid == true); | ||
46 | |||
47 | return !_superlative_form.empty(); | ||
48 | } | ||
49 | |||
50 | adverb_query adverb::antonyms() const | ||
51 | { | ||
52 | assert(_valid == true); | ||
53 | |||
54 | return _data->adverbs().antonym_of(*this); | ||
55 | } | ||
56 | |||
57 | adverb_query adverb::synonyms() const | ||
58 | { | ||
59 | assert(_valid == true); | ||
60 | |||
61 | return _data->adverbs().synonym_of(*this); | ||
62 | } | ||
63 | |||
64 | adjective_query adverb::anti_mannernyms() const | ||
65 | { | ||
66 | assert(_valid == true); | ||
67 | |||
68 | return _data->adjectives().anti_mannernym_of(*this); | ||
69 | } | ||
70 | |||
71 | }; | ||
diff --git a/lib/adverb.h b/lib/adverb.h deleted file mode 100644 index 56d4e28..0000000 --- a/lib/adverb.h +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | #ifndef ADVERB_H_86F8302F | ||
2 | #define ADVERB_H_86F8302F | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class adverb : public word { | ||
7 | private: | ||
8 | std::string _base_form; | ||
9 | std::string _comparative_form; | ||
10 | std::string _superlative_form; | ||
11 | |||
12 | friend class adverb_query; | ||
13 | |||
14 | public: | ||
15 | adverb(); | ||
16 | adverb(const data& _data, int _id); | ||
17 | |||
18 | std::string base_form() const; | ||
19 | std::string comparative_form() const; | ||
20 | std::string superlative_form() const; | ||
21 | |||
22 | bool has_comparative_form() const; | ||
23 | bool has_superlative_form() const; | ||
24 | |||
25 | adverb_query antonyms() const; | ||
26 | adverb_query synonyms() const; | ||
27 | adjective_query anti_mannernyms() const; | ||
28 | |||
29 | adverb_query& derived_from(const word& _w); | ||
30 | adverb_query& not_derived_from(const word& _w); | ||
31 | }; | ||
32 | |||
33 | }; | ||
34 | |||
35 | #endif /* end of include guard: ADVERB_H_86F8302F */ | ||
diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp deleted file mode 100644 index 3e62bb7..0000000 --- a/lib/adverb_query.cpp +++ /dev/null | |||
@@ -1,758 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | adverb_query::adverb_query(const data& _data) : _data(_data) | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | adverb_query& adverb_query::limit(int _limit) | ||
11 | { | ||
12 | if ((_limit > 0) || (_limit == unlimited)) | ||
13 | { | ||
14 | this->_limit = _limit; | ||
15 | } | ||
16 | |||
17 | return *this; | ||
18 | } | ||
19 | |||
20 | adverb_query& adverb_query::random() | ||
21 | { | ||
22 | this->_random = true; | ||
23 | |||
24 | return *this; | ||
25 | } | ||
26 | |||
27 | adverb_query& adverb_query::except(const adverb& _word) | ||
28 | { | ||
29 | _except.push_back(_word); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | adverb_query& adverb_query::rhymes_with(const word& _word) | ||
35 | { | ||
36 | for (auto rhyme : _word.get_rhymes()) | ||
37 | { | ||
38 | _rhymes.push_back(rhyme); | ||
39 | } | ||
40 | |||
41 | if (dynamic_cast<const adverb*>(&_word) != nullptr) | ||
42 | { | ||
43 | _except.push_back(dynamic_cast<const adverb&>(_word)); | ||
44 | } | ||
45 | |||
46 | return *this; | ||
47 | } | ||
48 | |||
49 | adverb_query& adverb_query::rhymes_with(rhyme _r) | ||
50 | { | ||
51 | _rhymes.push_back(_r); | ||
52 | |||
53 | return *this; | ||
54 | } | ||
55 | |||
56 | adverb_query& adverb_query::has_pronunciation() | ||
57 | { | ||
58 | this->_has_prn = true; | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | adverb_query& adverb_query::has_rhyming_noun() | ||
64 | { | ||
65 | _has_rhyming_noun = true; | ||
66 | |||
67 | return *this; | ||
68 | } | ||
69 | |||
70 | adverb_query& adverb_query::has_rhyming_adjective() | ||
71 | { | ||
72 | _has_rhyming_adjective = true; | ||
73 | |||
74 | return *this; | ||
75 | } | ||
76 | |||
77 | adverb_query& adverb_query::has_rhyming_adverb() | ||
78 | { | ||
79 | _has_rhyming_adverb = true; | ||
80 | |||
81 | return *this; | ||
82 | } | ||
83 | |||
84 | adverb_query& adverb_query::has_rhyming_verb() | ||
85 | { | ||
86 | _has_rhyming_verb = true; | ||
87 | |||
88 | return *this; | ||
89 | } | ||
90 | |||
91 | adverb_query& adverb_query::requires_comparative_form() | ||
92 | { | ||
93 | _requires_comparative_form = true; | ||
94 | |||
95 | return *this; | ||
96 | } | ||
97 | |||
98 | adverb_query& adverb_query::requires_superlative_form() | ||
99 | { | ||
100 | _requires_superlative_form = true; | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | adverb_query& adverb_query::with_stress(filter<std::vector<bool>> _arg) | ||
106 | { | ||
107 | _stress = _arg; | ||
108 | |||
109 | return *this; | ||
110 | } | ||
111 | |||
112 | adverb_query& adverb_query::with_prefix(filter<std::string> _f) | ||
113 | { | ||
114 | _f.clean(); | ||
115 | _with_prefix = _f; | ||
116 | |||
117 | return *this; | ||
118 | } | ||
119 | |||
120 | adverb_query& adverb_query::with_suffix(filter<std::string> _f) | ||
121 | { | ||
122 | _f.clean(); | ||
123 | _with_suffix = _f; | ||
124 | |||
125 | return *this; | ||
126 | } | ||
127 | |||
128 | adverb_query& adverb_query::with_complexity(int _arg) | ||
129 | { | ||
130 | _with_complexity = _arg; | ||
131 | |||
132 | return *this; | ||
133 | } | ||
134 | |||
135 | adverb_query& adverb_query::has_antonyms() | ||
136 | { | ||
137 | _has_antonyms = true; | ||
138 | |||
139 | return *this; | ||
140 | } | ||
141 | |||
142 | adverb_query& adverb_query::antonym_of(filter<adverb> _f) | ||
143 | { | ||
144 | _f.clean(); | ||
145 | _antonym_of = _f; | ||
146 | |||
147 | return *this; | ||
148 | } | ||
149 | |||
150 | adverb_query& adverb_query::has_synonyms() | ||
151 | { | ||
152 | _has_synonyms = true; | ||
153 | |||
154 | return *this; | ||
155 | } | ||
156 | |||
157 | adverb_query& adverb_query::synonym_of(filter<adverb> _f) | ||
158 | { | ||
159 | _f.clean(); | ||
160 | _synonym_of = _f; | ||
161 | |||
162 | return *this; | ||
163 | } | ||
164 | |||
165 | adverb_query& adverb_query::is_mannernymic() | ||
166 | { | ||
167 | _is_mannernymic = true; | ||
168 | |||
169 | return *this; | ||
170 | } | ||
171 | |||
172 | adverb_query& adverb_query::mannernym_of(filter<adjective> _f) | ||
173 | { | ||
174 | _f.clean(); | ||
175 | _mannernym_of = _f; | ||
176 | |||
177 | return *this; | ||
178 | } | ||
179 | /* | ||
180 | adverb_query& adverb_query::derived_from(const word& _w) | ||
181 | { | ||
182 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
183 | { | ||
184 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
185 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
186 | { | ||
187 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
188 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
189 | { | ||
190 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
191 | } | ||
192 | |||
193 | return *this; | ||
194 | } | ||
195 | |||
196 | adverb_query& adverb_query::not_derived_from(const word& _w) | ||
197 | { | ||
198 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
199 | { | ||
200 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
201 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
202 | { | ||
203 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
204 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
205 | { | ||
206 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
207 | } | ||
208 | |||
209 | return *this; | ||
210 | } | ||
211 | */ | ||
212 | std::list<adverb> adverb_query::run() const | ||
213 | { | ||
214 | std::stringstream construct; | ||
215 | construct << "SELECT adverb_id, base_form, comparative, superlative FROM adverbs"; | ||
216 | std::list<std::string> conditions; | ||
217 | std::list<binding> bindings; | ||
218 | |||
219 | if (_has_prn) | ||
220 | { | ||
221 | conditions.push_back("adverb_id IN (SELECT adverb_id FROM adverb_pronunciations)"); | ||
222 | } | ||
223 | |||
224 | if (!_rhymes.empty()) | ||
225 | { | ||
226 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
227 | std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
228 | conditions.push_back(cond); | ||
229 | |||
230 | for (auto rhy : _rhymes) | ||
231 | { | ||
232 | bindings.emplace_back(rhy.get_prerhyme()); | ||
233 | bindings.emplace_back(rhy.get_rhyme()); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | if (_has_rhyming_noun) | ||
238 | { | ||
239 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
240 | } | ||
241 | |||
242 | if (_has_rhyming_adjective) | ||
243 | { | ||
244 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
245 | } | ||
246 | |||
247 | if (_has_rhyming_adverb) | ||
248 | { | ||
249 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adverb_id != curp.adverb_id)"); | ||
250 | } | ||
251 | |||
252 | if (_has_rhyming_verb) | ||
253 | { | ||
254 | conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
255 | } | ||
256 | |||
257 | for (auto except : _except) | ||
258 | { | ||
259 | conditions.push_back("adverb_id != ?"); | ||
260 | bindings.emplace_back(except._id); | ||
261 | } | ||
262 | |||
263 | if (_requires_comparative_form) | ||
264 | { | ||
265 | conditions.push_back("comparative IS NOT NULL"); | ||
266 | } | ||
267 | |||
268 | if (_requires_superlative_form) | ||
269 | { | ||
270 | conditions.push_back("superlative IS NOT NULL"); | ||
271 | } | ||
272 | |||
273 | if (!_stress.empty()) | ||
274 | { | ||
275 | std::stringstream cond; | ||
276 | if (_stress.get_notlogic()) | ||
277 | { | ||
278 | cond << "adverb_id NOT IN"; | ||
279 | } else { | ||
280 | cond << "adverb_id IN"; | ||
281 | } | ||
282 | |||
283 | cond << "(SELECT adverb_id FROM adverb_pronunciations WHERE "; | ||
284 | |||
285 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
286 | switch (f.get_type()) | ||
287 | { | ||
288 | case filter<std::vector<bool>>::type::singleton: | ||
289 | { | ||
290 | std::ostringstream _val; | ||
291 | for (auto syl : f.get_elem()) | ||
292 | { | ||
293 | if (syl) | ||
294 | { | ||
295 | _val << "1"; | ||
296 | } else { | ||
297 | _val << "0"; | ||
298 | } | ||
299 | } | ||
300 | |||
301 | bindings.emplace_back(_val.str()); | ||
302 | |||
303 | if (notlogic == f.get_notlogic()) | ||
304 | { | ||
305 | return "stress = ?"; | ||
306 | } else { | ||
307 | return "stress != ?"; | ||
308 | } | ||
309 | } | ||
310 | |||
311 | case filter<std::vector<bool>>::type::group: | ||
312 | { | ||
313 | bool truelogic = notlogic != f.get_notlogic(); | ||
314 | |||
315 | std::list<std::string> clauses; | ||
316 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
317 | return recur(f2, truelogic); | ||
318 | }); | ||
319 | |||
320 | if (truelogic == f.get_orlogic()) | ||
321 | { | ||
322 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
323 | } else { | ||
324 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
325 | } | ||
326 | } | ||
327 | } | ||
328 | }; | ||
329 | |||
330 | cond << recur(_stress, _stress.get_notlogic()); | ||
331 | cond << ")"; | ||
332 | conditions.push_back(cond.str()); | ||
333 | } | ||
334 | |||
335 | if (!_with_prefix.empty()) | ||
336 | { | ||
337 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
338 | switch (f.get_type()) | ||
339 | { | ||
340 | case filter<std::string>::type::singleton: | ||
341 | { | ||
342 | bindings.emplace_back(f.get_elem() + "%"); | ||
343 | |||
344 | if (notlogic == f.get_notlogic()) | ||
345 | { | ||
346 | return "base_form LIKE ?"; | ||
347 | } else { | ||
348 | return "base_form NOT LIKE ?"; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | case filter<std::string>::type::group: | ||
353 | { | ||
354 | bool truelogic = notlogic != f.get_notlogic(); | ||
355 | |||
356 | std::list<std::string> clauses; | ||
357 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
358 | return recur(f2, truelogic); | ||
359 | }); | ||
360 | |||
361 | if (truelogic == f.get_orlogic()) | ||
362 | { | ||
363 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
364 | } else { | ||
365 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
366 | } | ||
367 | } | ||
368 | } | ||
369 | }; | ||
370 | |||
371 | conditions.push_back(recur(_with_prefix, false)); | ||
372 | } | ||
373 | |||
374 | if (!_with_suffix.empty()) | ||
375 | { | ||
376 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
377 | switch (f.get_type()) | ||
378 | { | ||
379 | case filter<std::string>::type::singleton: | ||
380 | { | ||
381 | bindings.emplace_back("%" + f.get_elem()); | ||
382 | |||
383 | if (notlogic == f.get_notlogic()) | ||
384 | { | ||
385 | return "base_form LIKE ?"; | ||
386 | } else { | ||
387 | return "base_form NOT LIKE ?"; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | case filter<std::string>::type::group: | ||
392 | { | ||
393 | bool truelogic = notlogic != f.get_notlogic(); | ||
394 | |||
395 | std::list<std::string> clauses; | ||
396 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
397 | return recur(f2, truelogic); | ||
398 | }); | ||
399 | |||
400 | if (truelogic == f.get_orlogic()) | ||
401 | { | ||
402 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
403 | } else { | ||
404 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
405 | } | ||
406 | } | ||
407 | } | ||
408 | }; | ||
409 | |||
410 | conditions.push_back(recur(_with_suffix, false)); | ||
411 | } | ||
412 | |||
413 | if (_with_complexity != unlimited) | ||
414 | { | ||
415 | conditions.push_back("complexity = ?"); | ||
416 | bindings.emplace_back(_with_complexity); | ||
417 | } | ||
418 | |||
419 | if (_has_antonyms) | ||
420 | { | ||
421 | conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_antonymy)"); | ||
422 | } | ||
423 | |||
424 | if (!_antonym_of.empty()) | ||
425 | { | ||
426 | std::stringstream cond; | ||
427 | if (_antonym_of.get_notlogic()) | ||
428 | { | ||
429 | cond << "adverb_id NOT IN"; | ||
430 | } else { | ||
431 | cond << "adverb_id IN"; | ||
432 | } | ||
433 | |||
434 | cond << "(SELECT adverb_2_id FROM adverb_antonymy WHERE "; | ||
435 | |||
436 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
437 | switch (f.get_type()) | ||
438 | { | ||
439 | case filter<adverb>::type::singleton: | ||
440 | { | ||
441 | bindings.emplace_back(f.get_elem()._id); | ||
442 | |||
443 | if (notlogic == f.get_notlogic()) | ||
444 | { | ||
445 | return "adverb_1_id = ?"; | ||
446 | } else { | ||
447 | return "adverb_1_id != ?"; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | case filter<adverb>::type::group: | ||
452 | { | ||
453 | bool truelogic = notlogic != f.get_notlogic(); | ||
454 | |||
455 | std::list<std::string> clauses; | ||
456 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
457 | return recur(f2, truelogic); | ||
458 | }); | ||
459 | |||
460 | if (truelogic == f.get_orlogic()) | ||
461 | { | ||
462 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
463 | } else { | ||
464 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
465 | } | ||
466 | } | ||
467 | } | ||
468 | }; | ||
469 | |||
470 | cond << recur(_antonym_of, _antonym_of.get_notlogic()); | ||
471 | cond << ")"; | ||
472 | conditions.push_back(cond.str()); | ||
473 | } | ||
474 | |||
475 | if (_has_synonyms) | ||
476 | { | ||
477 | conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_synonymy)"); | ||
478 | } | ||
479 | |||
480 | if (!_synonym_of.empty()) | ||
481 | { | ||
482 | std::stringstream cond; | ||
483 | if (_antonym_of.get_notlogic()) | ||
484 | { | ||
485 | cond << "adverb_id NOT IN"; | ||
486 | } else { | ||
487 | cond << "adverb_id IN"; | ||
488 | } | ||
489 | |||
490 | cond << "(SELECT adverb_2_id FROM adverb_synonymy WHERE "; | ||
491 | |||
492 | std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string { | ||
493 | switch (f.get_type()) | ||
494 | { | ||
495 | case filter<adverb>::type::singleton: | ||
496 | { | ||
497 | bindings.emplace_back(f.get_elem()._id); | ||
498 | |||
499 | if (notlogic == f.get_notlogic()) | ||
500 | { | ||
501 | return "adverb_1_id = ?"; | ||
502 | } else { | ||
503 | return "adverb_1_id != ?"; | ||
504 | } | ||
505 | } | ||
506 | |||
507 | case filter<adverb>::type::group: | ||
508 | { | ||
509 | bool truelogic = notlogic != f.get_notlogic(); | ||
510 | |||
511 | std::list<std::string> clauses; | ||
512 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) { | ||
513 | return recur(f2, truelogic); | ||
514 | }); | ||
515 | |||
516 | if (truelogic == f.get_orlogic()) | ||
517 | { | ||
518 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
519 | } else { | ||
520 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
521 | } | ||
522 | } | ||
523 | } | ||
524 | }; | ||
525 | |||
526 | cond << recur(_synonym_of, _synonym_of.get_notlogic()); | ||
527 | cond << ")"; | ||
528 | conditions.push_back(cond.str()); | ||
529 | } | ||
530 | |||
531 | if (_is_mannernymic) | ||
532 | { | ||
533 | conditions.push_back("adverb_id IN (SELECT mannernym_id FROM mannernymy)"); | ||
534 | } | ||
535 | |||
536 | if (!_mannernym_of.empty()) | ||
537 | { | ||
538 | std::stringstream cond; | ||
539 | if (_antonym_of.get_notlogic()) | ||
540 | { | ||
541 | cond << "adverb_id NOT IN"; | ||
542 | } else { | ||
543 | cond << "adverb_id IN"; | ||
544 | } | ||
545 | |||
546 | cond << "(SELECT mannernym_id FROM mannernymy WHERE "; | ||
547 | |||
548 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
549 | switch (f.get_type()) | ||
550 | { | ||
551 | case filter<adjective>::type::singleton: | ||
552 | { | ||
553 | bindings.emplace_back(f.get_elem()._id); | ||
554 | |||
555 | if (notlogic == f.get_notlogic()) | ||
556 | { | ||
557 | return "adjective_id = ?"; | ||
558 | } else { | ||
559 | return "adjective_id != ?"; | ||
560 | } | ||
561 | } | ||
562 | |||
563 | case filter<adjective>::type::group: | ||
564 | { | ||
565 | bool truelogic = notlogic != f.get_notlogic(); | ||
566 | |||
567 | std::list<std::string> clauses; | ||
568 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
569 | return recur(f2, truelogic); | ||
570 | }); | ||
571 | |||
572 | if (truelogic == f.get_orlogic()) | ||
573 | { | ||
574 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
575 | } else { | ||
576 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
577 | } | ||
578 | } | ||
579 | } | ||
580 | }; | ||
581 | |||
582 | cond << recur(_mannernym_of, _mannernym_of.get_notlogic()); | ||
583 | cond << ")"; | ||
584 | conditions.push_back(cond.str()); | ||
585 | } | ||
586 | |||
587 | /* if (!_derived_from_adjective.empty()) | ||
588 | { | ||
589 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); | ||
590 | std::string cond = "adverb_id IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
591 | conditions.push_back(cond); | ||
592 | } | ||
593 | |||
594 | if (!_not_derived_from_adjective.empty()) | ||
595 | { | ||
596 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); | ||
597 | std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
598 | conditions.push_back(cond); | ||
599 | } | ||
600 | |||
601 | if (!_derived_from_adverb.empty()) | ||
602 | { | ||
603 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_2_id = @DERADV"); | ||
604 | std::string cond = "adverb_id IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
605 | conditions.push_back(cond); | ||
606 | } | ||
607 | |||
608 | if (!_not_derived_from_adverb.empty()) | ||
609 | { | ||
610 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_2_id = @NDERADV"); | ||
611 | std::string cond = "adverb_id NOT IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
612 | conditions.push_back(cond); | ||
613 | } | ||
614 | |||
615 | if (!_derived_from_noun.empty()) | ||
616 | { | ||
617 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN"); | ||
618 | std::string cond = "adverb_id IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
619 | conditions.push_back(cond); | ||
620 | } | ||
621 | |||
622 | if (!_not_derived_from_noun.empty()) | ||
623 | { | ||
624 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN"); | ||
625 | std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
626 | conditions.push_back(cond); | ||
627 | }*/ | ||
628 | |||
629 | if (!conditions.empty()) | ||
630 | { | ||
631 | construct << " WHERE "; | ||
632 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
633 | } | ||
634 | |||
635 | if (_random) | ||
636 | { | ||
637 | construct << " ORDER BY RANDOM()"; | ||
638 | } | ||
639 | |||
640 | if (_limit != unlimited) | ||
641 | { | ||
642 | construct << " LIMIT " << _limit; | ||
643 | } | ||
644 | |||
645 | sqlite3_stmt* ppstmt; | ||
646 | std::string query = construct.str(); | ||
647 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
648 | { | ||
649 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
650 | } | ||
651 | |||
652 | int i = 1; | ||
653 | for (auto& binding : bindings) | ||
654 | { | ||
655 | switch (binding.get_type()) | ||
656 | { | ||
657 | case binding::type::integer: | ||
658 | { | ||
659 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
660 | |||
661 | break; | ||
662 | } | ||
663 | |||
664 | case binding::type::string: | ||
665 | { | ||
666 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
667 | |||
668 | break; | ||
669 | } | ||
670 | } | ||
671 | |||
672 | i++; | ||
673 | } | ||
674 | |||
675 | /* | ||
676 | for (auto adj : _derived_from_adjective) | ||
677 | { | ||
678 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
679 | } | ||
680 | |||
681 | for (auto adj : _not_derived_from_adjective) | ||
682 | { | ||
683 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
684 | } | ||
685 | |||
686 | for (auto adv : _derived_from_adverb) | ||
687 | { | ||
688 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
689 | } | ||
690 | |||
691 | for (auto adv : _not_derived_from_adverb) | ||
692 | { | ||
693 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
694 | } | ||
695 | |||
696 | for (auto n : _derived_from_noun) | ||
697 | { | ||
698 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
699 | } | ||
700 | |||
701 | for (auto n : _not_derived_from_noun) | ||
702 | { | ||
703 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
704 | }*/ | ||
705 | |||
706 | std::list<adverb> output; | ||
707 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
708 | { | ||
709 | adverb tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
710 | tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
711 | |||
712 | if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) | ||
713 | { | ||
714 | tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
715 | } | ||
716 | |||
717 | if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL) | ||
718 | { | ||
719 | tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
720 | } | ||
721 | |||
722 | output.push_back(tnc); | ||
723 | } | ||
724 | |||
725 | sqlite3_finalize(ppstmt); | ||
726 | |||
727 | for (auto& adverb : output) | ||
728 | { | ||
729 | query = "SELECT pronunciation, prerhyme, rhyme FROM adverb_pronunciations WHERE adverb_id = ?"; | ||
730 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
731 | { | ||
732 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
733 | } | ||
734 | |||
735 | sqlite3_bind_int(ppstmt, 1, adverb._id); | ||
736 | |||
737 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
738 | { | ||
739 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
740 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
741 | |||
742 | adverb.pronunciations.push_back(phonemes); | ||
743 | |||
744 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
745 | { | ||
746 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
747 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
748 | adverb.rhymes.emplace_back(prerhyme, rhyming); | ||
749 | } | ||
750 | } | ||
751 | |||
752 | sqlite3_finalize(ppstmt); | ||
753 | } | ||
754 | |||
755 | return output; | ||
756 | } | ||
757 | |||
758 | }; | ||
diff --git a/lib/adverb_query.h b/lib/adverb_query.h deleted file mode 100644 index 30e7400..0000000 --- a/lib/adverb_query.h +++ /dev/null | |||
@@ -1,86 +0,0 @@ | |||
1 | #ifndef ADVERB_QUERY_H_CA13CCDD | ||
2 | #define ADVERB_QUERY_H_CA13CCDD | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class adverb_query { | ||
7 | public: | ||
8 | adverb_query(const data& _data); | ||
9 | |||
10 | adverb_query& limit(int _limit); | ||
11 | adverb_query& random(); | ||
12 | adverb_query& except(const adverb& _word); | ||
13 | adverb_query& rhymes_with(const word& _word); | ||
14 | adverb_query& rhymes_with(rhyme _r); | ||
15 | adverb_query& has_pronunciation(); | ||
16 | adverb_query& has_rhyming_noun(); | ||
17 | adverb_query& has_rhyming_adjective(); | ||
18 | adverb_query& has_rhyming_adverb(); | ||
19 | adverb_query& has_rhyming_verb(); | ||
20 | adverb_query& with_stress(filter<std::vector<bool>> _arg); | ||
21 | |||
22 | adverb_query& requires_comparative_form(); | ||
23 | adverb_query& requires_superlative_form(); | ||
24 | |||
25 | adverb_query& with_prefix(filter<std::string> _f); | ||
26 | adverb_query& with_suffix(filter<std::string> _f); | ||
27 | |||
28 | adverb_query& with_complexity(int _arg); | ||
29 | |||
30 | adverb_query& has_antonyms(); | ||
31 | adverb_query& antonym_of(filter<adverb> _f); | ||
32 | |||
33 | adverb_query& has_synonyms(); | ||
34 | adverb_query& synonym_of(filter<adverb> _f); | ||
35 | |||
36 | adverb_query& is_mannernymic(); | ||
37 | adverb_query& mannernym_of(filter<adjective> _f); | ||
38 | |||
39 | /* adverb_query& derived_from(const word& _w); | ||
40 | adverb_query& not_derived_from(const word& _w);*/ | ||
41 | |||
42 | std::list<adverb> run() const; | ||
43 | |||
44 | const static int unlimited = -1; | ||
45 | |||
46 | private: | ||
47 | const data& _data; | ||
48 | int _limit = unlimited; | ||
49 | bool _random = false; | ||
50 | std::list<rhyme> _rhymes; | ||
51 | std::list<adverb> _except; | ||
52 | bool _has_prn = false; | ||
53 | bool _has_rhyming_noun = false; | ||
54 | bool _has_rhyming_adjective = false; | ||
55 | bool _has_rhyming_adverb = false; | ||
56 | bool _has_rhyming_verb = false; | ||
57 | filter<std::vector<bool>> _stress; | ||
58 | |||
59 | bool _requires_comparative_form = false; | ||
60 | bool _requires_superlative_form = false; | ||
61 | |||
62 | filter<std::string> _with_prefix; | ||
63 | filter<std::string> _with_suffix; | ||
64 | |||
65 | int _with_complexity = unlimited; | ||
66 | |||
67 | bool _has_antonyms = false; | ||
68 | filter<adverb> _antonym_of; | ||
69 | |||
70 | bool _has_synonyms = false; | ||
71 | filter<adverb> _synonym_of; | ||
72 | |||
73 | bool _is_mannernymic = false; | ||
74 | filter<adjective> _mannernym_of; | ||
75 | |||
76 | /* std::list<adjective> _derived_from_adjective; | ||
77 | std::list<adjective> _not_derived_from_adjective; | ||
78 | std::list<adverb> _derived_from_adverb; | ||
79 | std::list<adverb> _not_derived_from_adverb; | ||
80 | std::list<noun> _derived_from_noun; | ||
81 | std::list<noun> _not_derived_from_noun;*/ | ||
82 | }; | ||
83 | |||
84 | }; | ||
85 | |||
86 | #endif /* end of include guard: ADVERB_QUERY_H_CA13CCDD */ | ||
diff --git a/lib/binding.cpp b/lib/binding.cpp new file mode 100644 index 0000000..349cd6f --- /dev/null +++ b/lib/binding.cpp | |||
@@ -0,0 +1,180 @@ | |||
1 | #include "binding.h" | ||
2 | #include <stdexcept> | ||
3 | #include <utility> | ||
4 | |||
5 | namespace verbly { | ||
6 | |||
7 | binding::binding(const binding& other) | ||
8 | { | ||
9 | type_ = other.type_; | ||
10 | |||
11 | switch (type_) | ||
12 | { | ||
13 | case type::integer: | ||
14 | { | ||
15 | integer_ = other.integer_; | ||
16 | |||
17 | break; | ||
18 | } | ||
19 | |||
20 | case type::string: | ||
21 | { | ||
22 | new(&string_) std::string(other.string_); | ||
23 | |||
24 | break; | ||
25 | } | ||
26 | |||
27 | case type::invalid: | ||
28 | { | ||
29 | break; | ||
30 | } | ||
31 | } | ||
32 | } | ||
33 | |||
34 | binding::binding(binding&& other) : binding() | ||
35 | { | ||
36 | swap(*this, other); | ||
37 | } | ||
38 | |||
39 | binding& binding::operator=(binding other) | ||
40 | { | ||
41 | swap(*this, other); | ||
42 | |||
43 | return *this; | ||
44 | } | ||
45 | |||
46 | void swap(binding& first, binding& second) | ||
47 | { | ||
48 | using type = binding::type; | ||
49 | |||
50 | type tempType = first.type_; | ||
51 | int tempInteger; | ||
52 | std::string tempString; | ||
53 | |||
54 | switch (first.type_) | ||
55 | { | ||
56 | case type::integer: | ||
57 | { | ||
58 | tempInteger = first.integer_; | ||
59 | |||
60 | break; | ||
61 | } | ||
62 | |||
63 | case type::string: | ||
64 | { | ||
65 | tempString = std::move(tempString); | ||
66 | |||
67 | break; | ||
68 | } | ||
69 | |||
70 | case type::invalid: | ||
71 | { | ||
72 | break; | ||
73 | } | ||
74 | } | ||
75 | |||
76 | first.~binding(); | ||
77 | |||
78 | first.type_ = second.type_; | ||
79 | |||
80 | switch (second.type_) | ||
81 | { | ||
82 | case type::integer: | ||
83 | { | ||
84 | first.integer_ = second.integer_; | ||
85 | |||
86 | break; | ||
87 | } | ||
88 | |||
89 | case type::string: | ||
90 | { | ||
91 | new(&first.string_) std::string(std::move(second.string_)); | ||
92 | |||
93 | break; | ||
94 | } | ||
95 | |||
96 | case type::invalid: | ||
97 | { | ||
98 | break; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | second.~binding(); | ||
103 | |||
104 | second.type_ = tempType; | ||
105 | |||
106 | switch (tempType) | ||
107 | { | ||
108 | case type::integer: | ||
109 | { | ||
110 | second.integer_ = tempInteger; | ||
111 | |||
112 | break; | ||
113 | } | ||
114 | |||
115 | case type::string: | ||
116 | { | ||
117 | new(&second.string_) std::string(std::move(tempString)); | ||
118 | |||
119 | break; | ||
120 | } | ||
121 | |||
122 | case type::invalid: | ||
123 | { | ||
124 | break; | ||
125 | } | ||
126 | } | ||
127 | } | ||
128 | |||
129 | binding::~binding() | ||
130 | { | ||
131 | switch (type_) | ||
132 | { | ||
133 | case type::string: | ||
134 | { | ||
135 | using string_type = std::string; | ||
136 | string_.~string_type(); | ||
137 | |||
138 | break; | ||
139 | } | ||
140 | |||
141 | case type::integer: | ||
142 | case type::invalid: | ||
143 | { | ||
144 | break; | ||
145 | } | ||
146 | } | ||
147 | } | ||
148 | |||
149 | binding::binding(int arg) : | ||
150 | type_(type::integer), | ||
151 | integer_(arg) | ||
152 | { | ||
153 | } | ||
154 | |||
155 | int binding::getInteger() const | ||
156 | { | ||
157 | if (type_ != type::integer) | ||
158 | { | ||
159 | throw std::domain_error("binding::getInteger called on non-integer binding"); | ||
160 | } | ||
161 | |||
162 | return integer_; | ||
163 | } | ||
164 | |||
165 | binding::binding(std::string arg) : type_(type::string) | ||
166 | { | ||
167 | new(&string_) std::string(arg); | ||
168 | } | ||
169 | |||
170 | std::string binding::getString() const | ||
171 | { | ||
172 | if (type_ != type::string) | ||
173 | { | ||
174 | throw std::domain_error("binding::getString called on non-string binding"); | ||
175 | } | ||
176 | |||
177 | return string_; | ||
178 | } | ||
179 | |||
180 | }; | ||
diff --git a/lib/binding.h b/lib/binding.h new file mode 100644 index 0000000..7fbe20e --- /dev/null +++ b/lib/binding.h | |||
@@ -0,0 +1,70 @@ | |||
1 | #ifndef BINDING_H_CAE0B18E | ||
2 | #define BINDING_H_CAE0B18E | ||
3 | |||
4 | #include <string> | ||
5 | |||
6 | namespace verbly { | ||
7 | |||
8 | class binding { | ||
9 | public: | ||
10 | enum class type { | ||
11 | invalid, | ||
12 | integer, | ||
13 | string | ||
14 | }; | ||
15 | |||
16 | // Default constructor | ||
17 | |||
18 | binding() | ||
19 | { | ||
20 | } | ||
21 | |||
22 | // Copy and move constructors | ||
23 | |||
24 | binding(const binding& other); | ||
25 | binding(binding&& other); | ||
26 | |||
27 | // Assignment | ||
28 | |||
29 | binding& operator=(binding other); | ||
30 | |||
31 | // Swap | ||
32 | |||
33 | friend void swap(binding& first, binding& second); | ||
34 | |||
35 | // Destructor | ||
36 | |||
37 | ~binding(); | ||
38 | |||
39 | // Generic accessors | ||
40 | |||
41 | type getType() const | ||
42 | { | ||
43 | return type_; | ||
44 | } | ||
45 | |||
46 | // Integer | ||
47 | |||
48 | binding(int arg); | ||
49 | |||
50 | int getInteger() const; | ||
51 | |||
52 | // String | ||
53 | |||
54 | binding(std::string arg); | ||
55 | |||
56 | std::string getString() const; | ||
57 | |||
58 | private: | ||
59 | |||
60 | union { | ||
61 | int integer_; | ||
62 | std::string string_; | ||
63 | }; | ||
64 | |||
65 | type type_ = type::invalid; | ||
66 | }; | ||
67 | |||
68 | }; | ||
69 | |||
70 | #endif /* end of include guard: BINDING_H_CAE0B18E */ | ||
diff --git a/lib/data.cpp b/lib/data.cpp deleted file mode 100644 index db42487..0000000 --- a/lib/data.cpp +++ /dev/null | |||
@@ -1,177 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | data::data(std::string datafile) | ||
6 | { | ||
7 | if (sqlite3_open_v2(datafile.c_str(), &ppdb, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) | ||
8 | { | ||
9 | throw std::invalid_argument(sqlite3_errmsg(ppdb)); | ||
10 | } | ||
11 | } | ||
12 | |||
13 | data::data(data&& other) | ||
14 | { | ||
15 | ppdb = other.ppdb; | ||
16 | } | ||
17 | |||
18 | data& data::operator=(data&& other) | ||
19 | { | ||
20 | ppdb = other.ppdb; | ||
21 | |||
22 | return *this; | ||
23 | } | ||
24 | |||
25 | data::~data() | ||
26 | { | ||
27 | sqlite3_close_v2(ppdb); | ||
28 | } | ||
29 | |||
30 | verb_query data::verbs() const | ||
31 | { | ||
32 | return verb_query(*this); | ||
33 | } | ||
34 | |||
35 | adjective_query data::adjectives() const | ||
36 | { | ||
37 | return adjective_query(*this); | ||
38 | } | ||
39 | |||
40 | adverb_query data::adverbs() const | ||
41 | { | ||
42 | return adverb_query(*this); | ||
43 | } | ||
44 | |||
45 | noun_query data::nouns() const | ||
46 | { | ||
47 | return noun_query(*this); | ||
48 | } | ||
49 | |||
50 | frame_query data::frames() const | ||
51 | { | ||
52 | return frame_query(*this); | ||
53 | } | ||
54 | |||
55 | preposition_query data::prepositions() const | ||
56 | { | ||
57 | return preposition_query(*this); | ||
58 | } | ||
59 | |||
60 | binding::type binding::get_type() const | ||
61 | { | ||
62 | return _type; | ||
63 | } | ||
64 | |||
65 | binding::binding(const binding& other) | ||
66 | { | ||
67 | _type = other._type; | ||
68 | |||
69 | switch (_type) | ||
70 | { | ||
71 | case type::integer: | ||
72 | { | ||
73 | _integer = other._integer; | ||
74 | |||
75 | break; | ||
76 | } | ||
77 | |||
78 | case type::string: | ||
79 | { | ||
80 | new(&_string) std::string(other._string); | ||
81 | |||
82 | break; | ||
83 | } | ||
84 | } | ||
85 | } | ||
86 | |||
87 | binding::~binding() | ||
88 | { | ||
89 | switch (_type) | ||
90 | { | ||
91 | case type::string: | ||
92 | { | ||
93 | using string_type = std::string; | ||
94 | _string.~string_type(); | ||
95 | |||
96 | break; | ||
97 | } | ||
98 | } | ||
99 | } | ||
100 | |||
101 | binding& binding::operator=(const binding& other) | ||
102 | { | ||
103 | this->~binding(); | ||
104 | |||
105 | _type = other._type; | ||
106 | |||
107 | switch (_type) | ||
108 | { | ||
109 | case type::integer: | ||
110 | { | ||
111 | _integer = other._integer; | ||
112 | |||
113 | break; | ||
114 | } | ||
115 | |||
116 | case type::string: | ||
117 | { | ||
118 | new(&_string) std::string(other._string); | ||
119 | |||
120 | break; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | return *this; | ||
125 | } | ||
126 | |||
127 | binding::binding(int _arg) | ||
128 | { | ||
129 | _type = type::integer; | ||
130 | _integer = _arg; | ||
131 | } | ||
132 | |||
133 | int binding::get_integer() const | ||
134 | { | ||
135 | assert(_type == type::integer); | ||
136 | |||
137 | return _integer; | ||
138 | } | ||
139 | |||
140 | void binding::set_integer(int _arg) | ||
141 | { | ||
142 | *this = binding(_arg); | ||
143 | } | ||
144 | |||
145 | binding& binding::operator=(int _arg) | ||
146 | { | ||
147 | *this = binding(_arg); | ||
148 | |||
149 | return *this; | ||
150 | } | ||
151 | |||
152 | binding::binding(std::string _arg) | ||
153 | { | ||
154 | _type = type::string; | ||
155 | new(&_string) std::string(_arg); | ||
156 | } | ||
157 | |||
158 | std::string binding::get_string() const | ||
159 | { | ||
160 | assert(_type == type::string); | ||
161 | |||
162 | return _string; | ||
163 | } | ||
164 | |||
165 | void binding::set_string(std::string _arg) | ||
166 | { | ||
167 | *this = binding(_arg); | ||
168 | } | ||
169 | |||
170 | binding& binding::operator=(std::string _arg) | ||
171 | { | ||
172 | *this = binding(_arg); | ||
173 | |||
174 | return *this; | ||
175 | } | ||
176 | |||
177 | }; | ||
diff --git a/lib/data.h b/lib/data.h deleted file mode 100644 index b8b12b9..0000000 --- a/lib/data.h +++ /dev/null | |||
@@ -1,380 +0,0 @@ | |||
1 | #ifndef DATA_H_C4AEC3DD | ||
2 | #define DATA_H_C4AEC3DD | ||
3 | |||
4 | #include <sqlite3.h> | ||
5 | |||
6 | namespace verbly { | ||
7 | |||
8 | class data; | ||
9 | class word; | ||
10 | class adjective; | ||
11 | class noun; | ||
12 | class verb; | ||
13 | class adverb; | ||
14 | class frame; | ||
15 | class adjective_query; | ||
16 | class adverb_query; | ||
17 | class noun_query; | ||
18 | class verb_query; | ||
19 | class frame_query; | ||
20 | class preposition_query; | ||
21 | |||
22 | class data { | ||
23 | private: | ||
24 | sqlite3* ppdb; | ||
25 | |||
26 | friend class adjective_query; | ||
27 | friend class noun_query; | ||
28 | friend class verb_query; | ||
29 | friend class adverb_query; | ||
30 | friend class frame_query; | ||
31 | friend class preposition_query; | ||
32 | |||
33 | public: | ||
34 | data(std::string datafile); | ||
35 | |||
36 | data(const data& other) = delete; | ||
37 | data& operator=(const data& other) = delete; | ||
38 | |||
39 | data(data&& other); | ||
40 | data& operator=(data&& other); | ||
41 | |||
42 | ~data(); | ||
43 | |||
44 | verb_query verbs() const; | ||
45 | adjective_query adjectives() const; | ||
46 | adverb_query adverbs() const; | ||
47 | noun_query nouns() const; | ||
48 | frame_query frames() const; | ||
49 | preposition_query prepositions() const; | ||
50 | |||
51 | }; | ||
52 | |||
53 | template <class T> | ||
54 | class filter { | ||
55 | public: | ||
56 | enum class type { | ||
57 | singleton, | ||
58 | group | ||
59 | }; | ||
60 | |||
61 | typedef filter<T> value_type; | ||
62 | |||
63 | type get_type() const | ||
64 | { | ||
65 | return _type; | ||
66 | } | ||
67 | |||
68 | filter(const filter<T>& other) | ||
69 | { | ||
70 | _type = other._type; | ||
71 | _notlogic = other._notlogic; | ||
72 | |||
73 | switch (_type) | ||
74 | { | ||
75 | case type::singleton: | ||
76 | { | ||
77 | new(&_singleton.elem) T(other._singleton.elem); | ||
78 | |||
79 | break; | ||
80 | } | ||
81 | |||
82 | case type::group: | ||
83 | { | ||
84 | new(&_group.elems) std::list<filter<T>>(other._group.elems); | ||
85 | _group.orlogic = other._group.orlogic; | ||
86 | |||
87 | break; | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | filter<T>& operator=(const filter<T>& other) | ||
93 | { | ||
94 | this->~filter(); | ||
95 | |||
96 | _type = other._type; | ||
97 | _notlogic = other._notlogic; | ||
98 | |||
99 | switch (_type) | ||
100 | { | ||
101 | case type::singleton: | ||
102 | { | ||
103 | new(&_singleton.elem) T(other._singleton.elem); | ||
104 | |||
105 | break; | ||
106 | } | ||
107 | |||
108 | case type::group: | ||
109 | { | ||
110 | new(&_group.elems) std::list<filter<T>>(other._group.elems); | ||
111 | _group.orlogic = other._group.orlogic; | ||
112 | |||
113 | break; | ||
114 | } | ||
115 | } | ||
116 | |||
117 | return *this; | ||
118 | } | ||
119 | |||
120 | ~filter() | ||
121 | { | ||
122 | switch (_type) | ||
123 | { | ||
124 | case type::singleton: | ||
125 | { | ||
126 | _singleton.elem.~T(); | ||
127 | |||
128 | break; | ||
129 | } | ||
130 | |||
131 | case type::group: | ||
132 | { | ||
133 | using list_type = std::list<filter<T>>; | ||
134 | _group.elems.~list_type(); | ||
135 | |||
136 | break; | ||
137 | } | ||
138 | } | ||
139 | } | ||
140 | |||
141 | bool get_notlogic() const | ||
142 | { | ||
143 | return _notlogic; | ||
144 | } | ||
145 | |||
146 | void set_notlogic(bool _nl) | ||
147 | { | ||
148 | _notlogic = _nl; | ||
149 | } | ||
150 | |||
151 | std::list<T> inorder_flatten() const | ||
152 | { | ||
153 | std::list<T> result; | ||
154 | |||
155 | if (_type == type::singleton) | ||
156 | { | ||
157 | result.push_back(_singleton.elem); | ||
158 | } else if (_type == type::group) | ||
159 | { | ||
160 | for (auto elem : _group.elems) | ||
161 | { | ||
162 | auto l = elem.inorder_flatten(); | ||
163 | result.insert(std::end(result), std::begin(l), std::end(l)); | ||
164 | } | ||
165 | } | ||
166 | |||
167 | return result; | ||
168 | } | ||
169 | |||
170 | std::set<T> uniq_flatten() const | ||
171 | { | ||
172 | std::set<T> result; | ||
173 | |||
174 | if (_type == type::singleton) | ||
175 | { | ||
176 | result.insert(_singleton.elem); | ||
177 | } else if (_type == type::group) | ||
178 | { | ||
179 | for (auto elem : _group.elems) | ||
180 | { | ||
181 | auto l = elem.uniq_flatten(); | ||
182 | result.insert(std::begin(l), std::end(l)); | ||
183 | } | ||
184 | } | ||
185 | |||
186 | return result; | ||
187 | } | ||
188 | |||
189 | void clean() | ||
190 | { | ||
191 | if (_type == type::group) | ||
192 | { | ||
193 | std::list<typename std::list<filter<T>>::iterator> toremove; | ||
194 | for (auto it = _group.elems.begin(); it != _group.elems.end(); it++) | ||
195 | { | ||
196 | it->clean(); | ||
197 | |||
198 | if (it->get_type() == type::group) | ||
199 | { | ||
200 | if (it->_group.elems.size() == 0) | ||
201 | { | ||
202 | toremove.push_back(it); | ||
203 | } else if (it->_group.elems.size() == 1) | ||
204 | { | ||
205 | bool truelogic = it->_notlogic != it->_group.elems.front()._notlogic; | ||
206 | filter<T> e = it->_group.elems.front(); | ||
207 | *it = e; | ||
208 | it->_notlogic = truelogic; | ||
209 | } | ||
210 | } | ||
211 | } | ||
212 | |||
213 | for (auto rem : toremove) | ||
214 | { | ||
215 | _group.elems.erase(rem); | ||
216 | } | ||
217 | |||
218 | if (_group.elems.size() == 1) | ||
219 | { | ||
220 | bool truelogic = _notlogic != _group.elems.front()._notlogic; | ||
221 | filter<T> e = _group.elems.front(); | ||
222 | *this = e; | ||
223 | _notlogic = truelogic; | ||
224 | } | ||
225 | } | ||
226 | } | ||
227 | |||
228 | // Singleton | ||
229 | filter(T _elem, bool _notlogic = false) : _type(type::singleton) | ||
230 | { | ||
231 | new(&_singleton.elem) T(_elem); | ||
232 | this->_notlogic = _notlogic; | ||
233 | } | ||
234 | |||
235 | filter<T>& operator=(T _elem) | ||
236 | { | ||
237 | *this = filter<T>{_elem}; | ||
238 | |||
239 | return *this; | ||
240 | } | ||
241 | |||
242 | T get_elem() const | ||
243 | { | ||
244 | assert(_type == type::singleton); | ||
245 | |||
246 | return _singleton.elem; | ||
247 | } | ||
248 | |||
249 | void set_elem(T _elem) | ||
250 | { | ||
251 | assert(_type == type::singleton); | ||
252 | |||
253 | _singleton.elem = _elem; | ||
254 | } | ||
255 | |||
256 | // Group | ||
257 | typedef typename std::list<filter<T>>::iterator iterator; | ||
258 | |||
259 | filter() : _type(type::group) | ||
260 | { | ||
261 | new(&_group.elems) std::list<filter<T>>(); | ||
262 | _group.orlogic = false; | ||
263 | } | ||
264 | |||
265 | filter(std::initializer_list<filter<T>> _init) : _type(type::group) | ||
266 | { | ||
267 | new(&_group.elems) std::list<filter<T>>(_init); | ||
268 | _group.orlogic = false; | ||
269 | } | ||
270 | |||
271 | iterator begin() | ||
272 | { | ||
273 | assert(_type == type::group); | ||
274 | |||
275 | return _group.elems.begin(); | ||
276 | } | ||
277 | |||
278 | iterator end() | ||
279 | { | ||
280 | assert(_type == type::group); | ||
281 | |||
282 | return _group.elems.end(); | ||
283 | } | ||
284 | |||
285 | filter<T>& operator<<(filter<T> _elem) | ||
286 | { | ||
287 | assert(_type == type::group); | ||
288 | |||
289 | _group.elems.push_back(_elem); | ||
290 | |||
291 | return *this; | ||
292 | } | ||
293 | |||
294 | void push_back(filter<T> _elem) | ||
295 | { | ||
296 | assert(_type == type::group); | ||
297 | |||
298 | _group.elems.push_back(_elem); | ||
299 | } | ||
300 | |||
301 | bool get_orlogic() const | ||
302 | { | ||
303 | assert(_type == type::group); | ||
304 | |||
305 | return _group.orlogic; | ||
306 | } | ||
307 | |||
308 | void set_orlogic(bool _ol) | ||
309 | { | ||
310 | assert(_type == type::group); | ||
311 | |||
312 | _group.orlogic = _ol; | ||
313 | } | ||
314 | |||
315 | bool empty() const | ||
316 | { | ||
317 | if (_type == type::group) | ||
318 | { | ||
319 | return _group.elems.empty(); | ||
320 | } else { | ||
321 | return false; | ||
322 | } | ||
323 | } | ||
324 | |||
325 | int size() const | ||
326 | { | ||
327 | assert(_type == type::group); | ||
328 | |||
329 | return _group.elems.size(); | ||
330 | } | ||
331 | |||
332 | private: | ||
333 | type _type; | ||
334 | bool _notlogic = false; | ||
335 | union { | ||
336 | struct { | ||
337 | T elem; | ||
338 | } _singleton; | ||
339 | struct { | ||
340 | std::list<filter<T>> elems; | ||
341 | bool orlogic; | ||
342 | } _group; | ||
343 | }; | ||
344 | }; | ||
345 | |||
346 | class binding { | ||
347 | public: | ||
348 | enum class type { | ||
349 | integer, | ||
350 | string | ||
351 | }; | ||
352 | |||
353 | type get_type() const; | ||
354 | binding(const binding& other); | ||
355 | ~binding(); | ||
356 | binding& operator=(const binding& other); | ||
357 | |||
358 | // Integer | ||
359 | binding(int _arg); | ||
360 | int get_integer() const; | ||
361 | void set_integer(int _arg); | ||
362 | binding& operator=(int _arg); | ||
363 | |||
364 | // String | ||
365 | binding(std::string _arg); | ||
366 | std::string get_string() const; | ||
367 | void set_string(std::string _arg); | ||
368 | binding& operator=(std::string _arg); | ||
369 | |||
370 | private: | ||
371 | union { | ||
372 | int _integer; | ||
373 | std::string _string; | ||
374 | }; | ||
375 | type _type; | ||
376 | }; | ||
377 | |||
378 | }; | ||
379 | |||
380 | #endif /* end of include guard: DATA_H_C4AEC3DD */ | ||
diff --git a/lib/database.cpp b/lib/database.cpp new file mode 100644 index 0000000..351b93d --- /dev/null +++ b/lib/database.cpp | |||
@@ -0,0 +1,79 @@ | |||
1 | #include "database.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include <stdexcept> | ||
4 | #include "query.h" | ||
5 | |||
6 | namespace verbly { | ||
7 | |||
8 | database::database(std::string path) | ||
9 | { | ||
10 | if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK) | ||
11 | { | ||
12 | // We still have to free the resources allocated. In the event that | ||
13 | // allocation failed, ppdb will be null and sqlite3_close_v2 will just | ||
14 | // ignore it. | ||
15 | std::string errmsg(sqlite3_errmsg(ppdb_)); | ||
16 | sqlite3_close_v2(ppdb_); | ||
17 | |||
18 | throw database_error("Could not open verbly datafile", errmsg); | ||
19 | } | ||
20 | } | ||
21 | |||
22 | database::database(database&& other) : database() | ||
23 | { | ||
24 | swap(*this, other); | ||
25 | } | ||
26 | |||
27 | database& database::operator=(database&& other) | ||
28 | { | ||
29 | swap(*this, other); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | void swap(database& first, database& second) | ||
35 | { | ||
36 | std::swap(first.ppdb_, second.ppdb_); | ||
37 | } | ||
38 | |||
39 | database::~database() | ||
40 | { | ||
41 | sqlite3_close_v2(ppdb_); | ||
42 | } | ||
43 | |||
44 | query<notion> database::notions(filter where, bool random, int limit) const | ||
45 | { | ||
46 | return query<notion>(*this, ppdb_, std::move(where), random, limit); | ||
47 | } | ||
48 | |||
49 | query<word> database::words(filter where, bool random, int limit) const | ||
50 | { | ||
51 | return query<word>(*this, ppdb_, std::move(where), random, limit); | ||
52 | } | ||
53 | |||
54 | query<group> database::groups(filter where, bool random, int limit) const | ||
55 | { | ||
56 | return query<group>(*this, ppdb_, std::move(where), random, limit); | ||
57 | } | ||
58 | |||
59 | query<frame> database::frames(filter where, bool random, int limit) const | ||
60 | { | ||
61 | return query<frame>(*this, ppdb_, std::move(where), random, limit); | ||
62 | } | ||
63 | |||
64 | query<lemma> database::lemmas(filter where, bool random, int limit) const | ||
65 | { | ||
66 | return query<lemma>(*this, ppdb_, std::move(where), random, limit); | ||
67 | } | ||
68 | |||
69 | query<form> database::forms(filter where, bool random, int limit) const | ||
70 | { | ||
71 | return query<form>(*this, ppdb_, std::move(where), random, limit); | ||
72 | } | ||
73 | |||
74 | query<pronunciation> database::pronunciations(filter where, bool random, int limit) const | ||
75 | { | ||
76 | return query<pronunciation>(*this, ppdb_, std::move(where), random, limit); | ||
77 | } | ||
78 | |||
79 | }; | ||
diff --git a/lib/database.h b/lib/database.h new file mode 100644 index 0000000..d68c40b --- /dev/null +++ b/lib/database.h | |||
@@ -0,0 +1,73 @@ | |||
1 | #ifndef DATABASE_H_0B0A47D2 | ||
2 | #define DATABASE_H_0B0A47D2 | ||
3 | |||
4 | #include <string> | ||
5 | #include <exception> | ||
6 | #include <list> | ||
7 | #include "notion.h" | ||
8 | #include "word.h" | ||
9 | #include "group.h" | ||
10 | #include "frame.h" | ||
11 | #include "lemma.h" | ||
12 | #include "form.h" | ||
13 | #include "pronunciation.h" | ||
14 | |||
15 | struct sqlite3; | ||
16 | |||
17 | namespace verbly { | ||
18 | |||
19 | template <typename Object> | ||
20 | class query; | ||
21 | |||
22 | class database { | ||
23 | public: | ||
24 | |||
25 | // Constructor | ||
26 | |||
27 | explicit database(std::string path); | ||
28 | |||
29 | // Disable copying | ||
30 | |||
31 | database(const database& other) = delete; | ||
32 | database& operator=(const database& other) = delete; | ||
33 | |||
34 | // Move constructor and move assignment | ||
35 | |||
36 | database(database&& other); | ||
37 | database& operator=(database&& other); | ||
38 | |||
39 | // Swap | ||
40 | |||
41 | friend void swap(database& first, database& second); | ||
42 | |||
43 | // Destructor | ||
44 | |||
45 | ~database(); | ||
46 | |||
47 | // Queries | ||
48 | |||
49 | query<notion> notions(filter where, bool random = true, int limit = 1) const; | ||
50 | |||
51 | query<word> words(filter where, bool random = true, int limit = 1) const; | ||
52 | |||
53 | query<group> groups(filter where, bool random = true, int limit = 1) const; | ||
54 | |||
55 | query<frame> frames(filter where, bool random = true, int limit = 1) const; | ||
56 | |||
57 | query<lemma> lemmas(filter where, bool random = true, int limit = 1) const; | ||
58 | |||
59 | query<form> forms(filter where, bool random = true, int limit = 1) const; | ||
60 | |||
61 | query<pronunciation> pronunciations(filter where, bool random = true, int limit = 1) const; | ||
62 | |||
63 | private: | ||
64 | |||
65 | database() = default; | ||
66 | |||
67 | sqlite3* ppdb_ = nullptr; | ||
68 | |||
69 | }; | ||
70 | |||
71 | }; | ||
72 | |||
73 | #endif /* end of include guard: DATABASE_H_0B0A47D2 */ | ||
diff --git a/lib/enums.h b/lib/enums.h new file mode 100644 index 0000000..b37be7b --- /dev/null +++ b/lib/enums.h | |||
@@ -0,0 +1,45 @@ | |||
1 | #ifndef ENUMS_H_260BA847 | ||
2 | #define ENUMS_H_260BA847 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | enum class part_of_speech { | ||
7 | noun = 0, | ||
8 | adjective = 1, | ||
9 | adverb = 2, | ||
10 | verb = 3, | ||
11 | preposition = 4 | ||
12 | }; | ||
13 | |||
14 | enum class positioning { | ||
15 | undefined = -1, | ||
16 | predicate = 0, | ||
17 | attributive = 1, | ||
18 | postnominal = 2 | ||
19 | }; | ||
20 | |||
21 | enum class inflection { | ||
22 | base = 0, | ||
23 | plural = 1, | ||
24 | comparative = 2, | ||
25 | superlative = 3, | ||
26 | past_tense = 4, | ||
27 | past_participle = 5, | ||
28 | ing_form = 6, | ||
29 | s_form = 7 | ||
30 | }; | ||
31 | |||
32 | enum class object { | ||
33 | undefined = -1, | ||
34 | notion = 0, | ||
35 | word = 1, | ||
36 | group = 2, | ||
37 | frame = 3, | ||
38 | lemma = 4, | ||
39 | form = 5, | ||
40 | pronunciation = 6 | ||
41 | }; | ||
42 | |||
43 | }; | ||
44 | |||
45 | #endif /* end of include guard: ENUMS_H_260BA847 */ | ||
diff --git a/lib/field.cpp b/lib/field.cpp new file mode 100644 index 0000000..d7adbb3 --- /dev/null +++ b/lib/field.cpp | |||
@@ -0,0 +1,91 @@ | |||
1 | #include "field.h" | ||
2 | #include "filter.h" | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | filter field::operator==(int value) const | ||
7 | { | ||
8 | return filter(*this, filter::comparison::int_equals, value); | ||
9 | } | ||
10 | |||
11 | filter field::operator!=(int value) const | ||
12 | { | ||
13 | return filter(*this, filter::comparison::int_does_not_equal, value); | ||
14 | } | ||
15 | |||
16 | filter field::operator<(int value) const | ||
17 | { | ||
18 | return filter(*this, filter::comparison::int_is_less_than, value); | ||
19 | } | ||
20 | |||
21 | filter field::operator<=(int value) const | ||
22 | { | ||
23 | return filter(*this, filter::comparison::int_is_at_most, value); | ||
24 | } | ||
25 | |||
26 | filter field::operator>(int value) const | ||
27 | { | ||
28 | return filter(*this, filter::comparison::int_is_greater_than, value); | ||
29 | } | ||
30 | |||
31 | filter field::operator>=(int value) const | ||
32 | { | ||
33 | return filter(*this, filter::comparison::int_is_at_least, value); | ||
34 | } | ||
35 | |||
36 | filter field::operator==(part_of_speech value) const | ||
37 | { | ||
38 | return filter(*this, filter::comparison::int_equals, static_cast<int>(value)); | ||
39 | } | ||
40 | |||
41 | filter field::operator==(positioning value) const | ||
42 | { | ||
43 | return filter(*this, filter::comparison::int_equals, static_cast<int>(value)); | ||
44 | } | ||
45 | |||
46 | filter field::operator==(inflection value) const | ||
47 | { | ||
48 | return filter(*this, filter::comparison::int_equals, static_cast<int>(value)); | ||
49 | } | ||
50 | |||
51 | filter field::operator==(bool value) const | ||
52 | { | ||
53 | return filter(*this, filter::comparison::boolean_equals, value); | ||
54 | } | ||
55 | |||
56 | filter field::operator==(std::string value) const | ||
57 | { | ||
58 | return filter(*this, filter::comparison::string_equals, std::move(value)); | ||
59 | } | ||
60 | |||
61 | filter field::operator!=(std::string value) const | ||
62 | { | ||
63 | return filter(*this, filter::comparison::string_does_not_equal, std::move(value)); | ||
64 | } | ||
65 | |||
66 | filter field::operator%=(std::string value) const | ||
67 | { | ||
68 | return filter(*this, filter::comparison::string_is_like, std::move(value)); | ||
69 | } | ||
70 | |||
71 | field::operator filter() const | ||
72 | { | ||
73 | return filter(*this, filter::comparison::is_not_null); | ||
74 | } | ||
75 | |||
76 | filter field::operator!() const | ||
77 | { | ||
78 | return filter(*this, filter::comparison::is_null); | ||
79 | } | ||
80 | |||
81 | filter field::operator%=(filter joinCondition) const | ||
82 | { | ||
83 | if (type_ == type::hierarchal_join) | ||
84 | { | ||
85 | return filter(*this, filter::comparison::hierarchally_matches, std::move(joinCondition)); | ||
86 | } else { | ||
87 | return filter(*this, filter::comparison::matches, std::move(joinCondition)); | ||
88 | } | ||
89 | } | ||
90 | |||
91 | }; | ||
diff --git a/lib/field.h b/lib/field.h new file mode 100644 index 0000000..30c62be --- /dev/null +++ b/lib/field.h | |||
@@ -0,0 +1,306 @@ | |||
1 | #ifndef FIELD_H_43258321 | ||
2 | #define FIELD_H_43258321 | ||
3 | |||
4 | #include "enums.h" | ||
5 | #include <stdexcept> | ||
6 | #include <tuple> | ||
7 | |||
8 | namespace verbly { | ||
9 | |||
10 | class filter; | ||
11 | |||
12 | class field { | ||
13 | public: | ||
14 | enum class type { | ||
15 | undefined, | ||
16 | string, | ||
17 | integer, | ||
18 | boolean, | ||
19 | join, | ||
20 | join_through, | ||
21 | hierarchal_join | ||
22 | }; | ||
23 | |||
24 | // Default constructor | ||
25 | |||
26 | field() | ||
27 | { | ||
28 | } | ||
29 | |||
30 | // Static factories | ||
31 | |||
32 | static field stringField( | ||
33 | object obj, | ||
34 | const char* name, | ||
35 | bool nullable = false) | ||
36 | { | ||
37 | return field(obj, type::string, name, nullable); | ||
38 | } | ||
39 | |||
40 | static field stringField( | ||
41 | const char* table, | ||
42 | const char* name, | ||
43 | bool nullable = false) | ||
44 | { | ||
45 | return field(object::undefined, type::string, name, nullable, table); | ||
46 | } | ||
47 | |||
48 | static field integerField( | ||
49 | object obj, | ||
50 | const char* name, | ||
51 | bool nullable = false) | ||
52 | { | ||
53 | return field(obj, type::integer, name, nullable); | ||
54 | } | ||
55 | |||
56 | static field integerField( | ||
57 | const char* table, | ||
58 | const char* name, | ||
59 | bool nullable = false) | ||
60 | { | ||
61 | return field(object::undefined, type::integer, name, nullable, table); | ||
62 | } | ||
63 | |||
64 | static field booleanField( | ||
65 | object obj, | ||
66 | const char* name, | ||
67 | bool nullable = false) | ||
68 | { | ||
69 | return field(obj, type::boolean, name, nullable); | ||
70 | } | ||
71 | |||
72 | static field booleanField( | ||
73 | const char* table, | ||
74 | const char* name, | ||
75 | bool nullable = false) | ||
76 | { | ||
77 | return field(object::undefined, type::boolean, name, nullable, table); | ||
78 | } | ||
79 | |||
80 | static field joinField( | ||
81 | object obj, | ||
82 | const char* name, | ||
83 | object joinWith, | ||
84 | bool nullable = false) | ||
85 | { | ||
86 | return field(obj, type::join, name, nullable, 0, joinWith); | ||
87 | } | ||
88 | |||
89 | static field joinField( | ||
90 | object obj, | ||
91 | const char* name, | ||
92 | const char* table, | ||
93 | bool nullable = false) | ||
94 | { | ||
95 | return field(obj, type::join, name, nullable, table); | ||
96 | } | ||
97 | |||
98 | static field joinThrough( | ||
99 | object obj, | ||
100 | const char* name, | ||
101 | object joinWith, | ||
102 | const char* joinTable, | ||
103 | const char* foreignColumn) | ||
104 | { | ||
105 | return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, name, foreignColumn); | ||
106 | } | ||
107 | |||
108 | static field joinThrough( | ||
109 | object obj, | ||
110 | const char* name, | ||
111 | object joinWith, | ||
112 | const char* joinTable, | ||
113 | const char* foreignColumn, | ||
114 | const char* joinColumn, | ||
115 | const char* foreignJoinColumn) | ||
116 | { | ||
117 | return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, joinColumn, foreignJoinColumn); | ||
118 | } | ||
119 | |||
120 | static field selfJoin( | ||
121 | object obj, | ||
122 | const char* name, | ||
123 | const char* joinTable, | ||
124 | const char* joinColumn, | ||
125 | const char* foreignJoinColumn) | ||
126 | { | ||
127 | return field(obj, type::join_through, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn); | ||
128 | } | ||
129 | |||
130 | static field hierarchalSelfJoin( | ||
131 | object obj, | ||
132 | const char* name, | ||
133 | const char* joinTable, | ||
134 | const char* joinColumn, | ||
135 | const char* foreignJoinColumn) | ||
136 | { | ||
137 | return field(obj, type::hierarchal_join, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn); | ||
138 | } | ||
139 | |||
140 | // Accessors | ||
141 | |||
142 | object getObject() const | ||
143 | { | ||
144 | return object_; | ||
145 | } | ||
146 | |||
147 | type getType() const | ||
148 | { | ||
149 | return type_; | ||
150 | } | ||
151 | |||
152 | bool isJoin() const | ||
153 | { | ||
154 | return ((type_ == type::join) || (type_ == type::join_through) || (type_ == type::hierarchal_join)); | ||
155 | } | ||
156 | |||
157 | const char* getColumn() const | ||
158 | { | ||
159 | return column_; | ||
160 | } | ||
161 | |||
162 | bool isNullable() const | ||
163 | { | ||
164 | return nullable_; | ||
165 | } | ||
166 | |||
167 | bool hasTable() const | ||
168 | { | ||
169 | return (table_ != 0); | ||
170 | } | ||
171 | |||
172 | const char* getTable() const | ||
173 | { | ||
174 | return table_; | ||
175 | } | ||
176 | |||
177 | // Joins | ||
178 | |||
179 | object getJoinObject() const | ||
180 | { | ||
181 | // We ignore hierarchal joins because they are always self joins. | ||
182 | return ((type_ == type::join) || (type_ == type::join_through)) | ||
183 | ? joinObject_ | ||
184 | : throw std::domain_error("Non-join fields don't have join objects"); | ||
185 | } | ||
186 | |||
187 | // Many-to-many joins | ||
188 | |||
189 | const char* getForeignColumn() const | ||
190 | { | ||
191 | // We ignore hierarchal joins because they are always self joins. | ||
192 | return (type_ == type::join_through) | ||
193 | ? foreignColumn_ | ||
194 | : throw std::domain_error("Only many-to-many join fields have a foreign column"); | ||
195 | } | ||
196 | |||
197 | const char* getJoinColumn() const | ||
198 | { | ||
199 | return ((type_ == type::join_through) || (type_ == type::hierarchal_join)) | ||
200 | ? joinColumn_ | ||
201 | : throw std::domain_error("Only many-to-many join fields have a join column"); | ||
202 | } | ||
203 | |||
204 | const char* getForeignJoinColumn() const | ||
205 | { | ||
206 | return ((type_ == type::join_through) || (type_ == type::hierarchal_join)) | ||
207 | ? foreignJoinColumn_ | ||
208 | : throw std::domain_error("Only many-to-many join fields have a foreign join column"); | ||
209 | } | ||
210 | |||
211 | // Ordering | ||
212 | |||
213 | bool operator<(const field& other) const | ||
214 | { | ||
215 | // For the most part, (object, column) uniquely identifies fields. | ||
216 | // However, there do exist a number of relationships from an object to | ||
217 | // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have | ||
218 | // the same object (notion), the same column (notion_id), and the same | ||
219 | // table (hypernymy); however, they have different join columns. | ||
220 | return std::tie(object_, column_, table_, joinColumn_) < std::tie(other.object_, other.column_, other.table_, other.joinColumn_); | ||
221 | } | ||
222 | |||
223 | // Equality | ||
224 | |||
225 | bool operator==(const field& other) const | ||
226 | { | ||
227 | // For the most part, (object, column) uniquely identifies fields. | ||
228 | // However, there do exist a number of relationships from an object to | ||
229 | // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have | ||
230 | // the same object (notion), the same column (notion_id), and the same | ||
231 | // table (hypernymy); however, they have different join columns. | ||
232 | return std::tie(object_, column_, table_, joinColumn_) == std::tie(other.object_, other.column_, other.table_, other.joinColumn_); | ||
233 | } | ||
234 | |||
235 | // Filter construction | ||
236 | |||
237 | filter operator==(int value) const; // Integer equality | ||
238 | filter operator!=(int value) const; // Integer inequality | ||
239 | filter operator<(int value) const; // Integer is less than | ||
240 | filter operator<=(int value) const; // Integer is at most | ||
241 | filter operator>(int value) const; // Integer is greater than | ||
242 | filter operator>=(int value) const; // Integer is at least | ||
243 | |||
244 | filter operator==(part_of_speech value) const; // Part of speech equality | ||
245 | filter operator==(positioning value) const; // Adjective positioning equality | ||
246 | filter operator==(inflection value) const; // Inflection category equality | ||
247 | |||
248 | filter operator==(bool value) const; // Boolean equality | ||
249 | |||
250 | filter operator==(std::string value) const; // String equality | ||
251 | filter operator!=(std::string value) const; // String inequality | ||
252 | filter operator%=(std::string value) const; // String matching | ||
253 | |||
254 | operator filter() const; // Non-nullity | ||
255 | filter operator!() const; // Nullity | ||
256 | |||
257 | filter operator%=(filter joinCondition) const; // Join | ||
258 | |||
259 | private: | ||
260 | |||
261 | // Constructor | ||
262 | |||
263 | field( | ||
264 | object obj, | ||
265 | type datatype, | ||
266 | const char* column, | ||
267 | bool nullable = false, | ||
268 | const char* table = 0, | ||
269 | object joinObject = object::undefined, | ||
270 | const char* foreignColumn = 0, | ||
271 | const char* joinColumn = 0, | ||
272 | const char* foreignJoinColumn = 0) : | ||
273 | object_(obj), | ||
274 | type_(datatype), | ||
275 | column_(column), | ||
276 | nullable_(nullable), | ||
277 | table_(table), | ||
278 | joinObject_(joinObject), | ||
279 | foreignColumn_(foreignColumn), | ||
280 | joinColumn_(joinColumn), | ||
281 | foreignJoinColumn_(foreignJoinColumn) | ||
282 | { | ||
283 | } | ||
284 | |||
285 | // General | ||
286 | object object_ = object::undefined; | ||
287 | type type_ = type::undefined; | ||
288 | const char* column_ = 0; | ||
289 | const char* table_ = 0; | ||
290 | |||
291 | // Non-joins and belongs-to joins | ||
292 | bool nullable_ = false; | ||
293 | |||
294 | // Joins | ||
295 | object joinObject_ = object::undefined; | ||
296 | |||
297 | // Many-to-many joins | ||
298 | const char* foreignColumn_ = 0; | ||
299 | const char* joinColumn_ = 0; | ||
300 | const char* foreignJoinColumn_ = 0; | ||
301 | |||
302 | }; | ||
303 | |||
304 | }; | ||
305 | |||
306 | #endif /* end of include guard: FIELD_H_43258321 */ | ||
diff --git a/lib/filter.cpp b/lib/filter.cpp new file mode 100644 index 0000000..959fa05 --- /dev/null +++ b/lib/filter.cpp | |||
@@ -0,0 +1,1365 @@ | |||
1 | #include "filter.h" | ||
2 | #include <stdexcept> | ||
3 | #include <map> | ||
4 | #include "notion.h" | ||
5 | #include "word.h" | ||
6 | #include "group.h" | ||
7 | #include "frame.h" | ||
8 | #include "lemma.h" | ||
9 | #include "form.h" | ||
10 | #include "pronunciation.h" | ||
11 | |||
12 | namespace verbly { | ||
13 | |||
14 | filter::filter(const filter& other) | ||
15 | { | ||
16 | type_ = other.type_; | ||
17 | |||
18 | switch (type_) | ||
19 | { | ||
20 | case type::empty: | ||
21 | { | ||
22 | break; | ||
23 | } | ||
24 | |||
25 | case type::singleton: | ||
26 | { | ||
27 | new(&singleton_.filterField) field(other.singleton_.filterField); | ||
28 | singleton_.filterType = other.singleton_.filterType; | ||
29 | |||
30 | switch (singleton_.filterType) | ||
31 | { | ||
32 | case comparison::int_equals: | ||
33 | case comparison::int_does_not_equal: | ||
34 | case comparison::int_is_at_least: | ||
35 | case comparison::int_is_greater_than: | ||
36 | case comparison::int_is_at_most: | ||
37 | case comparison::int_is_less_than: | ||
38 | { | ||
39 | singleton_.intValue = other.singleton_.intValue; | ||
40 | |||
41 | break; | ||
42 | } | ||
43 | |||
44 | case comparison::boolean_equals: | ||
45 | { | ||
46 | singleton_.boolValue = other.singleton_.boolValue; | ||
47 | |||
48 | break; | ||
49 | } | ||
50 | |||
51 | case comparison::string_equals: | ||
52 | case comparison::string_does_not_equal: | ||
53 | case comparison::string_is_like: | ||
54 | case comparison::string_is_not_like: | ||
55 | { | ||
56 | new(&singleton_.stringValue) std::string(other.singleton_.stringValue); | ||
57 | |||
58 | break; | ||
59 | } | ||
60 | |||
61 | case comparison::is_null: | ||
62 | case comparison::is_not_null: | ||
63 | { | ||
64 | break; | ||
65 | } | ||
66 | |||
67 | case comparison::matches: | ||
68 | case comparison::does_not_match: | ||
69 | case comparison::hierarchally_matches: | ||
70 | case comparison::does_not_hierarchally_match: | ||
71 | { | ||
72 | new(&singleton_.join) std::unique_ptr<filter>(new filter(*other.singleton_.join)); | ||
73 | |||
74 | break; | ||
75 | } | ||
76 | } | ||
77 | |||
78 | break; | ||
79 | } | ||
80 | |||
81 | case type::group: | ||
82 | { | ||
83 | new(&group_.children) std::list<filter>(other.group_.children); | ||
84 | group_.orlogic = other.group_.orlogic; | ||
85 | |||
86 | break; | ||
87 | } | ||
88 | } | ||
89 | } | ||
90 | |||
91 | filter::filter(filter&& other) : filter() | ||
92 | { | ||
93 | swap(*this, other); | ||
94 | } | ||
95 | |||
96 | filter& filter::operator=(filter other) | ||
97 | { | ||
98 | swap(*this, other); | ||
99 | |||
100 | return *this; | ||
101 | } | ||
102 | |||
103 | void swap(filter& first, filter& second) | ||
104 | { | ||
105 | using type = filter::type; | ||
106 | using comparison = filter::comparison; | ||
107 | |||
108 | type tempType = first.type_; | ||
109 | field tempField; | ||
110 | comparison tempComparison; | ||
111 | std::unique_ptr<filter> tempJoin; | ||
112 | std::string tempStringValue; | ||
113 | int tempIntValue; | ||
114 | bool tempBoolValue; | ||
115 | std::list<filter> tempChildren; | ||
116 | bool tempOrlogic; | ||
117 | |||
118 | switch (tempType) | ||
119 | { | ||
120 | case type::empty: | ||
121 | { | ||
122 | break; | ||
123 | } | ||
124 | |||
125 | case type::singleton: | ||
126 | { | ||
127 | tempField = std::move(first.singleton_.filterField); | ||
128 | tempComparison = first.singleton_.filterType; | ||
129 | |||
130 | switch (tempComparison) | ||
131 | { | ||
132 | case comparison::int_equals: | ||
133 | case comparison::int_does_not_equal: | ||
134 | case comparison::int_is_at_least: | ||
135 | case comparison::int_is_greater_than: | ||
136 | case comparison::int_is_at_most: | ||
137 | case comparison::int_is_less_than: | ||
138 | { | ||
139 | tempIntValue = first.singleton_.intValue; | ||
140 | |||
141 | break; | ||
142 | } | ||
143 | |||
144 | case comparison::boolean_equals: | ||
145 | { | ||
146 | tempBoolValue = first.singleton_.boolValue; | ||
147 | |||
148 | break; | ||
149 | } | ||
150 | |||
151 | case comparison::string_equals: | ||
152 | case comparison::string_does_not_equal: | ||
153 | case comparison::string_is_like: | ||
154 | case comparison::string_is_not_like: | ||
155 | { | ||
156 | tempStringValue = std::move(first.singleton_.stringValue); | ||
157 | |||
158 | break; | ||
159 | } | ||
160 | |||
161 | case comparison::is_null: | ||
162 | case comparison::is_not_null: | ||
163 | { | ||
164 | break; | ||
165 | } | ||
166 | |||
167 | case comparison::matches: | ||
168 | case comparison::does_not_match: | ||
169 | case comparison::hierarchally_matches: | ||
170 | case comparison::does_not_hierarchally_match: | ||
171 | { | ||
172 | tempJoin = std::move(first.singleton_.join); | ||
173 | |||
174 | break; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | break; | ||
179 | } | ||
180 | |||
181 | case type::group: | ||
182 | { | ||
183 | tempChildren = std::move(first.group_.children); | ||
184 | tempOrlogic = first.group_.orlogic; | ||
185 | |||
186 | break; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | first.~filter(); | ||
191 | |||
192 | first.type_ = second.type_; | ||
193 | |||
194 | switch (first.type_) | ||
195 | { | ||
196 | case type::empty: | ||
197 | { | ||
198 | break; | ||
199 | } | ||
200 | |||
201 | case type::singleton: | ||
202 | { | ||
203 | new(&first.singleton_.filterField) field(std::move(second.singleton_.filterField)); | ||
204 | first.singleton_.filterType = second.singleton_.filterType; | ||
205 | |||
206 | switch (first.singleton_.filterType) | ||
207 | { | ||
208 | case comparison::int_equals: | ||
209 | case comparison::int_does_not_equal: | ||
210 | case comparison::int_is_at_least: | ||
211 | case comparison::int_is_greater_than: | ||
212 | case comparison::int_is_at_most: | ||
213 | case comparison::int_is_less_than: | ||
214 | { | ||
215 | first.singleton_.intValue = second.singleton_.intValue; | ||
216 | |||
217 | break; | ||
218 | } | ||
219 | |||
220 | case comparison::boolean_equals: | ||
221 | { | ||
222 | first.singleton_.boolValue = second.singleton_.boolValue; | ||
223 | |||
224 | break; | ||
225 | } | ||
226 | |||
227 | case comparison::string_equals: | ||
228 | case comparison::string_does_not_equal: | ||
229 | case comparison::string_is_like: | ||
230 | case comparison::string_is_not_like: | ||
231 | { | ||
232 | new(&first.singleton_.stringValue) std::string(std::move(second.singleton_.stringValue)); | ||
233 | |||
234 | break; | ||
235 | } | ||
236 | |||
237 | case comparison::is_null: | ||
238 | case comparison::is_not_null: | ||
239 | { | ||
240 | break; | ||
241 | } | ||
242 | |||
243 | case comparison::matches: | ||
244 | case comparison::does_not_match: | ||
245 | case comparison::hierarchally_matches: | ||
246 | case comparison::does_not_hierarchally_match: | ||
247 | { | ||
248 | new(&first.singleton_.join) std::unique_ptr<filter>(std::move(second.singleton_.join)); | ||
249 | |||
250 | break; | ||
251 | } | ||
252 | } | ||
253 | |||
254 | break; | ||
255 | } | ||
256 | |||
257 | case type::group: | ||
258 | { | ||
259 | new(&first.group_.children) std::list<filter>(std::move(second.group_.children)); | ||
260 | first.group_.orlogic = second.group_.orlogic; | ||
261 | |||
262 | break; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | second.~filter(); | ||
267 | |||
268 | second.type_ = tempType; | ||
269 | |||
270 | switch (second.type_) | ||
271 | { | ||
272 | case type::empty: | ||
273 | { | ||
274 | break; | ||
275 | } | ||
276 | |||
277 | case type::singleton: | ||
278 | { | ||
279 | new(&second.singleton_.filterField) field(std::move(tempField)); | ||
280 | second.singleton_.filterType = tempComparison; | ||
281 | |||
282 | switch (second.singleton_.filterType) | ||
283 | { | ||
284 | case comparison::int_equals: | ||
285 | case comparison::int_does_not_equal: | ||
286 | case comparison::int_is_at_least: | ||
287 | case comparison::int_is_greater_than: | ||
288 | case comparison::int_is_at_most: | ||
289 | case comparison::int_is_less_than: | ||
290 | { | ||
291 | second.singleton_.intValue = tempIntValue; | ||
292 | |||
293 | break; | ||
294 | } | ||
295 | |||
296 | case comparison::boolean_equals: | ||
297 | { | ||
298 | second.singleton_.boolValue = tempBoolValue; | ||
299 | |||
300 | break; | ||
301 | } | ||
302 | |||
303 | case comparison::string_equals: | ||
304 | case comparison::string_does_not_equal: | ||
305 | case comparison::string_is_like: | ||
306 | case comparison::string_is_not_like: | ||
307 | { | ||
308 | new(&second.singleton_.stringValue) std::string(std::move(tempStringValue)); | ||
309 | |||
310 | break; | ||
311 | } | ||
312 | |||
313 | case comparison::is_null: | ||
314 | case comparison::is_not_null: | ||
315 | { | ||
316 | break; | ||
317 | } | ||
318 | |||
319 | case comparison::matches: | ||
320 | case comparison::does_not_match: | ||
321 | case comparison::hierarchally_matches: | ||
322 | case comparison::does_not_hierarchally_match: | ||
323 | { | ||
324 | new(&second.singleton_.join) std::unique_ptr<filter>(std::move(tempJoin)); | ||
325 | |||
326 | break; | ||
327 | } | ||
328 | } | ||
329 | |||
330 | break; | ||
331 | } | ||
332 | |||
333 | case type::group: | ||
334 | { | ||
335 | new(&second.group_.children) std::list<filter>(std::move(tempChildren)); | ||
336 | second.group_.orlogic = tempOrlogic; | ||
337 | |||
338 | break; | ||
339 | } | ||
340 | } | ||
341 | } | ||
342 | |||
343 | filter::~filter() | ||
344 | { | ||
345 | switch (type_) | ||
346 | { | ||
347 | case type::empty: | ||
348 | { | ||
349 | break; | ||
350 | } | ||
351 | |||
352 | case type::singleton: | ||
353 | { | ||
354 | singleton_.filterField.~field(); | ||
355 | |||
356 | switch (singleton_.filterType) | ||
357 | { | ||
358 | case comparison::int_equals: | ||
359 | case comparison::int_does_not_equal: | ||
360 | case comparison::int_is_at_least: | ||
361 | case comparison::int_is_greater_than: | ||
362 | case comparison::int_is_at_most: | ||
363 | case comparison::int_is_less_than: | ||
364 | case comparison::boolean_equals: | ||
365 | case comparison::is_null: | ||
366 | case comparison::is_not_null: | ||
367 | { | ||
368 | break; | ||
369 | } | ||
370 | |||
371 | case comparison::string_equals: | ||
372 | case comparison::string_does_not_equal: | ||
373 | case comparison::string_is_like: | ||
374 | case comparison::string_is_not_like: | ||
375 | { | ||
376 | using string_type = std::string; | ||
377 | |||
378 | singleton_.stringValue.~string_type(); | ||
379 | |||
380 | break; | ||
381 | } | ||
382 | |||
383 | case comparison::matches: | ||
384 | case comparison::does_not_match: | ||
385 | case comparison::hierarchally_matches: | ||
386 | case comparison::does_not_hierarchally_match: | ||
387 | { | ||
388 | using ptr_type = std::unique_ptr<filter>; | ||
389 | |||
390 | singleton_.join.~ptr_type(); | ||
391 | |||
392 | break; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | break; | ||
397 | } | ||
398 | |||
399 | case type::group: | ||
400 | { | ||
401 | using list_type = std::list<filter>; | ||
402 | |||
403 | group_.children.~list_type(); | ||
404 | |||
405 | break; | ||
406 | } | ||
407 | } | ||
408 | } | ||
409 | |||
410 | filter::filter() | ||
411 | { | ||
412 | } | ||
413 | |||
414 | filter::filter( | ||
415 | field filterField, | ||
416 | comparison filterType, | ||
417 | int filterValue) : | ||
418 | type_(type::singleton) | ||
419 | { | ||
420 | if (filterField.getType() == field::type::integer) | ||
421 | { | ||
422 | switch (filterType) | ||
423 | { | ||
424 | case comparison::int_equals: | ||
425 | case comparison::int_does_not_equal: | ||
426 | case comparison::int_is_at_least: | ||
427 | case comparison::int_is_greater_than: | ||
428 | case comparison::int_is_at_most: | ||
429 | case comparison::int_is_less_than: | ||
430 | { | ||
431 | new(&singleton_.filterField) field(std::move(filterField)); | ||
432 | singleton_.filterType = filterType; | ||
433 | singleton_.intValue = filterValue; | ||
434 | |||
435 | break; | ||
436 | } | ||
437 | |||
438 | case comparison::boolean_equals: | ||
439 | case comparison::string_equals: | ||
440 | case comparison::string_does_not_equal: | ||
441 | case comparison::string_is_like: | ||
442 | case comparison::string_is_not_like: | ||
443 | case comparison::is_null: | ||
444 | case comparison::is_not_null: | ||
445 | case comparison::matches: | ||
446 | case comparison::does_not_match: | ||
447 | case comparison::hierarchally_matches: | ||
448 | case comparison::does_not_hierarchally_match: | ||
449 | { | ||
450 | throw std::invalid_argument("Invalid comparison for integer field"); | ||
451 | } | ||
452 | } | ||
453 | } else { | ||
454 | throw std::domain_error("Cannot match a non-integer field against an integer value"); | ||
455 | } | ||
456 | } | ||
457 | |||
458 | filter::filter( | ||
459 | field filterField, | ||
460 | comparison filterType, | ||
461 | std::string filterValue) : | ||
462 | type_(type::singleton) | ||
463 | { | ||
464 | if (filterField.getType() == field::type::string) | ||
465 | { | ||
466 | switch (filterType) | ||
467 | { | ||
468 | case comparison::string_equals: | ||
469 | case comparison::string_does_not_equal: | ||
470 | case comparison::string_is_like: | ||
471 | case comparison::string_is_not_like: | ||
472 | { | ||
473 | new(&singleton_.filterField) field(std::move(filterField)); | ||
474 | singleton_.filterType = filterType; | ||
475 | new(&singleton_.stringValue) std::string(std::move(filterValue)); | ||
476 | |||
477 | break; | ||
478 | } | ||
479 | |||
480 | case comparison::int_equals: | ||
481 | case comparison::int_does_not_equal: | ||
482 | case comparison::int_is_at_least: | ||
483 | case comparison::int_is_greater_than: | ||
484 | case comparison::int_is_at_most: | ||
485 | case comparison::int_is_less_than: | ||
486 | case comparison::boolean_equals: | ||
487 | case comparison::is_null: | ||
488 | case comparison::is_not_null: | ||
489 | case comparison::matches: | ||
490 | case comparison::does_not_match: | ||
491 | case comparison::hierarchally_matches: | ||
492 | case comparison::does_not_hierarchally_match: | ||
493 | { | ||
494 | throw std::invalid_argument("Invalid comparison for string field"); | ||
495 | } | ||
496 | } | ||
497 | } else { | ||
498 | throw std::domain_error("Cannot match a non-string field against an string value"); | ||
499 | } | ||
500 | } | ||
501 | |||
502 | filter::filter( | ||
503 | field filterField, | ||
504 | comparison filterType, | ||
505 | bool filterValue) : | ||
506 | type_(type::singleton) | ||
507 | { | ||
508 | if (filterField.getType() == field::type::boolean) | ||
509 | { | ||
510 | switch (filterType) | ||
511 | { | ||
512 | case comparison::boolean_equals: | ||
513 | { | ||
514 | new(&singleton_.filterField) field(std::move(filterField)); | ||
515 | singleton_.filterType = filterType; | ||
516 | singleton_.boolValue = filterValue; | ||
517 | |||
518 | break; | ||
519 | } | ||
520 | |||
521 | case comparison::string_equals: | ||
522 | case comparison::string_does_not_equal: | ||
523 | case comparison::string_is_like: | ||
524 | case comparison::string_is_not_like: | ||
525 | case comparison::int_equals: | ||
526 | case comparison::int_does_not_equal: | ||
527 | case comparison::int_is_at_least: | ||
528 | case comparison::int_is_greater_than: | ||
529 | case comparison::int_is_at_most: | ||
530 | case comparison::int_is_less_than: | ||
531 | case comparison::is_null: | ||
532 | case comparison::is_not_null: | ||
533 | case comparison::matches: | ||
534 | case comparison::does_not_match: | ||
535 | case comparison::hierarchally_matches: | ||
536 | case comparison::does_not_hierarchally_match: | ||
537 | { | ||
538 | throw std::invalid_argument("Invalid comparison for boolean field"); | ||
539 | } | ||
540 | } | ||
541 | } else { | ||
542 | throw std::domain_error("Cannot match a non-boolean field against a boolean value"); | ||
543 | } | ||
544 | } | ||
545 | |||
546 | filter::filter( | ||
547 | field filterField, | ||
548 | comparison filterType) : | ||
549 | type_(type::singleton) | ||
550 | { | ||
551 | if (filterField.isNullable()) | ||
552 | { | ||
553 | switch (filterType) | ||
554 | { | ||
555 | case comparison::is_null: | ||
556 | case comparison::is_not_null: | ||
557 | { | ||
558 | new(&singleton_.filterField) field(std::move(filterField)); | ||
559 | singleton_.filterType = filterType; | ||
560 | |||
561 | break; | ||
562 | } | ||
563 | |||
564 | case comparison::string_equals: | ||
565 | case comparison::string_does_not_equal: | ||
566 | case comparison::string_is_like: | ||
567 | case comparison::string_is_not_like: | ||
568 | case comparison::int_equals: | ||
569 | case comparison::int_does_not_equal: | ||
570 | case comparison::int_is_at_least: | ||
571 | case comparison::int_is_greater_than: | ||
572 | case comparison::int_is_at_most: | ||
573 | case comparison::int_is_less_than: | ||
574 | case comparison::boolean_equals: | ||
575 | case comparison::matches: | ||
576 | case comparison::does_not_match: | ||
577 | case comparison::hierarchally_matches: | ||
578 | case comparison::does_not_hierarchally_match: | ||
579 | { | ||
580 | throw std::invalid_argument("Incorrect constructor for given comparison"); | ||
581 | } | ||
582 | } | ||
583 | } else { | ||
584 | throw std::domain_error("Cannot check nullity/non-nullity of non-nullable field"); | ||
585 | } | ||
586 | } | ||
587 | |||
588 | filter::filter( | ||
589 | field joinOn, | ||
590 | comparison filterType, | ||
591 | filter joinCondition) : | ||
592 | type_(type::singleton) | ||
593 | { | ||
594 | switch (joinOn.getType()) | ||
595 | { | ||
596 | case field::type::join: | ||
597 | case field::type::join_through: | ||
598 | { | ||
599 | switch (filterType) | ||
600 | { | ||
601 | case comparison::matches: | ||
602 | case comparison::does_not_match: | ||
603 | { | ||
604 | new(&singleton_.filterField) field(std::move(joinOn)); | ||
605 | singleton_.filterType = filterType; | ||
606 | new(&singleton_.join) std::unique_ptr<filter>(new filter(joinCondition.normalize(singleton_.filterField.getJoinObject()))); | ||
607 | |||
608 | break; | ||
609 | } | ||
610 | |||
611 | case comparison::int_equals: | ||
612 | case comparison::int_does_not_equal: | ||
613 | case comparison::int_is_at_least: | ||
614 | case comparison::int_is_greater_than: | ||
615 | case comparison::int_is_at_most: | ||
616 | case comparison::int_is_less_than: | ||
617 | case comparison::boolean_equals: | ||
618 | case comparison::string_equals: | ||
619 | case comparison::string_does_not_equal: | ||
620 | case comparison::string_is_like: | ||
621 | case comparison::string_is_not_like: | ||
622 | case comparison::is_null: | ||
623 | case comparison::is_not_null: | ||
624 | case comparison::hierarchally_matches: | ||
625 | case comparison::does_not_hierarchally_match: | ||
626 | { | ||
627 | throw std::invalid_argument("Incorrect constructor for given comparison"); | ||
628 | } | ||
629 | } | ||
630 | |||
631 | break; | ||
632 | } | ||
633 | |||
634 | case field::type::hierarchal_join: | ||
635 | { | ||
636 | switch (filterType) | ||
637 | { | ||
638 | case comparison::hierarchally_matches: | ||
639 | case comparison::does_not_hierarchally_match: | ||
640 | { | ||
641 | new(&singleton_.filterField) field(std::move(joinOn)); | ||
642 | singleton_.filterType = filterType; | ||
643 | new(&singleton_.join) std::unique_ptr<filter>(new filter(joinCondition.normalize(singleton_.filterField.getObject()))); | ||
644 | |||
645 | break; | ||
646 | } | ||
647 | |||
648 | case comparison::int_equals: | ||
649 | case comparison::int_does_not_equal: | ||
650 | case comparison::int_is_at_least: | ||
651 | case comparison::int_is_greater_than: | ||
652 | case comparison::int_is_at_most: | ||
653 | case comparison::int_is_less_than: | ||
654 | case comparison::boolean_equals: | ||
655 | case comparison::string_equals: | ||
656 | case comparison::string_does_not_equal: | ||
657 | case comparison::string_is_like: | ||
658 | case comparison::string_is_not_like: | ||
659 | case comparison::is_null: | ||
660 | case comparison::is_not_null: | ||
661 | case comparison::matches: | ||
662 | case comparison::does_not_match: | ||
663 | { | ||
664 | throw std::invalid_argument("Incorrect constructor for given comparison"); | ||
665 | } | ||
666 | } | ||
667 | |||
668 | break; | ||
669 | } | ||
670 | |||
671 | case field::type::undefined: | ||
672 | case field::type::string: | ||
673 | case field::type::integer: | ||
674 | case field::type::boolean: | ||
675 | { | ||
676 | throw std::domain_error("Matching field must be a join field"); | ||
677 | } | ||
678 | } | ||
679 | } | ||
680 | |||
681 | field filter::getField() const | ||
682 | { | ||
683 | if (type_ == type::singleton) | ||
684 | { | ||
685 | return singleton_.filterField; | ||
686 | } else { | ||
687 | throw std::domain_error("This filter does not have a field"); | ||
688 | } | ||
689 | } | ||
690 | |||
691 | filter::comparison filter::getComparison() const | ||
692 | { | ||
693 | if (type_ == type::singleton) | ||
694 | { | ||
695 | return singleton_.filterType; | ||
696 | } else { | ||
697 | throw std::domain_error("This filter does not have a comparison"); | ||
698 | } | ||
699 | } | ||
700 | |||
701 | filter filter::getJoinCondition() const | ||
702 | { | ||
703 | if (type_ == type::singleton) | ||
704 | { | ||
705 | switch (singleton_.filterType) | ||
706 | { | ||
707 | case comparison::matches: | ||
708 | case comparison::does_not_match: | ||
709 | case comparison::hierarchally_matches: | ||
710 | case comparison::does_not_hierarchally_match: | ||
711 | { | ||
712 | return *singleton_.join; | ||
713 | } | ||
714 | |||
715 | case comparison::string_equals: | ||
716 | case comparison::string_does_not_equal: | ||
717 | case comparison::string_is_like: | ||
718 | case comparison::string_is_not_like: | ||
719 | case comparison::int_equals: | ||
720 | case comparison::int_does_not_equal: | ||
721 | case comparison::int_is_at_least: | ||
722 | case comparison::int_is_greater_than: | ||
723 | case comparison::int_is_at_most: | ||
724 | case comparison::int_is_less_than: | ||
725 | case comparison::boolean_equals: | ||
726 | case comparison::is_null: | ||
727 | case comparison::is_not_null: | ||
728 | { | ||
729 | throw std::domain_error("This filter does not have a join condition"); | ||
730 | } | ||
731 | } | ||
732 | } else { | ||
733 | throw std::domain_error("This filter does not have a join condition"); | ||
734 | } | ||
735 | } | ||
736 | |||
737 | std::string filter::getStringArgument() const | ||
738 | { | ||
739 | if (type_ == type::singleton) | ||
740 | { | ||
741 | switch (singleton_.filterType) | ||
742 | { | ||
743 | case comparison::string_equals: | ||
744 | case comparison::string_does_not_equal: | ||
745 | case comparison::string_is_like: | ||
746 | case comparison::string_is_not_like: | ||
747 | { | ||
748 | return singleton_.stringValue; | ||
749 | } | ||
750 | |||
751 | case comparison::int_equals: | ||
752 | case comparison::int_does_not_equal: | ||
753 | case comparison::int_is_at_least: | ||
754 | case comparison::int_is_greater_than: | ||
755 | case comparison::int_is_at_most: | ||
756 | case comparison::int_is_less_than: | ||
757 | case comparison::boolean_equals: | ||
758 | case comparison::is_null: | ||
759 | case comparison::is_not_null: | ||
760 | case comparison::matches: | ||
761 | case comparison::does_not_match: | ||
762 | case comparison::hierarchally_matches: | ||
763 | case comparison::does_not_hierarchally_match: | ||
764 | { | ||
765 | throw std::domain_error("This filter does not have a string argument"); | ||
766 | } | ||
767 | } | ||
768 | } else { | ||
769 | throw std::domain_error("This filter does not have a string argument"); | ||
770 | } | ||
771 | } | ||
772 | |||
773 | int filter::getIntegerArgument() const | ||
774 | { | ||
775 | if (type_ == type::singleton) | ||
776 | { | ||
777 | switch (singleton_.filterType) | ||
778 | { | ||
779 | case comparison::int_equals: | ||
780 | case comparison::int_does_not_equal: | ||
781 | case comparison::int_is_at_least: | ||
782 | case comparison::int_is_greater_than: | ||
783 | case comparison::int_is_at_most: | ||
784 | case comparison::int_is_less_than: | ||
785 | { | ||
786 | return singleton_.intValue; | ||
787 | } | ||
788 | |||
789 | case comparison::string_equals: | ||
790 | case comparison::string_does_not_equal: | ||
791 | case comparison::string_is_like: | ||
792 | case comparison::string_is_not_like: | ||
793 | case comparison::boolean_equals: | ||
794 | case comparison::is_null: | ||
795 | case comparison::is_not_null: | ||
796 | case comparison::matches: | ||
797 | case comparison::does_not_match: | ||
798 | case comparison::hierarchally_matches: | ||
799 | case comparison::does_not_hierarchally_match: | ||
800 | { | ||
801 | throw std::domain_error("This filter does not have an integer argument"); | ||
802 | } | ||
803 | } | ||
804 | } else { | ||
805 | throw std::domain_error("This filter does not have an integer argument"); | ||
806 | } | ||
807 | } | ||
808 | |||
809 | bool filter::getBooleanArgument() const | ||
810 | { | ||
811 | if ((type_ == type::singleton) && (singleton_.filterType == comparison::boolean_equals)) | ||
812 | { | ||
813 | return singleton_.boolValue; | ||
814 | } else { | ||
815 | throw std::domain_error("This filter does not have a boolean argument"); | ||
816 | } | ||
817 | } | ||
818 | |||
819 | filter::filter(bool orlogic) : type_(type::group) | ||
820 | { | ||
821 | new(&group_.children) std::list<filter>(); | ||
822 | group_.orlogic = orlogic; | ||
823 | } | ||
824 | |||
825 | bool filter::getOrlogic() const | ||
826 | { | ||
827 | if (type_ == type::group) | ||
828 | { | ||
829 | return group_.orlogic; | ||
830 | } else { | ||
831 | throw std::domain_error("This filter is not a group filter"); | ||
832 | } | ||
833 | } | ||
834 | |||
835 | filter filter::operator+(filter condition) const | ||
836 | { | ||
837 | filter result(*this); | ||
838 | result += std::move(condition); | ||
839 | |||
840 | return result; | ||
841 | } | ||
842 | |||
843 | filter& filter::operator+=(filter condition) | ||
844 | { | ||
845 | if (type_ == type::group) | ||
846 | { | ||
847 | group_.children.push_back(std::move(condition)); | ||
848 | |||
849 | return *this; | ||
850 | } else { | ||
851 | throw std::domain_error("Children can only be added to group filters"); | ||
852 | } | ||
853 | } | ||
854 | |||
855 | filter::const_iterator filter::begin() const | ||
856 | { | ||
857 | if (type_ == type::group) | ||
858 | { | ||
859 | return std::begin(group_.children); | ||
860 | } else { | ||
861 | throw std::domain_error("This filter has no children"); | ||
862 | } | ||
863 | } | ||
864 | |||
865 | filter::const_iterator filter::end() const | ||
866 | { | ||
867 | if (type_ == type::group) | ||
868 | { | ||
869 | return std::end(group_.children); | ||
870 | } else { | ||
871 | throw std::domain_error("This filter has no children"); | ||
872 | } | ||
873 | } | ||
874 | |||
875 | filter filter::operator!() const | ||
876 | { | ||
877 | switch (type_) | ||
878 | { | ||
879 | case type::empty: | ||
880 | { | ||
881 | return {}; | ||
882 | } | ||
883 | |||
884 | case type::singleton: | ||
885 | { | ||
886 | switch (singleton_.filterType) | ||
887 | { | ||
888 | case comparison::int_equals: | ||
889 | { | ||
890 | return filter(singleton_.filterField, comparison::int_does_not_equal, singleton_.intValue); | ||
891 | } | ||
892 | |||
893 | case comparison::int_does_not_equal: | ||
894 | { | ||
895 | return filter(singleton_.filterField, comparison::int_equals, singleton_.intValue); | ||
896 | } | ||
897 | |||
898 | case comparison::int_is_at_least: | ||
899 | { | ||
900 | return filter(singleton_.filterField, comparison::int_is_less_than, singleton_.intValue); | ||
901 | } | ||
902 | |||
903 | case comparison::int_is_greater_than: | ||
904 | { | ||
905 | return filter(singleton_.filterField, comparison::int_is_at_most, singleton_.intValue); | ||
906 | } | ||
907 | |||
908 | case comparison::int_is_at_most: | ||
909 | { | ||
910 | return filter(singleton_.filterField, comparison::int_is_greater_than, singleton_.intValue); | ||
911 | } | ||
912 | |||
913 | case comparison::int_is_less_than: | ||
914 | { | ||
915 | return filter(singleton_.filterField, comparison::int_is_at_least, singleton_.intValue); | ||
916 | } | ||
917 | |||
918 | case comparison::boolean_equals: | ||
919 | { | ||
920 | return filter(singleton_.filterField, comparison::boolean_equals, !singleton_.boolValue); | ||
921 | } | ||
922 | |||
923 | case comparison::string_equals: | ||
924 | { | ||
925 | return filter(singleton_.filterField, comparison::string_does_not_equal, singleton_.stringValue); | ||
926 | } | ||
927 | |||
928 | case comparison::string_does_not_equal: | ||
929 | { | ||
930 | return filter(singleton_.filterField, comparison::string_equals, singleton_.stringValue); | ||
931 | } | ||
932 | |||
933 | case comparison::string_is_like: | ||
934 | { | ||
935 | return filter(singleton_.filterField, comparison::string_is_not_like, singleton_.stringValue); | ||
936 | } | ||
937 | |||
938 | case comparison::string_is_not_like: | ||
939 | { | ||
940 | return filter(singleton_.filterField, comparison::string_is_like, singleton_.stringValue); | ||
941 | } | ||
942 | |||
943 | case comparison::is_null: | ||
944 | { | ||
945 | return filter(singleton_.filterField, comparison::is_not_null); | ||
946 | } | ||
947 | |||
948 | case comparison::is_not_null: | ||
949 | { | ||
950 | return filter(singleton_.filterField, comparison::is_null); | ||
951 | } | ||
952 | |||
953 | case comparison::matches: | ||
954 | { | ||
955 | return filter(singleton_.filterField, comparison::does_not_match, *singleton_.join); | ||
956 | } | ||
957 | |||
958 | case comparison::does_not_match: | ||
959 | { | ||
960 | return filter(singleton_.filterField, comparison::matches, *singleton_.join); | ||
961 | } | ||
962 | |||
963 | case comparison::hierarchally_matches: | ||
964 | { | ||
965 | return filter(singleton_.filterField, comparison::does_not_hierarchally_match, *singleton_.join); | ||
966 | } | ||
967 | |||
968 | case comparison::does_not_hierarchally_match: | ||
969 | { | ||
970 | return filter(singleton_.filterField, comparison::hierarchally_matches, *singleton_.join); | ||
971 | } | ||
972 | } | ||
973 | } | ||
974 | |||
975 | case type::group: | ||
976 | { | ||
977 | filter result(!group_.orlogic); | ||
978 | |||
979 | for (const filter& child : group_.children) | ||
980 | { | ||
981 | result += !child; | ||
982 | } | ||
983 | |||
984 | return result; | ||
985 | } | ||
986 | } | ||
987 | } | ||
988 | |||
989 | filter& filter::operator&=(filter condition) | ||
990 | { | ||
991 | return (*this = (*this && std::move(condition))); | ||
992 | } | ||
993 | |||
994 | filter& filter::operator|=(filter condition) | ||
995 | { | ||
996 | return (*this = (*this || std::move(condition))); | ||
997 | } | ||
998 | |||
999 | filter filter::operator&&(filter condition) const | ||
1000 | { | ||
1001 | switch (type_) | ||
1002 | { | ||
1003 | case type::empty: | ||
1004 | { | ||
1005 | return condition; | ||
1006 | } | ||
1007 | |||
1008 | case type::singleton: | ||
1009 | { | ||
1010 | filter result(false); | ||
1011 | result.group_.children.push_back(*this); | ||
1012 | result.group_.children.push_back(std::move(condition)); | ||
1013 | |||
1014 | return result; | ||
1015 | } | ||
1016 | |||
1017 | case type::group: | ||
1018 | { | ||
1019 | if (group_.orlogic) | ||
1020 | { | ||
1021 | filter result(false); | ||
1022 | result.group_.children.push_back(*this); | ||
1023 | result.group_.children.push_back(std::move(condition)); | ||
1024 | |||
1025 | return result; | ||
1026 | } else { | ||
1027 | filter result(*this); | ||
1028 | result.group_.children.push_back(std::move(condition)); | ||
1029 | |||
1030 | return result; | ||
1031 | } | ||
1032 | } | ||
1033 | } | ||
1034 | } | ||
1035 | |||
1036 | filter filter::operator||(filter condition) const | ||
1037 | { | ||
1038 | switch (type_) | ||
1039 | { | ||
1040 | case type::empty: | ||
1041 | { | ||
1042 | return condition; | ||
1043 | } | ||
1044 | |||
1045 | case type::singleton: | ||
1046 | { | ||
1047 | filter result(true); | ||
1048 | result.group_.children.push_back(*this); | ||
1049 | result.group_.children.push_back(std::move(condition)); | ||
1050 | |||
1051 | return result; | ||
1052 | } | ||
1053 | |||
1054 | case type::group: | ||
1055 | { | ||
1056 | if (!group_.orlogic) | ||
1057 | { | ||
1058 | filter result(true); | ||
1059 | result.group_.children.push_back(*this); | ||
1060 | result.group_.children.push_back(std::move(condition)); | ||
1061 | |||
1062 | return result; | ||
1063 | } else { | ||
1064 | filter result(*this); | ||
1065 | result.group_.children.push_back(std::move(condition)); | ||
1066 | |||
1067 | return result; | ||
1068 | } | ||
1069 | } | ||
1070 | } | ||
1071 | } | ||
1072 | |||
1073 | filter filter::normalize(object context) const | ||
1074 | { | ||
1075 | { | ||
1076 | switch (type_) | ||
1077 | { | ||
1078 | case type::empty: | ||
1079 | { | ||
1080 | return *this; | ||
1081 | } | ||
1082 | |||
1083 | case type::singleton: | ||
1084 | { | ||
1085 | // First, switch on the normalized context, and then switch on the | ||
1086 | // current context. We recursively recontextualize by using the | ||
1087 | // current filter as a subquery for a join such that the context of | ||
1088 | // the subquery is one step closer to the context of the current | ||
1089 | // filter, and then letting the filter constructor normalize the | ||
1090 | // subquery. | ||
1091 | switch (context) | ||
1092 | { | ||
1093 | case object::undefined: | ||
1094 | { | ||
1095 | // An undefined object indicates no participation in | ||
1096 | // recontexualization. | ||
1097 | return *this; | ||
1098 | } | ||
1099 | |||
1100 | case object::notion: | ||
1101 | { | ||
1102 | switch (singleton_.filterField.getObject()) | ||
1103 | { | ||
1104 | case object::undefined: | ||
1105 | case object::notion: | ||
1106 | { | ||
1107 | return *this; | ||
1108 | } | ||
1109 | |||
1110 | case object::word: | ||
1111 | case object::group: | ||
1112 | case object::frame: | ||
1113 | case object::lemma: | ||
1114 | case object::form: | ||
1115 | case object::pronunciation: | ||
1116 | { | ||
1117 | return (verbly::notion::word %= *this); | ||
1118 | } | ||
1119 | } | ||
1120 | } | ||
1121 | |||
1122 | case object::word: | ||
1123 | { | ||
1124 | switch (singleton_.filterField.getObject()) | ||
1125 | { | ||
1126 | case object::notion: | ||
1127 | { | ||
1128 | return (verbly::word::notion %= *this); | ||
1129 | } | ||
1130 | |||
1131 | case object::undefined: | ||
1132 | case object::word: | ||
1133 | { | ||
1134 | return *this; | ||
1135 | } | ||
1136 | |||
1137 | case object::group: | ||
1138 | case object::frame: | ||
1139 | { | ||
1140 | return (verbly::word::group %= *this); | ||
1141 | } | ||
1142 | |||
1143 | case object::lemma: | ||
1144 | case object::form: | ||
1145 | case object::pronunciation: | ||
1146 | { | ||
1147 | return (verbly::word::lemma %= *this); | ||
1148 | } | ||
1149 | } | ||
1150 | |||
1151 | case object::group: | ||
1152 | { | ||
1153 | switch (singleton_.filterField.getObject()) | ||
1154 | { | ||
1155 | case object::undefined: | ||
1156 | case object::group: | ||
1157 | { | ||
1158 | return *this; | ||
1159 | } | ||
1160 | |||
1161 | case object::notion: | ||
1162 | case object::word: | ||
1163 | case object::lemma: | ||
1164 | case object::form: | ||
1165 | case object::pronunciation: | ||
1166 | { | ||
1167 | return (verbly::group::word %= *this); | ||
1168 | } | ||
1169 | |||
1170 | case object::frame: | ||
1171 | { | ||
1172 | return (verbly::group::frame %= *this); | ||
1173 | } | ||
1174 | } | ||
1175 | } | ||
1176 | |||
1177 | case object::frame: | ||
1178 | { | ||
1179 | switch (singleton_.filterField.getObject()) | ||
1180 | { | ||
1181 | case object::undefined: | ||
1182 | case object::frame: | ||
1183 | { | ||
1184 | return *this; | ||
1185 | } | ||
1186 | |||
1187 | case object::notion: | ||
1188 | case object::word: | ||
1189 | case object::group: | ||
1190 | case object::lemma: | ||
1191 | case object::form: | ||
1192 | case object::pronunciation: | ||
1193 | { | ||
1194 | return (verbly::frame::group %= *this); | ||
1195 | } | ||
1196 | } | ||
1197 | } | ||
1198 | |||
1199 | case object::lemma: | ||
1200 | { | ||
1201 | switch (singleton_.filterField.getObject()) | ||
1202 | { | ||
1203 | case object::notion: | ||
1204 | case object::word: | ||
1205 | case object::group: | ||
1206 | case object::frame: | ||
1207 | { | ||
1208 | return verbly::lemma::word %= *this; | ||
1209 | } | ||
1210 | |||
1211 | case object::undefined: | ||
1212 | case object::lemma: | ||
1213 | { | ||
1214 | return *this; | ||
1215 | } | ||
1216 | |||
1217 | case object::form: | ||
1218 | case object::pronunciation: | ||
1219 | { | ||
1220 | return (verbly::lemma::form(inflection::base) %= *this); | ||
1221 | } | ||
1222 | } | ||
1223 | } | ||
1224 | |||
1225 | case object::form: | ||
1226 | { | ||
1227 | switch (singleton_.filterField.getObject()) | ||
1228 | { | ||
1229 | case object::notion: | ||
1230 | case object::word: | ||
1231 | case object::group: | ||
1232 | case object::frame: | ||
1233 | case object::lemma: | ||
1234 | { | ||
1235 | return verbly::form::lemma(inflection::base) %= *this; | ||
1236 | } | ||
1237 | |||
1238 | case object::undefined: | ||
1239 | case object::form: | ||
1240 | { | ||
1241 | return *this; | ||
1242 | } | ||
1243 | |||
1244 | case object::pronunciation: | ||
1245 | { | ||
1246 | return (verbly::form::pronunciation %= *this); | ||
1247 | } | ||
1248 | } | ||
1249 | } | ||
1250 | |||
1251 | case object::pronunciation: | ||
1252 | { | ||
1253 | switch (singleton_.filterField.getObject()) | ||
1254 | { | ||
1255 | case object::notion: | ||
1256 | case object::word: | ||
1257 | case object::group: | ||
1258 | case object::frame: | ||
1259 | case object::lemma: | ||
1260 | case object::form: | ||
1261 | { | ||
1262 | return verbly::pronunciation::form %= *this; | ||
1263 | } | ||
1264 | |||
1265 | case object::undefined: | ||
1266 | case object::pronunciation: | ||
1267 | { | ||
1268 | return *this; | ||
1269 | } | ||
1270 | } | ||
1271 | } | ||
1272 | } | ||
1273 | } | ||
1274 | } | ||
1275 | |||
1276 | case type::group: | ||
1277 | { | ||
1278 | filter result(group_.orlogic); | ||
1279 | std::map<field, filter> joins; | ||
1280 | |||
1281 | for (const filter& child : group_.children) | ||
1282 | { | ||
1283 | filter normalized = child.normalize(context); | ||
1284 | |||
1285 | // Notably, this does not attempt to merge hierarchal matches. | ||
1286 | switch (normalized.getType()) | ||
1287 | { | ||
1288 | case type::singleton: | ||
1289 | { | ||
1290 | switch (normalized.getComparison()) | ||
1291 | { | ||
1292 | case comparison::matches: | ||
1293 | { | ||
1294 | if (!joins.count(normalized.singleton_.filterField)) | ||
1295 | { | ||
1296 | joins[normalized.getField()] = filter(group_.orlogic); | ||
1297 | } | ||
1298 | |||
1299 | joins.at(normalized.getField()) += std::move(*normalized.singleton_.join); | ||
1300 | |||
1301 | break; | ||
1302 | } | ||
1303 | |||
1304 | case comparison::does_not_match: | ||
1305 | { | ||
1306 | if (!joins.count(normalized.singleton_.filterField)) | ||
1307 | { | ||
1308 | joins[normalized.getField()] = filter(group_.orlogic); | ||
1309 | } | ||
1310 | |||
1311 | joins.at(normalized.getField()) += !*normalized.singleton_.join; | ||
1312 | |||
1313 | break; | ||
1314 | } | ||
1315 | |||
1316 | case comparison::int_equals: | ||
1317 | case comparison::int_does_not_equal: | ||
1318 | case comparison::int_is_at_least: | ||
1319 | case comparison::int_is_greater_than: | ||
1320 | case comparison::int_is_at_most: | ||
1321 | case comparison::int_is_less_than: | ||
1322 | case comparison::boolean_equals: | ||
1323 | case comparison::string_equals: | ||
1324 | case comparison::string_does_not_equal: | ||
1325 | case comparison::string_is_like: | ||
1326 | case comparison::string_is_not_like: | ||
1327 | case comparison::is_null: | ||
1328 | case comparison::is_not_null: | ||
1329 | case comparison::hierarchally_matches: | ||
1330 | case comparison::does_not_hierarchally_match: | ||
1331 | { | ||
1332 | result += std::move(normalized); | ||
1333 | |||
1334 | break; | ||
1335 | } | ||
1336 | } | ||
1337 | |||
1338 | break; | ||
1339 | } | ||
1340 | |||
1341 | case type::group: | ||
1342 | case type::empty: | ||
1343 | { | ||
1344 | result += std::move(normalized); | ||
1345 | |||
1346 | break; | ||
1347 | } | ||
1348 | } | ||
1349 | } | ||
1350 | |||
1351 | for (auto& mapping : joins) | ||
1352 | { | ||
1353 | const field& joinOn = mapping.first; | ||
1354 | filter& joinCondition = mapping.second; | ||
1355 | |||
1356 | result += (joinOn %= joinCondition.normalize(joinOn.getJoinObject())); | ||
1357 | } | ||
1358 | |||
1359 | return result; | ||
1360 | } | ||
1361 | } | ||
1362 | } | ||
1363 | } | ||
1364 | |||
1365 | }; | ||
diff --git a/lib/filter.h b/lib/filter.h new file mode 100644 index 0000000..d213d7a --- /dev/null +++ b/lib/filter.h | |||
@@ -0,0 +1,143 @@ | |||
1 | #ifndef FILTER_H_932BA9C6 | ||
2 | #define FILTER_H_932BA9C6 | ||
3 | |||
4 | #include <list> | ||
5 | #include <string> | ||
6 | #include <memory> | ||
7 | #include "field.h" | ||
8 | #include "enums.h" | ||
9 | |||
10 | namespace verbly { | ||
11 | |||
12 | class filter { | ||
13 | public: | ||
14 | enum class type { | ||
15 | empty, | ||
16 | singleton, | ||
17 | group | ||
18 | }; | ||
19 | |||
20 | enum class comparison { | ||
21 | int_equals, | ||
22 | int_does_not_equal, | ||
23 | int_is_at_least, | ||
24 | int_is_greater_than, | ||
25 | int_is_at_most, | ||
26 | int_is_less_than, | ||
27 | boolean_equals, | ||
28 | string_equals, | ||
29 | string_does_not_equal, | ||
30 | string_is_like, | ||
31 | string_is_not_like, | ||
32 | is_null, | ||
33 | is_not_null, | ||
34 | matches, | ||
35 | does_not_match, | ||
36 | hierarchally_matches, | ||
37 | does_not_hierarchally_match | ||
38 | }; | ||
39 | |||
40 | // Copy and move constructors | ||
41 | |||
42 | filter(const filter& other); | ||
43 | filter(filter&& other); | ||
44 | |||
45 | // Assignment | ||
46 | |||
47 | filter& operator=(filter other); | ||
48 | |||
49 | // Swap | ||
50 | |||
51 | friend void swap(filter& first, filter& second); | ||
52 | |||
53 | // Destructor | ||
54 | |||
55 | ~filter(); | ||
56 | |||
57 | // Accessors | ||
58 | |||
59 | type getType() const | ||
60 | { | ||
61 | return type_; | ||
62 | } | ||
63 | |||
64 | // Empty | ||
65 | |||
66 | filter(); | ||
67 | |||
68 | // Singleton | ||
69 | |||
70 | filter(field filterField, comparison filterType, int filterValue); | ||
71 | filter(field filterField, comparison filterType, std::string filterValue); | ||
72 | filter(field filterField, comparison filterType, bool filterValue); | ||
73 | filter(field filterField, comparison filterType); | ||
74 | filter(field joinOn, comparison filterType, filter joinCondition); | ||
75 | |||
76 | field getField() const; | ||
77 | |||
78 | comparison getComparison() const; | ||
79 | |||
80 | filter getJoinCondition() const; | ||
81 | |||
82 | std::string getStringArgument() const; | ||
83 | |||
84 | int getIntegerArgument() const; | ||
85 | |||
86 | bool getBooleanArgument() const; | ||
87 | |||
88 | // Group | ||
89 | |||
90 | explicit filter(bool orlogic); | ||
91 | |||
92 | bool getOrlogic() const; | ||
93 | |||
94 | filter operator+(filter condition) const; | ||
95 | |||
96 | filter& operator+=(filter condition); | ||
97 | |||
98 | using const_iterator = std::list<filter>::const_iterator; | ||
99 | |||
100 | const_iterator begin() const; | ||
101 | |||
102 | const_iterator end() const; | ||
103 | |||
104 | // Negation | ||
105 | |||
106 | filter operator!() const; | ||
107 | |||
108 | // Groupifying | ||
109 | |||
110 | filter operator&&(filter condition) const; | ||
111 | filter operator||(filter condition) const; | ||
112 | |||
113 | filter& operator&=(filter condition); | ||
114 | filter& operator|=(filter condition); | ||
115 | |||
116 | // Utility | ||
117 | |||
118 | filter normalize(object context) const; | ||
119 | |||
120 | private: | ||
121 | union { | ||
122 | struct { | ||
123 | field filterField; | ||
124 | comparison filterType; | ||
125 | union { | ||
126 | std::unique_ptr<filter> join; | ||
127 | std::string stringValue; | ||
128 | int intValue; | ||
129 | bool boolValue; | ||
130 | }; | ||
131 | } singleton_; | ||
132 | struct { | ||
133 | std::list<filter> children; | ||
134 | bool orlogic; | ||
135 | } group_; | ||
136 | }; | ||
137 | type type_ = type::empty; | ||
138 | |||
139 | }; | ||
140 | |||
141 | }; | ||
142 | |||
143 | #endif /* end of include guard: FILTER_H_932BA9C6 */ | ||
diff --git a/lib/form.cpp b/lib/form.cpp new file mode 100644 index 0000000..8ba3bd7 --- /dev/null +++ b/lib/form.cpp | |||
@@ -0,0 +1,53 @@ | |||
1 | #include "form.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include "filter.h" | ||
4 | #include "pronunciation.h" | ||
5 | #include "database.h" | ||
6 | #include "query.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | |||
10 | const object form::objectType = object::form; | ||
11 | |||
12 | const std::list<std::string> form::select = {"form_id", "form", "complexity", "proper"}; | ||
13 | |||
14 | const field form::id = field::integerField(object::form, "form_id"); | ||
15 | const field form::text = field::stringField(object::form, "form"); | ||
16 | const field form::complexity = field::integerField(object::form, "complexity"); | ||
17 | const field form::proper = field::booleanField(object::form, "proper"); | ||
18 | |||
19 | const field form::pronunciation = field::joinThrough(object::form, "form_id", object::pronunciation, "forms_pronunciations", "pronunciation_id"); | ||
20 | |||
21 | const field form::lemmaJoin = field::joinField(object::form, "form_id", object::lemma); | ||
22 | const field form::inflectionCategory = field::integerField("lemmas_forms", "category"); | ||
23 | |||
24 | form::form(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
25 | { | ||
26 | id_ = sqlite3_column_int(row, 0); | ||
27 | text_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 1))); | ||
28 | complexity_ = sqlite3_column_int(row, 2); | ||
29 | proper_ = (sqlite3_column_int(row, 3) == 1); | ||
30 | } | ||
31 | |||
32 | filter operator%=(form::inflection_field check, filter joinCondition) | ||
33 | { | ||
34 | return (form::lemmaJoin %= (joinCondition && (form::inflectionCategory == check.getCategory()))); | ||
35 | } | ||
36 | |||
37 | const std::vector<pronunciation>& form::getPronunciations() const | ||
38 | { | ||
39 | if (!valid_) | ||
40 | { | ||
41 | throw std::domain_error("Bad access to uninitialized form"); | ||
42 | } | ||
43 | |||
44 | if (!initializedPronunciations_) | ||
45 | { | ||
46 | pronunciations_ = db_->pronunciations(pronunciation::form %= *this, false, -1).all(); | ||
47 | initializedPronunciations_ = true; | ||
48 | } | ||
49 | |||
50 | return pronunciations_; | ||
51 | } | ||
52 | |||
53 | }; | ||
diff --git a/lib/form.h b/lib/form.h new file mode 100644 index 0000000..c6a1353 --- /dev/null +++ b/lib/form.h | |||
@@ -0,0 +1,149 @@ | |||
1 | #ifndef FORM_H_3A6C962C | ||
2 | #define FORM_H_3A6C962C | ||
3 | |||
4 | #include <list> | ||
5 | #include <vector> | ||
6 | #include <string> | ||
7 | #include <stdexcept> | ||
8 | #include "field.h" | ||
9 | #include "filter.h" | ||
10 | |||
11 | struct sqlite3_stmt; | ||
12 | |||
13 | namespace verbly { | ||
14 | |||
15 | class pronunciation; | ||
16 | class database; | ||
17 | |||
18 | class form { | ||
19 | public: | ||
20 | |||
21 | // Default constructor | ||
22 | |||
23 | form() = default; | ||
24 | |||
25 | // Construct from database | ||
26 | |||
27 | form(const database& db, sqlite3_stmt* row); | ||
28 | |||
29 | // Accessors | ||
30 | |||
31 | operator bool() const | ||
32 | { | ||
33 | return valid_; | ||
34 | } | ||
35 | |||
36 | int getId() const | ||
37 | { | ||
38 | if (!valid_) | ||
39 | { | ||
40 | throw std::domain_error("Bad access to uninitialized form"); | ||
41 | } | ||
42 | |||
43 | return id_; | ||
44 | } | ||
45 | |||
46 | std::string getText() const | ||
47 | { | ||
48 | if (!valid_) | ||
49 | { | ||
50 | throw std::domain_error("Bad access to uninitialized form"); | ||
51 | } | ||
52 | |||
53 | return text_; | ||
54 | } | ||
55 | |||
56 | int getComplexity() const | ||
57 | { | ||
58 | if (!valid_) | ||
59 | { | ||
60 | throw std::domain_error("Bad access to uninitialized form"); | ||
61 | } | ||
62 | |||
63 | return complexity_; | ||
64 | } | ||
65 | |||
66 | bool isProper() const | ||
67 | { | ||
68 | if (!valid_) | ||
69 | { | ||
70 | throw std::domain_error("Bad access to uninitialized form"); | ||
71 | } | ||
72 | |||
73 | return proper_; | ||
74 | } | ||
75 | |||
76 | const std::vector<pronunciation>& getPronunciations() const; | ||
77 | |||
78 | // Type info | ||
79 | |||
80 | static const object objectType; | ||
81 | |||
82 | static const std::list<std::string> select; | ||
83 | |||
84 | // Query fields | ||
85 | |||
86 | static const field id; | ||
87 | static const field text; | ||
88 | static const field complexity; | ||
89 | static const field proper; | ||
90 | |||
91 | operator filter() const | ||
92 | { | ||
93 | if (!valid_) | ||
94 | { | ||
95 | throw std::domain_error("Bad access to uninitialized form"); | ||
96 | } | ||
97 | |||
98 | return (id == id_); | ||
99 | } | ||
100 | |||
101 | // Relationships to other objects | ||
102 | |||
103 | static const field pronunciation; | ||
104 | |||
105 | class inflection_field { | ||
106 | public: | ||
107 | |||
108 | inflection_field(inflection category) : category_(category) | ||
109 | { | ||
110 | } | ||
111 | |||
112 | const inflection getCategory() const | ||
113 | { | ||
114 | return category_; | ||
115 | } | ||
116 | |||
117 | private: | ||
118 | |||
119 | const inflection category_; | ||
120 | }; | ||
121 | |||
122 | static const inflection_field lemma(inflection category) | ||
123 | { | ||
124 | return inflection_field(category); | ||
125 | } | ||
126 | |||
127 | friend filter operator%=(form::inflection_field check, filter joinCondition); | ||
128 | |||
129 | private: | ||
130 | bool valid_ = false; | ||
131 | |||
132 | int id_; | ||
133 | std::string text_; | ||
134 | int complexity_ ; | ||
135 | bool proper_; | ||
136 | |||
137 | const database* db_; | ||
138 | |||
139 | mutable bool initializedPronunciations_ = false; | ||
140 | mutable std::vector<class pronunciation> pronunciations_; | ||
141 | |||
142 | static const field lemmaJoin; | ||
143 | static const field inflectionCategory; | ||
144 | |||
145 | }; | ||
146 | |||
147 | }; | ||
148 | |||
149 | #endif /* end of include guard: FORM_H_3A6C962C */ | ||
diff --git a/lib/frame.cpp b/lib/frame.cpp index ccec81b..bc3f842 100644 --- a/lib/frame.cpp +++ b/lib/frame.cpp | |||
@@ -1,320 +1,21 @@ | |||
1 | #include "verbly.h" | 1 | #include "frame.h" |
2 | #include <sqlite3.h> | ||
2 | 3 | ||
3 | namespace verbly { | 4 | namespace verbly { |
4 | 5 | ||
5 | frame::selrestr::type frame::selrestr::get_type() const | 6 | const object frame::objectType = object::frame; |
6 | { | ||
7 | return _type; | ||
8 | } | ||
9 | |||
10 | frame::selrestr::selrestr(const selrestr& other) | ||
11 | { | ||
12 | _type = other._type; | ||
13 | |||
14 | switch (_type) | ||
15 | { | ||
16 | case frame::selrestr::type::singleton: | ||
17 | { | ||
18 | _singleton.pos = other._singleton.pos; | ||
19 | new(&_singleton.restriction) std::string(other._singleton.restriction); | ||
20 | |||
21 | break; | ||
22 | } | ||
23 | |||
24 | case frame::selrestr::type::group: | ||
25 | { | ||
26 | new(&_group.children) std::list<selrestr>(other._group.children); | ||
27 | _group.orlogic = other._group.orlogic; | ||
28 | |||
29 | break; | ||
30 | } | ||
31 | |||
32 | case frame::selrestr::type::empty: | ||
33 | { | ||
34 | // Nothing! | ||
35 | |||
36 | break; | ||
37 | } | ||
38 | } | ||
39 | } | ||
40 | |||
41 | frame::selrestr::~selrestr() | ||
42 | { | ||
43 | switch (_type) | ||
44 | { | ||
45 | case frame::selrestr::type::singleton: | ||
46 | { | ||
47 | using string_type = std::string; | ||
48 | _singleton.restriction.~string_type(); | ||
49 | |||
50 | break; | ||
51 | } | ||
52 | |||
53 | case frame::selrestr::type::group: | ||
54 | { | ||
55 | using list_type = std::list<selrestr>; | ||
56 | _group.children.~list_type(); | ||
57 | |||
58 | break; | ||
59 | } | ||
60 | |||
61 | case frame::selrestr::type::empty: | ||
62 | { | ||
63 | // Nothing! | ||
64 | |||
65 | break; | ||
66 | } | ||
67 | } | ||
68 | } | ||
69 | |||
70 | frame::selrestr& frame::selrestr::operator=(const selrestr& other) | ||
71 | { | ||
72 | this->~selrestr(); | ||
73 | |||
74 | _type = other._type; | ||
75 | |||
76 | switch (_type) | ||
77 | { | ||
78 | case frame::selrestr::type::singleton: | ||
79 | { | ||
80 | _singleton.pos = other._singleton.pos; | ||
81 | new(&_singleton.restriction) std::string(other._singleton.restriction); | ||
82 | |||
83 | break; | ||
84 | } | ||
85 | |||
86 | case frame::selrestr::type::group: | ||
87 | { | ||
88 | new(&_group.children) std::list<selrestr>(other._group.children); | ||
89 | _group.orlogic = other._group.orlogic; | ||
90 | |||
91 | break; | ||
92 | } | ||
93 | |||
94 | case frame::selrestr::type::empty: | ||
95 | { | ||
96 | // Nothing! | ||
97 | |||
98 | break; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | frame::selrestr::selrestr() : _type(frame::selrestr::type::empty) | ||
106 | { | ||
107 | |||
108 | } | ||
109 | |||
110 | frame::selrestr::selrestr(std::string restriction, bool pos) : _type(frame::selrestr::type::singleton) | ||
111 | { | ||
112 | new(&_singleton.restriction) std::string(restriction); | ||
113 | _singleton.pos = pos; | ||
114 | } | ||
115 | |||
116 | std::string frame::selrestr::get_restriction() const | ||
117 | { | ||
118 | assert(_type == frame::selrestr::type::singleton); | ||
119 | |||
120 | return _singleton.restriction; | ||
121 | } | ||
122 | |||
123 | bool frame::selrestr::get_pos() const | ||
124 | { | ||
125 | assert(_type == frame::selrestr::type::singleton); | ||
126 | |||
127 | return _singleton.pos; | ||
128 | } | ||
129 | |||
130 | frame::selrestr::selrestr(std::list<selrestr> children, bool orlogic) : _type(frame::selrestr::type::group) | ||
131 | { | ||
132 | new(&_group.children) std::list<selrestr>(children); | ||
133 | _group.orlogic = orlogic; | ||
134 | } | ||
135 | |||
136 | std::list<frame::selrestr> frame::selrestr::get_children() const | ||
137 | { | ||
138 | assert(_type == frame::selrestr::type::group); | ||
139 | |||
140 | return _group.children; | ||
141 | } | ||
142 | |||
143 | std::list<frame::selrestr>::const_iterator frame::selrestr::begin() const | ||
144 | { | ||
145 | assert(_type == frame::selrestr::type::group); | ||
146 | |||
147 | return _group.children.begin(); | ||
148 | } | ||
149 | |||
150 | std::list<frame::selrestr>::const_iterator frame::selrestr::end() const | ||
151 | { | ||
152 | assert(_type == frame::selrestr::type::group); | ||
153 | |||
154 | return _group.children.end(); | ||
155 | } | ||
156 | |||
157 | bool frame::selrestr::get_orlogic() const | ||
158 | { | ||
159 | assert(_type == frame::selrestr::type::group); | ||
160 | |||
161 | return _group.orlogic; | ||
162 | } | ||
163 | |||
164 | frame::part::type frame::part::get_type() const | ||
165 | { | ||
166 | return _type; | ||
167 | } | ||
168 | |||
169 | frame::part::part() | ||
170 | { | ||
171 | |||
172 | } | ||
173 | 7 | ||
174 | frame::part::part(const part& other) | 8 | const std::list<std::string> frame::select = {"frame_id", "data"}; |
175 | { | ||
176 | _type = other._type; | ||
177 | |||
178 | switch (_type) | ||
179 | { | ||
180 | case frame::part::type::noun_phrase: | ||
181 | { | ||
182 | new(&_noun_phrase.role) std::string(other._noun_phrase.role); | ||
183 | new(&_noun_phrase.selrestrs) selrestr(other._noun_phrase.selrestrs); | ||
184 | new(&_noun_phrase.synrestrs) std::set<std::string>(other._noun_phrase.synrestrs); | ||
185 | |||
186 | break; | ||
187 | } | ||
188 | |||
189 | case frame::part::type::literal_preposition: | ||
190 | { | ||
191 | new(&_literal_preposition.choices) std::vector<std::string>(other._literal_preposition.choices); | ||
192 | |||
193 | break; | ||
194 | } | ||
195 | |||
196 | case frame::part::type::selection_preposition: | ||
197 | { | ||
198 | new(&_selection_preposition.preprestrs) std::vector<std::string>(other._selection_preposition.preprestrs); | ||
199 | |||
200 | break; | ||
201 | } | ||
202 | |||
203 | case frame::part::type::literal: | ||
204 | { | ||
205 | new(&_literal.lexval) std::string(other._literal.lexval); | ||
206 | |||
207 | break; | ||
208 | } | ||
209 | |||
210 | default: | ||
211 | { | ||
212 | // Nothing! | ||
213 | |||
214 | break; | ||
215 | } | ||
216 | } | ||
217 | } | ||
218 | 9 | ||
219 | frame::part::~part() | 10 | const field frame::id = field::integerField(object::frame, "frame_id"); |
220 | { | ||
221 | switch (_type) | ||
222 | { | ||
223 | case frame::part::type::noun_phrase: | ||
224 | { | ||
225 | using string_type = std::string; | ||
226 | using set_type = std::set<std::string>; | ||
227 | |||
228 | _noun_phrase.role.~string_type(); | ||
229 | _noun_phrase.selrestrs.~selrestr(); | ||
230 | _noun_phrase.synrestrs.~set_type(); | ||
231 | |||
232 | break; | ||
233 | } | ||
234 | |||
235 | case frame::part::type::literal_preposition: | ||
236 | { | ||
237 | using vector_type = std::vector<std::string>; | ||
238 | _literal_preposition.choices.~vector_type(); | ||
239 | |||
240 | break; | ||
241 | } | ||
242 | |||
243 | case frame::part::type::selection_preposition: | ||
244 | { | ||
245 | using vector_type = std::vector<std::string>; | ||
246 | _selection_preposition.preprestrs.~vector_type(); | ||
247 | |||
248 | break; | ||
249 | } | ||
250 | |||
251 | case frame::part::type::literal: | ||
252 | { | ||
253 | using string_type = std::string; | ||
254 | _literal.lexval.~string_type(); | ||
255 | |||
256 | break; | ||
257 | } | ||
258 | |||
259 | default: | ||
260 | { | ||
261 | // Nothing! | ||
262 | |||
263 | break; | ||
264 | } | ||
265 | } | ||
266 | } | ||
267 | 11 | ||
268 | std::string frame::part::get_role() const | 12 | const field frame::group = field::joinThrough(object::frame, "frame_id", object::group, "groups_frames", "group_id"); |
269 | { | ||
270 | assert(_type == frame::part::type::noun_phrase); | ||
271 | |||
272 | return _noun_phrase.role; | ||
273 | } | ||
274 | 13 | ||
275 | frame::selrestr frame::part::get_selrestrs() const | 14 | frame::frame(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) |
276 | { | 15 | { |
277 | assert(_type == frame::part::type::noun_phrase); | 16 | id_ = sqlite3_column_int(row, 0); |
278 | 17 | ||
279 | return _noun_phrase.selrestrs; | 18 | // TODO: Initialize frame data from row. |
280 | } | ||
281 | |||
282 | std::set<std::string> frame::part::get_synrestrs() const | ||
283 | { | ||
284 | assert(_type == frame::part::type::noun_phrase); | ||
285 | |||
286 | return _noun_phrase.synrestrs; | ||
287 | } | ||
288 | |||
289 | std::vector<std::string> frame::part::get_choices() const | ||
290 | { | ||
291 | assert(_type == frame::part::type::literal_preposition); | ||
292 | |||
293 | return _literal_preposition.choices; | ||
294 | } | ||
295 | |||
296 | std::vector<std::string> frame::part::get_preprestrs() const | ||
297 | { | ||
298 | assert(_type == frame::part::type::selection_preposition); | ||
299 | |||
300 | return _selection_preposition.preprestrs; | ||
301 | } | ||
302 | |||
303 | std::string frame::part::get_literal() const | ||
304 | { | ||
305 | assert(_type == frame::part::type::literal); | ||
306 | |||
307 | return _literal.lexval; | ||
308 | } | ||
309 | |||
310 | std::vector<frame::part> frame::parts() const | ||
311 | { | ||
312 | return _parts; | ||
313 | } | ||
314 | |||
315 | std::map<std::string, frame::selrestr> frame::roles() const | ||
316 | { | ||
317 | return _roles; | ||
318 | } | 19 | } |
319 | 20 | ||
320 | }; | 21 | }; |
diff --git a/lib/frame.h b/lib/frame.h index fa57e1b..68a4346 100644 --- a/lib/frame.h +++ b/lib/frame.h | |||
@@ -1,118 +1,78 @@ | |||
1 | #ifndef FRAME_H_9A5D90FE | 1 | #ifndef FRAME_H_EA29065A |
2 | #define FRAME_H_9A5D90FE | 2 | #define FRAME_H_EA29065A |
3 | |||
4 | #include <stdexcept> | ||
5 | #include <list> | ||
6 | #include "field.h" | ||
7 | #include "filter.h" | ||
8 | |||
9 | struct sqlite3_stmt; | ||
3 | 10 | ||
4 | namespace verbly { | 11 | namespace verbly { |
5 | 12 | ||
6 | class frame_query; | 13 | class database; |
7 | 14 | ||
8 | class frame { | 15 | class frame { |
9 | public: | 16 | public: |
10 | class selrestr { | 17 | |
11 | public: | 18 | // Default constructor |
12 | enum class type { | 19 | |
13 | empty, | 20 | frame() = default; |
14 | singleton, | 21 | |
15 | group | 22 | // Construct from database |
16 | }; | 23 | |
17 | 24 | frame(const database& db, sqlite3_stmt* row); | |
18 | type get_type() const; | 25 | |
19 | selrestr(const selrestr& other); | 26 | // Accessors |
20 | ~selrestr(); | 27 | |
21 | selrestr& operator=(const selrestr& other); | 28 | operator bool() const |
22 | 29 | { | |
23 | // Empty | 30 | return valid_; |
24 | selrestr(); | 31 | } |
25 | 32 | ||
26 | // Singleton | 33 | int getId() const |
27 | selrestr(std::string restriction, bool pos); | 34 | { |
28 | std::string get_restriction() const; | 35 | if (!valid_) |
29 | bool get_pos() const; | 36 | { |
30 | 37 | throw std::domain_error("Bad access to uninitialized frame"); | |
31 | // Group | 38 | } |
32 | selrestr(std::list<selrestr> children, bool orlogic); | ||
33 | std::list<selrestr> get_children() const; | ||
34 | std::list<selrestr>::const_iterator begin() const; | ||
35 | std::list<selrestr>::const_iterator end() const; | ||
36 | bool get_orlogic() const; | ||
37 | |||
38 | private: | ||
39 | union { | ||
40 | struct { | ||
41 | bool pos; | ||
42 | std::string restriction; | ||
43 | } _singleton; | ||
44 | struct { | ||
45 | std::list<selrestr> children; | ||
46 | bool orlogic; | ||
47 | } _group; | ||
48 | }; | ||
49 | type _type; | ||
50 | }; | ||
51 | 39 | ||
52 | class part { | 40 | return id_; |
53 | public: | 41 | } |
54 | enum class type { | 42 | |
55 | noun_phrase, | 43 | // Type info |
56 | verb, | 44 | |
57 | literal_preposition, | 45 | static const object objectType; |
58 | selection_preposition, | 46 | |
59 | adjective, | 47 | static const std::list<std::string> select; |
60 | adverb, | 48 | |
61 | literal | 49 | // Query fields |
62 | }; | 50 | |
63 | 51 | static const field id; | |
64 | type get_type() const; | 52 | |
65 | part(const part& other); | 53 | operator filter() const |
66 | ~part(); | 54 | { |
67 | 55 | if (!valid_) | |
68 | // Noun phrase | 56 | { |
69 | std::string get_role() const; | 57 | throw std::domain_error("Bad access to uninitialized frame"); |
70 | selrestr get_selrestrs() const; | 58 | } |
71 | std::set<std::string> get_synrestrs() const; | ||
72 | |||
73 | // Literal preposition | ||
74 | std::vector<std::string> get_choices() const; | ||
75 | |||
76 | // Selection preposition | ||
77 | std::vector<std::string> get_preprestrs() const; | ||
78 | |||
79 | // Literal | ||
80 | std::string get_literal() const; | ||
81 | |||
82 | private: | ||
83 | friend class frame_query; | ||
84 | |||
85 | part(); | ||
86 | |||
87 | union { | ||
88 | struct { | ||
89 | std::string role; | ||
90 | selrestr selrestrs; | ||
91 | std::set<std::string> synrestrs; | ||
92 | } _noun_phrase; | ||
93 | struct { | ||
94 | std::vector<std::string> choices; | ||
95 | } _literal_preposition; | ||
96 | struct { | ||
97 | std::vector<std::string> preprestrs; | ||
98 | } _selection_preposition; | ||
99 | struct { | ||
100 | std::string lexval; | ||
101 | } _literal; | ||
102 | }; | ||
103 | type _type; | ||
104 | }; | ||
105 | 59 | ||
106 | std::vector<part> parts() const; | 60 | return (id == id_); |
107 | std::map<std::string, selrestr> roles() const; | 61 | } |
108 | 62 | ||
109 | private: | 63 | // Relationships to other objects |
110 | friend class frame_query; | 64 | |
111 | 65 | static const field group; | |
112 | std::vector<part> _parts; | 66 | |
113 | std::map<std::string, selrestr> _roles; | 67 | private: |
68 | bool valid_ = false; | ||
69 | |||
70 | int id_; | ||
71 | |||
72 | const database* db_; | ||
73 | |||
114 | }; | 74 | }; |
115 | 75 | ||
116 | }; | 76 | }; |
117 | 77 | ||
118 | #endif /* end of include guard: FRAME_H_9A5D90FE */ | 78 | #endif /* end of include guard: FRAME_H_EA29065A */ |
diff --git a/lib/group.cpp b/lib/group.cpp new file mode 100644 index 0000000..8b6d985 --- /dev/null +++ b/lib/group.cpp | |||
@@ -0,0 +1,43 @@ | |||
1 | #include "group.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include "frame.h" | ||
4 | #include "database.h" | ||
5 | #include "query.h" | ||
6 | |||
7 | namespace verbly { | ||
8 | |||
9 | const object group::objectType = object::group; | ||
10 | |||
11 | const std::list<std::string> group::select = {"group_id", "data"}; | ||
12 | |||
13 | const field group::id = field::integerField(object::group, "group_id"); | ||
14 | |||
15 | const field group::frame = field::joinThrough(object::group, "group_id", object::frame, "groups_frames", "frame_id"); | ||
16 | const field group::word = field::joinField(object::group, "group_id", object::word); | ||
17 | |||
18 | group::group(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
19 | { | ||
20 | id_ = sqlite3_column_int(row, 0); | ||
21 | |||
22 | // TODO: Initialize role data from row. | ||
23 | } | ||
24 | |||
25 | const std::vector<frame>& group::getFrames() const | ||
26 | { | ||
27 | if (!valid_) | ||
28 | { | ||
29 | throw std::domain_error("Bad access to uninitialized group"); | ||
30 | } | ||
31 | |||
32 | if (!initializedFrames_) | ||
33 | { | ||
34 | frames_ = db_->frames(frame::group %= *this, false, -1).all(); | ||
35 | |||
36 | initializedFrames_ = true; | ||
37 | } | ||
38 | |||
39 | return frames_; | ||
40 | } | ||
41 | |||
42 | }; | ||
43 | |||
diff --git a/lib/group.h b/lib/group.h new file mode 100644 index 0000000..dd53503 --- /dev/null +++ b/lib/group.h | |||
@@ -0,0 +1,87 @@ | |||
1 | #ifndef GROUP_H_BD6933C0 | ||
2 | #define GROUP_H_BD6933C0 | ||
3 | |||
4 | #include <stdexcept> | ||
5 | #include <list> | ||
6 | #include <vector> | ||
7 | #include "field.h" | ||
8 | #include "filter.h" | ||
9 | |||
10 | struct sqlite3_stmt; | ||
11 | |||
12 | namespace verbly { | ||
13 | |||
14 | class database; | ||
15 | class frame; | ||
16 | |||
17 | class group { | ||
18 | public: | ||
19 | |||
20 | // Default constructor | ||
21 | |||
22 | group() = default; | ||
23 | |||
24 | // Construct from database | ||
25 | |||
26 | group(const database& db, sqlite3_stmt* row); | ||
27 | |||
28 | // Accessors | ||
29 | |||
30 | operator bool() const | ||
31 | { | ||
32 | return valid_; | ||
33 | } | ||
34 | |||
35 | int getId() const | ||
36 | { | ||
37 | if (!valid_) | ||
38 | { | ||
39 | throw std::domain_error("Bad access to uninitialized group"); | ||
40 | } | ||
41 | |||
42 | return id_; | ||
43 | } | ||
44 | |||
45 | const std::vector<frame>& getFrames() const; | ||
46 | |||
47 | // Type info | ||
48 | |||
49 | static const object objectType; | ||
50 | |||
51 | static const std::list<std::string> select; | ||
52 | |||
53 | // Query fields | ||
54 | |||
55 | static const field id; | ||
56 | |||
57 | operator filter() const | ||
58 | { | ||
59 | if (!valid_) | ||
60 | { | ||
61 | throw std::domain_error("Bad access to uninitialized group"); | ||
62 | } | ||
63 | |||
64 | return (id == id_); | ||
65 | } | ||
66 | |||
67 | // Relationships to other objects | ||
68 | |||
69 | static const field frame; | ||
70 | |||
71 | static const field word; | ||
72 | |||
73 | private: | ||
74 | bool valid_ = false; | ||
75 | |||
76 | int id_; | ||
77 | |||
78 | const database* db_; | ||
79 | |||
80 | mutable bool initializedFrames_ = false; | ||
81 | mutable std::vector<class frame> frames_; | ||
82 | |||
83 | }; | ||
84 | |||
85 | }; | ||
86 | |||
87 | #endif /* end of include guard: GROUP_H_BD6933C0 */ | ||
diff --git a/lib/lemma.cpp b/lib/lemma.cpp new file mode 100644 index 0000000..f9e9fcc --- /dev/null +++ b/lib/lemma.cpp | |||
@@ -0,0 +1,69 @@ | |||
1 | #include "lemma.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include "database.h" | ||
4 | #include "query.h" | ||
5 | |||
6 | namespace verbly { | ||
7 | |||
8 | const object lemma::objectType = object::lemma; | ||
9 | |||
10 | const std::list<std::string> lemma::select = {"lemma_id"}; | ||
11 | |||
12 | const field lemma::id = field::integerField(object::lemma, "lemma_id"); | ||
13 | |||
14 | const field lemma::word = field::joinField(object::lemma, "lemma_id", object::word); | ||
15 | |||
16 | const field lemma::formJoin = field::joinField(object::lemma, "form_id", object::form); | ||
17 | const field lemma::inflectionCategory = field::integerField(object::lemma, "category"); | ||
18 | |||
19 | filter operator%=(lemma::inflection_field check, filter joinCondition) | ||
20 | { | ||
21 | return (lemma::formJoin %= joinCondition) && (lemma::inflectionCategory == check.getCategory()); | ||
22 | } | ||
23 | |||
24 | lemma::lemma(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
25 | { | ||
26 | id_ = sqlite3_column_int(row, 0); | ||
27 | } | ||
28 | |||
29 | const form& lemma::getBaseForm() const | ||
30 | { | ||
31 | if (!valid_) | ||
32 | { | ||
33 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
34 | } | ||
35 | |||
36 | if (!forms_.count(inflection::base)) | ||
37 | { | ||
38 | initializeForm(inflection::base); | ||
39 | } | ||
40 | |||
41 | return forms_.at(inflection::base).front(); | ||
42 | } | ||
43 | |||
44 | bool lemma::hasInflection(inflection category) const | ||
45 | { | ||
46 | return !getInflections(category).empty(); | ||
47 | } | ||
48 | |||
49 | const std::vector<form>& lemma::getInflections(inflection category) const | ||
50 | { | ||
51 | if (!valid_) | ||
52 | { | ||
53 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
54 | } | ||
55 | |||
56 | if (!forms_.count(category)) | ||
57 | { | ||
58 | initializeForm(category); | ||
59 | } | ||
60 | |||
61 | return forms_.at(category); | ||
62 | } | ||
63 | |||
64 | void lemma::initializeForm(inflection infl) const | ||
65 | { | ||
66 | forms_[infl] = db_->forms(form::lemma(infl) %= *this, false, -1).all(); | ||
67 | } | ||
68 | |||
69 | }; | ||
diff --git a/lib/lemma.h b/lib/lemma.h new file mode 100644 index 0000000..9a07f16 --- /dev/null +++ b/lib/lemma.h | |||
@@ -0,0 +1,120 @@ | |||
1 | #ifndef LEMMA_H_0A180D30 | ||
2 | #define LEMMA_H_0A180D30 | ||
3 | |||
4 | #include <stdexcept> | ||
5 | #include <vector> | ||
6 | #include <list> | ||
7 | #include <map> | ||
8 | #include "field.h" | ||
9 | #include "enums.h" | ||
10 | #include "filter.h" | ||
11 | |||
12 | struct sqlite3_stmt; | ||
13 | |||
14 | namespace verbly { | ||
15 | |||
16 | class form; | ||
17 | class database; | ||
18 | |||
19 | class lemma { | ||
20 | public: | ||
21 | |||
22 | // Default constructor | ||
23 | |||
24 | lemma() = default; | ||
25 | |||
26 | // Construct from database | ||
27 | |||
28 | lemma(const database& db, sqlite3_stmt* row); | ||
29 | |||
30 | // Accessors | ||
31 | |||
32 | operator bool() const | ||
33 | { | ||
34 | return valid_; | ||
35 | } | ||
36 | |||
37 | int getId() const | ||
38 | { | ||
39 | if (!valid_) | ||
40 | { | ||
41 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
42 | } | ||
43 | |||
44 | return id_; | ||
45 | } | ||
46 | |||
47 | const form& getBaseForm() const; | ||
48 | |||
49 | bool hasInflection(inflection category) const; | ||
50 | |||
51 | const std::vector<form>& getInflections(inflection category) const; | ||
52 | |||
53 | // Type info | ||
54 | |||
55 | static const object objectType; | ||
56 | |||
57 | static const std::list<std::string> select; | ||
58 | |||
59 | // Query fields | ||
60 | |||
61 | static const field id; | ||
62 | |||
63 | operator filter() const | ||
64 | { | ||
65 | if (!valid_) | ||
66 | { | ||
67 | throw std::domain_error("Bad access to uninitialized lemma"); | ||
68 | } | ||
69 | |||
70 | return (id == id_); | ||
71 | } | ||
72 | |||
73 | // Relationships to other objects | ||
74 | |||
75 | static const field word; | ||
76 | |||
77 | class inflection_field { | ||
78 | public: | ||
79 | |||
80 | inflection_field(inflection category) : category_(category) | ||
81 | { | ||
82 | } | ||
83 | |||
84 | const inflection getCategory() const | ||
85 | { | ||
86 | return category_; | ||
87 | } | ||
88 | |||
89 | private: | ||
90 | |||
91 | const inflection category_; | ||
92 | }; | ||
93 | |||
94 | static const inflection_field form(inflection category) | ||
95 | { | ||
96 | return inflection_field(category); | ||
97 | } | ||
98 | |||
99 | friend filter operator%=(lemma::inflection_field check, filter joinCondition); | ||
100 | |||
101 | private: | ||
102 | |||
103 | void initializeForm(inflection category) const; | ||
104 | |||
105 | bool valid_ = false; | ||
106 | |||
107 | int id_; | ||
108 | |||
109 | mutable std::map<inflection, std::vector<class form>> forms_; | ||
110 | |||
111 | const database* db_; | ||
112 | |||
113 | static const field formJoin; | ||
114 | static const field inflectionCategory; | ||
115 | |||
116 | }; | ||
117 | |||
118 | }; | ||
119 | |||
120 | #endif /* end of include guard: LEMMA_H_0A180D30 */ | ||
diff --git a/lib/notion.cpp b/lib/notion.cpp new file mode 100644 index 0000000..16794d3 --- /dev/null +++ b/lib/notion.cpp | |||
@@ -0,0 +1,94 @@ | |||
1 | #include "notion.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include <sstream> | ||
4 | |||
5 | namespace verbly { | ||
6 | |||
7 | const object notion::objectType = object::notion; | ||
8 | |||
9 | const std::list<std::string> notion::select = {"notion_id", "part_of_speech", "wnid", "images"}; | ||
10 | |||
11 | const field notion::id = field::integerField(object::notion, "notion_id"); | ||
12 | const field notion::partOfSpeech = field::integerField(object::notion, "part_of_speech"); | ||
13 | const field notion::wnid = field::integerField(object::notion, "wnid", true); | ||
14 | const field notion::numOfImages = field::integerField(object::notion, "images", true); | ||
15 | |||
16 | const field notion::word = field::joinField(object::notion, "word_id", object::word); | ||
17 | |||
18 | const field notion::hypernyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id"); | ||
19 | const field notion::hyponyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id"); | ||
20 | |||
21 | const field notion::fullHypernyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id"); | ||
22 | const field notion::fullHyponyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id"); | ||
23 | |||
24 | const field notion::instances = field::selfJoin(object::notion, "notion_id", "instantiation", "class_id", "instance_id"); | ||
25 | const field notion::classes = field::selfJoin(object::notion, "notion_id", "instantiation", "instance_id", "class_id"); | ||
26 | |||
27 | const field notion::memberMeronyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id"); | ||
28 | const field notion::memberHolonyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id"); | ||
29 | |||
30 | const field notion::fullMemberMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id"); | ||
31 | const field notion::fullMemberHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id"); | ||
32 | |||
33 | const field notion::partMeronyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id"); | ||
34 | const field notion::partHolonyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id"); | ||
35 | |||
36 | const field notion::fullPartMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id"); | ||
37 | const field notion::fullPartHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id"); | ||
38 | |||
39 | const field notion::substanceMeronyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id"); | ||
40 | const field notion::substanceHolonyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id"); | ||
41 | |||
42 | const field notion::fullSubstanceMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id"); | ||
43 | const field notion::fullSubstanceHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id"); | ||
44 | |||
45 | const field notion::variants = field::selfJoin(object::notion, "notion_id", "variation", "noun_id", "adjective_id"); | ||
46 | const field notion::attributes = field::selfJoin(object::notion, "notion_id", "variation", "adjective_id", "noun_id"); | ||
47 | |||
48 | const field notion::similarAdjectives = field::selfJoin(object::notion, "notion_id", "similarity", "adjective_2_id", "adjective_1_id"); | ||
49 | |||
50 | const field notion::entails = field::selfJoin(object::notion, "notion_id", "entailment", "given_id", "entailment_id"); | ||
51 | const field notion::entailedBy = field::selfJoin(object::notion, "notion_id", "entailment", "entailment_id", "given_id"); | ||
52 | |||
53 | const field notion::causes = field::selfJoin(object::notion, "notion_id", "causality", "effect_id", "cause_id"); | ||
54 | const field notion::effects = field::selfJoin(object::notion, "notion_id", "causality", "cause_id", "effect_id"); | ||
55 | |||
56 | const notion::preposition_group_field prepositionGroup = {}; | ||
57 | |||
58 | const field notion::preposition_group_field::isA = field::joinField(object::notion, "notion_id", "is_a"); | ||
59 | const field notion::preposition_group_field::groupNameField = field::stringField("is_a", "groupname"); | ||
60 | |||
61 | notion::notion(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
62 | { | ||
63 | id_ = sqlite3_column_int(row, 0); | ||
64 | partOfSpeech_ = static_cast<part_of_speech>(sqlite3_column_int(row, 1)); | ||
65 | |||
66 | if (sqlite3_column_type(row, 2) != SQLITE_NULL) | ||
67 | { | ||
68 | hasWnid_ = true; | ||
69 | wnid_ = sqlite3_column_int(row, 2); | ||
70 | } | ||
71 | |||
72 | if (sqlite3_column_type(row, 3) != SQLITE_NULL) | ||
73 | { | ||
74 | hasNumOfImages_ = true; | ||
75 | numOfImages_ = sqlite3_column_int(row, 3); | ||
76 | } | ||
77 | } | ||
78 | |||
79 | std::string notion::getImageNetUrl() const | ||
80 | { | ||
81 | std::stringstream url; | ||
82 | url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n"; | ||
83 | url.width(8); | ||
84 | url.fill('0'); | ||
85 | url << (getWnid() % 100000000); | ||
86 | return url.str(); | ||
87 | } | ||
88 | |||
89 | filter notion::preposition_group_field::operator==(std::string groupName) const | ||
90 | { | ||
91 | return (isA %= (groupNameField == groupName)); | ||
92 | } | ||
93 | |||
94 | }; | ||
diff --git a/lib/notion.h b/lib/notion.h new file mode 100644 index 0000000..a180d73 --- /dev/null +++ b/lib/notion.h | |||
@@ -0,0 +1,200 @@ | |||
1 | #ifndef NOTION_H_FD1C7646 | ||
2 | #define NOTION_H_FD1C7646 | ||
3 | |||
4 | #include <stdexcept> | ||
5 | #include <string> | ||
6 | #include "field.h" | ||
7 | #include "filter.h" | ||
8 | |||
9 | struct sqlite3_stmt; | ||
10 | |||
11 | namespace verbly { | ||
12 | |||
13 | class database; | ||
14 | |||
15 | class notion { | ||
16 | public: | ||
17 | |||
18 | // Default constructor | ||
19 | |||
20 | notion() = default; | ||
21 | |||
22 | // Construct from database | ||
23 | |||
24 | notion(const database& db, sqlite3_stmt* row); | ||
25 | |||
26 | // Accessors | ||
27 | |||
28 | operator bool() const | ||
29 | { | ||
30 | return valid_; | ||
31 | } | ||
32 | |||
33 | int getId() const | ||
34 | { | ||
35 | if (!valid_) | ||
36 | { | ||
37 | throw std::domain_error("Bad access to uninitialized notion"); | ||
38 | } | ||
39 | |||
40 | return id_; | ||
41 | } | ||
42 | |||
43 | part_of_speech getPartOfSpeech() const | ||
44 | { | ||
45 | if (!valid_) | ||
46 | { | ||
47 | throw std::domain_error("Bad access to uninitialized notion"); | ||
48 | } | ||
49 | |||
50 | return partOfSpeech_; | ||
51 | } | ||
52 | |||
53 | bool hasWnid() const | ||
54 | { | ||
55 | if (!valid_) | ||
56 | { | ||
57 | throw std::domain_error("Bad access to uninitialized notion"); | ||
58 | } | ||
59 | |||
60 | return hasWnid_; | ||
61 | } | ||
62 | |||
63 | int getWnid() const | ||
64 | { | ||
65 | if (!valid_) | ||
66 | { | ||
67 | throw std::domain_error("Bad access to uninitialized notion"); | ||
68 | } | ||
69 | |||
70 | if (!hasWnid_) | ||
71 | { | ||
72 | throw std::domain_error("Notion has no wnid"); | ||
73 | } | ||
74 | |||
75 | return wnid_; | ||
76 | } | ||
77 | |||
78 | bool hasNumOfImages() const | ||
79 | { | ||
80 | if (!valid_) | ||
81 | { | ||
82 | throw std::domain_error("Bad access to uninitialized notion"); | ||
83 | } | ||
84 | |||
85 | return hasNumOfImages_; | ||
86 | } | ||
87 | |||
88 | int getNumOfImages() const | ||
89 | { | ||
90 | if (!valid_) | ||
91 | { | ||
92 | throw std::domain_error("Bad access to uninitialized notion"); | ||
93 | } | ||
94 | |||
95 | if (!hasNumOfImages_) | ||
96 | { | ||
97 | throw std::domain_error("Notion does not have a number of images"); | ||
98 | } | ||
99 | |||
100 | return numOfImages_; | ||
101 | } | ||
102 | |||
103 | // Convenience | ||
104 | |||
105 | std::string getImageNetUrl() const; | ||
106 | |||
107 | // Type info | ||
108 | |||
109 | static const object objectType; | ||
110 | |||
111 | static const std::list<std::string> select; | ||
112 | |||
113 | // Query fields | ||
114 | |||
115 | static const field id; | ||
116 | static const field partOfSpeech; | ||
117 | static const field wnid; | ||
118 | static const field numOfImages; | ||
119 | |||
120 | operator filter() const | ||
121 | { | ||
122 | return (id == id_); | ||
123 | } | ||
124 | |||
125 | // Relationships with other objects | ||
126 | |||
127 | static const field word; | ||
128 | |||
129 | // Relationships with self | ||
130 | |||
131 | static const field hypernyms; | ||
132 | static const field hyponyms; | ||
133 | |||
134 | static const field fullHypernyms; | ||
135 | static const field fullHyponyms; | ||
136 | |||
137 | static const field instances; | ||
138 | static const field classes; | ||
139 | |||
140 | static const field memberMeronyms; | ||
141 | static const field memberHolonyms; | ||
142 | |||
143 | static const field fullMemberMeronyms; | ||
144 | static const field fullMemberHolonyms; | ||
145 | |||
146 | static const field partMeronyms; | ||
147 | static const field partHolonyms; | ||
148 | |||
149 | static const field fullPartMeronyms; | ||
150 | static const field fullPartHolonyms; | ||
151 | |||
152 | static const field substanceMeronyms; | ||
153 | static const field substanceHolonyms; | ||
154 | |||
155 | static const field fullSubstanceMeronyms; | ||
156 | static const field fullSubstanceHolonyms; | ||
157 | |||
158 | static const field variants; | ||
159 | static const field attributes; | ||
160 | |||
161 | static const field similarAdjectives; | ||
162 | |||
163 | static const field entails; | ||
164 | static const field entailedBy; | ||
165 | |||
166 | static const field causes; | ||
167 | static const field effects; | ||
168 | |||
169 | // Preposition group relationship | ||
170 | |||
171 | class preposition_group_field { | ||
172 | public: | ||
173 | |||
174 | filter operator==(std::string groupName) const; | ||
175 | |||
176 | private: | ||
177 | |||
178 | static const field isA; | ||
179 | static const field groupNameField; | ||
180 | }; | ||
181 | |||
182 | static const preposition_group_field prepositionGroup; | ||
183 | |||
184 | private: | ||
185 | bool valid_ = false; | ||
186 | |||
187 | int id_; | ||
188 | part_of_speech partOfSpeech_; | ||
189 | bool hasWnid_ = false; | ||
190 | int wnid_; | ||
191 | bool hasNumOfImages_ = false; | ||
192 | int numOfImages_; | ||
193 | |||
194 | const database* db_; | ||
195 | |||
196 | }; | ||
197 | |||
198 | }; | ||
199 | |||
200 | #endif /* end of include guard: NOTION_H_FD1C7646 */ | ||
diff --git a/lib/noun.cpp b/lib/noun.cpp deleted file mode 100644 index d8b34c9..0000000 --- a/lib/noun.cpp +++ /dev/null | |||
@@ -1,221 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | #include <set> | ||
3 | #include <iostream> | ||
4 | |||
5 | namespace verbly { | ||
6 | |||
7 | noun::noun() | ||
8 | { | ||
9 | |||
10 | } | ||
11 | |||
12 | noun::noun(const data& _data, int _id) : word(_data, _id) | ||
13 | { | ||
14 | |||
15 | } | ||
16 | |||
17 | std::string noun::base_form() const | ||
18 | { | ||
19 | assert(_valid == true); | ||
20 | |||
21 | return _singular; | ||
22 | } | ||
23 | |||
24 | std::string noun::singular_form() const | ||
25 | { | ||
26 | assert(_valid == true); | ||
27 | |||
28 | return _singular; | ||
29 | } | ||
30 | |||
31 | std::string noun::plural_form() const | ||
32 | { | ||
33 | assert(_valid == true); | ||
34 | |||
35 | return _plural; | ||
36 | } | ||
37 | |||
38 | int noun::wnid() const | ||
39 | { | ||
40 | assert(_valid == true); | ||
41 | |||
42 | return _wnid; | ||
43 | } | ||
44 | |||
45 | bool noun::has_plural_form() const | ||
46 | { | ||
47 | assert(_valid == true); | ||
48 | |||
49 | return !_plural.empty(); | ||
50 | } | ||
51 | |||
52 | noun_query noun::hypernyms() const | ||
53 | { | ||
54 | assert(_valid == true); | ||
55 | |||
56 | return _data->nouns().hypernym_of(*this); | ||
57 | } | ||
58 | |||
59 | noun_query noun::full_hypernyms() const | ||
60 | { | ||
61 | assert(_valid == true); | ||
62 | |||
63 | return _data->nouns().full_hypernym_of(*this); | ||
64 | } | ||
65 | |||
66 | noun_query noun::hyponyms() const | ||
67 | { | ||
68 | assert(_valid == true); | ||
69 | |||
70 | return _data->nouns().hyponym_of(*this); | ||
71 | } | ||
72 | |||
73 | noun_query noun::full_hyponyms() const | ||
74 | { | ||
75 | assert(_valid == true); | ||
76 | |||
77 | return _data->nouns().full_hyponym_of(*this); | ||
78 | } | ||
79 | |||
80 | noun_query noun::part_meronyms() const | ||
81 | { | ||
82 | assert(_valid == true); | ||
83 | |||
84 | return _data->nouns().part_meronym_of(*this); | ||
85 | } | ||
86 | |||
87 | noun_query noun::full_part_meronyms() const | ||
88 | { | ||
89 | assert(_valid == true); | ||
90 | |||
91 | return _data->nouns().full_part_meronym_of(*this); | ||
92 | } | ||
93 | |||
94 | noun_query noun::part_holonyms() const | ||
95 | { | ||
96 | assert(_valid == true); | ||
97 | |||
98 | return _data->nouns().part_holonym_of(*this); | ||
99 | } | ||
100 | |||
101 | noun_query noun::full_part_holonyms() const | ||
102 | { | ||
103 | assert(_valid == true); | ||
104 | |||
105 | return _data->nouns().full_part_holonym_of(*this); | ||
106 | } | ||
107 | |||
108 | noun_query noun::substance_meronyms() const | ||
109 | { | ||
110 | assert(_valid == true); | ||
111 | |||
112 | return _data->nouns().substance_meronym_of(*this); | ||
113 | } | ||
114 | |||
115 | noun_query noun::full_substance_meronyms() const | ||
116 | { | ||
117 | assert(_valid == true); | ||
118 | |||
119 | return _data->nouns().full_substance_meronym_of(*this); | ||
120 | } | ||
121 | |||
122 | noun_query noun::substance_holonyms() const | ||
123 | { | ||
124 | assert(_valid == true); | ||
125 | |||
126 | return _data->nouns().substance_holonym_of(*this); | ||
127 | } | ||
128 | |||
129 | noun_query noun::full_substance_holonyms() const | ||
130 | { | ||
131 | assert(_valid == true); | ||
132 | |||
133 | return _data->nouns().full_substance_holonym_of(*this); | ||
134 | } | ||
135 | |||
136 | noun_query noun::member_meronyms() const | ||
137 | { | ||
138 | assert(_valid == true); | ||
139 | |||
140 | return _data->nouns().member_meronym_of(*this); | ||
141 | } | ||
142 | |||
143 | noun_query noun::full_member_meronyms() const | ||
144 | { | ||
145 | assert(_valid == true); | ||
146 | |||
147 | return _data->nouns().full_member_meronym_of(*this); | ||
148 | } | ||
149 | |||
150 | noun_query noun::member_holonyms() const | ||
151 | { | ||
152 | assert(_valid == true); | ||
153 | |||
154 | return _data->nouns().member_holonym_of(*this); | ||
155 | } | ||
156 | |||
157 | noun_query noun::full_member_holonyms() const | ||
158 | { | ||
159 | assert(_valid == true); | ||
160 | |||
161 | return _data->nouns().full_member_holonym_of(*this); | ||
162 | } | ||
163 | |||
164 | noun_query noun::classes() const | ||
165 | { | ||
166 | assert(_valid == true); | ||
167 | |||
168 | return _data->nouns().class_of(*this); | ||
169 | } | ||
170 | |||
171 | noun_query noun::instances() const | ||
172 | { | ||
173 | assert(_valid == true); | ||
174 | |||
175 | return _data->nouns().instance_of(*this); | ||
176 | } | ||
177 | |||
178 | noun_query noun::synonyms() const | ||
179 | { | ||
180 | assert(_valid == true); | ||
181 | |||
182 | return _data->nouns().synonym_of(*this); | ||
183 | } | ||
184 | |||
185 | noun_query noun::antonyms() const | ||
186 | { | ||
187 | assert(_valid == true); | ||
188 | |||
189 | return _data->nouns().antonym_of(*this); | ||
190 | } | ||
191 | |||
192 | adjective_query noun::pertainyms() const | ||
193 | { | ||
194 | assert(_valid == true); | ||
195 | |||
196 | return _data->adjectives().pertainym_of(*this); | ||
197 | } | ||
198 | |||
199 | adjective_query noun::variations() const | ||
200 | { | ||
201 | assert(_valid == true); | ||
202 | |||
203 | return _data->adjectives().variant_of(*this); | ||
204 | } | ||
205 | |||
206 | std::string noun::imagenet_url() const | ||
207 | { | ||
208 | std::stringstream url; | ||
209 | url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n"; | ||
210 | url.width(8); | ||
211 | url.fill('0'); | ||
212 | url << (_wnid % 100000000); | ||
213 | return url.str(); | ||
214 | } | ||
215 | |||
216 | bool noun::operator<(const noun& other) const | ||
217 | { | ||
218 | return _id < other._id; | ||
219 | } | ||
220 | |||
221 | }; | ||
diff --git a/lib/noun.h b/lib/noun.h deleted file mode 100644 index bd71e57..0000000 --- a/lib/noun.h +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | #ifndef NOUN_H_24A03C83 | ||
2 | #define NOUN_H_24A03C83 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class noun : public word { | ||
7 | private: | ||
8 | std::string _singular; | ||
9 | std::string _plural; | ||
10 | int _wnid; | ||
11 | |||
12 | friend class noun_query; | ||
13 | |||
14 | public: | ||
15 | noun(); | ||
16 | noun(const data& _data, int _id); | ||
17 | |||
18 | std::string base_form() const; | ||
19 | std::string singular_form() const; | ||
20 | std::string plural_form() const; | ||
21 | int wnid() const; | ||
22 | |||
23 | bool has_plural_form() const; | ||
24 | |||
25 | noun_query hypernyms() const; | ||
26 | noun_query full_hypernyms() const; | ||
27 | noun_query hyponyms() const; | ||
28 | noun_query full_hyponyms() const; | ||
29 | noun_query part_meronyms() const; | ||
30 | noun_query full_part_meronyms() const; | ||
31 | noun_query part_holonyms() const; | ||
32 | noun_query full_part_holonyms() const; | ||
33 | noun_query substance_meronyms() const; | ||
34 | noun_query full_substance_meronyms() const; | ||
35 | noun_query substance_holonyms() const; | ||
36 | noun_query full_substance_holonyms() const; | ||
37 | noun_query member_meronyms() const; | ||
38 | noun_query full_member_meronyms() const; | ||
39 | noun_query member_holonyms() const; | ||
40 | noun_query full_member_holonyms() const; | ||
41 | noun_query classes() const; | ||
42 | noun_query instances() const; | ||
43 | noun_query synonyms() const; | ||
44 | noun_query antonyms() const; | ||
45 | adjective_query pertainyms() const; | ||
46 | adjective_query variations() const; | ||
47 | |||
48 | std::string imagenet_url() const; | ||
49 | |||
50 | bool operator<(const noun& other) const; | ||
51 | }; | ||
52 | |||
53 | }; | ||
54 | |||
55 | #endif /* end of include guard: NOUN_H_24A03C83 */ | ||
diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp deleted file mode 100644 index 8648227..0000000 --- a/lib/noun_query.cpp +++ /dev/null | |||
@@ -1,2013 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | noun_query::noun_query(const data& _data) : _data(_data) | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | noun_query& noun_query::limit(int _limit) | ||
11 | { | ||
12 | if ((_limit > 0) || (_limit == unlimited)) | ||
13 | { | ||
14 | this->_limit = _limit; | ||
15 | } | ||
16 | |||
17 | return *this; | ||
18 | } | ||
19 | |||
20 | noun_query& noun_query::random() | ||
21 | { | ||
22 | this->_random = true; | ||
23 | |||
24 | return *this; | ||
25 | } | ||
26 | |||
27 | noun_query& noun_query::except(const noun& _word) | ||
28 | { | ||
29 | _except.push_back(_word); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | noun_query& noun_query::rhymes_with(const word& _word) | ||
35 | { | ||
36 | for (auto rhyme : _word.get_rhymes()) | ||
37 | { | ||
38 | _rhymes.push_back(rhyme); | ||
39 | } | ||
40 | |||
41 | if (dynamic_cast<const noun*>(&_word) != nullptr) | ||
42 | { | ||
43 | _except.push_back(dynamic_cast<const noun&>(_word)); | ||
44 | } | ||
45 | |||
46 | return *this; | ||
47 | } | ||
48 | |||
49 | noun_query& noun_query::rhymes_with(rhyme _r) | ||
50 | { | ||
51 | _rhymes.push_back(_r); | ||
52 | |||
53 | return *this; | ||
54 | } | ||
55 | |||
56 | noun_query& noun_query::has_pronunciation() | ||
57 | { | ||
58 | this->_has_prn = true; | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | noun_query& noun_query::has_rhyming_noun() | ||
64 | { | ||
65 | _has_rhyming_noun = true; | ||
66 | |||
67 | return *this; | ||
68 | } | ||
69 | |||
70 | noun_query& noun_query::has_rhyming_adjective() | ||
71 | { | ||
72 | _has_rhyming_adjective = true; | ||
73 | |||
74 | return *this; | ||
75 | } | ||
76 | |||
77 | noun_query& noun_query::has_rhyming_adverb() | ||
78 | { | ||
79 | _has_rhyming_adverb = true; | ||
80 | |||
81 | return *this; | ||
82 | } | ||
83 | |||
84 | noun_query& noun_query::has_rhyming_verb() | ||
85 | { | ||
86 | _has_rhyming_verb = true; | ||
87 | |||
88 | return *this; | ||
89 | } | ||
90 | |||
91 | noun_query& noun_query::with_stress(filter<std::vector<bool>> _arg) | ||
92 | { | ||
93 | _stress = _arg; | ||
94 | |||
95 | return *this; | ||
96 | } | ||
97 | |||
98 | noun_query& noun_query::with_singular_form(std::string _arg) | ||
99 | { | ||
100 | _with_singular_form.push_back(_arg); | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | noun_query& noun_query::with_prefix(filter<std::string> _f) | ||
106 | { | ||
107 | _f.clean(); | ||
108 | _with_prefix = _f; | ||
109 | |||
110 | return *this; | ||
111 | } | ||
112 | |||
113 | noun_query& noun_query::with_suffix(filter<std::string> _f) | ||
114 | { | ||
115 | _f.clean(); | ||
116 | _with_suffix = _f; | ||
117 | |||
118 | return *this; | ||
119 | } | ||
120 | |||
121 | noun_query& noun_query::requires_plural_form() | ||
122 | { | ||
123 | _requires_plural_form = true; | ||
124 | |||
125 | return *this; | ||
126 | } | ||
127 | |||
128 | noun_query& noun_query::with_complexity(int _arg) | ||
129 | { | ||
130 | _with_complexity = _arg; | ||
131 | |||
132 | return *this; | ||
133 | } | ||
134 | |||
135 | noun_query& noun_query::is_hypernym() | ||
136 | { | ||
137 | _is_hypernym = true; | ||
138 | |||
139 | return *this; | ||
140 | } | ||
141 | |||
142 | noun_query& noun_query::hypernym_of(filter<noun> _f) | ||
143 | { | ||
144 | _f.clean(); | ||
145 | _hypernym_of = _f; | ||
146 | |||
147 | return *this; | ||
148 | } | ||
149 | |||
150 | noun_query& noun_query::full_hypernym_of(filter<noun> _f) | ||
151 | { | ||
152 | _f.clean(); | ||
153 | _full_hypernym_of = _f; | ||
154 | |||
155 | return *this; | ||
156 | } | ||
157 | |||
158 | noun_query& noun_query::is_hyponym() | ||
159 | { | ||
160 | _is_hyponym = true; | ||
161 | |||
162 | return *this; | ||
163 | } | ||
164 | |||
165 | noun_query& noun_query::hyponym_of(filter<noun> _f) | ||
166 | { | ||
167 | _f.clean(); | ||
168 | _hyponym_of = _f; | ||
169 | |||
170 | return *this; | ||
171 | } | ||
172 | |||
173 | noun_query& noun_query::full_hyponym_of(filter<noun> _f) | ||
174 | { | ||
175 | _f.clean(); | ||
176 | _full_hyponym_of = _f; | ||
177 | |||
178 | return *this; | ||
179 | } | ||
180 | |||
181 | noun_query& noun_query::is_part_meronym() | ||
182 | { | ||
183 | _is_part_meronym = true; | ||
184 | |||
185 | return *this; | ||
186 | } | ||
187 | |||
188 | noun_query& noun_query::part_meronym_of(filter<noun> _f) | ||
189 | { | ||
190 | _f.clean(); | ||
191 | _part_meronym_of = _f; | ||
192 | |||
193 | return *this; | ||
194 | } | ||
195 | |||
196 | noun_query& noun_query::full_part_meronym_of(filter<noun> _f) | ||
197 | { | ||
198 | _f.clean(); | ||
199 | _full_part_meronym_of = _f; | ||
200 | |||
201 | return *this; | ||
202 | } | ||
203 | |||
204 | noun_query& noun_query::is_part_holonym() | ||
205 | { | ||
206 | _is_part_holonym = true; | ||
207 | |||
208 | return *this; | ||
209 | } | ||
210 | |||
211 | noun_query& noun_query::part_holonym_of(filter<noun> _f) | ||
212 | { | ||
213 | _f.clean(); | ||
214 | _part_holonym_of = _f; | ||
215 | |||
216 | return *this; | ||
217 | } | ||
218 | |||
219 | noun_query& noun_query::full_part_holonym_of(filter<noun> _f) | ||
220 | { | ||
221 | _f.clean(); | ||
222 | _full_part_holonym_of = _f; | ||
223 | |||
224 | return *this; | ||
225 | } | ||
226 | |||
227 | noun_query& noun_query::is_substance_meronym() | ||
228 | { | ||
229 | _is_substance_meronym = true; | ||
230 | |||
231 | return *this; | ||
232 | } | ||
233 | |||
234 | noun_query& noun_query::substance_meronym_of(filter<noun> _f) | ||
235 | { | ||
236 | _f.clean(); | ||
237 | _substance_meronym_of = _f; | ||
238 | |||
239 | return *this; | ||
240 | } | ||
241 | |||
242 | noun_query& noun_query::full_substance_meronym_of(filter<noun> _f) | ||
243 | { | ||
244 | _f.clean(); | ||
245 | _full_substance_meronym_of = _f; | ||
246 | |||
247 | return *this; | ||
248 | } | ||
249 | |||
250 | noun_query& noun_query::is_substance_holonym() | ||
251 | { | ||
252 | _is_substance_holonym = true; | ||
253 | |||
254 | return *this; | ||
255 | } | ||
256 | |||
257 | noun_query& noun_query::substance_holonym_of(filter<noun> _f) | ||
258 | { | ||
259 | _f.clean(); | ||
260 | _substance_holonym_of = _f; | ||
261 | |||
262 | return *this; | ||
263 | } | ||
264 | |||
265 | noun_query& noun_query::full_substance_holonym_of(filter<noun> _f) | ||
266 | { | ||
267 | _f.clean(); | ||
268 | _full_substance_holonym_of = _f; | ||
269 | |||
270 | return *this; | ||
271 | } | ||
272 | |||
273 | noun_query& noun_query::is_member_meronym() | ||
274 | { | ||
275 | _is_member_meronym = true; | ||
276 | |||
277 | return *this; | ||
278 | } | ||
279 | |||
280 | noun_query& noun_query::member_meronym_of(filter<noun> _f) | ||
281 | { | ||
282 | _f.clean(); | ||
283 | _member_meronym_of = _f; | ||
284 | |||
285 | return *this; | ||
286 | } | ||
287 | |||
288 | noun_query& noun_query::full_member_meronym_of(filter<noun> _f) | ||
289 | { | ||
290 | _f.clean(); | ||
291 | _full_member_meronym_of = _f; | ||
292 | |||
293 | return *this; | ||
294 | } | ||
295 | |||
296 | noun_query& noun_query::is_member_holonym() | ||
297 | { | ||
298 | _is_member_holonym = true; | ||
299 | |||
300 | return *this; | ||
301 | } | ||
302 | |||
303 | noun_query& noun_query::member_holonym_of(filter<noun> _f) | ||
304 | { | ||
305 | _f.clean(); | ||
306 | _member_holonym_of = _f; | ||
307 | |||
308 | return *this; | ||
309 | } | ||
310 | |||
311 | noun_query& noun_query::full_member_holonym_of(filter<noun> _f) | ||
312 | { | ||
313 | _f.clean(); | ||
314 | _full_member_holonym_of = _f; | ||
315 | |||
316 | return *this; | ||
317 | } | ||
318 | |||
319 | noun_query& noun_query::is_proper() | ||
320 | { | ||
321 | _is_proper = true; | ||
322 | |||
323 | return *this; | ||
324 | } | ||
325 | |||
326 | noun_query& noun_query::is_not_proper() | ||
327 | { | ||
328 | _is_not_proper = true; | ||
329 | |||
330 | return *this; | ||
331 | } | ||
332 | |||
333 | noun_query& noun_query::is_instance() | ||
334 | { | ||
335 | _is_instance = true; | ||
336 | |||
337 | return *this; | ||
338 | } | ||
339 | |||
340 | noun_query& noun_query::instance_of(filter<noun> _f) | ||
341 | { | ||
342 | _f.clean(); | ||
343 | _instance_of = _f; | ||
344 | |||
345 | return *this; | ||
346 | } | ||
347 | |||
348 | noun_query& noun_query::is_class() | ||
349 | { | ||
350 | _is_class = true; | ||
351 | |||
352 | return *this; | ||
353 | } | ||
354 | |||
355 | noun_query& noun_query::class_of(filter<noun> _f) | ||
356 | { | ||
357 | _f.clean(); | ||
358 | _class_of = _f; | ||
359 | |||
360 | return *this; | ||
361 | } | ||
362 | |||
363 | noun_query& noun_query::has_synonyms() | ||
364 | { | ||
365 | _has_synonyms = true; | ||
366 | |||
367 | return *this; | ||
368 | } | ||
369 | |||
370 | noun_query& noun_query::synonym_of(filter<noun> _f) | ||
371 | { | ||
372 | _f.clean(); | ||
373 | _synonym_of = _f; | ||
374 | |||
375 | return *this; | ||
376 | } | ||
377 | |||
378 | noun_query& noun_query::has_antonyms() | ||
379 | { | ||
380 | _has_antonyms = true; | ||
381 | |||
382 | return *this; | ||
383 | } | ||
384 | |||
385 | noun_query& noun_query::antonym_of(filter<noun> _f) | ||
386 | { | ||
387 | _f.clean(); | ||
388 | _antonym_of = _f; | ||
389 | |||
390 | return *this; | ||
391 | } | ||
392 | |||
393 | noun_query& noun_query::has_pertainym() | ||
394 | { | ||
395 | _has_pertainym = true; | ||
396 | |||
397 | return *this; | ||
398 | } | ||
399 | |||
400 | noun_query& noun_query::anti_pertainym_of(filter<adjective> _f) | ||
401 | { | ||
402 | _f.clean(); | ||
403 | _anti_pertainym_of = _f; | ||
404 | |||
405 | return *this; | ||
406 | } | ||
407 | |||
408 | noun_query& noun_query::is_attribute() | ||
409 | { | ||
410 | _is_attribute = true; | ||
411 | |||
412 | return *this; | ||
413 | } | ||
414 | |||
415 | noun_query& noun_query::attribute_of(filter<adjective> _f) | ||
416 | { | ||
417 | _f.clean(); | ||
418 | _attribute_of = _f; | ||
419 | |||
420 | return *this; | ||
421 | } | ||
422 | |||
423 | noun_query& noun_query::at_least_n_images(int _arg) | ||
424 | { | ||
425 | _at_least_n_images = _arg; | ||
426 | |||
427 | return *this; | ||
428 | } | ||
429 | |||
430 | noun_query& noun_query::with_wnid(int _arg) | ||
431 | { | ||
432 | _with_wnid.insert(_arg); | ||
433 | |||
434 | return *this; | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | noun_query& noun_query::derived_from(const word& _w) | ||
439 | { | ||
440 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
441 | { | ||
442 | _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
443 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
444 | { | ||
445 | _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
446 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
447 | { | ||
448 | _derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
449 | } | ||
450 | |||
451 | return *this; | ||
452 | } | ||
453 | |||
454 | noun_query& noun_query::not_derived_from(const word& _w) | ||
455 | { | ||
456 | if (dynamic_cast<const adjective*>(&_w) != nullptr) | ||
457 | { | ||
458 | _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w)); | ||
459 | } else if (dynamic_cast<const adverb*>(&_w) != nullptr) | ||
460 | { | ||
461 | _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w)); | ||
462 | } else if (dynamic_cast<const noun*>(&_w) != nullptr) | ||
463 | { | ||
464 | _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w)); | ||
465 | } | ||
466 | |||
467 | return *this; | ||
468 | }*/ | ||
469 | |||
470 | std::list<noun> noun_query::run() const | ||
471 | { | ||
472 | std::stringstream construct; | ||
473 | |||
474 | if (!_full_hypernym_of.empty() || !_full_hyponym_of.empty() || !_full_part_meronym_of.empty() || !_full_part_holonym_of.empty() || !_full_substance_meronym_of.empty() || !_full_substance_holonym_of.empty() || !_full_member_meronym_of.empty() || !_full_member_holonym_of.empty()) | ||
475 | { | ||
476 | construct << "WITH RECURSIVE "; | ||
477 | |||
478 | std::list<std::string> ctes; | ||
479 | |||
480 | for (auto hyponym : _full_hypernym_of.uniq_flatten()) | ||
481 | { | ||
482 | ctes.push_back("hypernym_tree_" + std::to_string(hyponym._id) + " AS (SELECT hypernym_id FROM hypernymy WHERE hyponym_id = " + std::to_string(hyponym._id) + " UNION SELECT h.hypernym_id FROM hypernym_tree_" + std::to_string(hyponym._id) + " AS t INNER JOIN hypernymy AS h ON t.hypernym_id = h.hyponym_id)"); | ||
483 | } | ||
484 | |||
485 | for (auto hypernym : _full_hyponym_of.uniq_flatten()) | ||
486 | { | ||
487 | ctes.push_back("hyponym_tree_" + std::to_string(hypernym._id) + " AS (SELECT hyponym_id FROM hypernymy WHERE hypernym_id = " + std::to_string(hypernym._id) + " UNION SELECT h.hyponym_id FROM hyponym_tree_" + std::to_string(hypernym._id) + " AS t INNER JOIN hypernymy AS h ON t.hyponym_id = h.hypernym_id)"); | ||
488 | } | ||
489 | |||
490 | for (auto holonym : _full_part_meronym_of.uniq_flatten()) | ||
491 | { | ||
492 | ctes.push_back("part_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM part_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM part_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN part_meronymy AS h ON t.meronym_id = h.holonym_id)"); | ||
493 | } | ||
494 | |||
495 | for (auto meronym : _full_part_holonym_of.uniq_flatten()) | ||
496 | { | ||
497 | ctes.push_back("part_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM part_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM part_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN part_meronymy AS h ON t.holonym_id = h.meronym_id)"); | ||
498 | } | ||
499 | |||
500 | for (auto holonym : _full_substance_meronym_of.uniq_flatten()) | ||
501 | { | ||
502 | ctes.push_back("substance_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM substance_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM substance_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.meronym_id = h.holonym_id)"); | ||
503 | } | ||
504 | |||
505 | for (auto meronym : _full_substance_holonym_of.uniq_flatten()) | ||
506 | { | ||
507 | ctes.push_back("substance_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM substance_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM substance_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.holonym_id = h.meronym_id)"); | ||
508 | } | ||
509 | |||
510 | for (auto holonym : _full_member_meronym_of.uniq_flatten()) | ||
511 | { | ||
512 | ctes.push_back("member_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM member_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM member_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN member_meronymy AS h ON t.meronym_id = h.holonym_id)"); | ||
513 | } | ||
514 | |||
515 | for (auto meronym : _full_member_holonym_of.uniq_flatten()) | ||
516 | { | ||
517 | ctes.push_back("member_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM member_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM member_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN member_meronymy AS h ON t.holonym_id = h.meronym_id)"); | ||
518 | } | ||
519 | |||
520 | construct << verbly::implode(std::begin(ctes), std::end(ctes), ", "); | ||
521 | construct << " "; | ||
522 | } | ||
523 | |||
524 | construct << "SELECT noun_id, singular, plural, wnid FROM nouns"; | ||
525 | std::list<std::string> conditions; | ||
526 | std::list<binding> bindings; | ||
527 | |||
528 | if (_has_prn) | ||
529 | { | ||
530 | conditions.push_back("noun_id IN (SELECT noun_id FROM noun_pronunciations)"); | ||
531 | } | ||
532 | |||
533 | if (!_rhymes.empty()) | ||
534 | { | ||
535 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
536 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
537 | conditions.push_back(cond); | ||
538 | |||
539 | for (auto rhy : _rhymes) | ||
540 | { | ||
541 | bindings.emplace_back(rhy.get_prerhyme()); | ||
542 | bindings.emplace_back(rhy.get_rhyme()); | ||
543 | } | ||
544 | } | ||
545 | |||
546 | if (_has_rhyming_noun) | ||
547 | { | ||
548 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.noun_id != curp.noun_id)"); | ||
549 | } | ||
550 | |||
551 | if (_has_rhyming_adjective) | ||
552 | { | ||
553 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
554 | } | ||
555 | |||
556 | if (_has_rhyming_adverb) | ||
557 | { | ||
558 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
559 | } | ||
560 | |||
561 | if (_has_rhyming_verb) | ||
562 | { | ||
563 | conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
564 | } | ||
565 | |||
566 | if (!_stress.empty()) | ||
567 | { | ||
568 | std::stringstream cond; | ||
569 | if (_stress.get_notlogic()) | ||
570 | { | ||
571 | cond << "noun_id NOT IN"; | ||
572 | } else { | ||
573 | cond << "noun_id IN"; | ||
574 | } | ||
575 | |||
576 | cond << "(SELECT noun_id FROM noun_pronunciations WHERE "; | ||
577 | |||
578 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
579 | switch (f.get_type()) | ||
580 | { | ||
581 | case filter<std::vector<bool>>::type::singleton: | ||
582 | { | ||
583 | std::ostringstream _val; | ||
584 | for (auto syl : f.get_elem()) | ||
585 | { | ||
586 | if (syl) | ||
587 | { | ||
588 | _val << "1"; | ||
589 | } else { | ||
590 | _val << "0"; | ||
591 | } | ||
592 | } | ||
593 | |||
594 | bindings.emplace_back(_val.str()); | ||
595 | |||
596 | if (notlogic == f.get_notlogic()) | ||
597 | { | ||
598 | return "stress = ?"; | ||
599 | } else { | ||
600 | return "stress != ?"; | ||
601 | } | ||
602 | } | ||
603 | |||
604 | case filter<std::vector<bool>>::type::group: | ||
605 | { | ||
606 | bool truelogic = notlogic != f.get_notlogic(); | ||
607 | |||
608 | std::list<std::string> clauses; | ||
609 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
610 | return recur(f2, truelogic); | ||
611 | }); | ||
612 | |||
613 | if (truelogic == f.get_orlogic()) | ||
614 | { | ||
615 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
616 | } else { | ||
617 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
618 | } | ||
619 | } | ||
620 | } | ||
621 | }; | ||
622 | |||
623 | cond << recur(_stress, _stress.get_notlogic()); | ||
624 | cond << ")"; | ||
625 | conditions.push_back(cond.str()); | ||
626 | } | ||
627 | |||
628 | for (auto except : _except) | ||
629 | { | ||
630 | conditions.push_back("noun_id != ?"); | ||
631 | bindings.emplace_back(except._id); | ||
632 | } | ||
633 | |||
634 | if (!_with_singular_form.empty()) | ||
635 | { | ||
636 | std::list<std::string> clauses(_with_singular_form.size(), "singular = ?"); | ||
637 | std::string cond = "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
638 | conditions.push_back(cond); | ||
639 | |||
640 | for (auto form : _with_singular_form) | ||
641 | { | ||
642 | bindings.emplace_back(form); | ||
643 | } | ||
644 | } | ||
645 | |||
646 | if (_requires_plural_form) | ||
647 | { | ||
648 | conditions.push_back("plural IS NOT NULL"); | ||
649 | } | ||
650 | |||
651 | if (!_with_prefix.empty()) | ||
652 | { | ||
653 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
654 | switch (f.get_type()) | ||
655 | { | ||
656 | case filter<std::string>::type::singleton: | ||
657 | { | ||
658 | bindings.emplace_back(f.get_elem() + "%"); | ||
659 | |||
660 | if (notlogic == f.get_notlogic()) | ||
661 | { | ||
662 | return "singular LIKE ?"; | ||
663 | } else { | ||
664 | return "singular NOT LIKE ?"; | ||
665 | } | ||
666 | } | ||
667 | |||
668 | case filter<std::string>::type::group: | ||
669 | { | ||
670 | bool truelogic = notlogic != f.get_notlogic(); | ||
671 | |||
672 | std::list<std::string> clauses; | ||
673 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
674 | return recur(f2, truelogic); | ||
675 | }); | ||
676 | |||
677 | if (truelogic == f.get_orlogic()) | ||
678 | { | ||
679 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
680 | } else { | ||
681 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
682 | } | ||
683 | } | ||
684 | } | ||
685 | }; | ||
686 | |||
687 | conditions.push_back(recur(_with_prefix, false)); | ||
688 | } | ||
689 | |||
690 | if (!_with_suffix.empty()) | ||
691 | { | ||
692 | std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string { | ||
693 | switch (f.get_type()) | ||
694 | { | ||
695 | case filter<std::string>::type::singleton: | ||
696 | { | ||
697 | bindings.emplace_back("%" + f.get_elem()); | ||
698 | |||
699 | if (notlogic == f.get_notlogic()) | ||
700 | { | ||
701 | return "singular LIKE ?"; | ||
702 | } else { | ||
703 | return "singular NOT LIKE ?"; | ||
704 | } | ||
705 | } | ||
706 | |||
707 | case filter<std::string>::type::group: | ||
708 | { | ||
709 | bool truelogic = notlogic != f.get_notlogic(); | ||
710 | |||
711 | std::list<std::string> clauses; | ||
712 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) { | ||
713 | return recur(f2, truelogic); | ||
714 | }); | ||
715 | |||
716 | if (truelogic == f.get_orlogic()) | ||
717 | { | ||
718 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
719 | } else { | ||
720 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
721 | } | ||
722 | } | ||
723 | } | ||
724 | }; | ||
725 | |||
726 | conditions.push_back(recur(_with_suffix, false)); | ||
727 | } | ||
728 | |||
729 | if (_with_complexity != unlimited) | ||
730 | { | ||
731 | conditions.push_back("complexity = ?"); | ||
732 | bindings.emplace_back(_with_complexity); | ||
733 | } | ||
734 | |||
735 | if (_is_hypernym) | ||
736 | { | ||
737 | conditions.push_back("noun_id IN (SELECT hypernym_id FROM hypernymy)"); | ||
738 | } | ||
739 | |||
740 | if (!_hypernym_of.empty()) | ||
741 | { | ||
742 | std::stringstream cond; | ||
743 | if (_hypernym_of.get_notlogic()) | ||
744 | { | ||
745 | cond << "noun_id NOT IN"; | ||
746 | } else { | ||
747 | cond << "noun_id IN"; | ||
748 | } | ||
749 | |||
750 | cond << "(SELECT hypernym_id FROM hypernymy WHERE "; | ||
751 | |||
752 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
753 | switch (f.get_type()) | ||
754 | { | ||
755 | case filter<noun>::type::singleton: | ||
756 | { | ||
757 | bindings.emplace_back(f.get_elem()._id); | ||
758 | |||
759 | if (notlogic == f.get_notlogic()) | ||
760 | { | ||
761 | return "hyponym_id = ?"; | ||
762 | } else { | ||
763 | return "hyponym_id != ?"; | ||
764 | } | ||
765 | } | ||
766 | |||
767 | case filter<noun>::type::group: | ||
768 | { | ||
769 | bool truelogic = notlogic != f.get_notlogic(); | ||
770 | |||
771 | std::list<std::string> clauses; | ||
772 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
773 | return recur(f2, truelogic); | ||
774 | }); | ||
775 | |||
776 | if (truelogic == f.get_orlogic()) | ||
777 | { | ||
778 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
779 | } else { | ||
780 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
781 | } | ||
782 | } | ||
783 | } | ||
784 | }; | ||
785 | |||
786 | cond << recur(_hypernym_of, _hypernym_of.get_notlogic()); | ||
787 | cond << ")"; | ||
788 | conditions.push_back(cond.str()); | ||
789 | } | ||
790 | |||
791 | if (!_full_hypernym_of.empty()) | ||
792 | { | ||
793 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
794 | switch (f.get_type()) | ||
795 | { | ||
796 | case filter<noun>::type::singleton: | ||
797 | { | ||
798 | if (notlogic == f.get_notlogic()) | ||
799 | { | ||
800 | return "noun_id IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
801 | } else { | ||
802 | return "noun_id NOT IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
803 | } | ||
804 | } | ||
805 | |||
806 | case filter<noun>::type::group: | ||
807 | { | ||
808 | bool truelogic = notlogic != f.get_notlogic(); | ||
809 | |||
810 | std::list<std::string> clauses; | ||
811 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
812 | return recur(f2, truelogic); | ||
813 | }); | ||
814 | |||
815 | if (truelogic == f.get_orlogic()) | ||
816 | { | ||
817 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
818 | } else { | ||
819 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
820 | } | ||
821 | } | ||
822 | } | ||
823 | }; | ||
824 | |||
825 | conditions.push_back(recur(_full_hypernym_of, false)); | ||
826 | } | ||
827 | |||
828 | if (!_full_hyponym_of.empty()) | ||
829 | { | ||
830 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
831 | switch (f.get_type()) | ||
832 | { | ||
833 | case filter<noun>::type::singleton: | ||
834 | { | ||
835 | if (notlogic == f.get_notlogic()) | ||
836 | { | ||
837 | return "noun_id IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
838 | } else { | ||
839 | return "noun_id NOT IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
840 | } | ||
841 | } | ||
842 | |||
843 | case filter<noun>::type::group: | ||
844 | { | ||
845 | bool truelogic = notlogic != f.get_notlogic(); | ||
846 | |||
847 | std::list<std::string> clauses; | ||
848 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
849 | return recur(f2, truelogic); | ||
850 | }); | ||
851 | |||
852 | if (truelogic == f.get_orlogic()) | ||
853 | { | ||
854 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
855 | } else { | ||
856 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
857 | } | ||
858 | } | ||
859 | } | ||
860 | }; | ||
861 | |||
862 | conditions.push_back(recur(_full_hyponym_of, false)); | ||
863 | } | ||
864 | |||
865 | if (_is_hyponym) | ||
866 | { | ||
867 | conditions.push_back("noun_id IN (SELECT hyponym_id FROM hypernymy)"); | ||
868 | } | ||
869 | |||
870 | if (!_hyponym_of.empty()) | ||
871 | { | ||
872 | std::stringstream cond; | ||
873 | if (_hyponym_of.get_notlogic()) | ||
874 | { | ||
875 | cond << "noun_id NOT IN"; | ||
876 | } else { | ||
877 | cond << "noun_id IN"; | ||
878 | } | ||
879 | |||
880 | cond << "(SELECT hyponym_id FROM hypernymy WHERE "; | ||
881 | |||
882 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
883 | switch (f.get_type()) | ||
884 | { | ||
885 | case filter<noun>::type::singleton: | ||
886 | { | ||
887 | bindings.emplace_back(f.get_elem()._id); | ||
888 | |||
889 | if (notlogic == f.get_notlogic()) | ||
890 | { | ||
891 | return "hypernym_id = ?"; | ||
892 | } else { | ||
893 | return "hypernym_id != ?"; | ||
894 | } | ||
895 | } | ||
896 | |||
897 | case filter<noun>::type::group: | ||
898 | { | ||
899 | bool truelogic = notlogic != f.get_notlogic(); | ||
900 | |||
901 | std::list<std::string> clauses; | ||
902 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
903 | return recur(f2, truelogic); | ||
904 | }); | ||
905 | |||
906 | if (truelogic == f.get_orlogic()) | ||
907 | { | ||
908 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
909 | } else { | ||
910 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
911 | } | ||
912 | } | ||
913 | } | ||
914 | }; | ||
915 | |||
916 | cond << recur(_hyponym_of, _hyponym_of.get_notlogic()); | ||
917 | cond << ")"; | ||
918 | conditions.push_back(cond.str()); | ||
919 | } | ||
920 | |||
921 | if (_is_part_meronym) | ||
922 | { | ||
923 | conditions.push_back("noun_id IN (SELECT meronym_id FROM part_meronymy)"); | ||
924 | } | ||
925 | |||
926 | if (!_part_meronym_of.empty()) | ||
927 | { | ||
928 | std::stringstream cond; | ||
929 | if (_part_meronym_of.get_notlogic()) | ||
930 | { | ||
931 | cond << "noun_id NOT IN"; | ||
932 | } else { | ||
933 | cond << "noun_id IN"; | ||
934 | } | ||
935 | |||
936 | cond << "(SELECT meronym_id FROM part_meronymy WHERE "; | ||
937 | |||
938 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
939 | switch (f.get_type()) | ||
940 | { | ||
941 | case filter<noun>::type::singleton: | ||
942 | { | ||
943 | bindings.emplace_back(f.get_elem()._id); | ||
944 | |||
945 | if (notlogic == f.get_notlogic()) | ||
946 | { | ||
947 | return "holonym_id = ?"; | ||
948 | } else { | ||
949 | return "holonym_id != ?"; | ||
950 | } | ||
951 | } | ||
952 | |||
953 | case filter<noun>::type::group: | ||
954 | { | ||
955 | bool truelogic = notlogic != f.get_notlogic(); | ||
956 | |||
957 | std::list<std::string> clauses; | ||
958 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
959 | return recur(f2, truelogic); | ||
960 | }); | ||
961 | |||
962 | if (truelogic == f.get_orlogic()) | ||
963 | { | ||
964 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
965 | } else { | ||
966 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
967 | } | ||
968 | } | ||
969 | } | ||
970 | }; | ||
971 | |||
972 | cond << recur(_part_meronym_of, _part_meronym_of.get_notlogic()); | ||
973 | cond << ")"; | ||
974 | conditions.push_back(cond.str()); | ||
975 | } | ||
976 | |||
977 | if (!_full_part_meronym_of.empty()) | ||
978 | { | ||
979 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
980 | switch (f.get_type()) | ||
981 | { | ||
982 | case filter<noun>::type::singleton: | ||
983 | { | ||
984 | if (notlogic == f.get_notlogic()) | ||
985 | { | ||
986 | return "noun_id IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
987 | } else { | ||
988 | return "noun_id NOT IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
989 | } | ||
990 | } | ||
991 | |||
992 | case filter<noun>::type::group: | ||
993 | { | ||
994 | bool truelogic = notlogic != f.get_notlogic(); | ||
995 | |||
996 | std::list<std::string> clauses; | ||
997 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
998 | return recur(f2, truelogic); | ||
999 | }); | ||
1000 | |||
1001 | if (truelogic == f.get_orlogic()) | ||
1002 | { | ||
1003 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1004 | } else { | ||
1005 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1006 | } | ||
1007 | } | ||
1008 | } | ||
1009 | }; | ||
1010 | |||
1011 | conditions.push_back(recur(_full_part_meronym_of, false)); | ||
1012 | } | ||
1013 | |||
1014 | if (_is_part_holonym) | ||
1015 | { | ||
1016 | conditions.push_back("noun_id IN (SELECT holonym_id FROM part_meronymy)"); | ||
1017 | } | ||
1018 | |||
1019 | if (!_part_holonym_of.empty()) | ||
1020 | { | ||
1021 | std::stringstream cond; | ||
1022 | if (_part_holonym_of.get_notlogic()) | ||
1023 | { | ||
1024 | cond << "noun_id NOT IN"; | ||
1025 | } else { | ||
1026 | cond << "noun_id IN"; | ||
1027 | } | ||
1028 | |||
1029 | cond << "(SELECT holonym_id FROM part_meronymy WHERE "; | ||
1030 | |||
1031 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1032 | switch (f.get_type()) | ||
1033 | { | ||
1034 | case filter<noun>::type::singleton: | ||
1035 | { | ||
1036 | bindings.emplace_back(f.get_elem()._id); | ||
1037 | |||
1038 | if (notlogic == f.get_notlogic()) | ||
1039 | { | ||
1040 | return "meronym_id = ?"; | ||
1041 | } else { | ||
1042 | return "meronym_id != ?"; | ||
1043 | } | ||
1044 | } | ||
1045 | |||
1046 | case filter<noun>::type::group: | ||
1047 | { | ||
1048 | bool truelogic = notlogic != f.get_notlogic(); | ||
1049 | |||
1050 | std::list<std::string> clauses; | ||
1051 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1052 | return recur(f2, truelogic); | ||
1053 | }); | ||
1054 | |||
1055 | if (truelogic == f.get_orlogic()) | ||
1056 | { | ||
1057 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1058 | } else { | ||
1059 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1060 | } | ||
1061 | } | ||
1062 | } | ||
1063 | }; | ||
1064 | |||
1065 | cond << recur(_part_holonym_of, _part_holonym_of.get_notlogic()); | ||
1066 | cond << ")"; | ||
1067 | conditions.push_back(cond.str()); | ||
1068 | } | ||
1069 | |||
1070 | if (!_full_part_holonym_of.empty()) | ||
1071 | { | ||
1072 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1073 | switch (f.get_type()) | ||
1074 | { | ||
1075 | case filter<noun>::type::singleton: | ||
1076 | { | ||
1077 | if (notlogic == f.get_notlogic()) | ||
1078 | { | ||
1079 | return "noun_id IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1080 | } else { | ||
1081 | return "noun_id NOT IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1082 | } | ||
1083 | } | ||
1084 | |||
1085 | case filter<noun>::type::group: | ||
1086 | { | ||
1087 | bool truelogic = notlogic != f.get_notlogic(); | ||
1088 | |||
1089 | std::list<std::string> clauses; | ||
1090 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1091 | return recur(f2, truelogic); | ||
1092 | }); | ||
1093 | |||
1094 | if (truelogic == f.get_orlogic()) | ||
1095 | { | ||
1096 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1097 | } else { | ||
1098 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1099 | } | ||
1100 | } | ||
1101 | } | ||
1102 | }; | ||
1103 | |||
1104 | conditions.push_back(recur(_full_part_holonym_of, false)); | ||
1105 | } | ||
1106 | |||
1107 | if (_is_substance_meronym) | ||
1108 | { | ||
1109 | conditions.push_back("noun_id IN (SELECT meronym_id FROM substance_meronymy)"); | ||
1110 | } | ||
1111 | |||
1112 | if (!_substance_meronym_of.empty()) | ||
1113 | { | ||
1114 | std::stringstream cond; | ||
1115 | if (_substance_meronym_of.get_notlogic()) | ||
1116 | { | ||
1117 | cond << "noun_id NOT IN"; | ||
1118 | } else { | ||
1119 | cond << "noun_id IN"; | ||
1120 | } | ||
1121 | |||
1122 | cond << "(SELECT meronym_id FROM substance_meronymy WHERE "; | ||
1123 | |||
1124 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1125 | switch (f.get_type()) | ||
1126 | { | ||
1127 | case filter<noun>::type::singleton: | ||
1128 | { | ||
1129 | bindings.emplace_back(f.get_elem()._id); | ||
1130 | |||
1131 | if (notlogic == f.get_notlogic()) | ||
1132 | { | ||
1133 | return "holonym_id = ?"; | ||
1134 | } else { | ||
1135 | return "holonym_id != ?"; | ||
1136 | } | ||
1137 | } | ||
1138 | |||
1139 | case filter<noun>::type::group: | ||
1140 | { | ||
1141 | bool truelogic = notlogic != f.get_notlogic(); | ||
1142 | |||
1143 | std::list<std::string> clauses; | ||
1144 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1145 | return recur(f2, truelogic); | ||
1146 | }); | ||
1147 | |||
1148 | if (truelogic == f.get_orlogic()) | ||
1149 | { | ||
1150 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1151 | } else { | ||
1152 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1153 | } | ||
1154 | } | ||
1155 | } | ||
1156 | }; | ||
1157 | |||
1158 | cond << recur(_substance_meronym_of, _substance_meronym_of.get_notlogic()); | ||
1159 | cond << ")"; | ||
1160 | conditions.push_back(cond.str()); | ||
1161 | } | ||
1162 | |||
1163 | if (!_full_substance_meronym_of.empty()) | ||
1164 | { | ||
1165 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1166 | switch (f.get_type()) | ||
1167 | { | ||
1168 | case filter<noun>::type::singleton: | ||
1169 | { | ||
1170 | if (notlogic == f.get_notlogic()) | ||
1171 | { | ||
1172 | return "noun_id IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1173 | } else { | ||
1174 | return "noun_id NOT IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1175 | } | ||
1176 | } | ||
1177 | |||
1178 | case filter<noun>::type::group: | ||
1179 | { | ||
1180 | bool truelogic = notlogic != f.get_notlogic(); | ||
1181 | |||
1182 | std::list<std::string> clauses; | ||
1183 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1184 | return recur(f2, truelogic); | ||
1185 | }); | ||
1186 | |||
1187 | if (truelogic == f.get_orlogic()) | ||
1188 | { | ||
1189 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1190 | } else { | ||
1191 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1192 | } | ||
1193 | } | ||
1194 | } | ||
1195 | }; | ||
1196 | |||
1197 | conditions.push_back(recur(_full_substance_meronym_of, false)); | ||
1198 | } | ||
1199 | |||
1200 | if (_is_substance_holonym) | ||
1201 | { | ||
1202 | conditions.push_back("noun_id IN (SELECT holonym_id FROM substance_meronymy)"); | ||
1203 | } | ||
1204 | |||
1205 | if (!_substance_holonym_of.empty()) | ||
1206 | { | ||
1207 | std::stringstream cond; | ||
1208 | if (_substance_holonym_of.get_notlogic()) | ||
1209 | { | ||
1210 | cond << "noun_id NOT IN"; | ||
1211 | } else { | ||
1212 | cond << "noun_id IN"; | ||
1213 | } | ||
1214 | |||
1215 | cond << "(SELECT holonym_id FROM substance_meronymy WHERE "; | ||
1216 | |||
1217 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1218 | switch (f.get_type()) | ||
1219 | { | ||
1220 | case filter<noun>::type::singleton: | ||
1221 | { | ||
1222 | bindings.emplace_back(f.get_elem()._id); | ||
1223 | |||
1224 | if (notlogic == f.get_notlogic()) | ||
1225 | { | ||
1226 | return "meronym_id = ?"; | ||
1227 | } else { | ||
1228 | return "meronym_id != ?"; | ||
1229 | } | ||
1230 | } | ||
1231 | |||
1232 | case filter<noun>::type::group: | ||
1233 | { | ||
1234 | bool truelogic = notlogic != f.get_notlogic(); | ||
1235 | |||
1236 | std::list<std::string> clauses; | ||
1237 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1238 | return recur(f2, truelogic); | ||
1239 | }); | ||
1240 | |||
1241 | if (truelogic == f.get_orlogic()) | ||
1242 | { | ||
1243 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1244 | } else { | ||
1245 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1246 | } | ||
1247 | } | ||
1248 | } | ||
1249 | }; | ||
1250 | |||
1251 | cond << recur(_substance_holonym_of, _substance_holonym_of.get_notlogic()); | ||
1252 | cond << ")"; | ||
1253 | conditions.push_back(cond.str()); | ||
1254 | } | ||
1255 | |||
1256 | if (!_full_substance_holonym_of.empty()) | ||
1257 | { | ||
1258 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1259 | switch (f.get_type()) | ||
1260 | { | ||
1261 | case filter<noun>::type::singleton: | ||
1262 | { | ||
1263 | if (notlogic == f.get_notlogic()) | ||
1264 | { | ||
1265 | return "noun_id IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1266 | } else { | ||
1267 | return "noun_id NOT IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1268 | } | ||
1269 | } | ||
1270 | |||
1271 | case filter<noun>::type::group: | ||
1272 | { | ||
1273 | bool truelogic = notlogic != f.get_notlogic(); | ||
1274 | |||
1275 | std::list<std::string> clauses; | ||
1276 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1277 | return recur(f2, truelogic); | ||
1278 | }); | ||
1279 | |||
1280 | if (truelogic == f.get_orlogic()) | ||
1281 | { | ||
1282 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1283 | } else { | ||
1284 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1285 | } | ||
1286 | } | ||
1287 | } | ||
1288 | }; | ||
1289 | |||
1290 | conditions.push_back(recur(_full_substance_holonym_of, false)); | ||
1291 | } | ||
1292 | |||
1293 | if (_is_member_meronym) | ||
1294 | { | ||
1295 | conditions.push_back("noun_id IN (SELECT meronym_id FROM member_meronymy)"); | ||
1296 | } | ||
1297 | |||
1298 | if (!_member_meronym_of.empty()) | ||
1299 | { | ||
1300 | std::stringstream cond; | ||
1301 | if (_member_meronym_of.get_notlogic()) | ||
1302 | { | ||
1303 | cond << "noun_id NOT IN"; | ||
1304 | } else { | ||
1305 | cond << "noun_id IN"; | ||
1306 | } | ||
1307 | |||
1308 | cond << "(SELECT meronym_id FROM member_meronymy WHERE "; | ||
1309 | |||
1310 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1311 | switch (f.get_type()) | ||
1312 | { | ||
1313 | case filter<noun>::type::singleton: | ||
1314 | { | ||
1315 | bindings.emplace_back(f.get_elem()._id); | ||
1316 | |||
1317 | if (notlogic == f.get_notlogic()) | ||
1318 | { | ||
1319 | return "holonym_id = ?"; | ||
1320 | } else { | ||
1321 | return "holonym_id != ?"; | ||
1322 | } | ||
1323 | } | ||
1324 | |||
1325 | case filter<noun>::type::group: | ||
1326 | { | ||
1327 | bool truelogic = notlogic != f.get_notlogic(); | ||
1328 | |||
1329 | std::list<std::string> clauses; | ||
1330 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1331 | return recur(f2, truelogic); | ||
1332 | }); | ||
1333 | |||
1334 | if (truelogic == f.get_orlogic()) | ||
1335 | { | ||
1336 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1337 | } else { | ||
1338 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1339 | } | ||
1340 | } | ||
1341 | } | ||
1342 | }; | ||
1343 | |||
1344 | cond << recur(_member_meronym_of, _member_meronym_of.get_notlogic()); | ||
1345 | cond << ")"; | ||
1346 | conditions.push_back(cond.str()); | ||
1347 | } | ||
1348 | |||
1349 | if (!_full_member_meronym_of.empty()) | ||
1350 | { | ||
1351 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1352 | switch (f.get_type()) | ||
1353 | { | ||
1354 | case filter<noun>::type::singleton: | ||
1355 | { | ||
1356 | if (notlogic == f.get_notlogic()) | ||
1357 | { | ||
1358 | return "noun_id IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1359 | } else { | ||
1360 | return "noun_id NOT IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1361 | } | ||
1362 | } | ||
1363 | |||
1364 | case filter<noun>::type::group: | ||
1365 | { | ||
1366 | bool truelogic = notlogic != f.get_notlogic(); | ||
1367 | |||
1368 | std::list<std::string> clauses; | ||
1369 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1370 | return recur(f2, truelogic); | ||
1371 | }); | ||
1372 | |||
1373 | if (truelogic == f.get_orlogic()) | ||
1374 | { | ||
1375 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1376 | } else { | ||
1377 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1378 | } | ||
1379 | } | ||
1380 | } | ||
1381 | }; | ||
1382 | |||
1383 | conditions.push_back(recur(_full_member_meronym_of, false)); | ||
1384 | } | ||
1385 | |||
1386 | if (_is_member_holonym) | ||
1387 | { | ||
1388 | conditions.push_back("noun_id IN (SELECT holonym_id FROM member_meronym)"); | ||
1389 | } | ||
1390 | |||
1391 | if (!_member_holonym_of.empty()) | ||
1392 | { | ||
1393 | std::stringstream cond; | ||
1394 | if (_member_holonym_of.get_notlogic()) | ||
1395 | { | ||
1396 | cond << "noun_id NOT IN"; | ||
1397 | } else { | ||
1398 | cond << "noun_id IN"; | ||
1399 | } | ||
1400 | |||
1401 | cond << "(SELECT holonym_id FROM member_meronymy WHERE "; | ||
1402 | |||
1403 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1404 | switch (f.get_type()) | ||
1405 | { | ||
1406 | case filter<noun>::type::singleton: | ||
1407 | { | ||
1408 | bindings.emplace_back(f.get_elem()._id); | ||
1409 | |||
1410 | if (notlogic == f.get_notlogic()) | ||
1411 | { | ||
1412 | return "meronym_id = ?"; | ||
1413 | } else { | ||
1414 | return "meronym_id != ?"; | ||
1415 | } | ||
1416 | } | ||
1417 | |||
1418 | case filter<noun>::type::group: | ||
1419 | { | ||
1420 | bool truelogic = notlogic != f.get_notlogic(); | ||
1421 | |||
1422 | std::list<std::string> clauses; | ||
1423 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1424 | return recur(f2, truelogic); | ||
1425 | }); | ||
1426 | |||
1427 | if (truelogic == f.get_orlogic()) | ||
1428 | { | ||
1429 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1430 | } else { | ||
1431 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1432 | } | ||
1433 | } | ||
1434 | } | ||
1435 | }; | ||
1436 | |||
1437 | cond << recur(_member_holonym_of, _member_holonym_of.get_notlogic()); | ||
1438 | cond << ")"; | ||
1439 | conditions.push_back(cond.str()); | ||
1440 | } | ||
1441 | |||
1442 | if (!_full_member_holonym_of.empty()) | ||
1443 | { | ||
1444 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1445 | switch (f.get_type()) | ||
1446 | { | ||
1447 | case filter<noun>::type::singleton: | ||
1448 | { | ||
1449 | if (notlogic == f.get_notlogic()) | ||
1450 | { | ||
1451 | return "noun_id IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1452 | } else { | ||
1453 | return "noun_id NOT IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")"; | ||
1454 | } | ||
1455 | } | ||
1456 | |||
1457 | case filter<noun>::type::group: | ||
1458 | { | ||
1459 | bool truelogic = notlogic != f.get_notlogic(); | ||
1460 | |||
1461 | std::list<std::string> clauses; | ||
1462 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1463 | return recur(f2, truelogic); | ||
1464 | }); | ||
1465 | |||
1466 | if (truelogic == f.get_orlogic()) | ||
1467 | { | ||
1468 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1469 | } else { | ||
1470 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1471 | } | ||
1472 | } | ||
1473 | } | ||
1474 | }; | ||
1475 | |||
1476 | conditions.push_back(recur(_full_member_holonym_of, false)); | ||
1477 | } | ||
1478 | |||
1479 | if (_is_proper) | ||
1480 | { | ||
1481 | conditions.push_back("proper = 1"); | ||
1482 | } | ||
1483 | |||
1484 | if (_is_not_proper) | ||
1485 | { | ||
1486 | conditions.push_back("proper = 0"); | ||
1487 | } | ||
1488 | |||
1489 | if (_is_instance) | ||
1490 | { | ||
1491 | conditions.push_back("noun_id IN (SELECT instance_id FROM instantiation)"); | ||
1492 | } | ||
1493 | |||
1494 | if (!_instance_of.empty()) | ||
1495 | { | ||
1496 | std::stringstream cond; | ||
1497 | if (_instance_of.get_notlogic()) | ||
1498 | { | ||
1499 | cond << "noun_id NOT IN"; | ||
1500 | } else { | ||
1501 | cond << "noun_id IN"; | ||
1502 | } | ||
1503 | |||
1504 | cond << "(SELECT instance_id FROM instantiation WHERE "; | ||
1505 | |||
1506 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1507 | switch (f.get_type()) | ||
1508 | { | ||
1509 | case filter<noun>::type::singleton: | ||
1510 | { | ||
1511 | bindings.emplace_back(f.get_elem()._id); | ||
1512 | |||
1513 | if (notlogic == f.get_notlogic()) | ||
1514 | { | ||
1515 | return "class_id = ?"; | ||
1516 | } else { | ||
1517 | return "class_id != ?"; | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | case filter<noun>::type::group: | ||
1522 | { | ||
1523 | bool truelogic = notlogic != f.get_notlogic(); | ||
1524 | |||
1525 | std::list<std::string> clauses; | ||
1526 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1527 | return recur(f2, truelogic); | ||
1528 | }); | ||
1529 | |||
1530 | if (truelogic == f.get_orlogic()) | ||
1531 | { | ||
1532 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1533 | } else { | ||
1534 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1535 | } | ||
1536 | } | ||
1537 | } | ||
1538 | }; | ||
1539 | |||
1540 | cond << recur(_instance_of, _instance_of.get_notlogic()); | ||
1541 | cond << ")"; | ||
1542 | conditions.push_back(cond.str()); | ||
1543 | } | ||
1544 | |||
1545 | if (_is_class) | ||
1546 | { | ||
1547 | conditions.push_back("noun_id IN (SELECT class_id FROM instantiation)"); | ||
1548 | } | ||
1549 | |||
1550 | if (!_class_of.empty()) | ||
1551 | { | ||
1552 | std::stringstream cond; | ||
1553 | if (_class_of.get_notlogic()) | ||
1554 | { | ||
1555 | cond << "noun_id NOT IN"; | ||
1556 | } else { | ||
1557 | cond << "noun_id IN"; | ||
1558 | } | ||
1559 | |||
1560 | cond << "(SELECT class_id FROM instantiation WHERE "; | ||
1561 | |||
1562 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1563 | switch (f.get_type()) | ||
1564 | { | ||
1565 | case filter<noun>::type::singleton: | ||
1566 | { | ||
1567 | bindings.emplace_back(f.get_elem()._id); | ||
1568 | |||
1569 | if (notlogic == f.get_notlogic()) | ||
1570 | { | ||
1571 | return "instance_id = ?"; | ||
1572 | } else { | ||
1573 | return "instance_id != ?"; | ||
1574 | } | ||
1575 | } | ||
1576 | |||
1577 | case filter<noun>::type::group: | ||
1578 | { | ||
1579 | bool truelogic = notlogic != f.get_notlogic(); | ||
1580 | |||
1581 | std::list<std::string> clauses; | ||
1582 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1583 | return recur(f2, truelogic); | ||
1584 | }); | ||
1585 | |||
1586 | if (truelogic == f.get_orlogic()) | ||
1587 | { | ||
1588 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1589 | } else { | ||
1590 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1591 | } | ||
1592 | } | ||
1593 | } | ||
1594 | }; | ||
1595 | |||
1596 | cond << recur(_class_of, _class_of.get_notlogic()); | ||
1597 | cond << ")"; | ||
1598 | conditions.push_back(cond.str()); | ||
1599 | } | ||
1600 | |||
1601 | if (_has_synonyms) | ||
1602 | { | ||
1603 | conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_synonymy)"); | ||
1604 | } | ||
1605 | |||
1606 | if (!_synonym_of.empty()) | ||
1607 | { | ||
1608 | std::stringstream cond; | ||
1609 | if (_synonym_of.get_notlogic()) | ||
1610 | { | ||
1611 | cond << "noun_id NOT IN"; | ||
1612 | } else { | ||
1613 | cond << "noun_id IN"; | ||
1614 | } | ||
1615 | |||
1616 | cond << "(SELECT noun_2_id FROM noun_synonymy WHERE "; | ||
1617 | |||
1618 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1619 | switch (f.get_type()) | ||
1620 | { | ||
1621 | case filter<noun>::type::singleton: | ||
1622 | { | ||
1623 | bindings.emplace_back(f.get_elem()._id); | ||
1624 | |||
1625 | if (notlogic == f.get_notlogic()) | ||
1626 | { | ||
1627 | return "noun_1_id = ?"; | ||
1628 | } else { | ||
1629 | return "noun_1_id != ?"; | ||
1630 | } | ||
1631 | } | ||
1632 | |||
1633 | case filter<noun>::type::group: | ||
1634 | { | ||
1635 | bool truelogic = notlogic != f.get_notlogic(); | ||
1636 | |||
1637 | std::list<std::string> clauses; | ||
1638 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1639 | return recur(f2, truelogic); | ||
1640 | }); | ||
1641 | |||
1642 | if (truelogic == f.get_orlogic()) | ||
1643 | { | ||
1644 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1645 | } else { | ||
1646 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1647 | } | ||
1648 | } | ||
1649 | } | ||
1650 | }; | ||
1651 | |||
1652 | cond << recur(_synonym_of, _synonym_of.get_notlogic()); | ||
1653 | cond << ")"; | ||
1654 | conditions.push_back(cond.str()); | ||
1655 | } | ||
1656 | |||
1657 | if (_has_antonyms) | ||
1658 | { | ||
1659 | conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_antonymy)"); | ||
1660 | } | ||
1661 | |||
1662 | if (!_antonym_of.empty()) | ||
1663 | { | ||
1664 | std::stringstream cond; | ||
1665 | if (_antonym_of.get_notlogic()) | ||
1666 | { | ||
1667 | cond << "noun_id NOT IN"; | ||
1668 | } else { | ||
1669 | cond << "noun_id IN"; | ||
1670 | } | ||
1671 | |||
1672 | cond << "(SELECT noun_2_id FROM noun_antonymy WHERE "; | ||
1673 | |||
1674 | std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string { | ||
1675 | switch (f.get_type()) | ||
1676 | { | ||
1677 | case filter<noun>::type::singleton: | ||
1678 | { | ||
1679 | bindings.emplace_back(f.get_elem()._id); | ||
1680 | |||
1681 | if (notlogic == f.get_notlogic()) | ||
1682 | { | ||
1683 | return "noun_1_id = ?"; | ||
1684 | } else { | ||
1685 | return "noun_1_id != ?"; | ||
1686 | } | ||
1687 | } | ||
1688 | |||
1689 | case filter<noun>::type::group: | ||
1690 | { | ||
1691 | bool truelogic = notlogic != f.get_notlogic(); | ||
1692 | |||
1693 | std::list<std::string> clauses; | ||
1694 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) { | ||
1695 | return recur(f2, truelogic); | ||
1696 | }); | ||
1697 | |||
1698 | if (truelogic == f.get_orlogic()) | ||
1699 | { | ||
1700 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1701 | } else { | ||
1702 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1703 | } | ||
1704 | } | ||
1705 | } | ||
1706 | }; | ||
1707 | |||
1708 | cond << recur(_antonym_of, _antonym_of.get_notlogic()); | ||
1709 | cond << ")"; | ||
1710 | conditions.push_back(cond.str()); | ||
1711 | } | ||
1712 | |||
1713 | if (_has_pertainym) | ||
1714 | { | ||
1715 | conditions.push_back("noun_id IN (SELECT noun_id FROM pertainymy)"); | ||
1716 | } | ||
1717 | |||
1718 | if (!_anti_pertainym_of.empty()) | ||
1719 | { | ||
1720 | std::stringstream cond; | ||
1721 | if (_anti_pertainym_of.get_notlogic()) | ||
1722 | { | ||
1723 | cond << "noun_id NOT IN"; | ||
1724 | } else { | ||
1725 | cond << "noun_id IN"; | ||
1726 | } | ||
1727 | |||
1728 | cond << "(SELECT noun_id FROM pertainymy WHERE "; | ||
1729 | |||
1730 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
1731 | switch (f.get_type()) | ||
1732 | { | ||
1733 | case filter<adjective>::type::singleton: | ||
1734 | { | ||
1735 | bindings.emplace_back(f.get_elem()._id); | ||
1736 | |||
1737 | if (notlogic == f.get_notlogic()) | ||
1738 | { | ||
1739 | return "pertainym_id = ?"; | ||
1740 | } else { | ||
1741 | return "pertainym_id != ?"; | ||
1742 | } | ||
1743 | } | ||
1744 | |||
1745 | case filter<adjective>::type::group: | ||
1746 | { | ||
1747 | bool truelogic = notlogic != f.get_notlogic(); | ||
1748 | |||
1749 | std::list<std::string> clauses; | ||
1750 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
1751 | return recur(f2, truelogic); | ||
1752 | }); | ||
1753 | |||
1754 | if (truelogic == f.get_orlogic()) | ||
1755 | { | ||
1756 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1757 | } else { | ||
1758 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1759 | } | ||
1760 | } | ||
1761 | } | ||
1762 | }; | ||
1763 | |||
1764 | cond << recur(_anti_pertainym_of, _anti_pertainym_of.get_notlogic()); | ||
1765 | cond << ")"; | ||
1766 | conditions.push_back(cond.str()); | ||
1767 | } | ||
1768 | |||
1769 | if (_is_attribute) | ||
1770 | { | ||
1771 | conditions.push_back("noun_id IN (SELECT noun_id FROM variation)"); | ||
1772 | } | ||
1773 | |||
1774 | if (!_attribute_of.empty()) | ||
1775 | { | ||
1776 | std::stringstream cond; | ||
1777 | if (_attribute_of.get_notlogic()) | ||
1778 | { | ||
1779 | cond << "noun_id NOT IN"; | ||
1780 | } else { | ||
1781 | cond << "noun_id IN"; | ||
1782 | } | ||
1783 | |||
1784 | cond << "(SELECT noun_id FROM variation WHERE "; | ||
1785 | |||
1786 | std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string { | ||
1787 | switch (f.get_type()) | ||
1788 | { | ||
1789 | case filter<adjective>::type::singleton: | ||
1790 | { | ||
1791 | bindings.emplace_back(f.get_elem()._id); | ||
1792 | |||
1793 | if (notlogic == f.get_notlogic()) | ||
1794 | { | ||
1795 | return "adjective_id = ?"; | ||
1796 | } else { | ||
1797 | return "adjective_id != ?"; | ||
1798 | } | ||
1799 | } | ||
1800 | |||
1801 | case filter<adjective>::type::group: | ||
1802 | { | ||
1803 | bool truelogic = notlogic != f.get_notlogic(); | ||
1804 | |||
1805 | std::list<std::string> clauses; | ||
1806 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) { | ||
1807 | return recur(f2, truelogic); | ||
1808 | }); | ||
1809 | |||
1810 | if (truelogic == f.get_orlogic()) | ||
1811 | { | ||
1812 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
1813 | } else { | ||
1814 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1815 | } | ||
1816 | } | ||
1817 | } | ||
1818 | }; | ||
1819 | |||
1820 | cond << recur(_attribute_of, _attribute_of.get_notlogic()); | ||
1821 | cond << ")"; | ||
1822 | conditions.push_back(cond.str()); | ||
1823 | } | ||
1824 | |||
1825 | if (_at_least_n_images != unlimited) | ||
1826 | { | ||
1827 | conditions.push_back("images >= ?"); | ||
1828 | bindings.emplace_back(_at_least_n_images); | ||
1829 | } | ||
1830 | |||
1831 | if (!_with_wnid.empty()) | ||
1832 | { | ||
1833 | std::vector<std::string> clauses(_with_wnid.size(), "wnid = ?"); | ||
1834 | std::string cond = verbly::implode(std::begin(clauses), std::end(clauses), " OR "); | ||
1835 | conditions.push_back("(" + cond + ")"); | ||
1836 | |||
1837 | for (auto wnid : _with_wnid) | ||
1838 | { | ||
1839 | bindings.emplace_back(wnid); | ||
1840 | } | ||
1841 | } | ||
1842 | |||
1843 | /* | ||
1844 | if (!_derived_from_adjective.empty()) | ||
1845 | { | ||
1846 | std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ"); | ||
1847 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1848 | conditions.push_back(cond); | ||
1849 | } | ||
1850 | |||
1851 | if (!_not_derived_from_adjective.empty()) | ||
1852 | { | ||
1853 | std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ"); | ||
1854 | std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1855 | conditions.push_back(cond); | ||
1856 | } | ||
1857 | |||
1858 | if (!_derived_from_adverb.empty()) | ||
1859 | { | ||
1860 | std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV"); | ||
1861 | std::string cond = "noun_id IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1862 | conditions.push_back(cond); | ||
1863 | } | ||
1864 | |||
1865 | if (!_not_derived_from_adverb.empty()) | ||
1866 | { | ||
1867 | std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV"); | ||
1868 | std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1869 | conditions.push_back(cond); | ||
1870 | } | ||
1871 | |||
1872 | if (!_derived_from_noun.empty()) | ||
1873 | { | ||
1874 | std::list<std::string> clauses(_derived_from_noun.size(), "noun_2_id = @DERN"); | ||
1875 | std::string cond = "noun_id IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1876 | conditions.push_back(cond); | ||
1877 | } | ||
1878 | |||
1879 | if (!_not_derived_from_noun.empty()) | ||
1880 | { | ||
1881 | std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_2_id = @NDERN"); | ||
1882 | std::string cond = "noun_id NOT IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
1883 | conditions.push_back(cond); | ||
1884 | } | ||
1885 | */ | ||
1886 | if (!conditions.empty()) | ||
1887 | { | ||
1888 | construct << " WHERE "; | ||
1889 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
1890 | } | ||
1891 | |||
1892 | if (_random) | ||
1893 | { | ||
1894 | construct << " ORDER BY RANDOM()"; | ||
1895 | } | ||
1896 | |||
1897 | if (_limit != unlimited) | ||
1898 | { | ||
1899 | construct << " LIMIT " << _limit; | ||
1900 | } | ||
1901 | |||
1902 | sqlite3_stmt* ppstmt; | ||
1903 | std::string query = construct.str(); | ||
1904 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1905 | { | ||
1906 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
1907 | } | ||
1908 | |||
1909 | int i = 1; | ||
1910 | for (auto& binding : bindings) | ||
1911 | { | ||
1912 | switch (binding.get_type()) | ||
1913 | { | ||
1914 | case binding::type::integer: | ||
1915 | { | ||
1916 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
1917 | |||
1918 | break; | ||
1919 | } | ||
1920 | |||
1921 | case binding::type::string: | ||
1922 | { | ||
1923 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
1924 | |||
1925 | break; | ||
1926 | } | ||
1927 | } | ||
1928 | |||
1929 | i++; | ||
1930 | } | ||
1931 | |||
1932 | /* | ||
1933 | for (auto adj : _derived_from_adjective) | ||
1934 | { | ||
1935 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id); | ||
1936 | } | ||
1937 | |||
1938 | for (auto adj : _not_derived_from_adjective) | ||
1939 | { | ||
1940 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id); | ||
1941 | } | ||
1942 | |||
1943 | for (auto adv : _derived_from_adverb) | ||
1944 | { | ||
1945 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id); | ||
1946 | } | ||
1947 | |||
1948 | for (auto adv : _not_derived_from_adverb) | ||
1949 | { | ||
1950 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id); | ||
1951 | } | ||
1952 | |||
1953 | for (auto n : _derived_from_noun) | ||
1954 | { | ||
1955 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id); | ||
1956 | } | ||
1957 | |||
1958 | for (auto n : _not_derived_from_noun) | ||
1959 | { | ||
1960 | sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id); | ||
1961 | } | ||
1962 | */ | ||
1963 | std::list<noun> output; | ||
1964 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
1965 | { | ||
1966 | noun tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
1967 | tnc._singular = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
1968 | |||
1969 | if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL) | ||
1970 | { | ||
1971 | tnc._plural = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
1972 | } | ||
1973 | |||
1974 | tnc._wnid = sqlite3_column_int(ppstmt, 3); | ||
1975 | |||
1976 | output.push_back(tnc); | ||
1977 | } | ||
1978 | |||
1979 | sqlite3_finalize(ppstmt); | ||
1980 | |||
1981 | for (auto& noun : output) | ||
1982 | { | ||
1983 | query = "SELECT pronunciation, prerhyme, rhyme FROM noun_pronunciations WHERE noun_id = ?"; | ||
1984 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
1985 | { | ||
1986 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
1987 | } | ||
1988 | |||
1989 | sqlite3_bind_int(ppstmt, 1, noun._id); | ||
1990 | |||
1991 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
1992 | { | ||
1993 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
1994 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
1995 | |||
1996 | noun.pronunciations.push_back(phonemes); | ||
1997 | |||
1998 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
1999 | { | ||
2000 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
2001 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
2002 | |||
2003 | noun.rhymes.emplace_back(prerhyme, rhyming); | ||
2004 | } | ||
2005 | } | ||
2006 | |||
2007 | sqlite3_finalize(ppstmt); | ||
2008 | } | ||
2009 | |||
2010 | return output; | ||
2011 | } | ||
2012 | |||
2013 | }; | ||
diff --git a/lib/noun_query.h b/lib/noun_query.h deleted file mode 100644 index 74df260..0000000 --- a/lib/noun_query.h +++ /dev/null | |||
@@ -1,180 +0,0 @@ | |||
1 | #ifndef NOUN_QUERY_H_5DE51DD7 | ||
2 | #define NOUN_QUERY_H_5DE51DD7 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class noun_query { | ||
7 | public: | ||
8 | noun_query(const data& _data); | ||
9 | |||
10 | noun_query& limit(int _limit); | ||
11 | noun_query& random(); | ||
12 | noun_query& except(const noun& _word); | ||
13 | noun_query& rhymes_with(const word& _word); | ||
14 | noun_query& rhymes_with(rhyme _r); | ||
15 | noun_query& has_pronunciation(); | ||
16 | noun_query& has_rhyming_noun(); | ||
17 | noun_query& has_rhyming_adjective(); | ||
18 | noun_query& has_rhyming_adverb(); | ||
19 | noun_query& has_rhyming_verb(); | ||
20 | noun_query& with_stress(filter<std::vector<bool>> _arg); | ||
21 | |||
22 | noun_query& with_singular_form(std::string _arg); | ||
23 | noun_query& with_prefix(filter<std::string> _f); | ||
24 | noun_query& with_suffix(filter<std::string> _f); | ||
25 | |||
26 | noun_query& requires_plural_form(); | ||
27 | |||
28 | noun_query& with_complexity(int _arg); | ||
29 | |||
30 | noun_query& is_hypernym(); | ||
31 | noun_query& hypernym_of(filter<noun> _f); | ||
32 | noun_query& full_hypernym_of(filter<noun> _f); | ||
33 | |||
34 | noun_query& is_hyponym(); | ||
35 | noun_query& hyponym_of(filter<noun> _f); | ||
36 | noun_query& full_hyponym_of(filter<noun> _f); | ||
37 | |||
38 | noun_query& is_part_meronym(); | ||
39 | noun_query& part_meronym_of(filter<noun> _f); | ||
40 | noun_query& full_part_meronym_of(filter<noun> _f); | ||
41 | |||
42 | noun_query& is_part_holonym(); | ||
43 | noun_query& part_holonym_of(filter<noun> _f); | ||
44 | noun_query& full_part_holonym_of(filter<noun> _f); | ||
45 | |||
46 | noun_query& is_substance_meronym(); | ||
47 | noun_query& substance_meronym_of(filter<noun> _f); | ||
48 | noun_query& full_substance_meronym_of(filter<noun> _f); | ||
49 | |||
50 | noun_query& is_substance_holonym(); | ||
51 | noun_query& substance_holonym_of(filter<noun> _f); | ||
52 | noun_query& full_substance_holonym_of(filter<noun> _f); | ||
53 | |||
54 | noun_query& is_member_meronym(); | ||
55 | noun_query& member_meronym_of(filter<noun> _f); | ||
56 | noun_query& full_member_meronym_of(filter<noun> _f); | ||
57 | |||
58 | noun_query& is_member_holonym(); | ||
59 | noun_query& member_holonym_of(filter<noun> _f); | ||
60 | noun_query& full_member_holonym_of(filter<noun> _f); | ||
61 | |||
62 | noun_query& is_proper(); | ||
63 | noun_query& is_not_proper(); | ||
64 | |||
65 | noun_query& is_instance(); | ||
66 | noun_query& instance_of(filter<noun> _f); | ||
67 | |||
68 | noun_query& is_class(); | ||
69 | noun_query& class_of(filter<noun> _f); | ||
70 | |||
71 | noun_query& has_synonyms(); | ||
72 | noun_query& synonym_of(filter<noun> _f); | ||
73 | |||
74 | noun_query& has_antonyms(); | ||
75 | noun_query& antonym_of(filter<noun> _f); | ||
76 | |||
77 | noun_query& has_pertainym(); | ||
78 | noun_query& anti_pertainym_of(filter<adjective> _f); | ||
79 | |||
80 | noun_query& is_attribute(); | ||
81 | noun_query& attribute_of(filter<adjective> _f); | ||
82 | |||
83 | noun_query& at_least_n_images(int _arg); | ||
84 | noun_query& with_wnid(int _arg); | ||
85 | |||
86 | /* noun_query& derived_from(const word& _w); | ||
87 | noun_query& not_derived_from(const word& _w);*/ | ||
88 | |||
89 | std::list<noun> run() const; | ||
90 | |||
91 | const static int unlimited = -1; | ||
92 | |||
93 | private: | ||
94 | const data& _data; | ||
95 | int _limit = unlimited; | ||
96 | bool _random = false; | ||
97 | std::list<rhyme> _rhymes; | ||
98 | std::list<noun> _except; | ||
99 | bool _has_prn = false; | ||
100 | bool _has_rhyming_noun = false; | ||
101 | bool _has_rhyming_adjective = false; | ||
102 | bool _has_rhyming_adverb = false; | ||
103 | bool _has_rhyming_verb = false; | ||
104 | filter<std::vector<bool>> _stress; | ||
105 | |||
106 | std::list<std::string> _with_singular_form; | ||
107 | filter<std::string> _with_prefix; | ||
108 | filter<std::string> _with_suffix; | ||
109 | |||
110 | int _with_complexity = unlimited; | ||
111 | |||
112 | bool _requires_plural_form = false; | ||
113 | |||
114 | bool _is_hypernym = false; | ||
115 | filter<noun> _hypernym_of; | ||
116 | filter<noun> _full_hypernym_of; | ||
117 | |||
118 | bool _is_hyponym = false; | ||
119 | filter<noun> _hyponym_of; | ||
120 | filter<noun> _full_hyponym_of; | ||
121 | |||
122 | bool _is_part_meronym = false; | ||
123 | filter<noun> _part_meronym_of; | ||
124 | filter<noun> _full_part_meronym_of; | ||
125 | |||
126 | bool _is_substance_meronym = false; | ||
127 | filter<noun> _substance_meronym_of; | ||
128 | filter<noun> _full_substance_meronym_of; | ||
129 | |||
130 | bool _is_member_meronym = false; | ||
131 | filter<noun> _member_meronym_of; | ||
132 | filter<noun> _full_member_meronym_of; | ||
133 | |||
134 | bool _is_part_holonym = false; | ||
135 | filter<noun> _part_holonym_of; | ||
136 | filter<noun> _full_part_holonym_of; | ||
137 | |||
138 | bool _is_substance_holonym = false; | ||
139 | filter<noun> _substance_holonym_of; | ||
140 | filter<noun> _full_substance_holonym_of; | ||
141 | |||
142 | bool _is_member_holonym = false; | ||
143 | filter<noun> _member_holonym_of; | ||
144 | filter<noun> _full_member_holonym_of; | ||
145 | |||
146 | bool _is_proper = false; | ||
147 | bool _is_not_proper = false; | ||
148 | |||
149 | bool _is_instance = false; | ||
150 | filter<noun> _instance_of; | ||
151 | |||
152 | bool _is_class = false; | ||
153 | filter<noun> _class_of; | ||
154 | |||
155 | bool _has_synonyms = false; | ||
156 | filter<noun> _synonym_of; | ||
157 | |||
158 | bool _has_antonyms = false; | ||
159 | filter<noun> _antonym_of; | ||
160 | |||
161 | bool _has_pertainym = false; | ||
162 | filter<adjective> _anti_pertainym_of; | ||
163 | |||
164 | bool _is_attribute = false; | ||
165 | filter<adjective> _attribute_of; | ||
166 | |||
167 | int _at_least_n_images = unlimited; | ||
168 | std::set<int> _with_wnid; | ||
169 | |||
170 | /* std::list<adjective> _derived_from_adjective; | ||
171 | std::list<adjective> _not_derived_from_adjective; | ||
172 | std::list<adverb> _derived_from_adverb; | ||
173 | std::list<adverb> _not_derived_from_adverb; | ||
174 | std::list<noun> _derived_from_noun; | ||
175 | std::list<noun> _not_derived_from_noun;*/ | ||
176 | }; | ||
177 | |||
178 | }; | ||
179 | |||
180 | #endif /* end of include guard: NOUN_QUERY_H_5DE51DD7 */ | ||
diff --git a/lib/preposition.cpp b/lib/preposition.cpp deleted file mode 100644 index cea9165..0000000 --- a/lib/preposition.cpp +++ /dev/null | |||
@@ -1,107 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | std::string preposition::get_form() const | ||
6 | { | ||
7 | return form; | ||
8 | } | ||
9 | |||
10 | preposition_query::preposition_query(const data& _data) : _data(_data) | ||
11 | { | ||
12 | |||
13 | } | ||
14 | |||
15 | preposition_query& preposition_query::limit(int _limit) | ||
16 | { | ||
17 | this->_limit = _limit; | ||
18 | |||
19 | return *this; | ||
20 | } | ||
21 | |||
22 | preposition_query& preposition_query::random() | ||
23 | { | ||
24 | _random = true; | ||
25 | |||
26 | return *this; | ||
27 | } | ||
28 | |||
29 | preposition_query& preposition_query::in_group(std::string _arg) | ||
30 | { | ||
31 | _in_group.push_back(_arg); | ||
32 | |||
33 | return *this; | ||
34 | } | ||
35 | |||
36 | std::list<preposition> preposition_query::run() const | ||
37 | { | ||
38 | std::stringstream construct; | ||
39 | construct << "SELECT form FROM prepositions"; | ||
40 | std::list<binding> bindings; | ||
41 | |||
42 | if (!_in_group.empty()) | ||
43 | { | ||
44 | std::list<std::string> clauses(_in_group.size(), "groupname = ?"); | ||
45 | construct << " WHERE preposition_id IN (SELECT preposition_id FROM preposition_groups WHERE "; | ||
46 | construct << verbly::implode(std::begin(clauses), std::end(clauses), " OR "); | ||
47 | construct << ")"; | ||
48 | |||
49 | for (auto g : _in_group) | ||
50 | { | ||
51 | bindings.emplace_back(g); | ||
52 | } | ||
53 | } | ||
54 | |||
55 | if (_random) | ||
56 | { | ||
57 | construct << " ORDER BY RANDOM()"; | ||
58 | } | ||
59 | |||
60 | if (_limit != unlimited) | ||
61 | { | ||
62 | construct << " LIMIT " << _limit; | ||
63 | } | ||
64 | |||
65 | sqlite3_stmt* ppstmt; | ||
66 | std::string query = construct.str(); | ||
67 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
68 | { | ||
69 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
70 | } | ||
71 | |||
72 | int i = 1; | ||
73 | for (auto& binding : bindings) | ||
74 | { | ||
75 | switch (binding.get_type()) | ||
76 | { | ||
77 | case binding::type::integer: | ||
78 | { | ||
79 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
80 | |||
81 | break; | ||
82 | } | ||
83 | |||
84 | case binding::type::string: | ||
85 | { | ||
86 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
87 | |||
88 | break; | ||
89 | } | ||
90 | } | ||
91 | |||
92 | i++; | ||
93 | } | ||
94 | |||
95 | std::list<preposition> output; | ||
96 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
97 | { | ||
98 | preposition pp; | ||
99 | pp.form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
100 | |||
101 | output.push_back(pp); | ||
102 | } | ||
103 | |||
104 | return output; | ||
105 | } | ||
106 | |||
107 | }; | ||
diff --git a/lib/preposition.h b/lib/preposition.h deleted file mode 100644 index 89f24fa..0000000 --- a/lib/preposition.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | #ifndef PREPOSITION_H_FF908021 | ||
2 | #define PREPOSITION_H_FF908021 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class preposition_query; | ||
7 | |||
8 | class preposition { | ||
9 | public: | ||
10 | std::string get_form() const; | ||
11 | |||
12 | private: | ||
13 | friend class preposition_query; | ||
14 | |||
15 | std::string form; | ||
16 | }; | ||
17 | |||
18 | class preposition_query { | ||
19 | public: | ||
20 | preposition_query(const data& _data); | ||
21 | |||
22 | preposition_query& limit(int _limit); | ||
23 | preposition_query& random(); | ||
24 | preposition_query& in_group(std::string _arg); | ||
25 | |||
26 | std::list<preposition> run() const; | ||
27 | |||
28 | const static int unlimited = -1; | ||
29 | private: | ||
30 | const data& _data; | ||
31 | int _limit = unlimited; | ||
32 | bool _random = false; | ||
33 | std::list<std::string> _in_group; | ||
34 | }; | ||
35 | |||
36 | }; | ||
37 | |||
38 | #endif /* end of include guard: PREPOSITION_H_FF908021 */ | ||
diff --git a/lib/pronunciation.cpp b/lib/pronunciation.cpp new file mode 100644 index 0000000..f5b742f --- /dev/null +++ b/lib/pronunciation.cpp | |||
@@ -0,0 +1,69 @@ | |||
1 | #include "pronunciation.h" | ||
2 | #include <sqlite3.h> | ||
3 | #include "form.h" | ||
4 | #include "lemma.h" | ||
5 | #include "word.h" | ||
6 | #include "util.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | |||
10 | const object pronunciation::objectType = object::pronunciation; | ||
11 | |||
12 | const std::list<std::string> pronunciation::select = {"pronunciation_id", "phonemes", "syllables", "stress", "prerhyme", "rhyme"}; | ||
13 | |||
14 | const field pronunciation::id = field::integerField(object::pronunciation, "pronunciation_id"); | ||
15 | const field pronunciation::numOfSyllables = field::integerField(object::pronunciation, "syllables"); | ||
16 | const field pronunciation::stress = field::stringField(object::pronunciation, "stress"); | ||
17 | |||
18 | const field pronunciation::form = field::joinThrough(object::pronunciation, "pronunciation_id", object::form, "forms_pronunciations", "form_id"); | ||
19 | |||
20 | const field pronunciation::prerhyme = field::stringField(object::pronunciation, "prerhyme", true); | ||
21 | const field pronunciation::rhyme = field::stringField(object::pronunciation, "rhyme", true); | ||
22 | |||
23 | pronunciation::pronunciation(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
24 | { | ||
25 | id_ = sqlite3_column_int(row, 0); | ||
26 | |||
27 | std::string phonemesStr(reinterpret_cast<const char*>(sqlite3_column_text(row, 1))); | ||
28 | phonemes_ = split<std::vector<std::string>>(phonemesStr, " "); | ||
29 | |||
30 | syllables_ = sqlite3_column_int(row, 2); | ||
31 | stress_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 3))); | ||
32 | |||
33 | if (sqlite3_column_type(row, 5) != SQLITE_NULL) | ||
34 | { | ||
35 | hasRhyme_ = true; | ||
36 | |||
37 | prerhyme_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 4))); | ||
38 | rhyme_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 5))); | ||
39 | } | ||
40 | } | ||
41 | |||
42 | filter pronunciation::rhymesWith(const pronunciation& arg) | ||
43 | { | ||
44 | return (prerhyme != arg.getPrerhyme()) && (rhyme == arg.getRhyme()); | ||
45 | } | ||
46 | |||
47 | /*filter pronunciation::rhymesWith(const class form& arg) | ||
48 | { | ||
49 | filter result; | ||
50 | |||
51 | for (const pronunciation& p : arg.getPronunciations()) | ||
52 | { | ||
53 | result |= rhymesWith(p); | ||
54 | } | ||
55 | |||
56 | return result; | ||
57 | } | ||
58 | |||
59 | filter pronunciation::rhymesWith(const lemma& arg) | ||
60 | { | ||
61 | return rhymesWith(arg.getBaseForm()); | ||
62 | } | ||
63 | |||
64 | filter pronunciation::rhymesWith(const word& arg) | ||
65 | { | ||
66 | return rhymesWith(arg.getLemma()); | ||
67 | }*/ | ||
68 | |||
69 | }; | ||
diff --git a/lib/pronunciation.h b/lib/pronunciation.h new file mode 100644 index 0000000..c7a1d4d --- /dev/null +++ b/lib/pronunciation.h | |||
@@ -0,0 +1,163 @@ | |||
1 | #ifndef PRONUNCIATION_H_C68F86B0 | ||
2 | #define PRONUNCIATION_H_C68F86B0 | ||
3 | |||
4 | #include <stdexcept> | ||
5 | #include <vector> | ||
6 | #include <string> | ||
7 | #include "field.h" | ||
8 | #include "filter.h" | ||
9 | |||
10 | struct sqlite3_stmt; | ||
11 | |||
12 | namespace verbly { | ||
13 | |||
14 | class form; | ||
15 | class lemma; | ||
16 | class word; | ||
17 | class database; | ||
18 | |||
19 | class pronunciation { | ||
20 | public: | ||
21 | |||
22 | // Default constructor | ||
23 | |||
24 | pronunciation() = default; | ||
25 | |||
26 | // Construct from database | ||
27 | |||
28 | pronunciation(const database& db, sqlite3_stmt* row); | ||
29 | |||
30 | // Accessors | ||
31 | |||
32 | operator bool() const | ||
33 | { | ||
34 | return valid_; | ||
35 | } | ||
36 | |||
37 | int getId() const | ||
38 | { | ||
39 | if (!valid_) | ||
40 | { | ||
41 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
42 | } | ||
43 | |||
44 | return id_; | ||
45 | } | ||
46 | |||
47 | const std::vector<std::string>& getPhonemes() const | ||
48 | { | ||
49 | if (!valid_) | ||
50 | { | ||
51 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
52 | } | ||
53 | |||
54 | return phonemes_; | ||
55 | } | ||
56 | |||
57 | int getSyllables() const | ||
58 | { | ||
59 | if (!valid_) | ||
60 | { | ||
61 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
62 | } | ||
63 | |||
64 | return syllables_; | ||
65 | } | ||
66 | |||
67 | std::string getStress() const | ||
68 | { | ||
69 | if (!valid_) | ||
70 | { | ||
71 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
72 | } | ||
73 | |||
74 | return stress_; | ||
75 | } | ||
76 | |||
77 | bool hasRhyme() const | ||
78 | { | ||
79 | if (!valid_) | ||
80 | { | ||
81 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
82 | } | ||
83 | |||
84 | return hasRhyme_; | ||
85 | } | ||
86 | |||
87 | std::string getPrerhyme() const | ||
88 | { | ||
89 | if (!valid_) | ||
90 | { | ||
91 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
92 | } | ||
93 | |||
94 | if (!hasRhyme_) | ||
95 | { | ||
96 | throw std::domain_error("This pronunciation has no rhyme"); | ||
97 | } | ||
98 | |||
99 | return prerhyme_; | ||
100 | } | ||
101 | |||
102 | std::string getRhyme() const | ||
103 | { | ||
104 | if (!valid_) | ||
105 | { | ||
106 | throw std::domain_error("Bad access to uninitialized pronunciation"); | ||
107 | } | ||
108 | |||
109 | if (!hasRhyme_) | ||
110 | { | ||
111 | throw std::domain_error("This pronunciation has no rhyme"); | ||
112 | } | ||
113 | |||
114 | return rhyme_; | ||
115 | } | ||
116 | |||
117 | // Type info | ||
118 | |||
119 | static const object objectType; | ||
120 | |||
121 | static const std::list<std::string> select; | ||
122 | |||
123 | // Query fields | ||
124 | |||
125 | static const field id; | ||
126 | static const field numOfSyllables; | ||
127 | static const field stress; | ||
128 | |||
129 | operator filter() const | ||
130 | { | ||
131 | return (id == id_); | ||
132 | } | ||
133 | |||
134 | static filter rhymesWith(const pronunciation& arg); | ||
135 | static filter rhymesWith(const class form& arg); | ||
136 | static filter rhymesWith(const lemma& arg); | ||
137 | static filter rhymesWith(const word& arg); | ||
138 | |||
139 | // Relationships to other objects | ||
140 | |||
141 | static const field form; | ||
142 | |||
143 | private: | ||
144 | bool valid_ = false; | ||
145 | |||
146 | int id_; | ||
147 | std::vector<std::string> phonemes_; | ||
148 | int syllables_; | ||
149 | std::string stress_; | ||
150 | bool hasRhyme_ = false; | ||
151 | std::string prerhyme_; | ||
152 | std::string rhyme_; | ||
153 | |||
154 | const database* db_; | ||
155 | |||
156 | static const field prerhyme; | ||
157 | static const field rhyme; | ||
158 | |||
159 | }; | ||
160 | |||
161 | }; | ||
162 | |||
163 | #endif /* end of include guard: PRONUNCIATION_H_C68F86B0 */ | ||
diff --git a/lib/query.h b/lib/query.h new file mode 100644 index 0000000..e31be3d --- /dev/null +++ b/lib/query.h | |||
@@ -0,0 +1,123 @@ | |||
1 | #ifndef QUERY_H_7CC5284C | ||
2 | #define QUERY_H_7CC5284C | ||
3 | |||
4 | #include <vector> | ||
5 | #include <stdexcept> | ||
6 | #include <string> | ||
7 | #include <list> | ||
8 | #include <sqlite3.h> | ||
9 | #include <iostream> | ||
10 | #include "statement.h" | ||
11 | #include "binding.h" | ||
12 | |||
13 | namespace verbly { | ||
14 | |||
15 | class database_error : public std::logic_error { | ||
16 | public: | ||
17 | |||
18 | database_error(std::string msg, std::string sqlMsg) : std::logic_error(msg + " (" + sqlMsg + ")") | ||
19 | { | ||
20 | } | ||
21 | }; | ||
22 | |||
23 | template <typename Object> | ||
24 | class query { | ||
25 | public: | ||
26 | |||
27 | query(const database& db, sqlite3* ppdb, filter queryFilter, bool random, int limit) : db_(&db) | ||
28 | { | ||
29 | statement stmt(Object::objectType, std::move(queryFilter)); | ||
30 | |||
31 | std::string queryString = stmt.getQueryString(Object::select, random, limit); | ||
32 | std::list<binding> bindings = stmt.getBindings(); | ||
33 | |||
34 | std::cout << queryString << std::endl; | ||
35 | |||
36 | if (sqlite3_prepare_v2(ppdb, queryString.c_str(), queryString.length(), &ppstmt_, NULL) != SQLITE_OK) | ||
37 | { | ||
38 | std::string errorMsg = sqlite3_errmsg(ppdb); | ||
39 | sqlite3_finalize(ppstmt_); | ||
40 | |||
41 | throw database_error("Error preparing query", errorMsg); | ||
42 | } | ||
43 | |||
44 | int i = 1; | ||
45 | for (const binding& value : bindings) | ||
46 | { | ||
47 | switch (value.getType()) | ||
48 | { | ||
49 | case binding::type::integer: | ||
50 | { | ||
51 | if (sqlite3_bind_int(ppstmt_, i, value.getInteger()) != SQLITE_OK) | ||
52 | { | ||
53 | std::string errorMsg = sqlite3_errmsg(ppdb); | ||
54 | sqlite3_finalize(ppstmt_); | ||
55 | |||
56 | throw database_error("Error binding value to query", errorMsg); | ||
57 | } | ||
58 | |||
59 | break; | ||
60 | } | ||
61 | |||
62 | case binding::type::string: | ||
63 | { | ||
64 | if (sqlite3_bind_text(ppstmt_, i, value.getString().c_str(), value.getString().length(), SQLITE_TRANSIENT) != SQLITE_OK) | ||
65 | { | ||
66 | std::string errorMsg = sqlite3_errmsg(ppdb); | ||
67 | sqlite3_finalize(ppstmt_); | ||
68 | |||
69 | throw database_error("Error binding value to query", errorMsg); | ||
70 | } | ||
71 | |||
72 | break; | ||
73 | } | ||
74 | |||
75 | case binding::type::invalid: | ||
76 | { | ||
77 | throw std::logic_error("Cannot use invalid bindings"); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | i++; | ||
82 | } | ||
83 | } | ||
84 | |||
85 | ~query() | ||
86 | { | ||
87 | sqlite3_finalize(ppstmt_); | ||
88 | } | ||
89 | |||
90 | std::vector<Object> all() const | ||
91 | { | ||
92 | std::vector<Object> result; | ||
93 | |||
94 | while (sqlite3_step(ppstmt_) == SQLITE_ROW) | ||
95 | { | ||
96 | result.emplace_back(*db_, ppstmt_); | ||
97 | } | ||
98 | |||
99 | sqlite3_reset(ppstmt_); | ||
100 | |||
101 | return result; | ||
102 | } | ||
103 | |||
104 | Object first() const | ||
105 | { | ||
106 | std::vector<Object> results = all(); | ||
107 | if (!results.empty()) | ||
108 | { | ||
109 | return results.front(); | ||
110 | } else { | ||
111 | throw std::logic_error("query returned empty dataset"); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | private: | ||
116 | const database* db_; | ||
117 | sqlite3_stmt* ppstmt_; | ||
118 | |||
119 | }; | ||
120 | |||
121 | }; | ||
122 | |||
123 | #endif /* end of include guard: QUERY_H_7CC5284C */ | ||
diff --git a/lib/statement.cpp b/lib/statement.cpp new file mode 100644 index 0000000..52fa00d --- /dev/null +++ b/lib/statement.cpp | |||
@@ -0,0 +1,806 @@ | |||
1 | #include "statement.h" | ||
2 | #include <sstream> | ||
3 | #include <utility> | ||
4 | #include "filter.h" | ||
5 | #include "util.h" | ||
6 | #include "notion.h" | ||
7 | #include "word.h" | ||
8 | #include "group.h" | ||
9 | #include "frame.h" | ||
10 | #include "lemma.h" | ||
11 | #include "form.h" | ||
12 | #include "pronunciation.h" | ||
13 | |||
14 | namespace verbly { | ||
15 | |||
16 | statement::statement( | ||
17 | object context, | ||
18 | filter queryFilter) : | ||
19 | statement(getTableForContext(context), queryFilter.normalize(context)) | ||
20 | { | ||
21 | } | ||
22 | |||
23 | std::string statement::getQueryString(std::list<std::string> select, bool random, int limit) const | ||
24 | { | ||
25 | std::stringstream queryStream; | ||
26 | |||
27 | if (!withs_.empty()) | ||
28 | { | ||
29 | queryStream << "WITH RECURSIVE "; | ||
30 | |||
31 | std::list<std::string> ctes; | ||
32 | for (const with& cte : withs_) | ||
33 | { | ||
34 | std::stringstream cteStream; | ||
35 | cteStream << cte.getIdentifier(); | ||
36 | cteStream << " AS (SELECT "; | ||
37 | cteStream << cte.getTopTable(); | ||
38 | cteStream << ".* FROM "; | ||
39 | cteStream << cte.getTableForId(cte.getTopTable()); | ||
40 | cteStream << " AS "; | ||
41 | cteStream << cte.getTopTable(); | ||
42 | |||
43 | for (const join& j : cte.getJoins()) | ||
44 | { | ||
45 | cteStream << " "; | ||
46 | cteStream << j; | ||
47 | } | ||
48 | |||
49 | if (cte.getCondition().getType() != condition::type::empty) | ||
50 | { | ||
51 | cteStream << " WHERE "; | ||
52 | cteStream << cte.getCondition().toSql(); | ||
53 | } | ||
54 | |||
55 | cteStream << " UNION SELECT l.* FROM "; | ||
56 | cteStream << cte.getIdentifier(); | ||
57 | cteStream << " AS t INNER JOIN "; | ||
58 | cteStream << cte.getField().getTable(); | ||
59 | cteStream << " AS j ON t."; | ||
60 | cteStream << cte.getField().getColumn(); | ||
61 | cteStream << " = j."; | ||
62 | cteStream << cte.getField().getForeignJoinColumn(); | ||
63 | cteStream << " INNER JOIN "; | ||
64 | cteStream << cte.getTableForId(cte.getTopTable()); | ||
65 | cteStream << " AS l ON j."; | ||
66 | cteStream << cte.getField().getJoinColumn(); | ||
67 | cteStream << " = l."; | ||
68 | cteStream << cte.getField().getColumn(); | ||
69 | cteStream << ")"; | ||
70 | |||
71 | ctes.push_back(cteStream.str()); | ||
72 | } | ||
73 | |||
74 | queryStream << implode(std::begin(ctes), std::end(ctes), ", "); | ||
75 | queryStream << " "; | ||
76 | } | ||
77 | |||
78 | std::list<std::string> realSelect; | ||
79 | for (std::string& s : select) | ||
80 | { | ||
81 | realSelect.push_back(topTable_ + "." + s); | ||
82 | } | ||
83 | |||
84 | queryStream << "SELECT "; | ||
85 | queryStream << implode(std::begin(realSelect), std::end(realSelect), ", "); | ||
86 | queryStream << " FROM "; | ||
87 | queryStream << tables_.at(topTable_); | ||
88 | queryStream << " AS "; | ||
89 | queryStream << topTable_; | ||
90 | |||
91 | for (const join& j : joins_) | ||
92 | { | ||
93 | queryStream << " "; | ||
94 | queryStream << j; | ||
95 | } | ||
96 | |||
97 | if (topCondition_.getType() != condition::type::empty) | ||
98 | { | ||
99 | queryStream << " WHERE "; | ||
100 | queryStream << topCondition_.toSql(); | ||
101 | } | ||
102 | |||
103 | if (random) | ||
104 | { | ||
105 | queryStream << " ORDER BY RANDOM()"; | ||
106 | } | ||
107 | |||
108 | if (limit > 0) | ||
109 | { | ||
110 | queryStream << " LIMIT "; | ||
111 | queryStream << limit; | ||
112 | } | ||
113 | |||
114 | return queryStream.str(); | ||
115 | } | ||
116 | |||
117 | std::list<binding> statement::getBindings() const | ||
118 | { | ||
119 | std::list<binding> result; | ||
120 | |||
121 | for (const with& w : withs_) | ||
122 | { | ||
123 | for (binding value : w.getCondition().flattenBindings()) | ||
124 | { | ||
125 | result.push_back(std::move(value)); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | for (binding value : topCondition_.flattenBindings()) | ||
130 | { | ||
131 | result.push_back(std::move(value)); | ||
132 | } | ||
133 | |||
134 | return result; | ||
135 | } | ||
136 | |||
137 | statement::statement( | ||
138 | std::string tableName, | ||
139 | filter clause, | ||
140 | int nextTableId, | ||
141 | int nextWithId) : | ||
142 | nextTableId_(nextTableId), | ||
143 | nextWithId_(nextWithId), | ||
144 | topTable_(instantiateTable(std::move(tableName))), | ||
145 | topCondition_(parseFilter(std::move(clause))) | ||
146 | { | ||
147 | } | ||
148 | |||
149 | statement::condition statement::parseFilter(filter clause) | ||
150 | { | ||
151 | switch (clause.getType()) | ||
152 | { | ||
153 | case filter::type::empty: | ||
154 | { | ||
155 | return {}; | ||
156 | } | ||
157 | |||
158 | case filter::type::singleton: | ||
159 | { | ||
160 | switch (clause.getField().getType()) | ||
161 | { | ||
162 | case field::type::undefined: | ||
163 | { | ||
164 | return {}; | ||
165 | } | ||
166 | |||
167 | case field::type::string: | ||
168 | case field::type::integer: | ||
169 | case field::type::boolean: | ||
170 | { | ||
171 | switch (clause.getComparison()) | ||
172 | { | ||
173 | case filter::comparison::is_null: | ||
174 | { | ||
175 | return condition(topTable_, clause.getField().getColumn(), true); | ||
176 | } | ||
177 | |||
178 | case filter::comparison::is_not_null: | ||
179 | { | ||
180 | return condition(topTable_, clause.getField().getColumn(), false); | ||
181 | } | ||
182 | |||
183 | case filter::comparison::int_equals: | ||
184 | { | ||
185 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getIntegerArgument()); | ||
186 | } | ||
187 | |||
188 | case filter::comparison::int_does_not_equal: | ||
189 | { | ||
190 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getIntegerArgument()); | ||
191 | } | ||
192 | |||
193 | case filter::comparison::int_is_at_least: | ||
194 | { | ||
195 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_least, clause.getIntegerArgument()); | ||
196 | } | ||
197 | |||
198 | case filter::comparison::int_is_greater_than: | ||
199 | { | ||
200 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_greater_than, clause.getIntegerArgument()); | ||
201 | } | ||
202 | |||
203 | case filter::comparison::int_is_at_most: | ||
204 | { | ||
205 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_most, clause.getIntegerArgument()); | ||
206 | } | ||
207 | |||
208 | case filter::comparison::int_is_less_than: | ||
209 | { | ||
210 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_less_than, clause.getIntegerArgument()); | ||
211 | } | ||
212 | |||
213 | case filter::comparison::boolean_equals: | ||
214 | { | ||
215 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getBooleanArgument() ? 1 : 0); | ||
216 | } | ||
217 | |||
218 | case filter::comparison::string_equals: | ||
219 | { | ||
220 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getStringArgument()); | ||
221 | } | ||
222 | |||
223 | case filter::comparison::string_does_not_equal: | ||
224 | { | ||
225 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getStringArgument()); | ||
226 | } | ||
227 | |||
228 | case filter::comparison::string_is_like: | ||
229 | { | ||
230 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_like, clause.getStringArgument()); | ||
231 | } | ||
232 | |||
233 | case filter::comparison::string_is_not_like: | ||
234 | { | ||
235 | return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_not_like, clause.getStringArgument()); | ||
236 | } | ||
237 | |||
238 | case filter::comparison::matches: | ||
239 | case filter::comparison::does_not_match: | ||
240 | case filter::comparison::hierarchally_matches: | ||
241 | case filter::comparison::does_not_hierarchally_match: | ||
242 | { | ||
243 | throw std::logic_error("Invalid comparison type for field"); | ||
244 | } | ||
245 | } | ||
246 | } | ||
247 | |||
248 | case field::type::join: | ||
249 | { | ||
250 | std::string joinTableName; | ||
251 | if (clause.getField().hasTable()) | ||
252 | { | ||
253 | joinTableName = clause.getField().getTable(); | ||
254 | } else { | ||
255 | joinTableName = getTableForContext(clause.getField().getJoinObject()); | ||
256 | } | ||
257 | |||
258 | statement joinStmt( | ||
259 | joinTableName, | ||
260 | clause.getJoinCondition().normalize(clause.getField().getJoinObject()), | ||
261 | nextTableId_, | ||
262 | nextWithId_); | ||
263 | |||
264 | std::string joinTable = joinStmt.topTable_; | ||
265 | condition curCond = integrate(std::move(joinStmt)); | ||
266 | |||
267 | bool outer = false; | ||
268 | if (clause.getComparison() == filter::comparison::does_not_match) | ||
269 | { | ||
270 | outer = true; | ||
271 | |||
272 | curCond &= condition(joinTable, clause.getField().getColumn(), true); | ||
273 | } | ||
274 | |||
275 | joins_.emplace_back(outer, joinTableName, topTable_, clause.getField().getColumn(), joinTable, clause.getField().getColumn()); | ||
276 | |||
277 | return curCond; | ||
278 | } | ||
279 | |||
280 | case field::type::join_through: | ||
281 | { | ||
282 | statement joinStmt( | ||
283 | getTableForContext(clause.getField().getJoinObject()), | ||
284 | clause.getJoinCondition().normalize(clause.getField().getJoinObject()), | ||
285 | nextTableId_, | ||
286 | nextWithId_); | ||
287 | |||
288 | std::string joinTable = joinStmt.topTable_; | ||
289 | std::string throughTable = instantiateTable(clause.getField().getTable()); | ||
290 | condition curCond = integrate(std::move(joinStmt)); | ||
291 | |||
292 | bool outer = false; | ||
293 | if (clause.getComparison() == filter::comparison::does_not_match) | ||
294 | { | ||
295 | outer = true; | ||
296 | |||
297 | curCond &= condition(throughTable, clause.getField().getJoinColumn(), true); | ||
298 | } | ||
299 | |||
300 | joins_.emplace_back(outer, clause.getField().getTable(), topTable_, clause.getField().getColumn(), throughTable, clause.getField().getJoinColumn()); | ||
301 | joins_.emplace_back(false, getTableForContext(clause.getField().getJoinObject()), throughTable, clause.getField().getForeignJoinColumn(), joinTable, clause.getField().getForeignColumn()); | ||
302 | |||
303 | return curCond; | ||
304 | } | ||
305 | |||
306 | case field::type::hierarchal_join: | ||
307 | { | ||
308 | std::string withName = std::string(clause.getField().getTable()) + "_tree_" + std::to_string(nextWithId_++); | ||
309 | std::string withInstName = instantiateTable(withName); | ||
310 | |||
311 | bool outer = false; | ||
312 | if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) | ||
313 | { | ||
314 | outer = true; | ||
315 | } | ||
316 | |||
317 | joins_.emplace_back(outer, withName, topTable_, clause.getField().getColumn(), withInstName, clause.getField().getColumn()); | ||
318 | |||
319 | statement withStmt( | ||
320 | getTableForContext(clause.getField().getObject()), | ||
321 | clause.getJoinCondition().normalize(clause.getField().getObject()), | ||
322 | nextTableId_, | ||
323 | nextWithId_); | ||
324 | |||
325 | for (auto& w : withStmt.withs_) | ||
326 | { | ||
327 | withs_.push_back(std::move(w)); | ||
328 | } | ||
329 | |||
330 | nextTableId_ = withStmt.nextTableId_; | ||
331 | nextWithId_ = withStmt.nextWithId_; | ||
332 | |||
333 | withs_.emplace_back( | ||
334 | withName, | ||
335 | clause.getField(), | ||
336 | std::move(withStmt.tables_), | ||
337 | std::move(withStmt.topTable_), | ||
338 | std::move(withStmt.topCondition_), | ||
339 | std::move(withStmt.joins_)); | ||
340 | |||
341 | if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) | ||
342 | { | ||
343 | return condition(withInstName, clause.getField().getColumn(), true); | ||
344 | } else { | ||
345 | return {}; | ||
346 | } | ||
347 | } | ||
348 | } | ||
349 | } | ||
350 | |||
351 | case filter::type::group: | ||
352 | { | ||
353 | condition grp(clause.getOrlogic()); | ||
354 | |||
355 | for (const filter& child : clause) | ||
356 | { | ||
357 | condition newChild = parseFilter(child); | ||
358 | if (newChild.getType() != condition::type::empty) | ||
359 | { | ||
360 | grp += std::move(newChild); | ||
361 | } | ||
362 | } | ||
363 | |||
364 | if (grp.getChildren().empty()) | ||
365 | { | ||
366 | grp = {}; | ||
367 | } | ||
368 | |||
369 | return grp; | ||
370 | } | ||
371 | } | ||
372 | } | ||
373 | |||
374 | std::string statement::instantiateTable(std::string name) | ||
375 | { | ||
376 | std::string identifier = name + "_" + std::to_string(nextTableId_++); | ||
377 | tables_[identifier] = name; | ||
378 | |||
379 | return identifier; | ||
380 | } | ||
381 | |||
382 | statement::condition statement::integrate(statement subStmt) | ||
383 | { | ||
384 | for (auto& mapping : subStmt.tables_) | ||
385 | { | ||
386 | tables_[mapping.first] = mapping.second; | ||
387 | } | ||
388 | |||
389 | for (auto& j : subStmt.joins_) | ||
390 | { | ||
391 | joins_.push_back(j); | ||
392 | } | ||
393 | |||
394 | for (auto& w : subStmt.withs_) | ||
395 | { | ||
396 | withs_.push_back(w); | ||
397 | } | ||
398 | |||
399 | nextTableId_ = subStmt.nextTableId_; | ||
400 | nextWithId_ = subStmt.nextWithId_; | ||
401 | |||
402 | return subStmt.topCondition_; | ||
403 | } | ||
404 | |||
405 | std::ostream& operator<<(std::ostream& oss, const statement::join& j) | ||
406 | { | ||
407 | if (j.isOuterJoin()) | ||
408 | { | ||
409 | oss << "LEFT"; | ||
410 | } else { | ||
411 | oss << "INNER"; | ||
412 | } | ||
413 | |||
414 | return oss | ||
415 | << " JOIN " | ||
416 | << j.getForeignTableName() | ||
417 | << " AS " | ||
418 | << j.getForeignTable() | ||
419 | << " ON " | ||
420 | << j.getForeignTable() | ||
421 | << "." | ||
422 | << j.getForeignColumn() | ||
423 | << " = " | ||
424 | << j.getJoinTable() | ||
425 | << "." | ||
426 | << j.getJoinColumn(); | ||
427 | } | ||
428 | |||
429 | statement::condition::condition(const condition& other) | ||
430 | { | ||
431 | type_ = other.type_; | ||
432 | |||
433 | switch (type_) | ||
434 | { | ||
435 | case type::empty: | ||
436 | { | ||
437 | break; | ||
438 | } | ||
439 | |||
440 | case type::singleton: | ||
441 | { | ||
442 | new(&singleton_.table_) std::string(other.singleton_.table_); | ||
443 | new(&singleton_.column_) std::string(other.singleton_.column_); | ||
444 | singleton_.comparison_ = other.singleton_.comparison_; | ||
445 | new(&singleton_.value_) binding(other.singleton_.value_); | ||
446 | |||
447 | break; | ||
448 | } | ||
449 | |||
450 | case type::group: | ||
451 | { | ||
452 | new(&group_.children_) std::list<condition>(other.group_.children_); | ||
453 | group_.orlogic_ = other.group_.orlogic_; | ||
454 | |||
455 | break; | ||
456 | } | ||
457 | } | ||
458 | } | ||
459 | |||
460 | statement::condition::condition(condition&& other) : condition() | ||
461 | { | ||
462 | swap(*this, other); | ||
463 | } | ||
464 | |||
465 | statement::condition& statement::condition::operator=(condition other) | ||
466 | { | ||
467 | swap(*this, other); | ||
468 | |||
469 | return *this; | ||
470 | } | ||
471 | |||
472 | void swap(statement::condition& first, statement::condition& second) | ||
473 | { | ||
474 | using type = statement::condition::type; | ||
475 | using condition = statement::condition; | ||
476 | |||
477 | type tempType = first.type_; | ||
478 | std::string tempTable; | ||
479 | std::string tempColumn; | ||
480 | condition::comparison tempComparison; | ||
481 | binding tempBinding; | ||
482 | std::list<condition> tempChildren; | ||
483 | bool tempOrlogic; | ||
484 | |||
485 | switch (tempType) | ||
486 | { | ||
487 | case type::empty: | ||
488 | { | ||
489 | break; | ||
490 | } | ||
491 | |||
492 | case type::singleton: | ||
493 | { | ||
494 | tempTable = std::move(first.singleton_.table_); | ||
495 | tempColumn = std::move(first.singleton_.column_); | ||
496 | tempComparison = first.singleton_.comparison_; | ||
497 | tempBinding = std::move(first.singleton_.value_); | ||
498 | |||
499 | break; | ||
500 | } | ||
501 | |||
502 | case type::group: | ||
503 | { | ||
504 | tempChildren = std::move(first.group_.children_); | ||
505 | tempOrlogic = first.group_.orlogic_; | ||
506 | |||
507 | break; | ||
508 | } | ||
509 | } | ||
510 | |||
511 | first.~condition(); | ||
512 | |||
513 | first.type_ = second.type_; | ||
514 | |||
515 | switch (first.type_) | ||
516 | { | ||
517 | case type::empty: | ||
518 | { | ||
519 | break; | ||
520 | } | ||
521 | |||
522 | case type::singleton: | ||
523 | { | ||
524 | new(&first.singleton_.table_) std::string(std::move(second.singleton_.table_)); | ||
525 | new(&first.singleton_.column_) std::string(std::move(second.singleton_.column_)); | ||
526 | first.singleton_.comparison_ = second.singleton_.comparison_; | ||
527 | new(&first.singleton_.value_) binding(std::move(second.singleton_.value_)); | ||
528 | |||
529 | break; | ||
530 | } | ||
531 | |||
532 | case type::group: | ||
533 | { | ||
534 | new(&first.group_.children_) std::list<condition>(std::move(second.group_.children_)); | ||
535 | first.group_.orlogic_ = second.group_.orlogic_; | ||
536 | |||
537 | break; | ||
538 | } | ||
539 | } | ||
540 | |||
541 | second.~condition(); | ||
542 | |||
543 | second.type_ = tempType; | ||
544 | |||
545 | switch (second.type_) | ||
546 | { | ||
547 | case type::empty: | ||
548 | { | ||
549 | break; | ||
550 | } | ||
551 | |||
552 | case type::singleton: | ||
553 | { | ||
554 | new(&second.singleton_.table_) std::string(std::move(tempTable)); | ||
555 | new(&second.singleton_.column_) std::string(std::move(tempColumn)); | ||
556 | second.singleton_.comparison_ = tempComparison; | ||
557 | new(&second.singleton_.value_) binding(std::move(tempBinding)); | ||
558 | |||
559 | break; | ||
560 | } | ||
561 | |||
562 | case type::group: | ||
563 | { | ||
564 | new(&second.group_.children_) std::list<condition>(std::move(tempChildren)); | ||
565 | second.group_.orlogic_ = tempOrlogic; | ||
566 | |||
567 | break; | ||
568 | } | ||
569 | } | ||
570 | } | ||
571 | |||
572 | statement::condition::~condition() | ||
573 | { | ||
574 | switch (type_) | ||
575 | { | ||
576 | case type::empty: | ||
577 | { | ||
578 | break; | ||
579 | } | ||
580 | |||
581 | case type::singleton: | ||
582 | { | ||
583 | using string_type = std::string; | ||
584 | |||
585 | singleton_.table_.~string_type(); | ||
586 | singleton_.column_.~string_type(); | ||
587 | singleton_.value_.~binding(); | ||
588 | |||
589 | break; | ||
590 | } | ||
591 | |||
592 | case type::group: | ||
593 | { | ||
594 | using list_type = std::list<condition>; | ||
595 | |||
596 | group_.children_.~list_type(); | ||
597 | |||
598 | break; | ||
599 | } | ||
600 | } | ||
601 | } | ||
602 | |||
603 | statement::condition::condition() : type_(type::empty) | ||
604 | { | ||
605 | } | ||
606 | |||
607 | statement::condition::condition( | ||
608 | std::string table, | ||
609 | std::string column, | ||
610 | bool isNull) : | ||
611 | type_(type::singleton) | ||
612 | { | ||
613 | new(&singleton_.table_) std::string(std::move(table)); | ||
614 | new(&singleton_.column_) std::string(std::move(column)); | ||
615 | |||
616 | if (isNull) | ||
617 | { | ||
618 | singleton_.comparison_ = comparison::is_null; | ||
619 | } else { | ||
620 | singleton_.comparison_ = comparison::is_not_null; | ||
621 | } | ||
622 | } | ||
623 | |||
624 | statement::condition::condition( | ||
625 | std::string table, | ||
626 | std::string column, | ||
627 | comparison comp, | ||
628 | binding value) : | ||
629 | type_(type::singleton) | ||
630 | { | ||
631 | new(&singleton_.table_) std::string(std::move(table)); | ||
632 | new(&singleton_.column_) std::string(std::move(column)); | ||
633 | singleton_.comparison_ = comp; | ||
634 | new(&singleton_.value_) binding(std::move(value)); | ||
635 | } | ||
636 | |||
637 | std::string statement::condition::toSql() const | ||
638 | { | ||
639 | switch (type_) | ||
640 | { | ||
641 | case type::empty: | ||
642 | { | ||
643 | return ""; | ||
644 | } | ||
645 | |||
646 | case type::singleton: | ||
647 | { | ||
648 | switch (singleton_.comparison_) | ||
649 | { | ||
650 | case comparison::equals: | ||
651 | { | ||
652 | return singleton_.table_ + "." + singleton_.column_ + " = ?"; | ||
653 | } | ||
654 | |||
655 | case comparison::does_not_equal: | ||
656 | { | ||
657 | return singleton_.table_ + "." + singleton_.column_ + " != ?"; | ||
658 | } | ||
659 | |||
660 | case comparison::is_greater_than: | ||
661 | { | ||
662 | return singleton_.table_ + "." + singleton_.column_ + " > ?"; | ||
663 | } | ||
664 | |||
665 | case comparison::is_at_most: | ||
666 | { | ||
667 | return singleton_.table_ + "." + singleton_.column_ + " <= ?"; | ||
668 | } | ||
669 | |||
670 | case comparison::is_less_than: | ||
671 | { | ||
672 | return singleton_.table_ + "." + singleton_.column_ + " < ?"; | ||
673 | } | ||
674 | |||
675 | case comparison::is_at_least: | ||
676 | { | ||
677 | return singleton_.table_ + "." + singleton_.column_ + " >= ?"; | ||
678 | } | ||
679 | |||
680 | case comparison::is_like: | ||
681 | { | ||
682 | return singleton_.table_ + "." + singleton_.column_ + " LIKE ?"; | ||
683 | } | ||
684 | |||
685 | case comparison::is_not_like: | ||
686 | { | ||
687 | return singleton_.table_ + "." + singleton_.column_ + " NOT LIKE ?"; | ||
688 | } | ||
689 | |||
690 | case comparison::is_not_null: | ||
691 | { | ||
692 | return singleton_.table_ + "." + singleton_.column_ + " IS NOT NULL"; | ||
693 | } | ||
694 | |||
695 | case comparison::is_null: | ||
696 | { | ||
697 | return singleton_.table_ + "." + singleton_.column_ + " IS NULL"; | ||
698 | } | ||
699 | } | ||
700 | } | ||
701 | |||
702 | case type::group: | ||
703 | { | ||
704 | std::list<std::string> clauses; | ||
705 | for (const condition& cond : group_.children_) | ||
706 | { | ||
707 | clauses.push_back(cond.toSql()); | ||
708 | } | ||
709 | |||
710 | return implode(std::begin(clauses), std::end(clauses), group_.orlogic_ ? " OR " : " AND "); | ||
711 | } | ||
712 | } | ||
713 | } | ||
714 | |||
715 | std::list<binding> statement::condition::flattenBindings() const | ||
716 | { | ||
717 | switch (type_) | ||
718 | { | ||
719 | case type::empty: | ||
720 | { | ||
721 | return {}; | ||
722 | } | ||
723 | |||
724 | case type::singleton: | ||
725 | { | ||
726 | return {singleton_.value_}; | ||
727 | } | ||
728 | |||
729 | case type::group: | ||
730 | { | ||
731 | std::list<binding> bindings; | ||
732 | for (const condition& cond : group_.children_) | ||
733 | { | ||
734 | for (binding value : cond.flattenBindings()) | ||
735 | { | ||
736 | bindings.push_back(std::move(value)); | ||
737 | } | ||
738 | } | ||
739 | |||
740 | return bindings; | ||
741 | } | ||
742 | } | ||
743 | } | ||
744 | |||
745 | statement::condition::condition(bool orlogic) : type_(type::group) | ||
746 | { | ||
747 | new(&group_.children_) std::list<condition>(); | ||
748 | group_.orlogic_ = orlogic; | ||
749 | } | ||
750 | |||
751 | statement::condition& statement::condition::operator+=(condition n) | ||
752 | { | ||
753 | if (type_ == type::group) | ||
754 | { | ||
755 | group_.children_.push_back(std::move(n)); | ||
756 | |||
757 | return *this; | ||
758 | } else { | ||
759 | throw std::domain_error("Cannot add condition to non-group condition"); | ||
760 | } | ||
761 | } | ||
762 | |||
763 | statement::condition& statement::condition::operator&=(condition n) | ||
764 | { | ||
765 | switch (type_) | ||
766 | { | ||
767 | case type::empty: | ||
768 | { | ||
769 | *this = std::move(n); | ||
770 | |||
771 | break; | ||
772 | } | ||
773 | |||
774 | case type::singleton: | ||
775 | { | ||
776 | condition grp(false); | ||
777 | grp += *this; | ||
778 | grp += std::move(n); | ||
779 | |||
780 | *this = grp; | ||
781 | |||
782 | break; | ||
783 | } | ||
784 | |||
785 | case type::group: | ||
786 | { | ||
787 | *this += std::move(n); | ||
788 | |||
789 | break; | ||
790 | } | ||
791 | } | ||
792 | |||
793 | return *this; | ||
794 | } | ||
795 | |||
796 | const std::list<statement::condition>& statement::condition::getChildren() const | ||
797 | { | ||
798 | if (type_ == type::group) | ||
799 | { | ||
800 | return group_.children_; | ||
801 | } else { | ||
802 | throw std::domain_error("Cannot get children of non-group condition"); | ||
803 | } | ||
804 | } | ||
805 | |||
806 | }; | ||
diff --git a/lib/statement.h b/lib/statement.h new file mode 100644 index 0000000..a528d60 --- /dev/null +++ b/lib/statement.h | |||
@@ -0,0 +1,272 @@ | |||
1 | #ifndef STATEMENT_H_29F51659 | ||
2 | #define STATEMENT_H_29F51659 | ||
3 | |||
4 | #include <string> | ||
5 | #include <list> | ||
6 | #include <map> | ||
7 | #include <set> | ||
8 | #include "binding.h" | ||
9 | #include "enums.h" | ||
10 | #include "field.h" | ||
11 | #include "filter.h" | ||
12 | |||
13 | namespace verbly { | ||
14 | |||
15 | class filter; | ||
16 | |||
17 | class statement { | ||
18 | public: | ||
19 | |||
20 | statement(object context, filter queryFilter); | ||
21 | |||
22 | std::string getQueryString(std::list<std::string> select, bool random, int limit) const; | ||
23 | |||
24 | std::list<binding> getBindings() const; | ||
25 | |||
26 | private: | ||
27 | |||
28 | class join { | ||
29 | public: | ||
30 | |||
31 | join( | ||
32 | bool outer, | ||
33 | std::string foreignTableName, | ||
34 | std::string joinTable, | ||
35 | std::string joinColumn, | ||
36 | std::string foreignTable, | ||
37 | std::string foreignColumn) : | ||
38 | outer_(outer), | ||
39 | foreignTableName_(std::move(foreignTableName)), | ||
40 | joinTable_(std::move(joinTable)), | ||
41 | joinColumn_(std::move(joinColumn)), | ||
42 | foreignTable_(std::move(foreignTable)), | ||
43 | foreignColumn_(std::move(foreignColumn)) | ||
44 | { | ||
45 | } | ||
46 | |||
47 | bool isOuterJoin() const | ||
48 | { | ||
49 | return outer_; | ||
50 | } | ||
51 | |||
52 | const std::string& getForeignTableName() const | ||
53 | { | ||
54 | return foreignTableName_; | ||
55 | } | ||
56 | |||
57 | const std::string& getJoinTable() const | ||
58 | { | ||
59 | return joinTable_; | ||
60 | } | ||
61 | |||
62 | const std::string& getJoinColumn() const | ||
63 | { | ||
64 | return joinColumn_; | ||
65 | } | ||
66 | |||
67 | const std::string& getForeignTable() const | ||
68 | { | ||
69 | return foreignTable_; | ||
70 | } | ||
71 | |||
72 | const std::string& getForeignColumn() const | ||
73 | { | ||
74 | return foreignColumn_; | ||
75 | } | ||
76 | |||
77 | private: | ||
78 | bool outer_ = false; | ||
79 | std::string foreignTableName_; | ||
80 | std::string joinTable_; | ||
81 | std::string joinColumn_; | ||
82 | std::string foreignTable_; | ||
83 | std::string foreignColumn_; | ||
84 | |||
85 | }; | ||
86 | |||
87 | friend std::ostream& operator<<(std::ostream& oss, const join& j); | ||
88 | |||
89 | class condition { | ||
90 | public: | ||
91 | enum class type { | ||
92 | empty, | ||
93 | singleton, | ||
94 | group | ||
95 | }; | ||
96 | |||
97 | enum class comparison { | ||
98 | equals, | ||
99 | does_not_equal, | ||
100 | is_greater_than, | ||
101 | is_at_most, | ||
102 | is_less_than, | ||
103 | is_at_least, | ||
104 | is_like, | ||
105 | is_not_like, | ||
106 | is_not_null, | ||
107 | is_null | ||
108 | }; | ||
109 | |||
110 | // Copy and move constructors | ||
111 | |||
112 | condition(const condition& other); | ||
113 | condition(condition&& other); | ||
114 | |||
115 | // Assignment | ||
116 | |||
117 | condition& operator=(condition other); | ||
118 | |||
119 | // Swap | ||
120 | |||
121 | friend void swap(condition& first, condition& second); | ||
122 | |||
123 | // Destructor | ||
124 | |||
125 | ~condition(); | ||
126 | |||
127 | // Accessors | ||
128 | |||
129 | type getType() const | ||
130 | { | ||
131 | return type_; | ||
132 | } | ||
133 | |||
134 | // Empty | ||
135 | |||
136 | condition(); | ||
137 | |||
138 | // Singleton | ||
139 | |||
140 | condition(std::string table, std::string column, bool isNull); | ||
141 | |||
142 | condition(std::string table, std::string column, comparison comp, binding value); | ||
143 | |||
144 | // Group | ||
145 | |||
146 | explicit condition(bool orlogic); | ||
147 | |||
148 | condition& operator+=(condition n); | ||
149 | |||
150 | condition& operator&=(condition n); | ||
151 | |||
152 | const std::list<condition>& getChildren() const; | ||
153 | |||
154 | // Utility | ||
155 | |||
156 | std::string toSql() const; | ||
157 | |||
158 | std::list<binding> flattenBindings() const; | ||
159 | |||
160 | private: | ||
161 | union { | ||
162 | struct { | ||
163 | std::string table_; | ||
164 | std::string column_; | ||
165 | comparison comparison_; | ||
166 | binding value_; | ||
167 | } singleton_; | ||
168 | struct { | ||
169 | std::list<condition> children_; | ||
170 | bool orlogic_; | ||
171 | } group_; | ||
172 | }; | ||
173 | type type_; | ||
174 | }; | ||
175 | |||
176 | friend void swap(condition& first, condition& second); | ||
177 | |||
178 | class with { | ||
179 | public: | ||
180 | |||
181 | with( | ||
182 | std::string identifier, | ||
183 | field f, | ||
184 | std::map<std::string, std::string> tables, | ||
185 | std::string topTable, | ||
186 | condition where, | ||
187 | std::list<join> joins) : | ||
188 | identifier_(std::move(identifier)), | ||
189 | field_(f), | ||
190 | tables_(std::move(tables)), | ||
191 | topTable_(std::move(topTable)), | ||
192 | topCondition_(std::move(where)), | ||
193 | joins_(std::move(joins)) | ||
194 | { | ||
195 | } | ||
196 | |||
197 | const std::string& getIdentifier() const | ||
198 | { | ||
199 | return identifier_; | ||
200 | } | ||
201 | |||
202 | field getField() const | ||
203 | { | ||
204 | return field_; | ||
205 | } | ||
206 | |||
207 | std::string getTableForId(std::string identifier) const | ||
208 | { | ||
209 | return tables_.at(identifier); | ||
210 | } | ||
211 | |||
212 | const std::string& getTopTable() const | ||
213 | { | ||
214 | return topTable_; | ||
215 | } | ||
216 | |||
217 | const condition& getCondition() const | ||
218 | { | ||
219 | return topCondition_; | ||
220 | } | ||
221 | |||
222 | const std::list<join>& getJoins() const | ||
223 | { | ||
224 | return joins_; | ||
225 | } | ||
226 | |||
227 | private: | ||
228 | std::string identifier_; | ||
229 | field field_; | ||
230 | std::map<std::string, std::string> tables_; | ||
231 | std::string topTable_; | ||
232 | condition topCondition_; | ||
233 | std::list<join> joins_; | ||
234 | |||
235 | }; | ||
236 | |||
237 | static constexpr const char* getTableForContext(object context) | ||
238 | { | ||
239 | return (context == object::notion) ? "notions" | ||
240 | : (context == object::word) ? "words" | ||
241 | : (context == object::group) ? "groups" | ||
242 | : (context == object::frame) ? "frames" | ||
243 | : (context == object::lemma) ? "lemmas_forms" | ||
244 | : (context == object::form) ? "forms" | ||
245 | : (context == object::pronunciation) ? "pronunciations" | ||
246 | : throw std::domain_error("Provided context has no associated table"); | ||
247 | } | ||
248 | |||
249 | static const std::list<field> getSelectForContext(object context); | ||
250 | |||
251 | statement(std::string tableName, filter clause, int nextTableId = 0, int nextWithId = 0); | ||
252 | |||
253 | condition parseFilter(filter queryFilter); | ||
254 | |||
255 | std::string instantiateTable(std::string name); | ||
256 | |||
257 | condition integrate(statement subStmt); | ||
258 | |||
259 | int nextTableId_; | ||
260 | int nextWithId_; | ||
261 | |||
262 | std::map<std::string, std::string> tables_; | ||
263 | std::string topTable_; | ||
264 | std::list<join> joins_; | ||
265 | std::list<with> withs_; | ||
266 | condition topCondition_; | ||
267 | |||
268 | }; | ||
269 | |||
270 | }; | ||
271 | |||
272 | #endif /* end of include guard: STATEMENT_H_29F51659 */ | ||
diff --git a/lib/util.h b/lib/util.h index fb5fe67..b74b050 100644 --- a/lib/util.h +++ b/lib/util.h | |||
@@ -1,6 +1,10 @@ | |||
1 | #ifndef UTIL_H_15DDCA2D | 1 | #ifndef UTIL_H_15DDCA2D |
2 | #define UTIL_H_15DDCA2D | 2 | #define UTIL_H_15DDCA2D |
3 | 3 | ||
4 | #include <string> | ||
5 | #include <sstream> | ||
6 | #include <iterator> | ||
7 | |||
4 | namespace verbly { | 8 | namespace verbly { |
5 | 9 | ||
6 | template <class InputIterator> | 10 | template <class InputIterator> |
@@ -21,25 +25,33 @@ namespace verbly { | |||
21 | return result.str(); | 25 | return result.str(); |
22 | } | 26 | } |
23 | 27 | ||
24 | template <class Container> | 28 | template <class OutputIterator> |
25 | Container split(std::string input, std::string delimiter) | 29 | void split(std::string input, std::string delimiter, OutputIterator out) |
26 | { | 30 | { |
27 | Container result; | ||
28 | |||
29 | while (!input.empty()) | 31 | while (!input.empty()) |
30 | { | 32 | { |
31 | int divider = input.find(delimiter); | 33 | int divider = input.find(delimiter); |
32 | if (divider == std::string::npos) | 34 | if (divider == std::string::npos) |
33 | { | 35 | { |
34 | result.push_back(input); | 36 | *out = input; |
37 | out++; | ||
35 | 38 | ||
36 | input = ""; | 39 | input = ""; |
37 | } else { | 40 | } else { |
38 | result.push_back(input.substr(0, divider)); | 41 | *out = input.substr(0, divider); |
42 | out++; | ||
39 | 43 | ||
40 | input = input.substr(divider+delimiter.length()); | 44 | input = input.substr(divider+delimiter.length()); |
41 | } | 45 | } |
42 | } | 46 | } |
47 | } | ||
48 | |||
49 | template <class Container> | ||
50 | Container split(std::string input, std::string delimiter) | ||
51 | { | ||
52 | Container result; | ||
53 | |||
54 | split(input, delimiter, std::back_inserter(result)); | ||
43 | 55 | ||
44 | return result; | 56 | return result; |
45 | } | 57 | } |
diff --git a/lib/verb.cpp b/lib/verb.cpp deleted file mode 100644 index 1f45d53..0000000 --- a/lib/verb.cpp +++ /dev/null | |||
@@ -1,64 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | verb::verb() | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | verb::verb(const data& _data, int _id) : word(_data, _id) | ||
11 | { | ||
12 | |||
13 | } | ||
14 | |||
15 | std::string verb::base_form() const | ||
16 | { | ||
17 | assert(_valid == true); | ||
18 | |||
19 | return _infinitive; | ||
20 | } | ||
21 | |||
22 | std::string verb::infinitive_form() const | ||
23 | { | ||
24 | assert(_valid == true); | ||
25 | |||
26 | return _infinitive; | ||
27 | } | ||
28 | |||
29 | std::string verb::past_tense_form() const | ||
30 | { | ||
31 | assert(_valid == true); | ||
32 | |||
33 | return _past_tense; | ||
34 | } | ||
35 | |||
36 | std::string verb::past_participle_form() const | ||
37 | { | ||
38 | assert(_valid == true); | ||
39 | |||
40 | return _past_participle; | ||
41 | } | ||
42 | |||
43 | std::string verb::ing_form() const | ||
44 | { | ||
45 | assert(_valid == true); | ||
46 | |||
47 | return _ing_form; | ||
48 | } | ||
49 | |||
50 | std::string verb::s_form() const | ||
51 | { | ||
52 | assert(_valid == true); | ||
53 | |||
54 | return _s_form; | ||
55 | } | ||
56 | |||
57 | frame_query verb::frames() const | ||
58 | { | ||
59 | assert(_valid == true); | ||
60 | |||
61 | return _data->frames().for_verb(*this); | ||
62 | } | ||
63 | |||
64 | }; | ||
diff --git a/lib/verb.h b/lib/verb.h deleted file mode 100644 index 7a2486e..0000000 --- a/lib/verb.h +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | #ifndef VERB_H_BCC929AD | ||
2 | #define VERB_H_BCC929AD | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class frame_query; | ||
7 | |||
8 | class verb : public word { | ||
9 | private: | ||
10 | std::string _infinitive; | ||
11 | std::string _past_tense; | ||
12 | std::string _past_participle; | ||
13 | std::string _ing_form; | ||
14 | std::string _s_form; | ||
15 | |||
16 | friend class verb_query; | ||
17 | |||
18 | public: | ||
19 | verb(); | ||
20 | verb(const data& _data, int _id); | ||
21 | |||
22 | std::string base_form() const; | ||
23 | std::string infinitive_form() const; | ||
24 | std::string past_tense_form() const; | ||
25 | std::string past_participle_form() const; | ||
26 | std::string ing_form() const; | ||
27 | std::string s_form() const; | ||
28 | |||
29 | frame_query frames() const; | ||
30 | }; | ||
31 | |||
32 | }; | ||
33 | |||
34 | #endif /* end of include guard: VERB_H_BCC929AD */ | ||
diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp deleted file mode 100644 index 4e6c253..0000000 --- a/lib/verb_query.cpp +++ /dev/null | |||
@@ -1,315 +0,0 @@ | |||
1 | #include "verbly.h" | ||
2 | |||
3 | namespace verbly { | ||
4 | |||
5 | verb_query::verb_query(const data& _data) : _data(_data) | ||
6 | { | ||
7 | |||
8 | } | ||
9 | |||
10 | verb_query& verb_query::limit(int _limit) | ||
11 | { | ||
12 | if ((_limit > 0) || (_limit == unlimited)) | ||
13 | { | ||
14 | this->_limit = _limit; | ||
15 | } | ||
16 | |||
17 | return *this; | ||
18 | } | ||
19 | |||
20 | verb_query& verb_query::random() | ||
21 | { | ||
22 | this->_random = true; | ||
23 | |||
24 | return *this; | ||
25 | } | ||
26 | |||
27 | verb_query& verb_query::except(const verb& _word) | ||
28 | { | ||
29 | _except.push_back(_word); | ||
30 | |||
31 | return *this; | ||
32 | } | ||
33 | |||
34 | verb_query& verb_query::rhymes_with(const word& _word) | ||
35 | { | ||
36 | for (auto rhyme : _word.get_rhymes()) | ||
37 | { | ||
38 | _rhymes.push_back(rhyme); | ||
39 | } | ||
40 | |||
41 | if (dynamic_cast<const verb*>(&_word) != nullptr) | ||
42 | { | ||
43 | _except.push_back(dynamic_cast<const verb&>(_word)); | ||
44 | } | ||
45 | |||
46 | return *this; | ||
47 | } | ||
48 | |||
49 | verb_query& verb_query::rhymes_with(rhyme _r) | ||
50 | { | ||
51 | _rhymes.push_back(_r); | ||
52 | |||
53 | return *this; | ||
54 | } | ||
55 | |||
56 | verb_query& verb_query::has_pronunciation() | ||
57 | { | ||
58 | this->_has_prn = true; | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | verb_query& verb_query::has_rhyming_noun() | ||
64 | { | ||
65 | _has_rhyming_noun = true; | ||
66 | |||
67 | return *this; | ||
68 | } | ||
69 | |||
70 | verb_query& verb_query::has_rhyming_adjective() | ||
71 | { | ||
72 | _has_rhyming_adjective = true; | ||
73 | |||
74 | return *this; | ||
75 | } | ||
76 | |||
77 | verb_query& verb_query::has_rhyming_adverb() | ||
78 | { | ||
79 | _has_rhyming_adverb = true; | ||
80 | |||
81 | return *this; | ||
82 | } | ||
83 | |||
84 | verb_query& verb_query::has_rhyming_verb() | ||
85 | { | ||
86 | _has_rhyming_verb = true; | ||
87 | |||
88 | return *this; | ||
89 | } | ||
90 | |||
91 | verb_query& verb_query::with_stress(filter<std::vector<bool>> _arg) | ||
92 | { | ||
93 | _stress = _arg; | ||
94 | |||
95 | return *this; | ||
96 | } | ||
97 | |||
98 | verb_query& verb_query::has_frames() | ||
99 | { | ||
100 | this->_has_frames = true; | ||
101 | |||
102 | return *this; | ||
103 | } | ||
104 | |||
105 | std::list<verb> verb_query::run() const | ||
106 | { | ||
107 | std::stringstream construct; | ||
108 | construct << "SELECT verb_id, infinitive, past_tense, past_participle, ing_form, s_form FROM verbs"; | ||
109 | std::list<std::string> conditions; | ||
110 | std::list<binding> bindings; | ||
111 | |||
112 | if (_has_prn) | ||
113 | { | ||
114 | conditions.push_back("verb_id IN (SELECT verb_id FROM verb_pronunciations)"); | ||
115 | } | ||
116 | |||
117 | if (!_rhymes.empty()) | ||
118 | { | ||
119 | std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)"); | ||
120 | std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
121 | conditions.push_back(cond); | ||
122 | |||
123 | for (auto rhy : _rhymes) | ||
124 | { | ||
125 | bindings.emplace_back(rhy.get_prerhyme()); | ||
126 | bindings.emplace_back(rhy.get_rhyme()); | ||
127 | } | ||
128 | } | ||
129 | |||
130 | if (_has_rhyming_noun) | ||
131 | { | ||
132 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
133 | } | ||
134 | |||
135 | if (_has_rhyming_adjective) | ||
136 | { | ||
137 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
138 | } | ||
139 | |||
140 | if (_has_rhyming_adverb) | ||
141 | { | ||
142 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)"); | ||
143 | } | ||
144 | |||
145 | if (_has_rhyming_verb) | ||
146 | { | ||
147 | conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)"); | ||
148 | } | ||
149 | |||
150 | if (!_stress.empty()) | ||
151 | { | ||
152 | std::stringstream cond; | ||
153 | if (_stress.get_notlogic()) | ||
154 | { | ||
155 | cond << "verb_id NOT IN"; | ||
156 | } else { | ||
157 | cond << "verb_id IN"; | ||
158 | } | ||
159 | |||
160 | cond << "(SELECT verb_id FROM verb_pronunciations WHERE "; | ||
161 | |||
162 | std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string { | ||
163 | switch (f.get_type()) | ||
164 | { | ||
165 | case filter<std::vector<bool>>::type::singleton: | ||
166 | { | ||
167 | std::ostringstream _val; | ||
168 | for (auto syl : f.get_elem()) | ||
169 | { | ||
170 | if (syl) | ||
171 | { | ||
172 | _val << "1"; | ||
173 | } else { | ||
174 | _val << "0"; | ||
175 | } | ||
176 | } | ||
177 | |||
178 | bindings.emplace_back(_val.str()); | ||
179 | |||
180 | if (notlogic == f.get_notlogic()) | ||
181 | { | ||
182 | return "stress = ?"; | ||
183 | } else { | ||
184 | return "stress != ?"; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | case filter<std::vector<bool>>::type::group: | ||
189 | { | ||
190 | bool truelogic = notlogic != f.get_notlogic(); | ||
191 | |||
192 | std::list<std::string> clauses; | ||
193 | std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) { | ||
194 | return recur(f2, truelogic); | ||
195 | }); | ||
196 | |||
197 | if (truelogic == f.get_orlogic()) | ||
198 | { | ||
199 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")"; | ||
200 | } else { | ||
201 | return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")"; | ||
202 | } | ||
203 | } | ||
204 | } | ||
205 | }; | ||
206 | |||
207 | cond << recur(_stress, _stress.get_notlogic()); | ||
208 | cond << ")"; | ||
209 | conditions.push_back(cond.str()); | ||
210 | } | ||
211 | |||
212 | for (auto except : _except) | ||
213 | { | ||
214 | conditions.push_back("verb_id != ?"); | ||
215 | bindings.emplace_back(except._id); | ||
216 | } | ||
217 | |||
218 | if (!_has_frames) | ||
219 | { | ||
220 | conditions.push_back("verb_id IN (SELECT verb_id FROM verb_groups)"); | ||
221 | } | ||
222 | |||
223 | if (!conditions.empty()) | ||
224 | { | ||
225 | construct << " WHERE "; | ||
226 | construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND "); | ||
227 | } | ||
228 | |||
229 | if (_random) | ||
230 | { | ||
231 | construct << " ORDER BY RANDOM()"; | ||
232 | } | ||
233 | |||
234 | if (_limit != unlimited) | ||
235 | { | ||
236 | construct << " LIMIT " << _limit; | ||
237 | } | ||
238 | |||
239 | sqlite3_stmt* ppstmt; | ||
240 | std::string query = construct.str(); | ||
241 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
242 | { | ||
243 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
244 | } | ||
245 | |||
246 | int i = 1; | ||
247 | for (auto& binding : bindings) | ||
248 | { | ||
249 | switch (binding.get_type()) | ||
250 | { | ||
251 | case binding::type::integer: | ||
252 | { | ||
253 | sqlite3_bind_int(ppstmt, i, binding.get_integer()); | ||
254 | |||
255 | break; | ||
256 | } | ||
257 | |||
258 | case binding::type::string: | ||
259 | { | ||
260 | sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT); | ||
261 | |||
262 | break; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | i++; | ||
267 | } | ||
268 | |||
269 | std::list<verb> output; | ||
270 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
271 | { | ||
272 | verb tnc {_data, sqlite3_column_int(ppstmt, 0)}; | ||
273 | tnc._infinitive = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
274 | tnc._past_tense = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
275 | tnc._past_participle = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3))); | ||
276 | tnc._ing_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4))); | ||
277 | tnc._s_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 5))); | ||
278 | |||
279 | output.push_back(tnc); | ||
280 | } | ||
281 | |||
282 | sqlite3_finalize(ppstmt); | ||
283 | |||
284 | for (auto& verb : output) | ||
285 | { | ||
286 | query = "SELECT pronunciation, prerhyme, rhyme FROM verb_pronunciations WHERE verb_id = ?"; | ||
287 | if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) | ||
288 | { | ||
289 | throw std::runtime_error(sqlite3_errmsg(_data.ppdb)); | ||
290 | } | ||
291 | |||
292 | sqlite3_bind_int(ppstmt, 1, verb._id); | ||
293 | |||
294 | while (sqlite3_step(ppstmt) == SQLITE_ROW) | ||
295 | { | ||
296 | std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0))); | ||
297 | auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " "); | ||
298 | |||
299 | verb.pronunciations.push_back(phonemes); | ||
300 | |||
301 | if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)) | ||
302 | { | ||
303 | std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1))); | ||
304 | std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2))); | ||
305 | verb.rhymes.emplace_back(prerhyme, rhyming); | ||
306 | } | ||
307 | } | ||
308 | |||
309 | sqlite3_finalize(ppstmt); | ||
310 | } | ||
311 | |||
312 | return output; | ||
313 | } | ||
314 | |||
315 | }; | ||
diff --git a/lib/verb_query.h b/lib/verb_query.h deleted file mode 100644 index 566ae37..0000000 --- a/lib/verb_query.h +++ /dev/null | |||
@@ -1,45 +0,0 @@ | |||
1 | #ifndef VERB_QUERY_H_34E5A679 | ||
2 | #define VERB_QUERY_H_34E5A679 | ||
3 | |||
4 | namespace verbly { | ||
5 | |||
6 | class verb_query { | ||
7 | public: | ||
8 | verb_query(const data& _data); | ||
9 | |||
10 | verb_query& limit(int _limit); | ||
11 | verb_query& random(); | ||
12 | verb_query& except(const verb& _word); | ||
13 | verb_query& rhymes_with(const word& _word); | ||
14 | verb_query& rhymes_with(rhyme _r); | ||
15 | verb_query& has_pronunciation(); | ||
16 | verb_query& has_rhyming_noun(); | ||
17 | verb_query& has_rhyming_adjective(); | ||
18 | verb_query& has_rhyming_adverb(); | ||
19 | verb_query& has_rhyming_verb(); | ||
20 | verb_query& with_stress(filter<std::vector<bool>> _arg); | ||
21 | |||
22 | verb_query& has_frames(); | ||
23 | |||
24 | std::list<verb> run() const; | ||
25 | |||
26 | const static int unlimited = -1; | ||
27 | |||
28 | private: | ||
29 | const data& _data; | ||
30 | int _limit = unlimited; | ||
31 | bool _random = false; | ||
32 | std::list<rhyme> _rhymes; | ||
33 | std::list<verb> _except; | ||
34 | bool _has_prn = false; | ||
35 | bool _has_frames = false; | ||
36 | bool _has_rhyming_noun = false; | ||
37 | bool _has_rhyming_adjective = false; | ||
38 | bool _has_rhyming_adverb = false; | ||
39 | bool _has_rhyming_verb = false; | ||
40 | filter<std::vector<bool>> _stress; | ||
41 | }; | ||
42 | |||
43 | }; | ||
44 | |||
45 | #endif /* end of include guard: VERB_QUERY_H_34E5A679 */ | ||
diff --git a/lib/verbly.h b/lib/verbly.h index cfaf5bc..6dfc01a 100644 --- a/lib/verbly.h +++ b/lib/verbly.h | |||
@@ -1,35 +1,17 @@ | |||
1 | #ifndef VERBLY_H_5B39CE50 | 1 | #ifndef VERBLY_H_5B39CE50 |
2 | #define VERBLY_H_5B39CE50 | 2 | #define VERBLY_H_5B39CE50 |
3 | 3 | ||
4 | #include <string> | ||
5 | #include <list> | ||
6 | #include <sstream> | ||
7 | #include <algorithm> | ||
8 | #include <cassert> | ||
9 | #include <set> | ||
10 | #include <stdexcept> | ||
11 | #include <vector> | ||
12 | #include <map> | ||
13 | #include <iterator> | ||
14 | #include <sstream> | ||
15 | #include <functional> | ||
16 | #include <iostream> | ||
17 | #include <new> | ||
18 | |||
19 | #include "util.h" | 4 | #include "util.h" |
20 | #include "data.h" | 5 | #include "database.h" |
6 | #include "filter.h" | ||
7 | #include "field.h" | ||
8 | #include "query.h" | ||
9 | #include "notion.h" | ||
21 | #include "word.h" | 10 | #include "word.h" |
22 | #include "verb.h" | 11 | #include "group.h" |
23 | #include "adverb.h" | ||
24 | #include "adjective.h" | ||
25 | #include "noun.h" | ||
26 | #include "frame.h" | 12 | #include "frame.h" |
27 | #include "preposition.h" | 13 | #include "lemma.h" |
28 | #include "token.h" | 14 | #include "form.h" |
29 | #include "noun_query.h" | 15 | #include "pronunciation.h" |
30 | #include "adverb_query.h" | ||
31 | #include "adjective_query.h" | ||
32 | #include "verb_query.h" | ||
33 | #include "frame_query.h" | ||
34 | 16 | ||
35 | #endif /* end of include guard: VERBLY_H_5B39CE50 */ | 17 | #endif /* end of include guard: VERBLY_H_5B39CE50 */ |
diff --git a/lib/word.cpp b/lib/word.cpp index 49e34a1..3edf2d2 100644 --- a/lib/word.cpp +++ b/lib/word.cpp | |||
@@ -1,60 +1,112 @@ | |||
1 | #include "verbly.h" | 1 | #include "word.h" |
2 | #include <algorithm> | 2 | #include <sqlite3.h> |
3 | #include "form.h" | ||
4 | #include "util.h" | ||
5 | #include "database.h" | ||
6 | #include "query.h" | ||
3 | 7 | ||
4 | namespace verbly { | 8 | namespace verbly { |
5 | 9 | ||
6 | rhyme::rhyme(std::string prerhyme, std::string phonemes) : _prerhyme(prerhyme), _rhyme(phonemes) | 10 | const object word::objectType = object::word; |
7 | { | ||
8 | |||
9 | } | ||
10 | 11 | ||
11 | std::string rhyme::get_prerhyme() const | 12 | const std::list<std::string> word::select = {"word_id", "notion_id", "lemma_id", "tag_count", "position", "group_id"}; |
12 | { | ||
13 | return _prerhyme; | ||
14 | } | ||
15 | 13 | ||
16 | std::string rhyme::get_rhyme() const | 14 | const field word::id = field::integerField(object::word, "word_id"); |
17 | { | 15 | const field word::tagCount = field::integerField(object::word, "tag_count", true); |
18 | return _rhyme; | 16 | const field word::adjectivePosition = field::integerField(object::word, "position", true); |
19 | } | 17 | |
18 | const field word::notion = field::joinField(object::word, "notion_id", object::notion); | ||
19 | const field word::lemma = field::joinField(object::word, "lemma_id", object::lemma); | ||
20 | const field word::group = field::joinField(object::word, "group_id", object::group, true); | ||
21 | |||
22 | const field word::antonyms = field::selfJoin(object::word, "word_id", "antonymy", "antonym_2_id", "antonym_1_id"); | ||
23 | |||
24 | const field word::specifications = field::selfJoin(object::word, "word_id", "specification", "general_id", "specific_id"); | ||
25 | const field word::generalizations = field::selfJoin(object::word, "word_id", "specification", "specific_id", "general_id"); | ||
20 | 26 | ||
21 | bool rhyme::operator==(const rhyme& other) const | 27 | const field word::pertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "noun_id", "pertainym_id"); |
28 | const field word::antiPertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "pertainym_id", "noun_id"); | ||
29 | |||
30 | const field word::mannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "adjective_id", "mannernym_id"); | ||
31 | const field word::antiMannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "mannernym_id", "adjective_id"); | ||
32 | |||
33 | const field word::usageTerms = field::selfJoin(object::word, "word_id", "usage", "domain_id", "term_id"); | ||
34 | const field word::usageDomains = field::selfJoin(object::word, "word_id", "usage", "term_id", "domain_id"); | ||
35 | |||
36 | const field word::topicalTerms = field::selfJoin(object::word, "word_id", "topicality", "domain_id", "term_id"); | ||
37 | const field word::topicalDomains = field::selfJoin(object::word, "word_id", "topicality", "term_id", "domain_id"); | ||
38 | |||
39 | const field word::regionalTerms = field::selfJoin(object::word, "word_id", "regionality", "domain_id", "term_id"); | ||
40 | const field word::regionalDomains = field::selfJoin(object::word, "word_id", "regionality", "term_id", "domain_id"); | ||
41 | |||
42 | word::word(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true) | ||
22 | { | 43 | { |
23 | return std::tie(_prerhyme, _rhyme) == std::tie(other._prerhyme, other._rhyme); | 44 | id_ = sqlite3_column_int(row, 0); |
45 | notionId_ = sqlite3_column_int(row, 1); | ||
46 | lemmaId_ = sqlite3_column_int(row, 2); | ||
47 | |||
48 | if (sqlite3_column_type(row, 3) != SQLITE_NULL) | ||
49 | { | ||
50 | hasTagCount_ = true; | ||
51 | tagCount_ = sqlite3_column_int(row, 3); | ||
52 | } | ||
53 | |||
54 | if (sqlite3_column_type(row, 4) != SQLITE_NULL) | ||
55 | { | ||
56 | adjectivePosition_ = static_cast<positioning>(sqlite3_column_int(row, 4)); | ||
57 | } | ||
58 | |||
59 | if (sqlite3_column_type(row, 5) != SQLITE_NULL) | ||
60 | { | ||
61 | hasGroup_ = true; | ||
62 | groupId_ = sqlite3_column_int(row, 5); | ||
63 | } | ||
24 | } | 64 | } |
25 | 65 | ||
26 | word::word() | 66 | const notion& word::getNotion() const |
27 | { | 67 | { |
68 | if (!valid_) | ||
69 | { | ||
70 | throw std::domain_error("Bad access to uninitialized word"); | ||
71 | } | ||
72 | |||
73 | if (!notion_) | ||
74 | { | ||
75 | notion_ = db_->notions(notion::id == notionId_).first(); | ||
76 | } | ||
28 | 77 | ||
78 | return notion_; | ||
29 | } | 79 | } |
30 | 80 | ||
31 | word::word(const data& _data, int _id) : _data(&_data), _id(_id), _valid(true) | 81 | const lemma& word::getLemma() const |
32 | { | 82 | { |
83 | if (!valid_) | ||
84 | { | ||
85 | throw std::domain_error("Bad access to uninitialized word"); | ||
86 | } | ||
33 | 87 | ||
88 | if (!lemma_) | ||
89 | { | ||
90 | lemma_ = db_->lemmas(lemma::id == lemmaId_).first(); | ||
91 | } | ||
92 | |||
93 | return lemma_; | ||
34 | } | 94 | } |
35 | 95 | ||
36 | std::list<rhyme> word::get_rhymes() const | 96 | std::string word::getBaseForm() const |
37 | { | 97 | { |
38 | assert(_valid == true); | 98 | return getLemma().getBaseForm().getText(); |
39 | |||
40 | return rhymes; | ||
41 | } | 99 | } |
42 | 100 | ||
43 | bool word::starts_with_vowel_sound() const | 101 | std::list<std::string> word::getInflections(inflection category) const |
44 | { | 102 | { |
45 | assert(_valid == true); | 103 | std::list<std::string> result; |
46 | 104 | for (const form& infl : getLemma().getInflections(category)) | |
47 | if (pronunciations.size() > 0) | ||
48 | { | 105 | { |
49 | return std::any_of(std::begin(pronunciations), std::end(pronunciations), [] (std::list<std::string> phonemes) { | 106 | result.push_back(infl.getText()); |
50 | return (phonemes.front().find_first_of("012") != std::string::npos); | ||
51 | }); | ||
52 | } else { | ||
53 | // If the word is not in CMUDICT, fall back to checking whether the first letter is a vowel | ||
54 | // Not perfect but will work in most cases | ||
55 | char ch = tolower(base_form().front()); | ||
56 | return (ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u'); | ||
57 | } | 107 | } |
108 | |||
109 | return result; | ||
58 | } | 110 | } |
59 | 111 | ||
60 | }; | 112 | }; |
diff --git a/lib/word.h b/lib/word.h index 08797a3..f71dad9 100644 --- a/lib/word.h +++ b/lib/word.h | |||
@@ -1,48 +1,173 @@ | |||
1 | #ifndef WORD_H_8FC89498 | 1 | #ifndef WORD_H_DF91B1B4 |
2 | #define WORD_H_8FC89498 | 2 | #define WORD_H_DF91B1B4 |
3 | |||
4 | #include <stdexcept> | ||
5 | #include <map> | ||
6 | #include "field.h" | ||
7 | #include "filter.h" | ||
8 | #include "notion.h" | ||
9 | #include "lemma.h" | ||
10 | #include "group.h" | ||
11 | |||
12 | struct sqlite3_stmt; | ||
3 | 13 | ||
4 | namespace verbly { | 14 | namespace verbly { |
5 | 15 | ||
6 | class rhyme { | 16 | class database; |
7 | public: | 17 | |
8 | rhyme(std::string prerhyme, std::string phonemes); | 18 | class word { |
19 | public: | ||
20 | |||
21 | // Default constructor | ||
22 | |||
23 | word() = default; | ||
24 | |||
25 | // Construct from database | ||
26 | |||
27 | word(const database& db, sqlite3_stmt* row); | ||
28 | |||
29 | // Accessors | ||
30 | |||
31 | operator bool() const | ||
32 | { | ||
33 | return valid_; | ||
34 | } | ||
35 | |||
36 | int getId() const | ||
37 | { | ||
38 | if (!valid_) | ||
39 | { | ||
40 | throw std::domain_error("Bad access to uninitialized word"); | ||
41 | } | ||
9 | 42 | ||
10 | std::string get_prerhyme() const; | 43 | return id_; |
11 | std::string get_rhyme() const; | 44 | } |
45 | |||
46 | bool hasTagCount() const | ||
47 | { | ||
48 | if (!valid_) | ||
49 | { | ||
50 | throw std::domain_error("Bad access to uninitialized word"); | ||
51 | } | ||
12 | 52 | ||
13 | bool operator==(const rhyme& other) const; | 53 | return hasTagCount_; |
54 | } | ||
55 | |||
56 | int getTagCount() const | ||
57 | { | ||
58 | if (!valid_) | ||
59 | { | ||
60 | throw std::domain_error("Bad access to uninitialized word"); | ||
61 | } | ||
14 | 62 | ||
15 | private: | 63 | if (!hasTagCount_) |
16 | std::string _prerhyme; | 64 | { |
17 | std::string _rhyme; | 65 | throw std::domain_error("Word has no tag count"); |
18 | }; | 66 | } |
19 | |||
20 | class word { | ||
21 | protected: | ||
22 | const data* _data; | ||
23 | int _id; | ||
24 | bool _valid = false; | ||
25 | 67 | ||
26 | std::list<std::list<std::string>> pronunciations; | 68 | return tagCount_; |
27 | std::list<rhyme> rhymes; | 69 | } |
70 | |||
71 | bool hasAdjectivePositioning() const | ||
72 | { | ||
73 | if (!valid_) | ||
74 | { | ||
75 | throw std::domain_error("Bad access to uninitialized word"); | ||
76 | } | ||
28 | 77 | ||
29 | word(); | 78 | return (adjectivePosition_ != positioning::undefined); |
30 | word(const data& _data, int _id); | 79 | } |
80 | |||
81 | positioning getAdjectivePosition() const | ||
82 | { | ||
83 | if (!valid_) | ||
84 | { | ||
85 | throw std::domain_error("Bad access to uninitialized word"); | ||
86 | } | ||
31 | 87 | ||
32 | friend class adjective_query; | 88 | if (adjectivePosition_ == positioning::undefined) |
33 | friend class verb_query; | 89 | { |
34 | friend class noun_query; | 90 | throw std::domain_error("Word has no adjective position"); |
35 | friend class adverb_query; | 91 | } |
36 | friend class frame_query; | ||
37 | friend class preposition_query; | ||
38 | |||
39 | public: | ||
40 | virtual std::string base_form() const = 0; | ||
41 | 92 | ||
42 | std::list<rhyme> get_rhymes() const; | 93 | return adjectivePosition_; |
43 | bool starts_with_vowel_sound() const; | 94 | } |
95 | |||
96 | const notion& getNotion() const; | ||
97 | |||
98 | const lemma& getLemma() const; | ||
99 | |||
100 | // Convenience accessors | ||
101 | |||
102 | std::string getBaseForm() const; | ||
103 | |||
104 | std::list<std::string> getInflections(inflection infl) const; | ||
105 | |||
106 | // Type info | ||
107 | |||
108 | static const object objectType; | ||
109 | |||
110 | static const std::list<std::string> select; | ||
111 | |||
112 | // Query fields | ||
113 | |||
114 | static const field id; | ||
115 | static const field tagCount; | ||
116 | static const field adjectivePosition; | ||
117 | |||
118 | operator filter() const | ||
119 | { | ||
120 | return (id == id_); | ||
121 | } | ||
122 | |||
123 | // Relationships with other objects | ||
124 | |||
125 | static const field notion; | ||
126 | static const field lemma; | ||
127 | static const field group; | ||
128 | |||
129 | // Relationships with self | ||
130 | |||
131 | static const field antonyms; | ||
132 | |||
133 | static const field specifications; | ||
134 | static const field generalizations; | ||
135 | |||
136 | static const field pertainyms; | ||
137 | static const field antiPertainyms; | ||
138 | |||
139 | static const field mannernyms; | ||
140 | static const field antiMannernyms; | ||
141 | |||
142 | static const field usageTerms; | ||
143 | static const field usageDomains; | ||
144 | |||
145 | static const field topicalTerms; | ||
146 | static const field topicalDomains; | ||
147 | |||
148 | static const field regionalTerms; | ||
149 | static const field regionalDomains; | ||
150 | |||
151 | private: | ||
152 | bool valid_ = false; | ||
153 | |||
154 | int id_; | ||
155 | bool hasTagCount_ = false; | ||
156 | int tagCount_; | ||
157 | positioning adjectivePosition_ = positioning::undefined; | ||
158 | int notionId_; | ||
159 | int lemmaId_; | ||
160 | bool hasGroup_ = false; | ||
161 | int groupId_; | ||
162 | |||
163 | const database* db_; | ||
164 | |||
165 | mutable class notion notion_; | ||
166 | mutable class lemma lemma_; | ||
167 | mutable class group group_; | ||
168 | |||
44 | }; | 169 | }; |
45 | 170 | ||
46 | }; | 171 | }; |
47 | 172 | ||
48 | #endif /* end of include guard: WORD_H_8FC89498 */ | 173 | #endif /* end of include guard: WORD_H_DF91B1B4 */ |