summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt4
-rw-r--r--generator/CMakeLists.txt6
-rw-r--r--generator/database.cpp173
-rw-r--r--generator/database.h73
-rw-r--r--generator/field.cpp193
-rw-r--r--generator/field.h76
-rw-r--r--generator/form.cpp53
-rw-r--r--generator/form.h71
-rw-r--r--generator/frame.cpp83
-rw-r--r--generator/frame.h59
-rw-r--r--generator/generator.cpp3145
-rw-r--r--generator/generator.h151
-rw-r--r--generator/group.cpp119
-rw-r--r--generator/group.h80
-rw-r--r--generator/lemma.cpp65
-rw-r--r--generator/lemma.h58
-rw-r--r--generator/main.cpp40
-rw-r--r--generator/notion.cpp85
-rw-r--r--generator/notion.h91
-rw-r--r--generator/part.cpp336
-rw-r--r--generator/part.h114
-rw-r--r--generator/progress.h78
-rw-r--r--generator/pronunciation.cpp87
-rw-r--r--generator/pronunciation.h82
-rw-r--r--generator/role.h35
-rw-r--r--generator/schema.sql352
-rw-r--r--generator/selrestr.cpp288
-rw-r--r--generator/selrestr.h88
-rw-r--r--generator/word.cpp77
-rw-r--r--generator/word.h110
-rw-r--r--lib/adjective.cpp113
-rw-r--r--lib/adjective.h51
-rw-r--r--lib/adjective_query.cpp1072
-rw-r--r--lib/adjective_query.h112
-rw-r--r--lib/adverb.cpp71
-rw-r--r--lib/adverb.h35
-rw-r--r--lib/adverb_query.cpp758
-rw-r--r--lib/adverb_query.h86
-rw-r--r--lib/binding.cpp180
-rw-r--r--lib/binding.h70
-rw-r--r--lib/data.cpp177
-rw-r--r--lib/data.h380
-rw-r--r--lib/database.cpp79
-rw-r--r--lib/database.h73
-rw-r--r--lib/enums.h45
-rw-r--r--lib/field.cpp91
-rw-r--r--lib/field.h306
-rw-r--r--lib/filter.cpp1365
-rw-r--r--lib/filter.h143
-rw-r--r--lib/form.cpp53
-rw-r--r--lib/form.h149
-rw-r--r--lib/frame.cpp317
-rw-r--r--lib/frame.h178
-rw-r--r--lib/group.cpp43
-rw-r--r--lib/group.h87
-rw-r--r--lib/lemma.cpp69
-rw-r--r--lib/lemma.h120
-rw-r--r--lib/notion.cpp94
-rw-r--r--lib/notion.h200
-rw-r--r--lib/noun.cpp221
-rw-r--r--lib/noun.h55
-rw-r--r--lib/noun_query.cpp2013
-rw-r--r--lib/noun_query.h180
-rw-r--r--lib/preposition.cpp107
-rw-r--r--lib/preposition.h38
-rw-r--r--lib/pronunciation.cpp69
-rw-r--r--lib/pronunciation.h163
-rw-r--r--lib/query.h123
-rw-r--r--lib/statement.cpp806
-rw-r--r--lib/statement.h272
-rw-r--r--lib/util.h24
-rw-r--r--lib/verb.cpp64
-rw-r--r--lib/verb.h34
-rw-r--r--lib/verb_query.cpp315
-rw-r--r--lib/verb_query.h45
-rw-r--r--lib/verbly.h36
-rw-r--r--lib/word.cpp120
-rw-r--r--lib/word.h193
78 files changed, 8971 insertions, 8696 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c39d99..61fcce2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -4,8 +4,10 @@ project (verbly)
4find_package(PkgConfig) 4find_package(PkgConfig)
5pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) 5pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED)
6 6
7set(CMAKE_BUILD_TYPE Debug)
8
7include_directories(vendor/json) 9include_directories(vendor/json)
8add_library(verbly lib/data.cpp lib/adjective.cpp lib/noun.cpp lib/verb.cpp lib/adverb.cpp lib/token.cpp lib/word.cpp lib/frame.cpp lib/preposition.cpp lib/adjective_query.cpp lib/adverb_query.cpp lib/noun_query.cpp lib/verb_query.cpp lib/frame_query.cpp) 10add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp)
9set_property(TARGET verbly PROPERTY CXX_STANDARD 11) 11set_property(TARGET verbly PROPERTY CXX_STANDARD 11)
10set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) 12set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON)
11target_link_libraries(verbly ${sqlite3_LIBRARIES}) 13target_link_libraries(verbly ${sqlite3_LIBRARIES})
diff --git a/generator/CMakeLists.txt b/generator/CMakeLists.txt index 552526d..4f78eb8 100644 --- a/generator/CMakeLists.txt +++ b/generator/CMakeLists.txt
@@ -1,12 +1,12 @@
1cmake_minimum_required (VERSION 2.6) 1cmake_minimum_required (VERSION 3.1)
2project (generator) 2project (generator)
3 3
4find_package(PkgConfig) 4find_package(PkgConfig)
5pkg_check_modules(sqlite3 sqlite3 REQUIRED) 5pkg_check_modules(sqlite3 sqlite3 REQUIRED)
6find_package(libxml2 REQUIRED) 6find_package(libxml2 REQUIRED)
7 7
8include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json/src) 8include_directories(${sqlite3_INCLUDE_DIR} ${LIBXML2_INCLUDE_DIR} ../vendor/json)
9add_executable(generator generator.cpp) 9add_executable(generator notion.cpp word.cpp lemma.cpp form.cpp pronunciation.cpp group.cpp frame.cpp part.cpp selrestr.cpp database.cpp field.cpp generator.cpp main.cpp)
10set_property(TARGET generator PROPERTY CXX_STANDARD 11) 10set_property(TARGET generator PROPERTY CXX_STANDARD 11)
11set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON) 11set_property(TARGET generator PROPERTY CXX_STANDARD_REQUIRED ON)
12target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES}) 12target_link_libraries(generator ${sqlite3_LIBRARIES} ${LIBXML2_LIBRARIES})
diff --git a/generator/database.cpp b/generator/database.cpp new file mode 100644 index 0000000..c7e4cfa --- /dev/null +++ b/generator/database.cpp
@@ -0,0 +1,173 @@
1#include "database.h"
2#include <sqlite3.h>
3#include <cassert>
4#include <fstream>
5#include <stdexcept>
6#include <cstdio>
7#include <sstream>
8#include "field.h"
9#include "../lib/util.h"
10
11namespace verbly {
12 namespace generator {
13
14 sqlite3_error::sqlite3_error(
15 const std::string& what,
16 const std::string& db_err) :
17 what_(what + " (" + db_err + ")"),
18 db_err_(db_err)
19 {
20 }
21
22 const char* sqlite3_error::what() const noexcept
23 {
24 return what_.c_str();
25 }
26
27 const char* sqlite3_error::db_err() const noexcept
28 {
29 return db_err_.c_str();
30 }
31
32 database::database(std::string path)
33 {
34 // If there is already a file at this path, overwrite it.
35 if (std::ifstream(path))
36 {
37 if (std::remove(path.c_str()))
38 {
39 throw std::logic_error("Could not overwrite file at path");
40 }
41 }
42
43 if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
44 {
45 // We still have to free the resources allocated. In the event that
46 // allocation failed, ppdb will be null and sqlite3_close_v2 will just
47 // ignore it.
48 std::string errmsg(sqlite3_errmsg(ppdb_));
49 sqlite3_close_v2(ppdb_);
50
51 throw sqlite3_error("Could not create output datafile", errmsg);
52 }
53 }
54
55 database::database(database&& other) : database()
56 {
57 swap(*this, other);
58 }
59
60 database& database::operator=(database&& other)
61 {
62 swap(*this, other);
63
64 return *this;
65 }
66
67 void swap(database& first, database& second)
68 {
69 std::swap(first.ppdb_, second.ppdb_);
70 }
71
72 database::~database()
73 {
74 sqlite3_close_v2(ppdb_);
75 }
76
77 void database::runQuery(std::string query)
78 {
79 // This can only happen when doing bad things with move semantics.
80 assert(ppdb_ != nullptr);
81
82 sqlite3_stmt* ppstmt;
83
84 if (sqlite3_prepare_v2(ppdb_, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
85 {
86 throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
87 }
88
89 int result = sqlite3_step(ppstmt);
90 sqlite3_finalize(ppstmt);
91
92 if (result != SQLITE_DONE)
93 {
94 throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
95 }
96 }
97
98 void database::insertIntoTable(std::string table, std::list<field> fields)
99 {
100 // This can only happen when doing bad things with move semantics.
101 assert(ppdb_ != nullptr);
102
103 // This shouldn't happen.
104 assert(!fields.empty());
105
106 std::list<std::string> fieldNames;
107 std::list<std::string> qs;
108 for (field& f : fields)
109 {
110 fieldNames.push_back(f.getName());
111 qs.push_back("?");
112 }
113
114 std::ostringstream query;
115 query << "INSERT INTO ";
116 query << table;
117 query << " (";
118 query << implode(std::begin(fieldNames), std::end(fieldNames), ", ");
119 query << ") VALUES (";
120 query << implode(std::begin(qs), std::end(qs), ", ");
121 query << ")";
122
123 std::string query_str = query.str();
124
125 sqlite3_stmt* ppstmt;
126
127 if (sqlite3_prepare_v2(ppdb_, query_str.c_str(), query_str.length(), &ppstmt, NULL) != SQLITE_OK)
128 {
129 throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
130 }
131
132 int i = 1;
133 for (field& f : fields)
134 {
135 switch (f.getType())
136 {
137 case field::type::integer:
138 {
139 sqlite3_bind_int(ppstmt, i, f.getInteger());
140
141 break;
142 }
143
144 case field::type::string:
145 {
146 sqlite3_bind_text(ppstmt, i, f.getString().c_str(), f.getString().length(), SQLITE_TRANSIENT);
147
148 break;
149 }
150
151 case field::type::invalid:
152 {
153 // Fields can only be invalid when doing bad things with move semantics.
154 assert(false);
155
156 break;
157 }
158 }
159
160 i++;
161 }
162
163 int result = sqlite3_step(ppstmt);
164 sqlite3_finalize(ppstmt);
165
166 if (result != SQLITE_DONE)
167 {
168 throw sqlite3_error("Error writing to database", sqlite3_errmsg(ppdb_));
169 }
170 }
171
172 };
173};
diff --git a/generator/database.h b/generator/database.h new file mode 100644 index 0000000..15cdff5 --- /dev/null +++ b/generator/database.h
@@ -0,0 +1,73 @@
1#ifndef DATABASE_H_0B0A47D2
2#define DATABASE_H_0B0A47D2
3
4#include <string>
5#include <exception>
6#include <list>
7
8struct sqlite3;
9
10namespace verbly {
11 namespace generator {
12
13 class field;
14
15 class sqlite3_error : public std::exception {
16 public:
17
18 sqlite3_error(const std::string& what, const std::string& db_err);
19
20 const char* what() const noexcept override;
21 const char* db_err() const noexcept;
22
23 private:
24 std::string what_;
25 std::string db_err_;
26
27 };
28
29 class database {
30 public:
31
32 // Constructor
33
34 explicit database(std::string path);
35
36 // Disable copying
37
38 database(const database& other) = delete;
39 database& operator=(const database& other) = delete;
40
41 // Move constructor and move assignment
42
43 database(database&& other);
44 database& operator=(database&& other);
45
46 // Swap
47
48 friend void swap(database& first, database& second);
49
50 // Destructor
51
52 ~database();
53
54 // Actions
55
56 void runQuery(std::string query);
57
58 void insertIntoTable(std::string table, std::list<field> fields);
59
60 private:
61
62 database()
63 {
64 }
65
66 sqlite3* ppdb_ = nullptr;
67
68 };
69
70 };
71};
72
73#endif /* end of include guard: DATABASE_H_0B0A47D2 */
diff --git a/generator/field.cpp b/generator/field.cpp new file mode 100644 index 0000000..84b2f91 --- /dev/null +++ b/generator/field.cpp
@@ -0,0 +1,193 @@
1#include "field.h"
2#include <stdexcept>
3#include <utility>
4
5namespace verbly {
6 namespace generator {
7
8 field::field(const field& other)
9 {
10 type_ = other.type_;
11 name_ = other.name_;
12
13 switch (type_)
14 {
15 case type::integer:
16 {
17 integer_ = other.integer_;
18
19 break;
20 }
21
22 case type::string:
23 {
24 new(&string_) std::string(other.string_);
25
26 break;
27 }
28
29 case type::invalid:
30 {
31 break;
32 }
33 }
34 }
35
36 field::field(field&& other) : field()
37 {
38 swap(*this, other);
39 }
40
41 field& field::operator=(field other)
42 {
43 swap(*this, other);
44
45 return *this;
46 }
47
48 void swap(field& first, field& second)
49 {
50 using type = field::type;
51
52 type tempType = first.type_;
53 std::string tempName = std::move(first.name_);
54 int tempInteger;
55 std::string tempString;
56
57 switch (first.type_)
58 {
59 case type::integer:
60 {
61 tempInteger = first.integer_;
62
63 break;
64 }
65
66 case type::string:
67 {
68 tempString = std::move(tempString);
69
70 break;
71 }
72
73 case type::invalid:
74 {
75 break;
76 }
77 }
78
79 first.~field();
80
81 first.type_ = second.type_;
82 first.name_ = std::move(second.name_);
83
84 switch (second.type_)
85 {
86 case type::integer:
87 {
88 first.integer_ = second.integer_;
89
90 break;
91 }
92
93 case type::string:
94 {
95 new(&first.string_) std::string(std::move(second.string_));
96
97 break;
98 }
99
100 case type::invalid:
101 {
102 break;
103 }
104 }
105
106 second.~field();
107
108 second.type_ = tempType;
109 second.name_ = std::move(tempName);
110
111 switch (tempType)
112 {
113 case type::integer:
114 {
115 second.integer_ = tempInteger;
116
117 break;
118 }
119
120 case type::string:
121 {
122 new(&second.string_) std::string(std::move(tempString));
123
124 break;
125 }
126
127 case type::invalid:
128 {
129 break;
130 }
131 }
132 }
133
134 field::~field()
135 {
136 switch (type_)
137 {
138 case type::string:
139 {
140 using string_type = std::string;
141 string_.~string_type();
142
143 break;
144 }
145
146 case type::integer:
147 case type::invalid:
148 {
149 break;
150 }
151 }
152 }
153
154 field::field(
155 std::string name,
156 int arg) :
157 type_(type::integer),
158 name_(name),
159 integer_(arg)
160 {
161 }
162
163 int field::getInteger() const
164 {
165 if (type_ != type::integer)
166 {
167 throw std::domain_error("field::getInteger called on non-integer field");
168 }
169
170 return integer_;
171 }
172
173 field::field(
174 std::string name,
175 std::string arg) :
176 type_(type::string),
177 name_(name)
178 {
179 new(&string_) std::string(arg);
180 }
181
182 std::string field::getString() const
183 {
184 if (type_ != type::string)
185 {
186 throw std::domain_error("field::getString called on non-string field");
187 }
188
189 return string_;
190 }
191
192 };
193};
diff --git a/generator/field.h b/generator/field.h new file mode 100644 index 0000000..1fbabfc --- /dev/null +++ b/generator/field.h
@@ -0,0 +1,76 @@
1#ifndef BINDING_H_CAE0B18E
2#define BINDING_H_CAE0B18E
3
4#include <string>
5
6namespace verbly {
7 namespace generator {
8
9 class field {
10 public:
11 enum class type {
12 invalid,
13 integer,
14 string
15 };
16
17 // Copy and move constructors
18
19 field(const field& other);
20 field(field&& other);
21
22 // Assignment
23
24 field& operator=(field other);
25
26 // Swap
27
28 friend void swap(field& first, field& second);
29
30 // Destructor
31
32 ~field();
33
34 // Generic accessors
35
36 type getType() const
37 {
38 return type_;
39 }
40
41 std::string getName() const
42 {
43 return name_;
44 }
45
46 // Integer
47
48 field(std::string name, int arg);
49
50 int getInteger() const;
51
52 // String
53
54 field(std::string name, std::string arg);
55
56 std::string getString() const;
57
58 private:
59
60 field()
61 {
62 }
63
64 union {
65 int integer_;
66 std::string string_;
67 };
68
69 type type_ = type::invalid;
70 std::string name_;
71 };
72
73 };
74};
75
76#endif /* end of include guard: BINDING_H_CAE0B18E */
diff --git a/generator/form.cpp b/generator/form.cpp new file mode 100644 index 0000000..6be9d47 --- /dev/null +++ b/generator/form.cpp
@@ -0,0 +1,53 @@
1#include "form.h"
2#include <algorithm>
3#include <list>
4#include "database.h"
5#include "field.h"
6#include "pronunciation.h"
7
8namespace verbly {
9 namespace generator {
10
11 int form::nextId_ = 0;
12
13 form::form(std::string text) :
14 id_(nextId_++),
15 text_(text),
16 complexity_(std::count(std::begin(text), std::end(text), ' ') + 1),
17 proper_(std::any_of(std::begin(text), std::end(text), std::isupper))
18 {
19 }
20
21 void form::addPronunciation(const pronunciation& p)
22 {
23 pronunciations_.insert(&p);
24 }
25
26 database& operator<<(database& db, const form& arg)
27 {
28 // Serialize the form first.
29 {
30 std::list<field> fields;
31 fields.emplace_back("form_id", arg.getId());
32 fields.emplace_back("form", arg.getText());
33 fields.emplace_back("complexity", arg.getComplexity());
34 fields.emplace_back("proper", arg.isProper());
35
36 db.insertIntoTable("forms", std::move(fields));
37 }
38
39 // Then, serialize the form/pronunciation relationship.
40 for (const pronunciation* p : arg.getPronunciations())
41 {
42 std::list<field> fields;
43 fields.emplace_back("form_id", arg.getId());
44 fields.emplace_back("pronunciation_id", p->getId());
45
46 db.insertIntoTable("forms_pronunciations", std::move(fields));
47 }
48
49 return db;
50 }
51
52 };
53};
diff --git a/generator/form.h b/generator/form.h new file mode 100644 index 0000000..5576035 --- /dev/null +++ b/generator/form.h
@@ -0,0 +1,71 @@
1#ifndef FORM_H_7EFBC970
2#define FORM_H_7EFBC970
3
4#include <string>
5#include <set>
6
7namespace verbly {
8 namespace generator {
9
10 class pronunciation;
11 class database;
12
13 class form {
14 public:
15
16 // Constructor
17
18 explicit form(std::string text);
19
20 // Mutators
21
22 void addPronunciation(const pronunciation& p);
23
24 // Accessors
25
26 int getId() const
27 {
28 return id_;
29 }
30
31 std::string getText() const
32 {
33 return text_;
34 }
35
36 int getComplexity() const
37 {
38 return complexity_;
39 }
40
41 bool isProper() const
42 {
43 return proper_;
44 }
45
46 std::set<const pronunciation*> getPronunciations() const
47 {
48 return pronunciations_;
49 }
50
51 private:
52
53 static int nextId_;
54
55 const int id_;
56 const std::string text_;
57 const int complexity_;
58 const bool proper_;
59
60 std::set<const pronunciation*> pronunciations_;
61
62 };
63
64 // Serializer
65
66 database& operator<<(database& db, const form& arg);
67
68 };
69};
70
71#endif /* end of include guard: FORM_H_7EFBC970 */
diff --git a/generator/frame.cpp b/generator/frame.cpp new file mode 100644 index 0000000..9f0653f --- /dev/null +++ b/generator/frame.cpp
@@ -0,0 +1,83 @@
1#include "frame.h"
2#include "database.h"
3#include "field.h"
4
5namespace verbly {
6 namespace generator {
7
8 int frame::nextId_ = 0;
9
10 frame::frame() : id_(nextId_++)
11 {
12 }
13
14 void frame::push_back(part fp)
15 {
16 parts_.push_back(std::move(fp));
17 }
18
19 database& operator<<(database& db, const frame& arg)
20 {
21 std::list<field> fields;
22 fields.emplace_back("frame_id", arg.getId());
23
24 nlohmann::json jsonParts;
25 for (const part& p : arg)
26 {
27 nlohmann::json jsonPart;
28 jsonPart["type"] = static_cast<int>(p.getType());
29
30 switch (p.getType())
31 {
32 case part::type::noun_phrase:
33 {
34 jsonPart["role"] = p.getNounRole();
35 jsonPart["selrestrs"] = p.getNounSelrestrs().toJson();
36 jsonPart["synrestrs"] = p.getNounSynrestrs();
37
38 break;
39 }
40
41 case part::type::preposition:
42 {
43 jsonPart["choices"] = p.getPrepositionChoices();
44 jsonPart["literal"] = p.isPrepositionLiteral();
45
46 break;
47 }
48
49 case part::type::literal:
50 {
51 jsonPart["value"] = p.getLiteralValue();
52
53 break;
54 }
55
56 case part::type::verb:
57 case part::type::adjective:
58 case part::type::adverb:
59 {
60 break;
61 }
62
63 case part::type::invalid:
64 {
65 // Invalid parts should not be serialized.
66 assert(false);
67
68 break;
69 }
70 }
71
72 jsonParts.emplace_back(std::move(jsonPart));
73 }
74
75 fields.emplace_back("data", jsonParts.dump());
76
77 db.insertIntoTable("frames", std::move(fields));
78
79 return db;
80 }
81
82 };
83};
diff --git a/generator/frame.h b/generator/frame.h new file mode 100644 index 0000000..411ce6c --- /dev/null +++ b/generator/frame.h
@@ -0,0 +1,59 @@
1#ifndef FRAME_H_26770FF1
2#define FRAME_H_26770FF1
3
4#include <list>
5#include "part.h"
6
7namespace verbly {
8 namespace generator {
9
10 class database;
11
12 class frame {
13 public:
14
15 // Aliases
16
17 using const_iterator = std::list<part>::const_iterator;
18
19 // Constructor
20
21 frame();
22
23 // Mutators
24
25 void push_back(part fp);
26
27 // Accessors
28
29 int getId() const
30 {
31 return id_;
32 }
33
34 const_iterator begin() const
35 {
36 return std::begin(parts_);
37 }
38
39 const_iterator end() const
40 {
41 return std::end(parts_);
42 }
43
44 private:
45
46 static int nextId_;
47
48 const int id_;
49
50 std::list<part> parts_;
51
52 };
53
54 database& operator<<(database& db, const frame& arg);
55
56 };
57};
58
59#endif /* end of include guard: FRAME_H_26770FF1 */
diff --git a/generator/generator.cpp b/generator/generator.cpp index 6a16467..d88cb31 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp
@@ -1,2320 +1,1477 @@
1#include <libxml/parser.h> 1#include "generator.h"
2#include <cassert>
3#include <stdexcept>
2#include <iostream> 4#include <iostream>
5#include <regex>
3#include <dirent.h> 6#include <dirent.h>
4#include <set>
5#include <map>
6#include <string>
7#include <vector>
8#include <fstream> 7#include <fstream>
9#include <sqlite3.h> 8#include "enums.h"
10#include <sstream>
11#include <regex>
12#include <list>
13#include <algorithm>
14#include <json.hpp>
15#include "progress.h" 9#include "progress.h"
10#include "selrestr.h"
11#include "role.h"
12#include "part.h"
13#include "field.h"
16#include "../lib/util.h" 14#include "../lib/util.h"
17 15
18using json = nlohmann::json; 16namespace verbly {
19 17 namespace generator {
20struct verb_t {
21 std::string infinitive;
22 std::string past_tense;
23 std::string past_participle;
24 std::string ing_form;
25 std::string s_form;
26 int id;
27};
28
29struct adjective_t {
30 std::string base;
31 std::string comparative;
32 std::string superlative;
33};
34
35struct noun_t {
36 std::string singular;
37 std::string plural;
38};
39
40struct selrestr_t {
41 enum class type_t {
42 singleton,
43 andlogic,
44 orlogic,
45 empty
46 };
47 type_t type;
48 std::string restriction;
49 bool pos;
50 std::list<selrestr_t> subordinates;
51};
52
53struct framepart_t {
54 enum class type_t {
55 np,
56 v,
57 pp,
58 adj,
59 adv,
60 lex
61 };
62 type_t type;
63 std::string role;
64 selrestr_t selrestrs;
65 std::set<std::string> preprestrs;
66 std::set<std::string> synrestrs;
67 std::list<std::string> choices;
68 std::string lexval;
69};
70
71struct group_t {
72 std::string id;
73 std::string parent;
74 std::set<std::string> members;
75 std::map<std::string, selrestr_t> roles;
76 std::list<std::list<framepart_t>> frames;
77};
78
79struct pronunciation_t {
80 std::string phonemes;
81 std::string prerhyme;
82 std::string rhyme;
83 int syllables = 0;
84 std::string stress;
85
86 bool operator<(const pronunciation_t& other) const
87 {
88 return phonemes < other.phonemes;
89 }
90};
91
92std::map<std::string, group_t> groups;
93std::map<std::string, verb_t> verbs;
94std::map<std::string, adjective_t> adjectives;
95std::map<std::string, noun_t> nouns;
96std::map<int, std::map<int, int>> wn;
97std::map<int, int> images;
98std::map<std::string, std::set<pronunciation_t>> pronunciations;
99
100void print_usage()
101{
102 std::cout << "Verbly Datafile Generator" << std::endl;
103 std::cout << "-------------------------" << std::endl;
104 std::cout << "Requires exactly six arguments." << std::endl;
105 std::cout << "1. The path to a VerbNet data directory." << std::endl;
106 std::cout << "2. The path to an AGID infl.txt file." << std::endl;
107 std::cout << "3. The path to a WordNet prolog data directory." << std::endl;
108 std::cout << "4. The path to a CMUDICT pronunciation file." << std::endl;
109 std::cout << "5. The path to an ImageNet urls.txt file." << std::endl;
110 std::cout << "6. Datafile output path." << std::endl;
111
112 exit(1);
113}
114
115void db_error(sqlite3* ppdb, std::string query)
116{
117 std::cout << "Error writing to output database: " << sqlite3_errmsg(ppdb) << std::endl;
118 std::cout << query << std::endl;
119 sqlite3_close_v2(ppdb);
120 print_usage();
121}
122
123json export_selrestrs(selrestr_t r)
124{
125 if (r.type == selrestr_t::type_t::empty)
126 {
127 return {};
128 } else if (r.type == selrestr_t::type_t::singleton)
129 {
130 json result;
131 result["type"] = r.restriction;
132 result["pos"] = r.pos;
133 return result;
134 } else {
135 json result;
136 if (r.type == selrestr_t::type_t::andlogic)
137 {
138 result["logic"] = "and";
139 } else {
140 result["logic"] = "or";
141 }
142
143 std::list<json> outlist;
144 std::transform(std::begin(r.subordinates), std::end(r.subordinates), std::back_inserter(outlist), &export_selrestrs);
145 result["children"] = outlist;
146 18
147 return result; 19 generator::generator(
148 } 20 std::string verbNetPath,
149} 21 std::string agidPath,
150 22 std::string wordNetPath,
151selrestr_t parse_selrestrs(xmlNodePtr top, std::string filename) 23 std::string cmudictPath,
152{ 24 std::string imageNetPath,
153 selrestr_t r; 25 std::string outputPath) :
154 xmlChar* key; 26 verbNetPath_(verbNetPath),
155 27 agidPath_(agidPath),
156 if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTRS")) 28 wordNetPath_(wordNetPath),
157 { 29 cmudictPath_(cmudictPath),
158 if (xmlChildElementCount(top) == 0) 30 imageNetPath_(imageNetPath),
31 db_(outputPath)
159 { 32 {
160 r.type = selrestr_t::type_t::empty; 33 // Ensure VerbNet directory exists
161 } else if (xmlChildElementCount(top) == 1) 34 DIR* dir;
162 { 35 if ((dir = opendir(verbNetPath_.c_str())) == nullptr)
163 r = parse_selrestrs(xmlFirstElementChild(top), filename);
164 } else {
165 r.type = selrestr_t::type_t::andlogic;
166
167 if (xmlHasProp(top, (const xmlChar*) "logic"))
168 { 36 {
169 key = xmlGetProp(top, (const xmlChar*) "logic"); 37 throw std::invalid_argument("Invalid VerbNet data directory");
170 if (!xmlStrcmp(key, (const xmlChar*) "or"))
171 {
172 r.type = selrestr_t::type_t::orlogic;
173 }
174 xmlFree(key);
175 } 38 }
176 39
177 for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) 40 closedir(dir);
41
42 // Ensure AGID infl.txt exists
43 if (!std::ifstream(agidPath_))
178 { 44 {
179 if (!xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTRS") || !xmlStrcmp(selrestr->name, (const xmlChar*) "SELRESTR")) 45 throw std::invalid_argument("AGID infl.txt file not found");
180 {
181 r.subordinates.push_back(parse_selrestrs(selrestr, filename));
182 }
183 } 46 }
184 } 47
185 } else if (!xmlStrcmp(top->name, (const xmlChar*) "SELRESTR")) 48 // Add directory separator to WordNet path
186 { 49 if ((wordNetPath_.back() != '/') && (wordNetPath_.back() != '\\'))
187 r.type = selrestr_t::type_t::singleton;
188
189 key = xmlGetProp(top, (xmlChar*) "Value");
190 r.pos = (std::string((const char*)key) == "+");
191 xmlFree(key);
192
193 key = xmlGetProp(top, (xmlChar*) "type");
194 r.restriction = (const char*) key;
195 xmlFree(key);
196 } else {
197 // Invalid
198 std::cout << "Bad VerbNet file format: " << filename << std::endl;
199 print_usage();
200 }
201
202 return r;
203}
204
205group_t& parse_group(xmlNodePtr top, std::string filename)
206{
207 xmlChar* key = xmlGetProp(top, (xmlChar*) "ID");
208 if (key == 0)
209 {
210 std::cout << "Bad VerbNet file format: " << filename << std::endl;
211 print_usage();
212 }
213 std::string vnid = (const char*)key;
214 vnid = vnid.substr(vnid.find_first_of("-")+1);
215 xmlFree(key);
216
217 group_t g;
218 g.id = vnid;
219
220 for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
221 {
222 if (!xmlStrcmp(node->name, (const xmlChar*) "SUBCLASSES"))
223 {
224 for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next)
225 { 50 {
226 if (!xmlStrcmp(subclass->name, (const xmlChar*) "VNSUBCLASS")) 51 wordNetPath_ += '/';
227 {
228 auto& sg = parse_group(subclass, filename);
229 sg.parent = vnid;
230
231 for (auto member : sg.members)
232 {
233 g.members.insert(member);
234 }
235
236 // The schema requires that subclasses appear after role definitions, so we can do this now
237 for (auto role : g.roles)
238 {
239 if (sg.roles.count(role.first) == 0)
240 {
241 sg.roles[role.first] = role.second;
242 }
243 }
244 }
245 } 52 }
246 } else if (!xmlStrcmp(node->name, (const xmlChar*) "MEMBERS")) 53
247 { 54 // Ensure WordNet tables exist
248 for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next) 55 for (std::string table : {
56 "s", "sk", "ant", "at", "cls", "hyp", "ins", "mm", "mp", "ms", "per", "sa", "sim", "syntax"
57 })
249 { 58 {
250 if (!xmlStrcmp(member->name, (const xmlChar*) "MEMBER")) 59 if (!std::ifstream(wordNetPath_ + "wn_" + table + ".pl"))
251 { 60 {
252 key = xmlGetProp(member, (xmlChar*) "name"); 61 throw std::invalid_argument("WordNet " + table + " table not found");
253 g.members.insert((const char*)key);
254 xmlFree(key);
255 } 62 }
256 } 63 }
257 } else if (!xmlStrcmp(node->name, (const xmlChar*) "THEMROLES")) 64
258 { 65 // Ensure CMUDICT file exists
259 for (xmlNodePtr role = node->xmlChildrenNode; role != nullptr; role = role->next) 66 if (!std::ifstream(cmudictPath_))
260 { 67 {
261 if (!xmlStrcmp(role->name, (const xmlChar*) "THEMROLE")) 68 throw std::invalid_argument("CMUDICT file not found");
262 {
263 selrestr_t r;
264 r.type = selrestr_t::type_t::empty;
265
266 key = xmlGetProp(role, (const xmlChar*) "type");
267 std::string type = (const char*)key;
268 xmlFree(key);
269
270 for (xmlNodePtr rolenode = role->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
271 {
272 if (!xmlStrcmp(rolenode->name, (const xmlChar*) "SELRESTRS"))
273 {
274 r = parse_selrestrs(rolenode, filename);
275 }
276 }
277
278 g.roles[type] = r;
279 }
280 } 69 }
281 } else if (!xmlStrcmp(node->name, (const xmlChar*) "FRAMES")) 70
282 { 71 // Ensure ImageNet urls.txt exists
283 for (xmlNodePtr frame = node->xmlChildrenNode; frame != nullptr; frame = frame->next) 72 if (!std::ifstream(imageNetPath_))
284 { 73 {
285 if (!xmlStrcmp(frame->name, (const xmlChar*) "FRAME")) 74 throw std::invalid_argument("ImageNet urls.txt file not found");
286 {
287 std::list<framepart_t> f;
288
289 for (xmlNodePtr framenode = frame->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
290 {
291 if (!xmlStrcmp(framenode->name, (const xmlChar*) "SYNTAX"))
292 {
293 for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
294 {
295 framepart_t fp;
296
297 if (!xmlStrcmp(syntaxnode->name, (const xmlChar*) "NP"))
298 {
299 fp.type = framepart_t::type_t::np;
300
301 key = xmlGetProp(syntaxnode, (xmlChar*) "value");
302 fp.role = (const char*)key;
303 xmlFree(key);
304
305 fp.selrestrs.type = selrestr_t::type_t::empty;
306
307 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
308 {
309 if (!xmlStrcmp(npnode->name, (const xmlChar*) "SYNRESTRS"))
310 {
311 for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
312 {
313 if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SYNRESTR"))
314 {
315 key = xmlGetProp(synrestr, (xmlChar*) "type");
316 fp.synrestrs.insert(std::string((const char*)key));
317 xmlFree(key);
318 }
319 }
320 }
321
322 if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS"))
323 {
324 fp.selrestrs = parse_selrestrs(npnode, filename);
325 }
326 }
327 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "VERB"))
328 {
329 fp.type = framepart_t::type_t::v;
330 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "PREP"))
331 {
332 fp.type = framepart_t::type_t::pp;
333
334 if (xmlHasProp(syntaxnode, (xmlChar*) "value"))
335 {
336 key = xmlGetProp(syntaxnode, (xmlChar*) "value");
337 std::string choices = (const char*)key;
338 xmlFree(key);
339
340 fp.choices = verbly::split<std::list<std::string>>(choices, " ");
341 }
342
343 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
344 {
345 if (!xmlStrcmp(npnode->name, (const xmlChar*) "SELRESTRS"))
346 {
347 for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
348 {
349 if (!xmlStrcmp(synrestr->name, (const xmlChar*) "SELRESTR"))
350 {
351 key = xmlGetProp(synrestr, (xmlChar*) "type");
352 fp.preprestrs.insert(std::string((const char*)key));
353 xmlFree(key);
354 }
355 }
356 }
357 }
358 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADJ"))
359 {
360 fp.type = framepart_t::type_t::adj;
361 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "ADV"))
362 {
363 fp.type = framepart_t::type_t::adv;
364 } else if (!xmlStrcmp(syntaxnode->name, (xmlChar*) "LEX"))
365 {
366 fp.type = framepart_t::type_t::lex;
367
368 key = xmlGetProp(syntaxnode, (xmlChar*) "value");
369 fp.lexval = (const char*)key;
370 xmlFree(key);
371 } else {
372 continue;
373 }
374
375 f.push_back(fp);
376 }
377
378 g.frames.push_back(f);
379 }
380 }
381 }
382 } 75 }
383 } 76 }
384 }
385
386 groups[vnid] = g;
387
388 return groups[vnid];
389}
390
391int main(int argc, char** argv)
392{
393 if (argc != 7)
394 {
395 print_usage();
396 }
397
398 // VerbNet data
399 std::cout << "Reading verb frames..." << std::endl;
400
401 DIR* dir;
402 if ((dir = opendir(argv[1])) == nullptr)
403 {
404 std::cout << "Invalid VerbNet data directory." << std::endl;
405
406 print_usage();
407 }
408
409 struct dirent* ent;
410 while ((ent = readdir(dir)) != nullptr)
411 {
412 std::string filename(argv[1]);
413 if (filename.back() != '/')
414 {
415 filename += '/';
416 }
417 77
418 filename += ent->d_name; 78 void generator::run()
419 //std::cout << ent->d_name << std::endl;
420
421 if (filename.rfind(".xml") != filename.size() - 4)
422 {
423 continue;
424 }
425
426 xmlDocPtr doc = xmlParseFile(filename.c_str());
427 if (doc == nullptr)
428 {
429 std::cout << "Error opening " << filename << std::endl;
430 print_usage();
431 }
432
433 xmlNodePtr top = xmlDocGetRootElement(doc);
434 if ((top == nullptr) || (xmlStrcmp(top->name, (xmlChar*) "VNCLASS")))
435 {
436 std::cout << "Bad VerbNet file format: " << filename << std::endl;
437 print_usage();
438 }
439
440 parse_group(top, filename);
441 }
442
443 closedir(dir);
444
445 // Get verbs from AGID
446 std::cout << "Reading inflections..." << std::endl;
447
448 std::ifstream agidfile(argv[2]);
449 if (!agidfile.is_open())
450 {
451 std::cout << "Could not open AGID file: " << argv[2] << std::endl;
452 print_usage();
453 }
454
455 for (;;)
456 {
457 std::string line;
458 if (!getline(agidfile, line))
459 {
460 break;
461 }
462
463 if (line.back() == '\r')
464 { 79 {
465 line.pop_back(); 80 // Create notions, words, lemmas, and forms from WordNet synsets
466 } 81 readWordNetSynsets();
467 82
468 int divider = line.find_first_of(" "); 83 // Reads adjective positioning WordNet data
469 std::string word = line.substr(0, divider); 84 readAdjectivePositioning();
470 line = line.substr(divider+1); 85
471 char type = line[0]; 86 // Counts the number of URLs ImageNet has per notion
472 87 readImageNetUrls();
473 if (line[1] == '?') 88
474 { 89 // Creates a word by WordNet sense key lookup table
475 line.erase(0, 4); 90 readWordNetSenseKeys();
476 } else { 91
477 line.erase(0, 3); 92 // Creates groups and frames from VerbNet data
478 } 93 readVerbNet();
479 94
480 std::vector<std::string> forms; 95 // Creates forms and inflections from AGID. To reduce the amount of forms
481 while (!line.empty()) 96 // created, we do this after most lemmas that need inflecting have been
482 { 97 // created through other means, and then only generate forms for
483 std::string inflection; 98 // inflections of already-existing lemmas. The exception to this regards
484 if ((divider = line.find(" | ")) != std::string::npos) 99 // verb lemmas. If a verb lemma in AGID either does not exist yet, or does
485 { 100 // exist but is not related to any words that are related to verb notions,
486 inflection = line.substr(0, divider); 101 // then a notion and a word is generated and the form generation proceeds
487 line = line.substr(divider + 3); 102 // as usual.
488 } else { 103 readAgidInflections();
489 inflection = line; 104
490 line = ""; 105 // Reads in prepositions and the is_a relationship
491 } 106 readPrepositions();
492 107
493 if ((divider = inflection.find_first_of(",?")) != std::string::npos) 108 // Creates pronunciations from CMUDICT. To reduce the amount of
494 { 109 // pronunciations created, we do this after all forms have been created,
495 inflection = inflection.substr(0, divider); 110 // and then only generate pronunciations for already-exisiting forms.
496 } 111 readCmudictPronunciations();
497 112
498 forms.push_back(inflection); 113 // Writes the database schema
114 writeSchema();
115
116 // Dumps data to the database
117 dumpObjects();
118
119 // Populates the antonymy relationship from WordNet
120 readWordNetAntonymy();
121
122 // Populates the variation relationship from WordNet
123 readWordNetVariation();
124
125 // Populates the usage, topicality, and regionality relationships from
126 // WordNet
127 readWordNetClasses();
128
129 // Populates the causality relationship from WordNet
130 readWordNetCausality();
131
132 // Populates the entailment relationship from WordNet
133 readWordNetEntailment();
134
135 // Populates the hypernymy relationship from WordNet
136 readWordNetHypernymy();
137
138 // Populates the instantiation relationship from WordNet
139 readWordNetInstantiation();
140
141 // Populates the member meronymy relationship from WordNet
142 readWordNetMemberMeronymy();
143
144 // Populates the part meronymy relationship from WordNet
145 readWordNetPartMeronymy();
146
147 // Populates the substance meronymy relationship from WordNet
148 readWordNetSubstanceMeronymy();
149
150 // Populates the pertainymy and mannernymy relationships from WordNet
151 readWordNetPertainymy();
152
153 // Populates the specification relationship from WordNet
154 readWordNetSpecification();
155
156 // Populates the adjective similarity relationship from WordNet
157 readWordNetSimilarity();
158
159
160
161
162
163
164
165
499 } 166 }
500 167
501 switch (type) 168 void generator::readWordNetSynsets()
502 { 169 {
503 case 'V': 170 std::list<std::string> lines(readFile(wordNetPath_ + "wn_s.pl"));
171 progress ppgs("Reading synsets from WordNet...", lines.size());
172
173 for (std::string line : lines)
504 { 174 {
505 verb_t v; 175 ppgs.update();
506 v.infinitive = word; 176
507 if (forms.size() == 4) 177 std::regex relation("^s\\(([1234]\\d{8}),(\\d+),'(.+)',\\w,\\d+,(\\d+)\\)\\.$");
508 { 178 std::smatch relation_data;
509 v.past_tense = forms[0]; 179 if (!std::regex_search(line, relation_data, relation))
510 v.past_participle = forms[1]; 180 {
511 v.ing_form = forms[2]; 181 continue;
512 v.s_form = forms[3];
513 } else if (forms.size() == 3)
514 {
515 v.past_tense = forms[0];
516 v.past_participle = forms[0];
517 v.ing_form = forms[1];
518 v.s_form = forms[2];
519 } else if (forms.size() == 8)
520 {
521 // As of AGID 2014.08.11, this is only "to be"
522 v.past_tense = forms[0];
523 v.past_participle = forms[2];
524 v.ing_form = forms[3];
525 v.s_form = forms[4];
526 } else {
527 // Words that don't fit the cases above as of AGID 2014.08.11:
528 // - may and shall do not conjugate the way we want them to
529 // - methinks only has a past tense and is an outlier
530 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
531 std::cout << "Ignoring verb \"" << word << "\" due to non-standard number of forms." << std::endl;
532 } 182 }
533 183
534 verbs[word] = v; 184 int synset_id = std::stoi(relation_data[1]);
535 185 int wnum = std::stoi(relation_data[2]);
536 break; 186 std::string text = relation_data[3];
537 } 187 int tag_count = std::stoi(relation_data[4]);
538 188 size_t word_it;
539 case 'A': 189 while ((word_it = text.find("''")) != std::string::npos)
540 {
541 adjective_t adj;
542 adj.base = word;
543 if (forms.size() == 2)
544 { 190 {
545 adj.comparative = forms[0]; 191 text.erase(word_it, 1);
546 adj.superlative = forms[1];
547 } else {
548 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
549 std::cout << "Ignoring adjective/adverb \"" << word << "\" due to non-standard number of forms." << std::endl;
550 } 192 }
551 193
552 adjectives[word] = adj; 194 // The WordNet data does contain duplicates, so we need to check that we
553 195 // haven't already created this word.
554 break; 196 std::pair<int, int> lookup(synset_id, wnum);
555 } 197 if (!wordByWnidAndWnum_.count(lookup))
556
557 case 'N':
558 {
559 noun_t n;
560 n.singular = word;
561 if (forms.size() == 1)
562 { 198 {
563 n.plural = forms[0]; 199 notion& synset = lookupOrCreateNotion(synset_id);
564 } else { 200 lemma& lex = lookupOrCreateLemma(text);
565 // As of AGID 2014.08.11, this is non-existent. 201 word& entry = createWord(synset, lex, tag_count);
566 std::cout << "Ignoring noun \"" << word << "\" due to non-standard number of forms." << std::endl; 202
203 wordByWnidAndWnum_[lookup] = &entry;
567 } 204 }
568
569 nouns[word] = n;
570
571 break;
572 } 205 }
573 } 206 }
574 }
575
576 // Pronounciations
577 std::cout << "Reading pronunciations..." << std::endl;
578
579 std::ifstream pronfile(argv[4]);
580 if (!pronfile.is_open())
581 {
582 std::cout << "Could not open CMUDICT file: " << argv[4] << std::endl;
583 print_usage();
584 }
585
586 for (;;)
587 {
588 std::string line;
589 if (!getline(pronfile, line))
590 {
591 break;
592 }
593
594 if (line.back() == '\r')
595 {
596 line.pop_back();
597 }
598 207
599 std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)"); 208 void generator::readAdjectivePositioning()
600 std::smatch phoneme_data;
601 if (std::regex_search(line, phoneme_data, phoneme))
602 { 209 {
603 std::string canonical(phoneme_data[1]); 210 std::list<std::string> lines(readFile(wordNetPath_ + "wn_syntax.pl"));
604 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower); 211 progress ppgs("Reading adjective positionings from WordNet...", lines.size());
605
606 std::string phonemes = phoneme_data[2];
607 auto phoneme_set = verbly::split<std::list<std::string>>(phonemes, " ");
608 auto phemstrt = std::find_if(std::begin(phoneme_set), std::end(phoneme_set), [] (std::string phoneme) {
609 return phoneme.find("1") != std::string::npos;
610 });
611 212
612 pronunciation_t p; 213 for (std::string line : lines)
613 p.phonemes = phonemes;
614
615 // Rhyme detection
616 if (phemstrt != std::end(phoneme_set))
617 { 214 {
618 std::stringstream rhymer; 215 ppgs.update();
619 for (auto it = phemstrt; it != std::end(phoneme_set); it++)
620 {
621 std::string naked;
622 std::remove_copy_if(std::begin(*it), std::end(*it), std::back_inserter(naked), [] (char ch) {
623 return isdigit(ch);
624 });
625
626 if (it != phemstrt)
627 {
628 rhymer << " ";
629 }
630
631 rhymer << naked;
632 }
633 216
634 p.rhyme = rhymer.str(); 217 std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\.");
635 218 std::smatch relation_data;
636 if (phemstrt != std::begin(phoneme_set)) 219 if (!std::regex_search(line, relation_data, relation))
637 { 220 {
638 phemstrt--; 221 continue;
639 p.prerhyme = *phemstrt;
640 } else {
641 p.prerhyme = "";
642 } 222 }
643 } else {
644 p.prerhyme = "";
645 p.rhyme = "";
646 }
647 223
648 // Syllable/stress 224 int synset_id = stoi(relation_data[1]);
649 for (auto phm : phoneme_set) 225 int wnum = stoi(relation_data[2]);
650 { 226 std::string adjpos_str = relation_data[3];
651 if (isdigit(phm.back()))
652 {
653 // It's a vowel!
654 p.syllables++;
655 227
656 if (phm.back() == '1') 228 std::pair<int, int> lookup(synset_id, wnum);
229 if (wordByWnidAndWnum_.count(lookup))
230 {
231 word& adj = *wordByWnidAndWnum_.at(lookup);
232
233 if (adjpos_str == "p")
234 {
235 adj.setAdjectivePosition(positioning::predicate);
236 } else if (adjpos_str == "a")
237 {
238 adj.setAdjectivePosition(positioning::attributive);
239 } else if (adjpos_str == "i")
657 { 240 {
658 p.stress.push_back('1'); 241 adj.setAdjectivePosition(positioning::postnominal);
659 } else { 242 } else {
660 p.stress.push_back('0'); 243 // Can't happen because of how we specified the regex.
244 assert(false);
661 } 245 }
662 } 246 }
663 } 247 }
664
665 pronunciations[canonical].insert(p);
666 }
667 }
668
669 // Images
670 std::cout << "Reading images..." << std::endl;
671
672 std::ifstream imagefile(argv[5]);
673 if (!imagefile.is_open())
674 {
675 std::cout << "Could not open ImageNet file: " << argv[5] << std::endl;
676 print_usage();
677 }
678
679 for (;;)
680 {
681 std::string line;
682 if (!getline(imagefile, line))
683 {
684 break;
685 }
686
687 if (line.back() == '\r')
688 {
689 line.pop_back();
690 }
691
692 std::string wnid_s = line.substr(1, 8);
693 int wnid = stoi(wnid_s) + 100000000;
694 images[wnid]++;
695 }
696
697 imagefile.close();
698
699 // Start writing output
700 std::cout << "Writing schema..." << std::endl;
701
702 sqlite3* ppdb;
703 if (sqlite3_open_v2(argv[6], &ppdb, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) != SQLITE_OK)
704 {
705 std::cout << "Error opening output datafile: " << sqlite3_errmsg(ppdb) << std::endl;
706 print_usage();
707 }
708
709 std::ifstream schemafile("schema.sql");
710 if (!schemafile.is_open())
711 {
712 std::cout << "Could not find schema file" << std::endl;
713 print_usage();
714 }
715
716 std::stringstream schemabuilder;
717 for (;;)
718 {
719 std::string line;
720 if (!getline(schemafile, line))
721 {
722 break;
723 }
724
725 if (line.back() == '\r')
726 {
727 line.pop_back();
728 }
729
730 schemabuilder << line << std::endl;
731 }
732
733 std::string schema = schemabuilder.str();
734 while (!schema.empty())
735 {
736 std::string query;
737 int divider = schema.find(";");
738 if (divider != std::string::npos)
739 {
740 query = schema.substr(0, divider+1);
741 schema = schema.substr(divider+2);
742 } else {
743 break;
744 } 248 }
745 249
746 sqlite3_stmt* schmstmt; 250 void generator::readImageNetUrls()
747 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &schmstmt, NULL) != SQLITE_OK)
748 { 251 {
749 db_error(ppdb, query); 252 // The ImageNet datafile is so large that it is unreasonable and
750 } 253 // unnecessary to read it into memory; instead, we will parse each line as
751 254 // we read it. This has the caveat that we cannot display a progress bar.
752 if (sqlite3_step(schmstmt) != SQLITE_DONE) 255 std::cout << "Reading image counts from ImageNet..." << std::endl;
753 {
754 db_error(ppdb, query);
755 }
756
757 sqlite3_finalize(schmstmt);
758 }
759
760 std::cout << "Writing prepositions..." << std::endl;
761 std::ifstream prepfile("prepositions.txt");
762 if (!prepfile.is_open())
763 {
764 std::cout << "Could not find prepositions file" << std::endl;
765 print_usage();
766 }
767
768 for (;;)
769 {
770 std::string line;
771 if (!getline(prepfile, line))
772 {
773 break;
774 }
775
776 if (line.back() == '\r')
777 {
778 line.pop_back();
779 }
780
781 std::regex relation("^([^:]+): (.+)");
782 std::smatch relation_data;
783 std::regex_search(line, relation_data, relation);
784 std::string prep = relation_data[1];
785 std::list<std::string> groups = verbly::split<std::list<std::string>>(relation_data[2], ", ");
786
787 std::string query("INSERT INTO prepositions (form) VALUES (?)");
788 sqlite3_stmt* ppstmt;
789
790 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
791 {
792 db_error(ppdb, query);
793 }
794
795 sqlite3_bind_text(ppstmt, 1, prep.c_str(), prep.length(), SQLITE_TRANSIENT);
796
797 if (sqlite3_step(ppstmt) != SQLITE_DONE)
798 {
799 db_error(ppdb, query);
800 }
801
802 sqlite3_finalize(ppstmt);
803
804 query = "SELECT last_insert_rowid()";
805 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
806 {
807 db_error(ppdb, query);
808 }
809
810 if (sqlite3_step(ppstmt) != SQLITE_ROW)
811 {
812 db_error(ppdb, query);
813 }
814
815 int rowid = sqlite3_column_int(ppstmt, 0);
816 sqlite3_finalize(ppstmt);
817
818 for (auto group : groups)
819 {
820 query = "INSERT INTO preposition_groups (preposition_id, groupname) VALUES (?, ?)";
821 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
822 {
823 db_error(ppdb, query);
824 }
825 256
826 sqlite3_bind_int(ppstmt, 1, rowid); 257 std::ifstream file(imageNetPath_);
827 sqlite3_bind_text(ppstmt, 2, group.c_str(), group.length(), SQLITE_TRANSIENT); 258 if (!file)
828
829 if (sqlite3_step(ppstmt) != SQLITE_DONE)
830 { 259 {
831 db_error(ppdb, query); 260 throw std::invalid_argument("Could not find file " + imageNetPath_);
832 } 261 }
833
834 sqlite3_finalize(ppstmt);
835 }
836 }
837
838 262
839 { 263 std::string line;
840 progress ppgs("Writing verbs...", verbs.size()); 264 while (std::getline(file, line))
841 for (auto& mapping : verbs)
842 {
843 sqlite3_stmt* ppstmt;
844 std::string query("INSERT INTO verbs (infinitive, past_tense, past_participle, ing_form, s_form) VALUES (?, ?, ?, ?, ?)");
845 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
846 {
847 db_error(ppdb, query);
848 }
849
850 sqlite3_bind_text(ppstmt, 1, mapping.second.infinitive.c_str(), mapping.second.infinitive.length(), SQLITE_TRANSIENT);
851 sqlite3_bind_text(ppstmt, 2, mapping.second.past_tense.c_str(), mapping.second.past_tense.length(), SQLITE_TRANSIENT);
852 sqlite3_bind_text(ppstmt, 3, mapping.second.past_participle.c_str(), mapping.second.past_participle.length(), SQLITE_TRANSIENT);
853 sqlite3_bind_text(ppstmt, 4, mapping.second.ing_form.c_str(), mapping.second.ing_form.length(), SQLITE_TRANSIENT);
854 sqlite3_bind_text(ppstmt, 5, mapping.second.s_form.c_str(), mapping.second.s_form.length(), SQLITE_TRANSIENT);
855
856 if (sqlite3_step(ppstmt) != SQLITE_DONE)
857 {
858 db_error(ppdb, query);
859 }
860
861 sqlite3_finalize(ppstmt);
862
863 std::string canonical(mapping.second.infinitive);
864 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
865 if (pronunciations.count(canonical) == 1)
866 { 265 {
867 query = "SELECT last_insert_rowid()"; 266 if (line.back() == '\r')
868 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
869 { 267 {
870 db_error(ppdb, query); 268 line.pop_back();
871 } 269 }
872 270
873 if (sqlite3_step(ppstmt) != SQLITE_ROW) 271 std::string wnid_s = line.substr(1, 8);
272 int wnid = stoi(wnid_s) + 100000000;
273 if (notionByWnid_.count(wnid))
874 { 274 {
875 db_error(ppdb, query); 275 // We know that this notion has a wnid and is a noun.
876 } 276 notionByWnid_.at(wnid)->incrementNumOfImages();
877
878 int rowid = sqlite3_column_int(ppstmt, 0);
879
880 sqlite3_finalize(ppstmt);
881
882 mapping.second.id = rowid;
883
884 for (auto pronunciation : pronunciations[canonical])
885 {
886 if (!pronunciation.rhyme.empty())
887 {
888 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
889 } else {
890 query = "INSERT INTO verb_pronunciations (verb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
891 }
892
893 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
894 {
895 db_error(ppdb, query);
896 }
897
898 sqlite3_bind_int(ppstmt, 1, rowid);
899 sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT);
900 sqlite3_bind_int(ppstmt, 3, pronunciation.syllables);
901 sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT);
902
903 if (!pronunciation.rhyme.empty())
904 {
905 sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT);
906 sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT);
907 }
908
909 if (sqlite3_step(ppstmt) != SQLITE_DONE)
910 {
911 db_error(ppdb, query);
912 }
913
914 sqlite3_finalize(ppstmt);
915 } 277 }
916 } 278 }
917
918 ppgs.update();
919 } 279 }
920 } 280
921 281 void generator::readWordNetSenseKeys()
922 {
923 progress ppgs("Writing verb frames...", groups.size());
924 for (auto& mapping : groups)
925 { 282 {
926 std::list<json> roledatal; 283 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sk.pl"));
927 std::transform(std::begin(mapping.second.roles), std::end(mapping.second.roles), std::back_inserter(roledatal), [] (std::pair<std::string, selrestr_t> r) { 284 progress ppgs("Reading sense keys from WordNet...", lines.size());
928 json role;
929 role["type"] = r.first;
930 role["selrestrs"] = export_selrestrs(r.second);
931
932 return role;
933 });
934
935 json roledata(roledatal);
936 std::string rdm = roledata.dump();
937
938 sqlite3_stmt* ppstmt;
939 std::string query("INSERT INTO groups (data) VALUES (?)");
940 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
941 {
942 db_error(ppdb, query);
943 }
944
945 sqlite3_bind_blob(ppstmt, 1, rdm.c_str(), rdm.size(), SQLITE_TRANSIENT);
946
947 if (sqlite3_step(ppstmt) != SQLITE_DONE)
948 {
949 db_error(ppdb, query);
950 }
951 285
952 sqlite3_finalize(ppstmt); 286 for (std::string line : lines)
953
954 query = "SELECT last_insert_rowid()";
955 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
956 {
957 db_error(ppdb, query);
958 }
959
960 if (sqlite3_step(ppstmt) != SQLITE_ROW)
961 {
962 db_error(ppdb, query);
963 }
964
965 int gid = sqlite3_column_int(ppstmt, 0);
966 sqlite3_finalize(ppstmt);
967
968 for (auto frame : mapping.second.frames)
969 { 287 {
970 std::list<json> fdatap; 288 ppgs.update();
971 std::transform(std::begin(frame), std::end(frame), std::back_inserter(fdatap), [] (framepart_t& fp) {
972 json part;
973
974 switch (fp.type)
975 {
976 case framepart_t::type_t::np:
977 {
978 part["type"] = "np";
979 part["role"] = fp.role;
980 part["selrestrs"] = export_selrestrs(fp.selrestrs);
981 part["synrestrs"] = fp.synrestrs;
982
983 break;
984 }
985
986 case framepart_t::type_t::pp:
987 {
988 part["type"] = "pp";
989 part["values"] = fp.choices;
990 part["preprestrs"] = fp.preprestrs;
991
992 break;
993 }
994
995 case framepart_t::type_t::v:
996 {
997 part["type"] = "v";
998
999 break;
1000 }
1001
1002 case framepart_t::type_t::adj:
1003 {
1004 part["type"] = "adj";
1005
1006 break;
1007 }
1008
1009 case framepart_t::type_t::adv:
1010 {
1011 part["type"] = "adv";
1012
1013 break;
1014 }
1015
1016 case framepart_t::type_t::lex:
1017 {
1018 part["type"] = "lex";
1019 part["value"] = fp.lexval;
1020
1021 break;
1022 }
1023 }
1024
1025 return part;
1026 });
1027
1028 json fdata(fdatap);
1029 std::string marshall = fdata.dump();
1030
1031 query = "INSERT INTO frames (group_id, data) VALUES (?, ?)";
1032 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1033 {
1034 db_error(ppdb, query);
1035 }
1036
1037 sqlite3_bind_int(ppstmt, 1, gid);
1038 sqlite3_bind_blob(ppstmt, 2, marshall.c_str(), marshall.length(), SQLITE_TRANSIENT);
1039 289
1040 if (sqlite3_step(ppstmt) != SQLITE_DONE) 290 // We only actually need to lookup verbs by sense key so we'll just
291 // ignore everything that isn't a verb.
292 std::regex relation("^sk\\((2\\d{8}),(\\d+),'(.+)'\\)\\.$");
293 std::smatch relation_data;
294 if (!std::regex_search(line, relation_data, relation))
1041 { 295 {
1042 db_error(ppdb, query); 296 continue;
1043 } 297 }
298
299 int synset_id = stoi(relation_data[1]);
300 int wnum = stoi(relation_data[2]);
301 std::string sense_key = relation_data[3];
1044 302
1045 sqlite3_finalize(ppstmt); 303 // We are treating this mapping as injective, which is not entirely
1046 } 304 // accurate. First, the WordNet table contains duplicate rows, so those
1047 305 // need to be ignored. More importantly, a small number of sense keys
1048 for (auto member : mapping.second.members) 306 // (one for each letter of the Latin alphabet, plus 9 other words) each
1049 { 307 // map to two different words in the same synset which differ only by
1050 if (verbs.count(member) == 1) 308 // capitalization. Luckily, none of these exceptions are verbs, so we
309 // can pretend that the mapping is injective.
310 if (!wnSenseKeys_.count(sense_key))
1051 { 311 {
1052 auto& v = verbs[member]; 312 std::pair<int, int> lookup(synset_id, wnum);
1053 313 if (wordByWnidAndWnum_.count(lookup))
1054 query = "INSERT INTO verb_groups (verb_id, group_id) VALUES (?, ?)";
1055 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1056 {
1057 db_error(ppdb, query);
1058 }
1059
1060 sqlite3_bind_int(ppstmt, 1, v.id);
1061 sqlite3_bind_int(ppstmt, 2, gid);
1062
1063 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1064 { 314 {
1065 db_error(ppdb, query); 315 wnSenseKeys_[sense_key] = wordByWnidAndWnum_.at(lookup);
1066 } 316 }
1067
1068 sqlite3_finalize(ppstmt);
1069 } 317 }
1070 } 318 }
1071
1072 ppgs.update();
1073 } 319 }
1074 } 320
1075 321 void generator::readVerbNet()
1076 // Get nouns/adjectives/adverbs from WordNet
1077 // Useful relations:
1078 // - s: master list
1079 // - ant: antonymy (e.g. happy/sad, sad/happy, happiness/sadness)
1080 // - at: variation (e.g. a measurement can be standard or nonstandard)
1081 // - der: derivation (e.g. happy/happily, happily/happy)
1082 // - hyp: hypernymy/hyponymy (e.g. color/red, color/blue)
1083 // - ins: instantiation (do we need this? let's see)
1084 // - mm: member meronymy/holonymy (e.g. family/mother, family/child)
1085 // - mp: part meronymy/holonymy (e.g. wheel/spoke, wheel/tire)
1086 // - ms: substance meronymy/holonymy (e.g. tire/rubber, doorstop/rubber)
1087 // - per: pertainymy (e.g. something that is Alaskan pertains to Alaska)
1088 // mannernymy (e.g. something done quickly is done in a manner that is quick)
1089 // - sa: specification (e.g. inaccurate (general) can mean imprecise or incorrect (specific))
1090 // - sim: synonymy (e.g. cheerful/happy, happy/cheerful)
1091 // - syntax: positioning flags for some adjectives
1092 std::string wnpref {argv[3]};
1093 if (wnpref.back() != '/')
1094 {
1095 wnpref += '/';
1096 }
1097
1098 // s table
1099 {
1100 std::ifstream wnsfile(wnpref + "wn_s.pl");
1101 if (!wnsfile.is_open())
1102 { 322 {
1103 std::cout << "Invalid WordNet data directory." << std::endl; 323 std::cout << "Reading frames from VerbNet..." << std::endl;
1104 print_usage();
1105 }
1106 324
1107 std::list<std::string> lines; 325 DIR* dir;
1108 for (;;) 326 if ((dir = opendir(verbNetPath_.c_str())) == nullptr)
1109 {
1110 std::string line;
1111 if (!getline(wnsfile, line))
1112 { 327 {
1113 break; 328 throw std::invalid_argument("Invalid VerbNet data directory");
1114 } 329 }
1115 330
1116 if (line.back() == '\r') 331 struct dirent* ent;
1117 { 332 while ((ent = readdir(dir)) != nullptr)
1118 line.pop_back();
1119 }
1120
1121 lines.push_back(line);
1122 }
1123
1124 progress ppgs("Writing nouns, adjectives, and adverbs...", lines.size());
1125 for (auto line : lines)
1126 {
1127 ppgs.update();
1128
1129 std::regex relation("^s\\(([134]\\d{8}),(\\d+),'(.+)',\\w,\\d+,\\d+\\)\\.$");
1130 std::smatch relation_data;
1131 if (!std::regex_search(line, relation_data, relation))
1132 { 333 {
1133 continue; 334 std::string filename(verbNetPath_);
1134 } 335
336 if (filename.back() != '/')
337 {
338 filename += '/';
339 }
1135 340
1136 int synset_id = stoi(relation_data[1]); 341 filename += ent->d_name;
1137 int wnum = stoi(relation_data[2]);
1138 std::string word = relation_data[3];
1139 size_t word_it;
1140 while ((word_it = word.find("''")) != std::string::npos)
1141 {
1142 word.erase(word_it, 1);
1143 }
1144 342
1145 std::string query; 343 if (filename.rfind(".xml") != filename.size() - 4)
1146 switch (synset_id / 100000000)
1147 {
1148 case 1: // Noun
1149 { 344 {
1150 if (nouns.count(word) == 1) 345 continue;
1151 {
1152 query = "INSERT INTO nouns (singular, proper, complexity, images, wnid, plural) VALUES (?, ?, ?, ?, ?, ?)";
1153 } else {
1154 query = "INSERT INTO nouns (singular, proper, complexity, images, wnid) VALUES (?, ?, ?, ?, ?)";
1155 }
1156
1157 break;
1158 } 346 }
1159 347
1160 case 2: // Verb 348 xmlDocPtr doc = xmlParseFile(filename.c_str());
349 if (doc == nullptr)
1161 { 350 {
1162 // Ignore 351 throw std::logic_error("Error opening " + filename);
1163
1164 break;
1165 } 352 }
1166 353
1167 case 3: // Adjective 354 xmlNodePtr top = xmlDocGetRootElement(doc);
355 if ((top == nullptr) || (xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("VNCLASS"))))
1168 { 356 {
1169 if (adjectives.count(word) == 1) 357 throw std::logic_error("Bad VerbNet file format: " + filename);
1170 {
1171 query = "INSERT INTO adjectives (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)";
1172 } else {
1173 query = "INSERT INTO adjectives (base_form, complexity) VALUES (?, ?)";
1174 }
1175
1176 break;
1177 } 358 }
1178 359
1179 case 4: // Adverb 360 try
1180 { 361 {
1181 if (adjectives.count(word) == 1) 362 createGroup(top);
1182 { 363 } catch (const std::exception& e)
1183 query = "INSERT INTO adverbs (base_form, complexity, comparative, superlative) VALUES (?, ?, ?, ?)"; 364 {
1184 } else { 365 std::throw_with_nested(std::logic_error("Error parsing VerbNet file: " + filename));
1185 query = "INSERT INTO adverbs (base_form, complexity) VALUES (?, ?)";
1186 }
1187
1188 break;
1189 } 366 }
1190 } 367 }
368
369 closedir(dir);
370 }
1191 371
1192 sqlite3_stmt* ppstmt; 372 void generator::readAgidInflections()
1193 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK) 373 {
374 std::list<std::string> lines(readFile(agidPath_));
375 progress ppgs("Reading inflections from AGID...", lines.size());
376
377 for (std::string line : lines)
1194 { 378 {
1195 db_error(ppdb, query); 379 ppgs.update();
1196 } 380
381 int divider = line.find_first_of(" ");
382 std::string infinitive = line.substr(0, divider);
383 line = line.substr(divider+1);
384 char type = line[0];
1197 385
1198 sqlite3_bind_text(ppstmt, 1, word.c_str(), word.length(), SQLITE_TRANSIENT); 386 if (line[1] == '?')
1199 switch (synset_id / 100000000)
1200 {
1201 case 1: // Noun
1202 { 387 {
1203 sqlite3_bind_int(ppstmt, 2, (std::any_of(std::begin(word), std::end(word), [] (char ch) { 388 line.erase(0, 4);
1204 return isupper(ch); 389 } else {
1205 }) ? 1 : 0)); 390 line.erase(0, 3);
1206
1207 sqlite3_bind_int(ppstmt, 3, verbly::split<std::list<std::string>>(word, " ").size());
1208 sqlite3_bind_int(ppstmt, 4, images[synset_id]);
1209 sqlite3_bind_int(ppstmt, 5, synset_id);
1210
1211 if (nouns.count(word) == 1)
1212 {
1213 sqlite3_bind_text(ppstmt, 6, nouns[word].plural.c_str(), nouns[word].plural.length(), SQLITE_TRANSIENT);
1214 }
1215
1216 break;
1217 } 391 }
1218 392
1219 case 3: // Adjective 393 if (!lemmaByBaseForm_.count(infinitive) && (type != 'V'))
1220 case 4: // Adverb
1221 { 394 {
1222 sqlite3_bind_int(ppstmt, 2, verbly::split<std::list<std::string>>(word, " ").size()); 395 continue;
1223 396 }
1224 if (adjectives.count(word) == 1) 397
398 lemma& curLemma = lookupOrCreateLemma(infinitive);
399
400 auto forms = split<std::vector<std::string>>(line, " | ");
401 for (std::string& inflForm : forms)
402 {
403 int sympos = inflForm.find_first_of(",?");
404 if (sympos != std::string::npos)
1225 { 405 {
1226 sqlite3_bind_text(ppstmt, 3, adjectives[word].comparative.c_str(), adjectives[word].comparative.length(), SQLITE_TRANSIENT); 406 inflForm = inflForm.substr(0, sympos);
1227 sqlite3_bind_text(ppstmt, 4, adjectives[word].superlative.c_str(), adjectives[word].superlative.length(), SQLITE_TRANSIENT);
1228 } 407 }
1229
1230 break;
1231 } 408 }
1232 }
1233 409
1234 if (sqlite3_step(ppstmt) != SQLITE_DONE) 410 switch (type)
1235 {
1236 db_error(ppdb, query);
1237 }
1238
1239 sqlite3_finalize(ppstmt);
1240
1241 query = "SELECT last_insert_rowid()";
1242 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1243 {
1244 db_error(ppdb, query);
1245 }
1246
1247 if (sqlite3_step(ppstmt) != SQLITE_ROW)
1248 {
1249 db_error(ppdb, query);
1250 }
1251
1252 int rowid = sqlite3_column_int(ppstmt, 0);
1253 wn[synset_id][wnum] = rowid;
1254
1255 sqlite3_finalize(ppstmt);
1256
1257 std::string canonical(word);
1258 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
1259 if (pronunciations.count(canonical) == 1)
1260 {
1261 for (auto pronunciation : pronunciations[canonical])
1262 { 411 {
1263 switch (synset_id / 100000000) 412 case 'V':
1264 { 413 {
1265 case 1: // Noun 414 if (forms.size() == 4)
1266 { 415 {
1267 if (!pronunciation.rhyme.empty()) 416 curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
1268 { 417 curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[1]));
1269 query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; 418 curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[2]));
1270 } else { 419 curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[3]));
1271 query = "INSERT INTO noun_pronunciations (noun_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; 420 } else if (forms.size() == 3)
1272 }
1273
1274 break;
1275 }
1276
1277 case 3: // Adjective
1278 { 421 {
1279 if (!pronunciation.rhyme.empty()) 422 curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
1280 { 423 curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[0]));
1281 query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)"; 424 curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[1]));
1282 } else { 425 curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[2]));
1283 query = "INSERT INTO adjective_pronunciations (adjective_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)"; 426 } else if (forms.size() == 8)
1284 } 427 {
1285 428 // As of AGID 2014.08.11, this is only "to be"
1286 break; 429 curLemma.addInflection(inflection::past_tense, lookupOrCreateForm(forms[0]));
430 curLemma.addInflection(inflection::past_participle, lookupOrCreateForm(forms[2]));
431 curLemma.addInflection(inflection::ing_form, lookupOrCreateForm(forms[3]));
432 curLemma.addInflection(inflection::s_form, lookupOrCreateForm(forms[4]));
433 } else {
434 // Words that don't fit the cases above as of AGID 2014.08.11:
435 // - may and shall do not conjugate the way we want them to
436 // - methinks only has a past tense and is an outlier
437 // - wit has five forms, and is archaic/obscure enough that we can ignore it for now
438 std::cout << " Ignoring verb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
1287 } 439 }
1288 440
1289 case 4: // Adverb 441 // For verbs in particular, we sometimes create a notion and a word
442 // from inflection data. Specifically, if there are not yet any
443 // verbs existing that have the same infinitive form. "Yet" means
444 // that this verb appears in the AGID data but not in either WordNet
445 // or VerbNet.
446 if (!wordsByBaseForm_.count(infinitive)
447 || !std::any_of(std::begin(wordsByBaseForm_.at(infinitive)), std::end(wordsByBaseForm_.at(infinitive)), [] (word* w) {
448 return w->getNotion().getPartOfSpeech() == part_of_speech::verb;
449 }))
1290 { 450 {
1291 if (!pronunciation.rhyme.empty()) 451 notion& n = createNotion(part_of_speech::verb);
1292 { 452 createWord(n, curLemma);
1293 query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress, prerhyme, rhyme) VALUES (?, ?, ?, ?, ?, ?)";
1294 } else {
1295 query = "INSERT INTO adverb_pronunciations (adverb_id, pronunciation, syllables, stress) VALUES (?, ?, ?, ?)";
1296 }
1297
1298 break;
1299 } 453 }
1300 }
1301
1302 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1303 {
1304 db_error(ppdb, query);
1305 }
1306
1307 sqlite3_bind_int(ppstmt, 1, rowid);
1308 sqlite3_bind_text(ppstmt, 2, pronunciation.phonemes.c_str(), pronunciation.phonemes.length(), SQLITE_TRANSIENT);
1309 sqlite3_bind_int(ppstmt, 3, pronunciation.syllables);
1310 sqlite3_bind_text(ppstmt, 4, pronunciation.stress.c_str(), pronunciation.stress.length(), SQLITE_TRANSIENT);
1311
1312 if (!pronunciation.rhyme.empty())
1313 {
1314 sqlite3_bind_text(ppstmt, 5, pronunciation.prerhyme.c_str(), pronunciation.prerhyme.length(), SQLITE_TRANSIENT);
1315 sqlite3_bind_text(ppstmt, 6, pronunciation.rhyme.c_str(), pronunciation.rhyme.length(), SQLITE_TRANSIENT);
1316 }
1317 454
1318 if (sqlite3_step(ppstmt) != SQLITE_DONE) 455 break;
1319 {
1320 db_error(ppdb, query);
1321 } 456 }
1322
1323 sqlite3_finalize(ppstmt);
1324 }
1325 }
1326 }
1327 }
1328
1329 // While we're working on s
1330 {
1331 progress ppgs("Writing word synonyms...", wn.size());
1332 for (auto sense : wn)
1333 {
1334 ppgs.update();
1335 457
1336 for (auto word1 : sense.second) 458 case 'A':
1337 {
1338 for (auto word2 : sense.second)
1339 {
1340 if (word1 != word2)
1341 { 459 {
1342 std::string query; 460 if (forms.size() == 2)
1343 switch (sense.first / 100000000)
1344 { 461 {
1345 case 1: // Noun 462 curLemma.addInflection(inflection::comparative, lookupOrCreateForm(forms[0]));
1346 { 463 curLemma.addInflection(inflection::superlative, lookupOrCreateForm(forms[1]));
1347 query = "INSERT INTO noun_synonymy (noun_1_id, noun_2_id) VALUES (?, ?)"; 464 } else {
1348 465 // As of AGID 2014.08.11, this is only "only", which has only the form "onliest"
1349 break; 466 std::cout << " Ignoring adjective/adverb \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
1350 } 467 }
1351
1352 case 2: // Verb
1353 {
1354 // Ignore
1355
1356 break;
1357 }
1358
1359 case 3: // Adjective
1360 {
1361 query = "INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
1362 468
1363 break; 469 break;
1364 } 470 }
1365 471
1366 case 4: // Adverb 472 case 'N':
1367 { 473 {
1368 query = "INSERT INTO adverb_synonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)"; 474 if (forms.size() == 1)
1369
1370 break;
1371 }
1372 }
1373
1374 sqlite3_stmt* ppstmt;
1375 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1376 {
1377 db_error(ppdb, query);
1378 }
1379
1380 sqlite3_bind_int(ppstmt, 1, word1.second);
1381 sqlite3_bind_int(ppstmt, 2, word2.second);
1382
1383 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1384 { 475 {
1385 db_error(ppdb, query); 476 curLemma.addInflection(inflection::plural, lookupOrCreateForm(forms[0]));
477 } else {
478 // As of AGID 2014.08.11, this is non-existent.
479 std::cout << " Ignoring noun \"" << infinitive << "\" due to non-standard number of forms." << std::endl;
1386 } 480 }
1387 481
1388 sqlite3_finalize(ppstmt); 482 break;
1389 } 483 }
1390 } 484 }
1391 } 485 }
1392 } 486 }
1393 }
1394
1395 // ant table
1396 {
1397 std::ifstream wnantfile(wnpref + "wn_ant.pl");
1398 if (!wnantfile.is_open())
1399 {
1400 std::cout << "Invalid WordNet data directory." << std::endl;
1401 print_usage();
1402 }
1403
1404 std::list<std::string> lines;
1405 for (;;)
1406 {
1407 std::string line;
1408 if (!getline(wnantfile, line))
1409 {
1410 break;
1411 }
1412 487
1413 if (line.back() == '\r') 488 void generator::readPrepositions()
1414 {
1415 line.pop_back();
1416 }
1417
1418 lines.push_back(line);
1419 }
1420
1421 progress ppgs("Writing antonyms...", lines.size());
1422 for (auto line : lines)
1423 { 489 {
1424 ppgs.update(); 490 std::list<std::string> lines(readFile("prepositions.txt"));
491 progress ppgs("Reading prepositions...", lines.size());
1425 492
1426 std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\."); 493 for (std::string line : lines)
1427 std::smatch relation_data;
1428 if (!std::regex_search(line, relation_data, relation))
1429 {
1430 continue;
1431 }
1432
1433 int synset_id_1 = stoi(relation_data[1]);
1434 int wnum_1 = stoi(relation_data[2]);
1435 int synset_id_2 = stoi(relation_data[3]);
1436 int wnum_2 = stoi(relation_data[4]);
1437
1438 std::string query;
1439 switch (synset_id_1 / 100000000)
1440 { 494 {
1441 case 1: // Noun 495 ppgs.update();
1442 {
1443 query = "INSERT INTO noun_antonymy (noun_1_id, noun_2_id) VALUES (?, ?)";
1444 496
1445 break; 497 std::regex relation("^([^:]+): (.+)");
1446 } 498 std::smatch relation_data;
1447 499 std::regex_search(line, relation_data, relation);
1448 case 2: // Verb 500 std::string prep = relation_data[1];
1449 { 501 auto groups = split<std::list<std::string>>(relation_data[2], ", ");
1450 // Ignore
1451 502
1452 break; 503 notion& n = createNotion(part_of_speech::preposition);
1453 } 504 lemma& l = lookupOrCreateLemma(prep);
1454 505 word& w = createWord(n, l);
1455 case 3: // Adjective
1456 {
1457 query = "INSERT INTO adjective_antonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)";
1458 506
1459 break; 507 n.setPrepositionGroups(groups);
1460 }
1461
1462 case 4: // Adverb
1463 {
1464 query = "INSERT INTO adverb_antonymy (adverb_1_id, adverb_2_id) VALUES (?, ?)";
1465
1466 break;
1467 }
1468 }
1469
1470 sqlite3_stmt* ppstmt;
1471 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1472 {
1473 db_error(ppdb, query);
1474 }
1475
1476 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1477 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
1478
1479 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1480 {
1481 db_error(ppdb, query);
1482 }
1483
1484 sqlite3_finalize(ppstmt);
1485 }
1486 }
1487
1488 // at table
1489 {
1490 std::ifstream wnatfile(wnpref + "wn_at.pl");
1491 if (!wnatfile.is_open())
1492 {
1493 std::cout << "Invalid WordNet data directory." << std::endl;
1494 print_usage();
1495 }
1496
1497 std::list<std::string> lines;
1498 for (;;)
1499 {
1500 std::string line;
1501 if (!getline(wnatfile, line))
1502 {
1503 break;
1504 } 508 }
1505
1506 if (line.back() == '\r')
1507 {
1508 line.pop_back();
1509 }
1510
1511 lines.push_back(line);
1512 } 509 }
1513 510
1514 progress ppgs("Writing variations...", lines.size()); 511 void generator::readCmudictPronunciations()
1515 for (auto line : lines)
1516 { 512 {
1517 ppgs.update(); 513 std::list<std::string> lines(readFile(cmudictPath_));
514 progress ppgs("Reading pronunciations from CMUDICT...", lines.size());
1518 515
1519 std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\."); 516 for (std::string line : lines)
1520 std::smatch relation_data;
1521 if (!std::regex_search(line, relation_data, relation))
1522 { 517 {
1523 continue; 518 ppgs.update();
1524 } 519
1525 520 std::regex phoneme("([A-Z][^ \\(]*)(?:\\(\\d+\\))? ([A-Z 0-9]+)");
1526 int synset_id_1 = stoi(relation_data[1]); 521 std::smatch phoneme_data;
1527 int synset_id_2 = stoi(relation_data[2]); 522 if (std::regex_search(line, phoneme_data, phoneme))
1528 std::string query("INSERT INTO variation (noun_id, adjective_id) VALUES (?, ?)");
1529
1530 for (auto mapping1 : wn[synset_id_1])
1531 {
1532 for (auto mapping2 : wn[synset_id_2])
1533 { 523 {
1534 sqlite3_stmt* ppstmt; 524 std::string canonical(phoneme_data[1]);
1535 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 525 std::transform(std::begin(canonical), std::end(canonical), std::begin(canonical), ::tolower);
1536 {
1537 db_error(ppdb, query);
1538 }
1539
1540 sqlite3_bind_int(ppstmt, 1, mapping1.second);
1541 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1542 526
1543 if (sqlite3_step(ppstmt) != SQLITE_DONE) 527 if (!formByText_.count(canonical))
1544 { 528 {
1545 db_error(ppdb, query); 529 continue;
1546 } 530 }
1547 531
1548 sqlite3_finalize(ppstmt); 532 std::string phonemes = phoneme_data[2];
533 pronunciations_.emplace_back(phonemes);
534 pronunciation& p = pronunciations_.back();
535 formByText_.at(canonical)->addPronunciation(p);
1549 } 536 }
1550 } 537 }
1551 } 538 }
1552 }
1553
1554 // der table
1555 {
1556 std::ifstream wnderfile(wnpref + "wn_der.pl");
1557 if (!wnderfile.is_open())
1558 {
1559 std::cout << "Invalid WordNet data directory." << std::endl;
1560 print_usage();
1561 }
1562 539
1563 std::list<std::string> lines; 540 void generator::writeSchema()
1564 for (;;)
1565 { 541 {
1566 std::string line; 542 std::ifstream file("schema.sql");
1567 if (!getline(wnderfile, line)) 543 if (!file)
1568 { 544 {
1569 break; 545 throw std::invalid_argument("Could not find database schema");
1570 } 546 }
1571 547
1572 if (line.back() == '\r') 548 std::ostringstream schemaBuilder;
549 std::string line;
550 while (std::getline(file, line))
1573 { 551 {
1574 line.pop_back(); 552 if (line.back() == '\r')
553 {
554 line.pop_back();
555 }
556
557 schemaBuilder << line;
1575 } 558 }
1576 559
1577 lines.push_back(line); 560 std::string schema = schemaBuilder.str();
561 auto queries = split<std::list<std::string>>(schema, ";");
562 progress ppgs("Writing database schema...", queries.size());
563 for (std::string query : queries)
564 {
565 if (!queries.empty())
566 {
567 db_.runQuery(query);
568 }
569
570 ppgs.update();
571 }
1578 } 572 }
1579 573
1580 progress ppgs("Writing morphological derivation...", lines.size()); 574 void generator::dumpObjects()
1581 for (auto line : lines)
1582 { 575 {
1583 ppgs.update();
1584
1585 std::regex relation("^der\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
1586 std::smatch relation_data;
1587 if (!std::regex_search(line, relation_data, relation))
1588 { 576 {
1589 continue; 577 progress ppgs("Writing notions...", notions_.size());
578
579 for (notion& n : notions_)
580 {
581 db_ << n;
582
583 ppgs.update();
584 }
1590 } 585 }
1591 586
1592 int synset_id_1 = stoi(relation_data[1]);
1593 int wnum_1 = stoi(relation_data[2]);
1594 int synset_id_2 = stoi(relation_data[3]);
1595 int wnum_2 = stoi(relation_data[4]);
1596 std::string query;
1597 switch (synset_id_1 / 100000000)
1598 { 587 {
1599 case 1: // Noun 588 progress ppgs("Writing words...", words_.size());
589
590 for (word& w : words_)
1600 { 591 {
1601 switch (synset_id_2 / 100000000) 592 db_ << w;
1602 {
1603 case 1: // Noun
1604 {
1605 query = "INSERT INTO noun_noun_derivation (noun_1_id, noun_2_id) VALUES (?, ?)";
1606 break;
1607 }
1608
1609 case 3: // Adjective
1610 {
1611 query = "INSERT INTO noun_adjective_derivation (noun_id, adjective_id) VALUES (?, ?)";
1612 break;
1613 }
1614
1615 case 4: // Adverb
1616 {
1617 query = "INSERT INTO noun_adverb_derivation (noun_id, adverb_id) VALUES (?, ?)";
1618 break;
1619 }
1620 }
1621 593
1622 break; 594 ppgs.update();
1623 } 595 }
596 }
597
598 {
599 progress ppgs("Writing lemmas...", lemmas_.size());
1624 600
1625 case 3: // Adjective 601 for (lemma& l : lemmas_)
1626 { 602 {
1627 switch (synset_id_2 / 100000000) 603 db_ << l;
1628 {
1629 case 1: // Noun
1630 {
1631 query = "INSERT INTO noun_adjective_derivation (adjective_id, noun_id) VALUES (?, ?)";
1632 break;
1633 }
1634
1635 case 3: // Adjective
1636 {
1637 query = "INSERT INTO adjective_adjective_derivation (adjective_id, adjective_id) VALUES (?, ?)";
1638 break;
1639 }
1640
1641 case 4: // Adverb
1642 {
1643 query = "INSERT INTO adjective_adverb_derivation (adjective_id, adverb_id) VALUES (?, ?)";
1644 break;
1645 }
1646 }
1647 604
1648 break; 605 ppgs.update();
1649 } 606 }
607 }
608
609 {
610 progress ppgs("Writing forms...", forms_.size());
1650 611
1651 case 4: // Adverb 612 for (form& f : forms_)
1652 { 613 {
1653 switch (synset_id_2 / 100000000) 614 db_ << f;
1654 {
1655 case 1: // Noun
1656 {
1657 query = "INSERT INTO noun_adverb_derivation (adverb_id, noun_id) VALUES (?, ?)";
1658 break;
1659 }
1660
1661 case 3: // Adjective
1662 {
1663 query = "INSERT INTO adjective_adverb_derivation (adverb_id, adjective_id) VALUES (?, ?)";
1664 break;
1665 }
1666
1667 case 4: // Adverb
1668 {
1669 query = "INSERT INTO adverb_adverb_derivation (adverb_1_id, adverb_2_id) VALUES (?, ?)";
1670 break;
1671 }
1672 }
1673 615
1674 break; 616 ppgs.update();
1675 } 617 }
1676 } 618 }
1677 619
1678 sqlite3_stmt* ppstmt;
1679 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
1680 { 620 {
1681 db_error(ppdb, query); 621 progress ppgs("Writing pronunciations...", pronunciations_.size());
622
623 for (pronunciation& p : pronunciations_)
624 {
625 db_ << p;
626
627 ppgs.update();
628 }
1682 } 629 }
1683 630
1684 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
1685 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
1686
1687 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1688 { 631 {
1689 db_error(ppdb, query); 632 progress ppgs("Writing verb groups...", groups_.size());
633
634 for (group& g : groups_)
635 {
636 db_ << g;
637
638 ppgs.update();
639 }
1690 } 640 }
1691 641
1692 sqlite3_finalize(ppstmt);
1693 }
1694 }
1695
1696 // hyp table
1697 {
1698 std::ifstream wnhypfile(wnpref + "wn_hyp.pl");
1699 if (!wnhypfile.is_open())
1700 {
1701 std::cout << "Invalid WordNet data directory." << std::endl;
1702 print_usage();
1703 }
1704
1705 std::list<std::string> lines;
1706 for (;;)
1707 {
1708 std::string line;
1709 if (!getline(wnhypfile, line))
1710 {
1711 break;
1712 }
1713
1714 if (line.back() == '\r')
1715 { 642 {
1716 line.pop_back(); 643 progress ppgs("Writing verb frames...", frames_.size());
644
645 for (frame& f : frames_)
646 {
647 db_ << f;
648
649 ppgs.update();
650 }
1717 } 651 }
1718
1719 lines.push_back(line);
1720 } 652 }
1721 653
1722 progress ppgs("Writing hypernyms...", lines.size()); 654 void generator::readWordNetAntonymy()
1723 for (auto line : lines)
1724 { 655 {
1725 ppgs.update(); 656 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ant.pl"));
1726 657 progress ppgs("Writing antonyms...", lines.size());
1727 std::regex relation("^hyp\\((1\\d{8}),(1\\d{8})\\)\\."); 658 for (auto line : lines)
1728 std::smatch relation_data;
1729 if (!std::regex_search(line, relation_data, relation))
1730 { 659 {
1731 continue; 660 ppgs.update();
1732 }
1733
1734 int synset_id_1 = stoi(relation_data[1]);
1735 int synset_id_2 = stoi(relation_data[2]);
1736 std::string query("INSERT INTO hypernymy (hyponym_id, hypernym_id) VALUES (?, ?)");
1737 661
1738 for (auto mapping1 : wn[synset_id_1]) 662 std::regex relation("^ant\\(([134]\\d{8}),(\\d+),([134]\\d{8}),(\\d+)\\)\\.");
1739 { 663 std::smatch relation_data;
1740 for (auto mapping2 : wn[synset_id_2]) 664 if (!std::regex_search(line, relation_data, relation))
1741 { 665 {
1742 sqlite3_stmt* ppstmt; 666 continue;
1743 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 667 }
1744 { 668
1745 db_error(ppdb, query); 669 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
1746 } 670 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
1747 671
1748 sqlite3_bind_int(ppstmt, 1, mapping1.second); 672 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
1749 sqlite3_bind_int(ppstmt, 2, mapping2.second); 673 {
674 word& word1 = *wordByWnidAndWnum_.at(lookup1);
675 word& word2 = *wordByWnidAndWnum_.at(lookup2);
1750 676
1751 if (sqlite3_step(ppstmt) != SQLITE_DONE) 677 std::list<field> fields;
1752 { 678 fields.emplace_back("antonym_1_id", word1.getId());
1753 db_error(ppdb, query); 679 fields.emplace_back("antonym_2_id", word2.getId());
1754 }
1755 680
1756 sqlite3_finalize(ppstmt); 681 db_.insertIntoTable("antonymy", std::move(fields));
1757 } 682 }
1758 } 683 }
1759 } 684 }
1760 }
1761
1762 // ins table
1763 {
1764 std::ifstream wninsfile(wnpref + "wn_ins.pl");
1765 if (!wninsfile.is_open())
1766 {
1767 std::cout << "Invalid WordNet data directory." << std::endl;
1768 print_usage();
1769 }
1770
1771 std::list<std::string> lines;
1772 for (;;)
1773 {
1774 std::string line;
1775 if (!getline(wninsfile, line))
1776 {
1777 break;
1778 }
1779 685
1780 if (line.back() == '\r') 686 void generator::readWordNetVariation()
687 {
688 std::list<std::string> lines(readFile(wordNetPath_ + "wn_at.pl"));
689 progress ppgs("Writing variation...", lines.size());
690 for (auto line : lines)
1781 { 691 {
1782 line.pop_back(); 692 ppgs.update();
1783 }
1784 693
1785 lines.push_back(line); 694 std::regex relation("^at\\((1\\d{8}),(3\\d{8})\\)\\.");
695 std::smatch relation_data;
696 if (!std::regex_search(line, relation_data, relation))
697 {
698 continue;
699 }
700
701 int lookup1 = std::stoi(relation_data[1]);
702 int lookup2 = std::stoi(relation_data[2]);
703
704 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
705 {
706 notion& notion1 = *notionByWnid_.at(lookup1);
707 notion& notion2 = *notionByWnid_.at(lookup2);
708
709 std::list<field> fields;
710 fields.emplace_back("noun_id", notion1.getId());
711 fields.emplace_back("adjective_id", notion2.getId());
712
713 db_.insertIntoTable("variation", std::move(fields));
714 }
715 }
1786 } 716 }
1787 717
1788 progress ppgs("Writing instantiations...", lines.size()); 718 void generator::readWordNetClasses()
1789 for (auto line : lines)
1790 { 719 {
1791 ppgs.update(); 720 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cls.pl"));
1792 721 progress ppgs("Writing usage, topicality, and regionality...", lines.size());
1793 std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\."); 722 for (auto line : lines)
1794 std::smatch relation_data;
1795 if (!std::regex_search(line, relation_data, relation))
1796 { 723 {
1797 continue; 724 ppgs.update();
1798 }
1799
1800 int synset_id_1 = stoi(relation_data[1]);
1801 int synset_id_2 = stoi(relation_data[2]);
1802 std::string query("INSERT INTO instantiation (instance_id, class_id) VALUES (?, ?)");
1803 725
1804 for (auto mapping1 : wn[synset_id_1]) 726 std::regex relation("^cls\\(([134]\\d{8}),(\\d+),(1\\d{8}),(\\d+),([tur])\\)\\.");
1805 { 727 std::smatch relation_data;
1806 for (auto mapping2 : wn[synset_id_2]) 728 if (!std::regex_search(line, relation_data, relation))
729 {
730 continue;
731 }
732
733 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
734 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
735 std::string class_type = relation_data[5];
736
737 std::string table_name;
738 if (class_type == "t")
739 {
740 table_name += "topicality";
741 } else if (class_type == "u")
742 {
743 table_name += "usage";
744 } else if (class_type == "r")
745 {
746 table_name += "regionality";
747 }
748
749 std::list<int> leftJoin;
750 std::list<int> rightJoin;
751
752 if ((lookup1.second == 0) && (wordsByWnid_.count(lookup1.first)))
1807 { 753 {
1808 sqlite3_stmt* ppstmt; 754 std::transform(std::begin(wordsByWnid_.at(lookup1.first)), std::end(wordsByWnid_.at(lookup1.first)), std::back_inserter(leftJoin), [] (word* w) {
1809 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 755 return w->getId();
756 });
757 } else if (wordByWnidAndWnum_.count(lookup1)) {
758 leftJoin.push_back(wordByWnidAndWnum_.at(lookup1)->getId());
759 }
760
761 if ((lookup2.second == 0) && (wordsByWnid_.count(lookup2.first)))
762 {
763 std::transform(std::begin(wordsByWnid_.at(lookup2.first)), std::end(wordsByWnid_.at(lookup2.first)), std::back_inserter(rightJoin), [] (word* w) {
764 return w->getId();
765 });
766 } else if (wordByWnidAndWnum_.count(lookup2)) {
767 rightJoin.push_back(wordByWnidAndWnum_.at(lookup2)->getId());
768 }
769
770 for (int word1 : leftJoin)
771 {
772 for (int word2 : rightJoin)
1810 { 773 {
1811 db_error(ppdb, query); 774 std::list<field> fields;
1812 } 775 fields.emplace_back("term_id", word1);
776 fields.emplace_back("domain_id", word2);
1813 777
1814 sqlite3_bind_int(ppstmt, 1, mapping1.second); 778 db_.insertIntoTable(table_name, std::move(fields));
1815 sqlite3_bind_int(ppstmt, 2, mapping2.second);
1816
1817 if (sqlite3_step(ppstmt) != SQLITE_DONE)
1818 {
1819 db_error(ppdb, query);
1820 } 779 }
1821
1822 sqlite3_finalize(ppstmt);
1823 } 780 }
1824 } 781 }
1825 } 782 }
1826 }
1827
1828 // mm table
1829 {
1830 std::ifstream wnmmfile(wnpref + "wn_mm.pl");
1831 if (!wnmmfile.is_open())
1832 {
1833 std::cout << "Invalid WordNet data directory." << std::endl;
1834 print_usage();
1835 }
1836
1837 std::list<std::string> lines;
1838 for (;;)
1839 {
1840 std::string line;
1841 if (!getline(wnmmfile, line))
1842 {
1843 break;
1844 }
1845 783
1846 if (line.back() == '\r') 784 void generator::readWordNetCausality()
785 {
786 std::list<std::string> lines(readFile(wordNetPath_ + "wn_cs.pl"));
787 progress ppgs("Writing causality...", lines.size());
788 for (auto line : lines)
1847 { 789 {
1848 line.pop_back(); 790 ppgs.update();
1849 }
1850 791
1851 lines.push_back(line); 792 std::regex relation("^cs\\((2\\d{8}),(2\\d{8})\\)\\.");
793 std::smatch relation_data;
794 if (!std::regex_search(line, relation_data, relation))
795 {
796 continue;
797 }
798
799 int lookup1 = std::stoi(relation_data[1]);
800 int lookup2 = std::stoi(relation_data[2]);
801
802 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
803 {
804 notion& notion1 = *notionByWnid_.at(lookup1);
805 notion& notion2 = *notionByWnid_.at(lookup2);
806
807 std::list<field> fields;
808 fields.emplace_back("effect_id", notion1.getId());
809 fields.emplace_back("cause_id", notion2.getId());
810
811 db_.insertIntoTable("causality", std::move(fields));
812 }
813 }
1852 } 814 }
1853 815
1854 progress ppgs("Writing member meronyms...", lines.size()); 816 void generator::readWordNetEntailment()
1855 for (auto line : lines)
1856 { 817 {
1857 ppgs.update(); 818 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ent.pl"));
1858 819 progress ppgs("Writing entailment...", lines.size());
1859 std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\."); 820 for (auto line : lines)
1860 std::smatch relation_data;
1861 if (!std::regex_search(line, relation_data, relation))
1862 { 821 {
1863 continue; 822 ppgs.update();
1864 }
1865 823
1866 int synset_id_1 = stoi(relation_data[1]); 824 std::regex relation("^ent\\((2\\d{8}),(2\\d{8})\\)\\.");
1867 int synset_id_2 = stoi(relation_data[2]); 825 std::smatch relation_data;
1868 std::string query("INSERT INTO member_meronymy (holonym_id, meronym_id) VALUES (?, ?)"); 826 if (!std::regex_search(line, relation_data, relation))
1869
1870 for (auto mapping1 : wn[synset_id_1])
1871 {
1872 for (auto mapping2 : wn[synset_id_2])
1873 { 827 {
1874 sqlite3_stmt* ppstmt; 828 continue;
1875 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 829 }
1876 { 830
1877 db_error(ppdb, query); 831 int lookup1 = std::stoi(relation_data[1]);
1878 } 832 int lookup2 = std::stoi(relation_data[2]);
1879 833
1880 sqlite3_bind_int(ppstmt, 1, mapping1.second); 834 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
1881 sqlite3_bind_int(ppstmt, 2, mapping2.second); 835 {
836 notion& notion1 = *notionByWnid_.at(lookup1);
837 notion& notion2 = *notionByWnid_.at(lookup2);
1882 838
1883 if (sqlite3_step(ppstmt) != SQLITE_DONE) 839 std::list<field> fields;
1884 { 840 fields.emplace_back("given_id", notion1.getId());
1885 db_error(ppdb, query); 841 fields.emplace_back("entailment_id", notion2.getId());
1886 }
1887 842
1888 sqlite3_finalize(ppstmt); 843 db_.insertIntoTable("entailment", std::move(fields));
1889 } 844 }
1890 } 845 }
1891 } 846 }
1892 } 847
1893 848 void generator::readWordNetHypernymy()
1894 // ms table
1895 {
1896 std::ifstream wnmsfile(wnpref + "wn_ms.pl");
1897 if (!wnmsfile.is_open())
1898 {
1899 std::cout << "Invalid WordNet data directory." << std::endl;
1900 print_usage();
1901 }
1902
1903 std::list<std::string> lines;
1904 for (;;)
1905 { 849 {
1906 std::string line; 850 std::list<std::string> lines(readFile(wordNetPath_ + "wn_hyp.pl"));
1907 if (!getline(wnmsfile, line)) 851 progress ppgs("Writing hypernymy...", lines.size());
852 for (auto line : lines)
1908 { 853 {
1909 break; 854 ppgs.update();
855
856 std::regex relation("^hyp\\(([12]\\d{8}),([12]\\d{8})\\)\\.");
857 std::smatch relation_data;
858 if (!std::regex_search(line, relation_data, relation))
859 {
860 continue;
861 }
862
863 int lookup1 = std::stoi(relation_data[1]);
864 int lookup2 = std::stoi(relation_data[2]);
865
866 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
867 {
868 notion& notion1 = *notionByWnid_.at(lookup1);
869 notion& notion2 = *notionByWnid_.at(lookup2);
870
871 std::list<field> fields;
872 fields.emplace_back("hyponym_id", notion1.getId());
873 fields.emplace_back("hypernym_id", notion2.getId());
874
875 db_.insertIntoTable("hypernymy", std::move(fields));
876 }
1910 } 877 }
878 }
1911 879
1912 if (line.back() == '\r') 880 void generator::readWordNetInstantiation()
881 {
882 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ins.pl"));
883 progress ppgs("Writing instantiation...", lines.size());
884 for (auto line : lines)
1913 { 885 {
1914 line.pop_back(); 886 ppgs.update();
1915 }
1916 887
1917 lines.push_back(line); 888 std::regex relation("^ins\\((1\\d{8}),(1\\d{8})\\)\\.");
889 std::smatch relation_data;
890 if (!std::regex_search(line, relation_data, relation))
891 {
892 continue;
893 }
894
895 int lookup1 = std::stoi(relation_data[1]);
896 int lookup2 = std::stoi(relation_data[2]);
897
898 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
899 {
900 notion& notion1 = *notionByWnid_.at(lookup1);
901 notion& notion2 = *notionByWnid_.at(lookup2);
902
903 std::list<field> fields;
904 fields.emplace_back("instance_id", notion1.getId());
905 fields.emplace_back("class_id", notion2.getId());
906
907 db_.insertIntoTable("instantiation", std::move(fields));
908 }
909 }
1918 } 910 }
1919 911
1920 progress ppgs("Writing substance meronyms...", lines.size()); 912 void generator::readWordNetMemberMeronymy()
1921 for (auto line : lines)
1922 { 913 {
1923 ppgs.update(); 914 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mm.pl"));
1924 915 progress ppgs("Writing member meronymy...", lines.size());
1925 std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\."); 916 for (auto line : lines)
1926 std::smatch relation_data;
1927 if (!std::regex_search(line, relation_data, relation))
1928 { 917 {
1929 continue; 918 ppgs.update();
1930 }
1931
1932 int synset_id_1 = stoi(relation_data[1]);
1933 int synset_id_2 = stoi(relation_data[2]);
1934 std::string query("INSERT INTO substance_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
1935 919
1936 for (auto mapping1 : wn[synset_id_1]) 920 std::regex relation("^mm\\((1\\d{8}),(1\\d{8})\\)\\.");
1937 { 921 std::smatch relation_data;
1938 for (auto mapping2 : wn[synset_id_2]) 922 if (!std::regex_search(line, relation_data, relation))
1939 { 923 {
1940 sqlite3_stmt* ppstmt; 924 continue;
1941 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 925 }
1942 { 926
1943 db_error(ppdb, query); 927 int lookup1 = std::stoi(relation_data[1]);
1944 } 928 int lookup2 = std::stoi(relation_data[2]);
929
930 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
931 {
932 notion& notion1 = *notionByWnid_.at(lookup1);
933 notion& notion2 = *notionByWnid_.at(lookup2);
1945 934
1946 sqlite3_bind_int(ppstmt, 1, mapping1.second); 935 std::list<field> fields;
1947 sqlite3_bind_int(ppstmt, 2, mapping2.second); 936 fields.emplace_back("holonym_id", notion1.getId());
937 fields.emplace_back("meronym_id", notion2.getId());
1948 938
1949 if (sqlite3_step(ppstmt) != SQLITE_DONE) 939 db_.insertIntoTable("member_meronymy", std::move(fields));
1950 {
1951 db_error(ppdb, query);
1952 }
1953
1954 sqlite3_finalize(ppstmt);
1955 } 940 }
1956 } 941 }
1957 } 942 }
1958 } 943
1959 944 void generator::readWordNetPartMeronymy()
1960 // mm table
1961 {
1962 std::ifstream wnmpfile(wnpref + "wn_mp.pl");
1963 if (!wnmpfile.is_open())
1964 {
1965 std::cout << "Invalid WordNet data directory." << std::endl;
1966 print_usage();
1967 }
1968
1969 std::list<std::string> lines;
1970 for (;;)
1971 { 945 {
1972 std::string line; 946 std::list<std::string> lines(readFile(wordNetPath_ + "wn_mp.pl"));
1973 if (!getline(wnmpfile, line)) 947 progress ppgs("Writing part meronymy...", lines.size());
948 for (auto line : lines)
1974 { 949 {
1975 break; 950 ppgs.update();
951
952 std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\.");
953 std::smatch relation_data;
954 if (!std::regex_search(line, relation_data, relation))
955 {
956 continue;
957 }
958
959 int lookup1 = std::stoi(relation_data[1]);
960 int lookup2 = std::stoi(relation_data[2]);
961
962 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
963 {
964 notion& notion1 = *notionByWnid_.at(lookup1);
965 notion& notion2 = *notionByWnid_.at(lookup2);
966
967 std::list<field> fields;
968 fields.emplace_back("holonym_id", notion1.getId());
969 fields.emplace_back("meronym_id", notion2.getId());
970
971 db_.insertIntoTable("part_meronymy", std::move(fields));
972 }
1976 } 973 }
974 }
1977 975
1978 if (line.back() == '\r') 976 void generator::readWordNetSubstanceMeronymy()
977 {
978 std::list<std::string> lines(readFile(wordNetPath_ + "wn_ms.pl"));
979 progress ppgs("Writing substance meronymy...", lines.size());
980 for (auto line : lines)
1979 { 981 {
1980 line.pop_back(); 982 ppgs.update();
1981 }
1982 983
1983 lines.push_back(line); 984 std::regex relation("^ms\\((1\\d{8}),(1\\d{8})\\)\\.");
985 std::smatch relation_data;
986 if (!std::regex_search(line, relation_data, relation))
987 {
988 continue;
989 }
990
991 int lookup1 = std::stoi(relation_data[1]);
992 int lookup2 = std::stoi(relation_data[2]);
993
994 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
995 {
996 notion& notion1 = *notionByWnid_.at(lookup1);
997 notion& notion2 = *notionByWnid_.at(lookup2);
998
999 std::list<field> fields;
1000 fields.emplace_back("holonym_id", notion1.getId());
1001 fields.emplace_back("meronym_id", notion2.getId());
1002
1003 db_.insertIntoTable("substance_meronymy", std::move(fields));
1004 }
1005 }
1984 } 1006 }
1985 1007
1986 progress ppgs("Writing part meronyms...", lines.size()); 1008 void generator::readWordNetPertainymy()
1987 for (auto line : lines)
1988 { 1009 {
1989 ppgs.update(); 1010 std::list<std::string> lines(readFile(wordNetPath_ + "wn_per.pl"));
1990 1011 progress ppgs("Writing pertainymy and mannernymy...", lines.size());
1991 std::regex relation("^mp\\((1\\d{8}),(1\\d{8})\\)\\."); 1012 for (auto line : lines)
1992 std::smatch relation_data;
1993 if (!std::regex_search(line, relation_data, relation))
1994 { 1013 {
1995 continue; 1014 ppgs.update();
1996 }
1997
1998 int synset_id_1 = stoi(relation_data[1]);
1999 int synset_id_2 = stoi(relation_data[2]);
2000 std::string query("INSERT INTO part_meronymy (holonym_id, meronym_id) VALUES (?, ?)");
2001 1015
2002 for (auto mapping1 : wn[synset_id_1]) 1016 std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\.");
2003 { 1017 std::smatch relation_data;
2004 for (auto mapping2 : wn[synset_id_2]) 1018 if (!std::regex_search(line, relation_data, relation))
2005 { 1019 {
2006 sqlite3_stmt* ppstmt; 1020 continue;
2007 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK) 1021 }
2008 { 1022
2009 db_error(ppdb, query); 1023 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
2010 } 1024 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
1025
1026 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
1027 {
1028 word& word1 = *wordByWnidAndWnum_.at(lookup1);
1029 word& word2 = *wordByWnidAndWnum_.at(lookup2);
2011 1030
2012 sqlite3_bind_int(ppstmt, 1, mapping1.second); 1031 if (word1.getNotion().getPartOfSpeech() == part_of_speech::adjective)
2013 sqlite3_bind_int(ppstmt, 2, mapping2.second); 1032 {
1033 std::list<field> fields;
1034 fields.emplace_back("pertainym_id", word1.getId());
1035 fields.emplace_back("noun_id", word2.getId());
2014 1036
2015 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1037 db_.insertIntoTable("pertainymy", std::move(fields));
1038 } else if (word1.getNotion().getPartOfSpeech() == part_of_speech::adverb)
2016 { 1039 {
2017 db_error(ppdb, query); 1040 std::list<field> fields;
2018 } 1041 fields.emplace_back("mannernym_id", word1.getId());
1042 fields.emplace_back("adjective_id", word2.getId());
2019 1043
2020 sqlite3_finalize(ppstmt); 1044 db_.insertIntoTable("mannernymy", std::move(fields));
1045 }
2021 } 1046 }
2022 } 1047 }
2023 } 1048 }
2024 }
2025
2026 // per table
2027 {
2028 std::ifstream wnperfile(wnpref + "wn_per.pl");
2029 if (!wnperfile.is_open())
2030 {
2031 std::cout << "Invalid WordNet data directory." << std::endl;
2032 print_usage();
2033 }
2034
2035 std::list<std::string> lines;
2036 for (;;)
2037 {
2038 std::string line;
2039 if (!getline(wnperfile, line))
2040 {
2041 break;
2042 }
2043 1049
2044 if (line.back() == '\r') 1050 void generator::readWordNetSpecification()
1051 {
1052 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sa.pl"));
1053 progress ppgs("Writing specifications...", lines.size());
1054 for (auto line : lines)
2045 { 1055 {
2046 line.pop_back(); 1056 ppgs.update();
1057
1058 std::regex relation("^sa\\((23\\d{8}),(\\d+),(23\\d{8}),(\\d+)\\)\\.");
1059 std::smatch relation_data;
1060 if (!std::regex_search(line, relation_data, relation))
1061 {
1062 continue;
1063 }
1064
1065 std::pair<int, int> lookup1(std::stoi(relation_data[1]), std::stoi(relation_data[2]));
1066 std::pair<int, int> lookup2(std::stoi(relation_data[3]), std::stoi(relation_data[4]));
1067
1068 if (wordByWnidAndWnum_.count(lookup1) && wordByWnidAndWnum_.count(lookup2))
1069 {
1070 word& word1 = *wordByWnidAndWnum_.at(lookup1);
1071 word& word2 = *wordByWnidAndWnum_.at(lookup2);
1072
1073 std::list<field> fields;
1074 fields.emplace_back("general_id", word1.getId());
1075 fields.emplace_back("specific_id", word2.getId());
1076
1077 db_.insertIntoTable("specification", std::move(fields));
1078 }
2047 } 1079 }
2048
2049 lines.push_back(line);
2050 } 1080 }
2051 1081
2052 progress ppgs("Writing pertainyms and mannernyms...", lines.size()); 1082 void generator::readWordNetSimilarity()
2053 for (auto line : lines)
2054 { 1083 {
2055 ppgs.update(); 1084 std::list<std::string> lines(readFile(wordNetPath_ + "wn_sim.pl"));
2056 1085 progress ppgs("Writing adjective similarity...", lines.size());
2057 std::regex relation("^per\\(([34]\\d{8}),(\\d+),([13]\\d{8}),(\\d+)\\)\\."); 1086 for (auto line : lines)
2058 std::smatch relation_data;
2059 if (!std::regex_search(line, relation_data, relation))
2060 { 1087 {
2061 continue; 1088 ppgs.update();
2062 }
2063 1089
2064 int synset_id_1 = stoi(relation_data[1]); 1090 std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\.");
2065 int wnum_1 = stoi(relation_data[2]); 1091 std::smatch relation_data;
2066 int synset_id_2 = stoi(relation_data[3]); 1092 if (!std::regex_search(line, relation_data, relation))
2067 int wnum_2 = stoi(relation_data[4]);
2068 std::string query;
2069 switch (synset_id_1 / 100000000)
2070 {
2071 case 3: // Adjective
2072 { 1093 {
2073 // This is a pertainym, the second word should be a noun 1094 continue;
2074 // Technically it can be an adjective but we're ignoring that
2075 if (synset_id_2 / 100000000 != 1)
2076 {
2077 continue;
2078 }
2079
2080 query = "INSERT INTO pertainymy (pertainym_id, noun_id) VALUES (?, ?)";
2081
2082 break;
2083 } 1095 }
1096
1097 int lookup1 = std::stoi(relation_data[1]);
1098 int lookup2 = std::stoi(relation_data[2]);
2084 1099
2085 case 4: // Adverb 1100 if (notionByWnid_.count(lookup1) && notionByWnid_.count(lookup2))
2086 { 1101 {
2087 // This is a mannernym, the second word should be an adjective 1102 notion& notion1 = *notionByWnid_.at(lookup1);
2088 if (synset_id_2 / 100000000 != 3) 1103 notion& notion2 = *notionByWnid_.at(lookup2);
2089 {
2090 continue;
2091 }
2092 1104
2093 query = "INSERT INTO mannernymy (mannernym_id, adjective_id) VALUES (?, ?)"; 1105 std::list<field> fields;
1106 fields.emplace_back("adjective_1_id", notion1.getId());
1107 fields.emplace_back("adjective_2_id", notion2.getId());
2094 1108
2095 break; 1109 db_.insertIntoTable("similarity", std::move(fields));
2096 } 1110 }
2097 } 1111 }
2098 1112 }
2099 sqlite3_stmt* ppstmt;
2100 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
2101 {
2102 db_error(ppdb, query);
2103 }
2104
2105 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]);
2106 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]);
2107 1113
2108 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1114 std::list<std::string> generator::readFile(std::string path)
1115 {
1116 std::ifstream file(path);
1117 if (!file)
2109 { 1118 {
2110 db_error(ppdb, query); 1119 throw std::invalid_argument("Could not find file " + path);
2111 } 1120 }
2112
2113 sqlite3_finalize(ppstmt);
2114 }
2115 }
2116 1121
2117 // sa table 1122 std::list<std::string> lines;
2118 {
2119 std::ifstream wnsafile(wnpref + "wn_sa.pl");
2120 if (!wnsafile.is_open())
2121 {
2122 std::cout << "Invalid WordNet data directory." << std::endl;
2123 print_usage();
2124 }
2125
2126 std::list<std::string> lines;
2127 for (;;)
2128 {
2129 std::string line; 1123 std::string line;
2130 if (!getline(wnsafile, line)) 1124 while (std::getline(file, line))
2131 {
2132 break;
2133 }
2134
2135 if (line.back() == '\r')
2136 { 1125 {
2137 line.pop_back(); 1126 if (line.back() == '\r')
1127 {
1128 line.pop_back();
1129 }
1130
1131 lines.push_back(line);
2138 } 1132 }
2139 1133
2140 lines.push_back(line); 1134 return lines;
2141 } 1135 }
2142 1136
2143 progress ppgs("Writing specifications...", lines.size()); 1137 part_of_speech generator::partOfSpeechByWnid(int wnid)
2144 for (auto line : lines)
2145 { 1138 {
2146 ppgs.update(); 1139 switch (wnid / 100000000)
2147
2148 std::regex relation("^per\\((3\\d{8}),(\\d+),(3\\d{8}),(\\d+)\\)\\.");
2149 std::smatch relation_data;
2150 if (!std::regex_search(line, relation_data, relation))
2151 {
2152 continue;
2153 }
2154
2155 int synset_id_1 = stoi(relation_data[1]);
2156 int wnum_1 = stoi(relation_data[2]);
2157 int synset_id_2 = stoi(relation_data[3]);
2158 int wnum_2 = stoi(relation_data[4]);
2159 std::string query("INSERT INTO specification (general_id, specific_id) VALUES (?, ?)");
2160
2161 sqlite3_stmt* ppstmt;
2162 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
2163 { 1140 {
2164 db_error(ppdb, query); 1141 case 1: return part_of_speech::noun;
1142 case 2: return part_of_speech::verb;
1143 case 3: return part_of_speech::adjective;
1144 case 4: return part_of_speech::adverb;
1145 default: throw std::domain_error("Invalid WordNet synset ID: " + std::to_string(wnid));
2165 } 1146 }
1147 }
2166 1148
2167 sqlite3_bind_int(ppstmt, 1, wn[synset_id_1][wnum_1]); 1149 notion& generator::createNotion(part_of_speech partOfSpeech)
2168 sqlite3_bind_int(ppstmt, 2, wn[synset_id_2][wnum_2]); 1150 {
1151 notions_.emplace_back(partOfSpeech);
1152
1153 return notions_.back();
1154 }
2169 1155
2170 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1156 notion& generator::lookupOrCreateNotion(int wnid)
1157 {
1158 if (!notionByWnid_.count(wnid))
2171 { 1159 {
2172 db_error(ppdb, query); 1160 notions_.emplace_back(partOfSpeechByWnid(wnid), wnid);
1161 notionByWnid_[wnid] = &notions_.back();
2173 } 1162 }
2174 1163
2175 sqlite3_finalize(ppstmt); 1164 return *notionByWnid_.at(wnid);
2176 }
2177 }
2178
2179 // sim table
2180 {
2181 std::ifstream wnsimfile(wnpref + "wn_sim.pl");
2182 if (!wnsimfile.is_open())
2183 {
2184 std::cout << "Invalid WordNet data directory." << std::endl;
2185 print_usage();
2186 } 1165 }
2187 1166
2188 std::list<std::string> lines; 1167 lemma& generator::lookupOrCreateLemma(std::string base_form)
2189 for (;;)
2190 { 1168 {
2191 std::string line; 1169 if (!lemmaByBaseForm_.count(base_form))
2192 if (!getline(wnsimfile, line))
2193 { 1170 {
2194 break; 1171 lemmas_.emplace_back(lookupOrCreateForm(base_form));
1172 lemmaByBaseForm_[base_form] = &lemmas_.back();
2195 } 1173 }
1174
1175 return *lemmaByBaseForm_.at(base_form);
1176 }
2196 1177
2197 if (line.back() == '\r') 1178 form& generator::lookupOrCreateForm(std::string text)
1179 {
1180 if (!formByText_.count(text))
2198 { 1181 {
2199 line.pop_back(); 1182 forms_.emplace_back(text);
1183 formByText_[text] = &forms_.back();
2200 } 1184 }
2201 1185
2202 lines.push_back(line); 1186 return *formByText_[text];
2203 } 1187 }
2204 1188
2205 progress ppgs("Writing sense synonyms...", lines.size()); 1189 template <typename... Args> word& generator::createWord(Args&&... args)
2206 for (auto line : lines)
2207 { 1190 {
2208 ppgs.update(); 1191 words_.emplace_back(std::forward<Args>(args)...);
1192 word& w = words_.back();
2209 1193
2210 std::regex relation("^sim\\((3\\d{8}),(3\\d{8})\\)\\."); 1194 wordsByBaseForm_[w.getLemma().getBaseForm().getText()].insert(&w);
2211 std::smatch relation_data; 1195
2212 if (!std::regex_search(line, relation_data, relation)) 1196 if (w.getNotion().hasWnid())
2213 { 1197 {
2214 continue; 1198 wordsByWnid_[w.getNotion().getWnid()].insert(&w);
2215 } 1199 }
2216 1200
2217 int synset_id_1 = stoi(relation_data[1]); 1201 return w;
2218 int synset_id_2 = stoi(relation_data[2]); 1202 }
2219 std::string query("INSERT INTO adjective_synonymy (adjective_1_id, adjective_2_id) VALUES (?, ?)"); 1203
1204 group& generator::createGroup(xmlNodePtr top)
1205 {
1206 groups_.emplace_back();
1207 group& grp = groups_.back();
2220 1208
2221 for (auto mapping1 : wn[synset_id_1]) 1209 xmlChar* key;
1210
1211 for (xmlNodePtr node = top->xmlChildrenNode; node != nullptr; node = node->next)
2222 { 1212 {
2223 for (auto mapping2 : wn[synset_id_2]) 1213 if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("SUBCLASSES")))
2224 { 1214 {
2225 sqlite3_stmt* ppstmt; 1215 for (xmlNodePtr subclass = node->xmlChildrenNode; subclass != nullptr; subclass = subclass->next)
2226 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
2227 { 1216 {
2228 db_error(ppdb, query); 1217 if (!xmlStrcmp(subclass->name, reinterpret_cast<const xmlChar*>("VNSUBCLASS")))
1218 {
1219 try
1220 {
1221 group& subgrp = createGroup(subclass);
1222 subgrp.setParent(grp);
1223 } catch (const std::exception& e)
1224 {
1225 key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID"));
1226
1227 if (key == nullptr)
1228 {
1229 std::throw_with_nested(std::logic_error("Error parsing IDless subgroup"));
1230 } else {
1231 std::string subgroupId(reinterpret_cast<const char*>(key));
1232 xmlFree(key);
1233
1234 std::throw_with_nested(std::logic_error("Error parsing subgroup " + subgroupId));
1235 }
1236 }
1237 }
2229 } 1238 }
2230 1239 } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("MEMBERS")))
2231 sqlite3_bind_int(ppstmt, 1, mapping1.second); 1240 {
2232 sqlite3_bind_int(ppstmt, 2, mapping2.second); 1241 for (xmlNodePtr member = node->xmlChildrenNode; member != nullptr; member = member->next)
2233
2234 if (sqlite3_step(ppstmt) != SQLITE_DONE)
2235 { 1242 {
2236 db_error(ppdb, query); 1243 if (!xmlStrcmp(member->name, reinterpret_cast<const xmlChar*>("MEMBER")))
1244 {
1245 key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("wn"));
1246 std::string wnSenses(reinterpret_cast<const char*>(key));
1247 xmlFree(key);
1248
1249 auto wnSenseKeys = split<std::list<std::string>>(wnSenses, " ");
1250 if (!wnSenseKeys.empty())
1251 {
1252 std::list<std::string> tempKeys;
1253
1254 std::transform(std::begin(wnSenseKeys), std::end(wnSenseKeys), std::back_inserter(tempKeys), [] (std::string sense) {
1255 return sense + "::";
1256 });
1257
1258 std::list<std::string> filteredKeys;
1259
1260 std::remove_copy_if(std::begin(tempKeys), std::end(tempKeys), std::back_inserter(filteredKeys), [&] (std::string sense) {
1261 return !wnSenseKeys_.count(sense);
1262 });
1263
1264 wnSenseKeys = std::move(filteredKeys);
1265 }
1266
1267 if (!wnSenseKeys.empty())
1268 {
1269 for (std::string sense : wnSenseKeys)
1270 {
1271 word& wordSense = *wnSenseKeys_[sense];
1272 wordSense.setVerbGroup(grp);
1273 }
1274 } else {
1275 key = xmlGetProp(member, reinterpret_cast<const xmlChar*>("name"));
1276 std::string memberName(reinterpret_cast<const char*>(key));
1277 xmlFree(key);
1278
1279 notion& n = createNotion(part_of_speech::verb);
1280 lemma& l = lookupOrCreateLemma(memberName);
1281 word& w = createWord(n, l);
1282
1283 w.setVerbGroup(grp);
1284 }
1285 }
2237 } 1286 }
2238 1287 } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("THEMROLES")))
2239 sqlite3_reset(ppstmt); 1288 {
2240 sqlite3_clear_bindings(ppstmt); 1289 for (xmlNodePtr roletopnode = node->xmlChildrenNode; roletopnode != nullptr; roletopnode = roletopnode->next)
2241
2242 sqlite3_bind_int(ppstmt, 1, mapping2.second);
2243 sqlite3_bind_int(ppstmt, 2, mapping1.second);
2244
2245 if (sqlite3_step(ppstmt) != SQLITE_DONE)
2246 { 1290 {
2247 db_error(ppdb, query); 1291 if (!xmlStrcmp(roletopnode->name, reinterpret_cast<const xmlChar*>("THEMROLE")))
1292 {
1293 role r;
1294
1295 key = xmlGetProp(roletopnode, reinterpret_cast<const xmlChar*>("type"));
1296 std::string roleName = reinterpret_cast<const char*>(key);
1297 xmlFree(key);
1298
1299 for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next)
1300 {
1301 if (!xmlStrcmp(rolenode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
1302 {
1303 r.setSelrestrs(parseSelrestr(rolenode));
1304 }
1305 }
1306
1307 grp.addRole(roleName, std::move(r));
1308 }
2248 } 1309 }
1310 } else if (!xmlStrcmp(node->name, reinterpret_cast<const xmlChar*>("FRAMES")))
1311 {
1312 for (xmlNodePtr frametopnode = node->xmlChildrenNode; frametopnode != nullptr; frametopnode = frametopnode->next)
1313 {
1314 if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME")))
1315 {
1316 frames_.emplace_back();
1317 frame& fr = frames_.back();
2249 1318
2250 sqlite3_finalize(ppstmt); 1319 for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
1320 {
1321 if (!xmlStrcmp(framenode->name, reinterpret_cast<const xmlChar*>("SYNTAX")))
1322 {
1323 for (xmlNodePtr syntaxnode = framenode->xmlChildrenNode; syntaxnode != nullptr; syntaxnode = syntaxnode->next)
1324 {
1325 if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("NP")))
1326 {
1327 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
1328 std::string partRole = reinterpret_cast<const char*>(key);
1329 xmlFree(key);
1330
1331 selrestr partSelrestrs;
1332 std::set<std::string> partSynrestrs;
1333
1334 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
1335 {
1336 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SYNRESTRS")))
1337 {
1338 for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
1339 {
1340 if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SYNRESTR")))
1341 {
1342 key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type"));
1343 partSynrestrs.insert(reinterpret_cast<const char*>(key));
1344 xmlFree(key);
1345 }
1346 }
1347 }
1348
1349 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
1350 {
1351 partSelrestrs = parseSelrestr(npnode);
1352 }
1353 }
1354
1355 fr.push_back(part::createNounPhrase(std::move(partRole), std::move(partSelrestrs), std::move(partSynrestrs)));
1356 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("VERB")))
1357 {
1358 fr.push_back(part::createVerb());
1359 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("PREP")))
1360 {
1361 std::set<std::string> partChoices;
1362 bool partLiteral;
1363
1364 if (xmlHasProp(syntaxnode, reinterpret_cast<const xmlChar*>("value")))
1365 {
1366 partLiteral = true;
1367
1368 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
1369 std::string choicesStr = reinterpret_cast<const char*>(key);
1370 xmlFree(key);
1371
1372 split(choicesStr, " ", std::inserter(partChoices, std::end(partChoices)));
1373 } else {
1374 partLiteral = false;
1375
1376 for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next)
1377 {
1378 if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
1379 {
1380 for (xmlNodePtr synrestr = npnode->xmlChildrenNode; synrestr != nullptr; synrestr = synrestr->next)
1381 {
1382 if (!xmlStrcmp(synrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
1383 {
1384 key = xmlGetProp(synrestr, reinterpret_cast<const xmlChar*>("type"));
1385 partChoices.insert(reinterpret_cast<const char*>(key));
1386 xmlFree(key);
1387 }
1388 }
1389 }
1390 }
1391 }
1392
1393 fr.push_back(part::createPreposition(std::move(partChoices), partLiteral));
1394 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADJ")))
1395 {
1396 fr.push_back(part::createAdjective());
1397 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("ADV")))
1398 {
1399 fr.push_back(part::createAdverb());
1400 } else if (!xmlStrcmp(syntaxnode->name, reinterpret_cast<const xmlChar*>("LEX")))
1401 {
1402 key = xmlGetProp(syntaxnode, reinterpret_cast<const xmlChar*>("value"));
1403 std::string literalValue = reinterpret_cast<const char*>(key);
1404 xmlFree(key);
1405
1406 fr.push_back(part::createLiteral(literalValue));
1407 } else {
1408 continue;
1409 }
1410 }
1411
1412 grp.addFrame(fr);
1413 }
1414 }
1415 }
1416 }
2251 } 1417 }
2252 } 1418 }
2253 }
2254 }
2255
2256 // syntax table
2257 {
2258 std::ifstream wnsyntaxfile(wnpref + "wn_syntax.pl");
2259 if (!wnsyntaxfile.is_open())
2260 {
2261 std::cout << "Invalid WordNet data directory." << std::endl;
2262 print_usage();
2263 }
2264 1419
2265 std::list<std::string> lines; 1420 return grp;
2266 for (;;)
2267 {
2268 std::string line;
2269 if (!getline(wnsyntaxfile, line))
2270 {
2271 break;
2272 }
2273
2274 if (line.back() == '\r')
2275 {
2276 line.pop_back();
2277 }
2278
2279 lines.push_back(line);
2280 } 1421 }
2281 1422
2282 progress ppgs("Writing adjective syntax markers...", lines.size()); 1423 selrestr generator::parseSelrestr(xmlNodePtr top)
2283 for (auto line : lines)
2284 { 1424 {
2285 ppgs.update(); 1425 xmlChar* key;
2286 1426
2287 std::regex relation("^syntax\\((3\\d{8}),(\\d+),([ipa])p?\\)\\."); 1427 if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS")))
2288 std::smatch relation_data;
2289 if (!std::regex_search(line, relation_data, relation))
2290 {
2291 continue;
2292 }
2293
2294 int synset_id = stoi(relation_data[1]);
2295 int wnum = stoi(relation_data[2]);
2296 std::string syn = relation_data[3];
2297 std::string query("UPDATE adjectives SET position = ? WHERE adjective_id = ?");
2298
2299 sqlite3_stmt* ppstmt;
2300 if (sqlite3_prepare_v2(ppdb, query.c_str(), query.size(), &ppstmt, NULL) != SQLITE_OK)
2301 { 1428 {
2302 db_error(ppdb, query); 1429 if (xmlChildElementCount(top) == 0)
2303 } 1430 {
2304 1431 return {};
2305 sqlite3_bind_text(ppstmt, 1, syn.c_str(), 1, SQLITE_TRANSIENT); 1432 } else if (xmlChildElementCount(top) == 1)
2306 sqlite3_bind_int(ppstmt, 2, wn[synset_id][wnum]); 1433 {
2307 1434 return parseSelrestr(xmlFirstElementChild(top));
2308 if (sqlite3_step(ppstmt) != SQLITE_DONE) 1435 } else {
1436 bool orlogic = false;
1437 if (xmlHasProp(top, reinterpret_cast<const xmlChar*>("logic")))
1438 {
1439 key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("logic"));
1440 if (!xmlStrcmp(key, reinterpret_cast<const xmlChar*>("or")))
1441 {
1442 orlogic = true;
1443 }
1444
1445 xmlFree(key);
1446 }
1447
1448 std::list<selrestr> children;
1449 for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next)
1450 {
1451 if (!xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))
1452 || !xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
1453 {
1454 children.push_back(parseSelrestr(selrestr));
1455 }
1456 }
1457
1458 return selrestr(children, orlogic);
1459 }
1460 } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR")))
2309 { 1461 {
2310 db_error(ppdb, query); 1462 key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("Value"));
1463 bool selPos = (std::string(reinterpret_cast<const char*>(key)) == "+");
1464 xmlFree(key);
1465
1466 key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type"));
1467 std::string selRestriction = reinterpret_cast<const char*>(key);
1468 xmlFree(key);
1469
1470 return selrestr(selRestriction, selPos);
1471 } else {
1472 throw std::logic_error("Badly formatted selrestr");
2311 } 1473 }
2312
2313 sqlite3_finalize(ppstmt);
2314 } 1474 }
2315 } 1475
2316 1476 };
2317 sqlite3_close_v2(ppdb); 1477};
2318
2319 std::cout << "Done." << std::endl;
2320}
diff --git a/generator/generator.h b/generator/generator.h new file mode 100644 index 0000000..e2a7404 --- /dev/null +++ b/generator/generator.h
@@ -0,0 +1,151 @@
1#ifndef GENERATOR_H_5B61CBC5
2#define GENERATOR_H_5B61CBC5
3
4#include <string>
5#include <map>
6#include <list>
7#include <set>
8#include <libxml/parser.h>
9#include "database.h"
10#include "notion.h"
11#include "word.h"
12#include "lemma.h"
13#include "form.h"
14#include "pronunciation.h"
15#include "group.h"
16#include "frame.h"
17
18namespace verbly {
19 namespace generator {
20
21 enum class part_of_speech;
22 class selrestr;
23
24 class generator {
25 public:
26
27 // Constructor
28
29 generator(
30 std::string verbNetPath,
31 std::string agidPath,
32 std::string wordNetPath,
33 std::string cmudictPath,
34 std::string imageNetPath,
35 std::string outputPath);
36
37 // Action
38
39 void run();
40
41 private:
42
43 // Subroutines
44
45 void readWordNetSynsets();
46
47 void readAdjectivePositioning();
48
49 void readImageNetUrls();
50
51 void readWordNetSenseKeys();
52
53 void readVerbNet();
54
55 void readAgidInflections();
56
57 void readPrepositions();
58
59 void readCmudictPronunciations();
60
61 void writeSchema();
62
63 void dumpObjects();
64
65 void readWordNetAntonymy();
66
67 void readWordNetVariation();
68
69 void readWordNetClasses();
70
71 void readWordNetCausality();
72
73 void readWordNetEntailment();
74
75 void readWordNetHypernymy();
76
77 void readWordNetInstantiation();
78
79 void readWordNetMemberMeronymy();
80
81 void readWordNetPartMeronymy();
82
83 void readWordNetSubstanceMeronymy();
84
85 void readWordNetPertainymy();
86
87 void readWordNetSpecification();
88
89 void readWordNetSimilarity();
90
91 // Helpers
92
93 std::list<std::string> readFile(std::string path);
94
95 inline part_of_speech partOfSpeechByWnid(int wnid);
96
97 notion& createNotion(part_of_speech partOfSpeech);
98
99 notion& lookupOrCreateNotion(int wnid);
100
101 lemma& lookupOrCreateLemma(std::string base_form);
102
103 form& lookupOrCreateForm(std::string text);
104
105 template <typename... Args> word& createWord(Args&&... args);
106
107 group& createGroup(xmlNodePtr top);
108
109 selrestr parseSelrestr(xmlNodePtr top);
110
111 // Input
112
113 std::string verbNetPath_;
114 std::string agidPath_;
115 std::string wordNetPath_;
116 std::string cmudictPath_;
117 std::string imageNetPath_;
118
119 // Output
120
121 database db_;
122
123 // Data
124
125 std::list<notion> notions_;
126 std::list<word> words_;
127 std::list<lemma> lemmas_;
128 std::list<form> forms_;
129 std::list<pronunciation> pronunciations_;
130 std::list<frame> frames_;
131 std::list<group> groups_;
132
133 // Indexes
134
135 std::map<int, notion*> notionByWnid_;
136 std::map<int, std::set<word*>> wordsByWnid_;
137 std::map<std::pair<int, int>, word*> wordByWnidAndWnum_;
138 std::map<std::string, std::set<word*>> wordsByBaseForm_;
139 std::map<std::string, lemma*> lemmaByBaseForm_;
140 std::map<std::string, form*> formByText_;
141
142 // Caches
143
144 std::map<std::string, word*> wnSenseKeys_;
145
146 };
147
148 };
149};
150
151#endif /* end of include guard: GENERATOR_H_5B61CBC5 */
diff --git a/generator/group.cpp b/generator/group.cpp new file mode 100644 index 0000000..7cbd4c8 --- /dev/null +++ b/generator/group.cpp
@@ -0,0 +1,119 @@
1#include "group.h"
2#include <stdexcept>
3#include <list>
4#include <json.hpp>
5#include "database.h"
6#include "field.h"
7#include "frame.h"
8
9namespace verbly {
10 namespace generator {
11
12 int group::nextId_ = 0;
13
14 group::group() : id_(nextId_++)
15 {
16 }
17
18 void group::setParent(const group& parent)
19 {
20 // Adding a group to itself is nonsensical.
21 assert(&parent != this);
22
23 parent_ = &parent;
24 }
25
26 void group::addRole(std::string name, role r)
27 {
28 roleNames_.insert(name);
29 roles_[name] = std::move(r);
30 }
31
32 void group::addFrame(const frame& f)
33 {
34 frames_.insert(&f);
35 }
36
37 std::set<std::string> group::getRoles() const
38 {
39 std::set<std::string> fullRoles = roleNames_;
40
41 if (hasParent())
42 {
43 for (std::string name : getParent().getRoles())
44 {
45 fullRoles.insert(name);
46 }
47 }
48
49 return fullRoles;
50 }
51
52 const role& group::getRole(std::string name) const
53 {
54 if (roles_.count(name))
55 {
56 return roles_.at(name);
57 } else if (hasParent())
58 {
59 return getParent().getRole(name);
60 } else {
61 throw std::invalid_argument("Specified role not found in verb group");
62 }
63 }
64
65 std::set<const frame*> group::getFrames() const
66 {
67 std::set<const frame*> fullFrames = frames_;
68
69 if (hasParent())
70 {
71 for (const frame* f : getParent().getFrames())
72 {
73 fullFrames.insert(f);
74 }
75 }
76
77 return fullFrames;
78 }
79
80 database& operator<<(database& db, const group& arg)
81 {
82 // Serialize the group first
83 {
84 std::list<field> fields;
85 fields.emplace_back("group_id", arg.getId());
86
87 nlohmann::json jsonRoles;
88 for (std::string name : arg.getRoles())
89 {
90 const role& r = arg.getRole(name);
91
92 nlohmann::json jsonRole;
93 jsonRole["type"] = name;
94 jsonRole["selrestrs"] = r.getSelrestrs().toJson();
95
96 jsonRoles.emplace_back(std::move(jsonRole));
97 }
98
99 fields.emplace_back("data", jsonRoles.dump());
100
101 db.insertIntoTable("groups", std::move(fields));
102 }
103
104 // Then, serialize the group/frame relationship
105 for (const frame* f : arg.getFrames())
106 {
107 std::list<field> fields;
108
109 fields.emplace_back("group_id", arg.getId());
110 fields.emplace_back("frame_id", f->getId());
111
112 db.insertIntoTable("groups_frames", std::move(fields));
113 }
114
115 return db;
116 }
117
118 };
119};
diff --git a/generator/group.h b/generator/group.h new file mode 100644 index 0000000..efb8c5d --- /dev/null +++ b/generator/group.h
@@ -0,0 +1,80 @@
1#ifndef GROUP_H_EDAFB5DC
2#define GROUP_H_EDAFB5DC
3
4#include <map>
5#include <set>
6#include <string>
7#include <cassert>
8#include "role.h"
9
10namespace verbly {
11 namespace generator {
12
13 class frame;
14 class database;
15
16 class group {
17 public:
18
19 // Constructor
20
21 group();
22
23 // Mutators
24
25 void setParent(const group& parent);
26
27 void addRole(std::string name, role r);
28
29 void addFrame(const frame& f);
30
31 // Accessors
32
33 int getId() const
34 {
35 return id_;
36 }
37
38 bool hasParent() const
39 {
40 return (parent_ != nullptr);
41 }
42
43 const group& getParent() const
44 {
45 // Calling code should always call hasParent first
46 assert(parent_ != nullptr);
47
48 return *parent_;
49 }
50
51 std::set<std::string> getRoles() const;
52
53 const role& getRole(std::string name) const;
54
55 std::set<const frame*> getFrames() const;
56
57 private:
58
59 static int nextId_;
60
61 const int id_;
62
63 const group* parent_ = nullptr;
64 std::map<std::string, role> roles_;
65 std::set<const frame*> frames_;
66
67 // Caches
68
69 std::set<std::string> roleNames_;
70
71 };
72
73 // Serializer
74
75 database& operator<<(database& db, const group& arg);
76
77 };
78};
79
80#endif /* end of include guard: GROUP_H_EDAFB5DC */
diff --git a/generator/lemma.cpp b/generator/lemma.cpp new file mode 100644 index 0000000..e66b153 --- /dev/null +++ b/generator/lemma.cpp
@@ -0,0 +1,65 @@
1#include "lemma.h"
2#include <list>
3#include <cassert>
4#include "field.h"
5#include "database.h"
6#include "form.h"
7
8namespace verbly {
9 namespace generator {
10
11 int lemma::nextId_ = 0;
12
13 lemma::lemma(const form& baseForm) :
14 id_(nextId_++),
15 baseForm_(baseForm)
16 {
17 inflections_[inflection::base] = {&baseForm};
18 }
19
20 void lemma::addInflection(inflection type, const form& f)
21 {
22 // There can only be one base form.
23 assert(type != inflection::base);
24
25 inflections_[type].insert(&f);
26 }
27
28 std::set<const form*> lemma::getInflections(inflection type) const
29 {
30 if (inflections_.count(type))
31 {
32 return inflections_.at(type);
33 } else {
34 return {};
35 }
36 }
37
38 database& operator<<(database& db, const lemma& arg)
39 {
40 for (inflection type : {
41 inflection::base,
42 inflection::plural,
43 inflection::comparative,
44 inflection::superlative,
45 inflection::past_tense,
46 inflection::past_participle,
47 inflection::ing_form,
48 inflection::s_form})
49 {
50 for (const form* f : arg.getInflections(type))
51 {
52 std::list<field> fields;
53 fields.emplace_back("lemma_id", arg.getId());
54 fields.emplace_back("form_id", f->getId());
55 fields.emplace_back("category", static_cast<int>(type));
56
57 db.insertIntoTable("lemmas_forms", std::move(fields));
58 }
59 }
60
61 return db;
62 }
63
64 };
65};
diff --git a/generator/lemma.h b/generator/lemma.h new file mode 100644 index 0000000..6452e08 --- /dev/null +++ b/generator/lemma.h
@@ -0,0 +1,58 @@
1#ifndef LEMMA_H_D73105A7
2#define LEMMA_H_D73105A7
3
4#include <string>
5#include <map>
6#include <set>
7#include "enums.h"
8
9namespace verbly {
10 namespace generator {
11
12 class database;
13 class form;
14
15 class lemma {
16 public:
17
18 // Constructors
19
20 explicit lemma(const form& baseForm);
21
22 // Mutators
23
24 void addInflection(inflection type, const form& f);
25
26 // Accessors
27
28 int getId() const
29 {
30 return id_;
31 }
32
33 const form& getBaseForm() const
34 {
35 return baseForm_;
36 }
37
38 std::set<const form*> getInflections(inflection type) const;
39
40 private:
41
42 static int nextId_;
43
44 const int id_;
45 const form& baseForm_;
46
47 std::map<inflection, std::set<const form*>> inflections_;
48
49 };
50
51 // Serializer
52
53 database& operator<<(database& db, const lemma& arg);
54
55 };
56};
57
58#endif /* end of include guard: LEMMA_H_D73105A7 */
diff --git a/generator/main.cpp b/generator/main.cpp new file mode 100644 index 0000000..827c963 --- /dev/null +++ b/generator/main.cpp
@@ -0,0 +1,40 @@
1#include <iostream>
2#include <exception>
3#include "generator.h"
4
5void printUsage()
6{
7 std::cout << "usage: generator verbnet agid wordnet cmudict imagenet output" << std::endl;
8 std::cout << "verbnet :: path to a VerbNet data directory" << std::endl;
9 std::cout << "agid :: path to an AGID infl.txt file" << std::endl;
10 std::cout << "wordnet :: path to a WordNet prolog data directory" << std::endl;
11 std::cout << "cmudict :: path to a CMUDICT pronunciation file" << std::endl;
12 std::cout << "imagenet :: path to an ImageNet urls.txt file" << std::endl;
13 std::cout << "output :: datafile output path" << std::endl;
14}
15
16int main(int argc, char** argv)
17{
18 if (argc == 7)
19 {
20 try
21 {
22 verbly::generator::generator app(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]);
23
24 try
25 {
26 app.run();
27 } catch (const std::exception& e)
28 {
29 std::cout << e.what() << std::endl;
30 }
31 } catch (const std::exception& e)
32 {
33 std::cout << e.what() << std::endl;
34 printUsage();
35 }
36 } else {
37 std::cout << "verbly datafile generator" << std::endl;
38 printUsage();
39 }
40}
diff --git a/generator/notion.cpp b/generator/notion.cpp new file mode 100644 index 0000000..290d982 --- /dev/null +++ b/generator/notion.cpp
@@ -0,0 +1,85 @@
1#include "notion.h"
2#include <string>
3#include <list>
4#include "database.h"
5#include "field.h"
6
7namespace verbly {
8 namespace generator {
9
10 int notion::nextId_ = 0;
11
12 notion::notion(
13 part_of_speech partOfSpeech) :
14 id_(nextId_++),
15 partOfSpeech_(partOfSpeech)
16 {
17 }
18
19 notion::notion(
20 part_of_speech partOfSpeech,
21 int wnid) :
22 id_(nextId_++),
23 partOfSpeech_(partOfSpeech),
24 wnid_(wnid),
25 hasWnid_(true)
26 {
27 }
28
29 void notion::incrementNumOfImages()
30 {
31 // Calling code should always call hasWnid and check that the notion is a noun first.
32 assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun));
33
34 numOfImages_++;
35 }
36
37 void notion::setPrepositionGroups(std::list<std::string> groups)
38 {
39 // Calling code should always check that the notion is a preposition first.
40 assert(partOfSpeech_ == part_of_speech::preposition);
41
42 prepositionGroups_ = groups;
43 }
44
45 database& operator<<(database& db, const notion& arg)
46 {
47 // First, serialize the notion
48 {
49 std::list<field> fields;
50
51 fields.emplace_back("notion_id", arg.getId());
52 fields.emplace_back("part_of_speech", static_cast<int>(arg.getPartOfSpeech()));
53
54 if (arg.hasWnid())
55 {
56 fields.emplace_back("wnid", arg.getWnid());
57
58 if (arg.getPartOfSpeech() == part_of_speech::noun)
59 {
60 fields.emplace_back("images", arg.getNumOfImages());
61 }
62 }
63
64 db.insertIntoTable("notions", std::move(fields));
65 }
66
67 // Next, serialize the is_a relationship if this is a preposition
68 if (arg.getPartOfSpeech() == part_of_speech::preposition)
69 {
70 for (std::string group : arg.getPrepositionGroups())
71 {
72 std::list<field> fields;
73
74 fields.emplace_back("notion_id", arg.getId());
75 fields.emplace_back("groupname", group);
76
77 db.insertIntoTable("is_a", std::move(fields));
78 }
79 }
80
81 return db;
82 }
83
84 };
85};
diff --git a/generator/notion.h b/generator/notion.h new file mode 100644 index 0000000..76210de --- /dev/null +++ b/generator/notion.h
@@ -0,0 +1,91 @@
1#ifndef NOTION_H_221DE2BC
2#define NOTION_H_221DE2BC
3
4#include <cassert>
5#include <list>
6#include <string>
7#include "enums.h"
8
9namespace verbly {
10 namespace generator {
11
12 class database;
13
14 class notion {
15 public:
16
17 // Constructors
18
19 explicit notion(part_of_speech partOfSpeech);
20
21 notion(part_of_speech partOfSpeech, int wnid);
22
23 // Mutators
24
25 void incrementNumOfImages();
26
27 void setPrepositionGroups(std::list<std::string> groups);
28
29 // Accessors
30
31 int getId() const
32 {
33 return id_;
34 }
35
36 part_of_speech getPartOfSpeech() const
37 {
38 return partOfSpeech_;
39 }
40
41 bool hasWnid() const
42 {
43 return hasWnid_;
44 }
45
46 int getWnid() const
47 {
48 // Calling code should always call hasWnid first.
49 assert(hasWnid_);
50
51 return wnid_;
52 }
53
54 int getNumOfImages() const
55 {
56 // Calling code should always call hasWnid and check that the notion is a noun first.
57 assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun));
58
59 return numOfImages_;
60 }
61
62 std::list<std::string> getPrepositionGroups() const
63 {
64 // Calling code should always check that the notion is a preposition first.
65 assert(partOfSpeech_ == part_of_speech::preposition);
66
67 return prepositionGroups_;
68 }
69
70 private:
71
72 static int nextId_;
73
74 const int id_;
75 const part_of_speech partOfSpeech_;
76 const int wnid_ = 0;
77 const bool hasWnid_ = false;
78
79 int numOfImages_ = 0;
80 std::list<std::string> prepositionGroups_;
81
82 };
83
84 // Serializer
85
86 database& operator<<(database& db, const notion& arg);
87
88 };
89};
90
91#endif /* end of include guard: NOTION_H_221DE2BC */
diff --git a/generator/part.cpp b/generator/part.cpp new file mode 100644 index 0000000..dbd4e11 --- /dev/null +++ b/generator/part.cpp
@@ -0,0 +1,336 @@
1#include "part.h"
2#include <stdexcept>
3#include "selrestr.h"
4
5namespace verbly {
6 namespace generator {
7
8 part part::createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs)
9 {
10 part p(type::noun_phrase);
11
12 new(&p.noun_phrase_.role) std::string(std::move(role));
13 new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs));
14 new(&p.noun_phrase_.synrestrs) std::set<std::string>(std::move(synrestrs));
15
16 return p;
17 }
18
19 part part::createVerb()
20 {
21 return part(type::verb);
22 }
23
24 part part::createPreposition(std::set<std::string> choices, bool literal)
25 {
26 part p(type::preposition);
27
28 new(&p.preposition_.choices) std::set<std::string>(std::move(choices));
29 p.preposition_.literal = literal;
30
31 return p;
32 }
33
34 part part::createAdjective()
35 {
36 return part(type::adjective);
37 }
38
39 part part::createAdverb()
40 {
41 return part(type::adverb);
42 }
43
44 part part::createLiteral(std::string value)
45 {
46 part p(type::literal);
47
48 new(&p.literal_) std::string(std::move(value));
49
50 return p;
51 }
52
53 part::part(const part& other)
54 {
55 type_ = other.type_;
56
57 switch (type_)
58 {
59 case type::noun_phrase:
60 {
61 new(&noun_phrase_.role) std::string(other.noun_phrase_.role);
62 new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs);
63 new(&noun_phrase_.synrestrs) std::set<std::string>(other.noun_phrase_.synrestrs);
64
65 break;
66 }
67
68 case type::preposition:
69 {
70 new(&preposition_.choices) std::set<std::string>(other.preposition_.choices);
71 preposition_.literal = other.preposition_.literal;
72
73 break;
74 }
75
76 case type::literal:
77 {
78 new(&literal_) std::string(other.literal_);
79
80 break;
81 }
82
83 case type::verb:
84 case type::adjective:
85 case type::adverb:
86 case type::invalid:
87 {
88 break;
89 }
90 }
91 }
92
93 part::part(part&& other) : part()
94 {
95 swap(*this, other);
96 }
97
98 part& part::operator=(part other)
99 {
100 swap(*this, other);
101
102 return *this;
103 }
104
105 void swap(part& first, part& second)
106 {
107 using type = part::type;
108
109 type tempType = first.type_;
110 std::string tempRole;
111 selrestr tempSelrestrs;
112 std::set<std::string> tempSynrestrs;
113 std::set<std::string> tempChoices;
114 bool tempPrepLiteral;
115 std::string tempLiteralValue;
116
117 switch (tempType)
118 {
119 case type::noun_phrase:
120 {
121 tempRole = std::move(first.noun_phrase_.role);
122 tempSelrestrs = std::move(first.noun_phrase_.selrestrs);
123 tempSynrestrs = std::move(first.noun_phrase_.synrestrs);
124
125 break;
126 }
127
128 case type::preposition:
129 {
130 tempChoices = std::move(first.preposition_.choices);
131 tempPrepLiteral = first.preposition_.literal;
132
133 break;
134 }
135
136 case type::literal:
137 {
138 tempLiteralValue = std::move(first.literal_);
139
140 break;
141 }
142
143 case type::verb:
144 case type::adjective:
145 case type::adverb:
146 case type::invalid:
147 {
148 break;
149 }
150 }
151
152 first.~part();
153
154 first.type_ = second.type_;
155
156 switch (first.type_)
157 {
158 case type::noun_phrase:
159 {
160 new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role));
161 new(&first.noun_phrase_.selrestrs) selrestr(std::move(second.noun_phrase_.selrestrs));
162 new(&first.noun_phrase_.synrestrs) std::set<std::string>(std::move(second.noun_phrase_.synrestrs));
163
164 break;
165 }
166
167 case type::preposition:
168 {
169 new(&first.preposition_.choices) std::set<std::string>(std::move(second.preposition_.choices));
170 first.preposition_.literal = second.preposition_.literal;
171
172 break;
173 }
174
175 case type::literal:
176 {
177 new(&first.literal_) std::string(std::move(second.literal_));
178
179 break;
180 }
181
182 case type::verb:
183 case type::adjective:
184 case type::adverb:
185 case type::invalid:
186 {
187 break;
188 }
189 }
190
191 second.~part();
192
193 second.type_ = tempType;
194
195 switch (second.type_)
196 {
197 case type::noun_phrase:
198 {
199 new(&second.noun_phrase_.role) std::string(std::move(tempRole));
200 new(&second.noun_phrase_.selrestrs) selrestr(std::move(tempSelrestrs));
201 new(&second.noun_phrase_.synrestrs) std::set<std::string>(std::move(tempSynrestrs));
202
203 break;
204 }
205
206 case type::preposition:
207 {
208 new(&second.preposition_.choices) std::set<std::string>(std::move(tempChoices));
209 second.preposition_.literal = tempPrepLiteral;
210
211 break;
212 }
213
214 case type::literal:
215 {
216 new(&second.literal_) std::string(std::move(tempLiteralValue));
217
218 break;
219 }
220
221 case type::verb:
222 case type::adjective:
223 case type::adverb:
224 case type::invalid:
225 {
226 break;
227 }
228 }
229 }
230
231 part::~part()
232 {
233 switch (type_)
234 {
235 case type::noun_phrase:
236 {
237 using string_type = std::string;
238 using set_type = std::set<std::string>;
239
240 noun_phrase_.role.~string_type();
241 noun_phrase_.selrestrs.~selrestr();
242 noun_phrase_.synrestrs.~set_type();
243
244 break;
245 }
246
247 case type::preposition:
248 {
249 using set_type = std::set<std::string>;
250
251 preposition_.choices.~set_type();
252
253 break;
254 }
255
256 case type::literal:
257 {
258 using string_type = std::string;
259
260 literal_.~string_type();
261
262 break;
263 }
264
265 case type::verb:
266 case type::adjective:
267 case type::adverb:
268 case type::invalid:
269 {
270 break;
271 }
272 }
273 }
274
275 std::string part::getNounRole() const
276 {
277 if (type_ == type::noun_phrase)
278 {
279 return noun_phrase_.role;
280 } else {
281 throw std::domain_error("part::getNounRole is only valid for noun phrase parts");
282 }
283 }
284
285 selrestr part::getNounSelrestrs() const
286 {
287 if (type_ == type::noun_phrase)
288 {
289 return noun_phrase_.selrestrs;
290 } else {
291 throw std::domain_error("part::getNounSelrestrs is only valid for noun phrase parts");
292 }
293 }
294
295 std::set<std::string> part::getNounSynrestrs() const
296 {
297 if (type_ == type::noun_phrase)
298 {
299 return noun_phrase_.synrestrs;
300 } else {
301 throw std::domain_error("part::getNounSynrestrs is only valid for noun phrase parts");
302 }
303 }
304
305 std::set<std::string> part::getPrepositionChoices() const
306 {
307 if (type_ == type::preposition)
308 {
309 return preposition_.choices;
310 } else {
311 throw std::domain_error("part::getPrepositionChoices is only valid for preposition parts");
312 }
313 }
314
315 bool part::isPrepositionLiteral() const
316 {
317 if (type_ == type::preposition)
318 {
319 return preposition_.literal;
320 } else {
321 throw std::domain_error("part::isPrepositionLiteral is only valid for preposition parts");
322 }
323 }
324
325 std::string part::getLiteralValue() const
326 {
327 if (type_ == type::literal)
328 {
329 return literal_;
330 } else {
331 throw std::domain_error("part::getLiteralValue is only valid for literal parts");
332 }
333 }
334
335 };
336};
diff --git a/generator/part.h b/generator/part.h new file mode 100644 index 0000000..d044630 --- /dev/null +++ b/generator/part.h
@@ -0,0 +1,114 @@
1#ifndef PART_H_FB54F361
2#define PART_H_FB54F361
3
4#include <string>
5#include <set>
6#include "selrestr.h"
7
8namespace verbly {
9 namespace generator {
10
11 class part {
12 public:
13 enum class type {
14 invalid = -1,
15 noun_phrase = 0,
16 verb = 1,
17 preposition = 2,
18 adjective = 3,
19 adverb = 4,
20 literal = 5
21 };
22
23 // Static factories
24
25 static part createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs);
26
27 static part createVerb();
28
29 static part createPreposition(std::set<std::string> choices, bool literal);
30
31 static part createAdjective();
32
33 static part createAdverb();
34
35 static part createLiteral(std::string value);
36
37 // Copy and move constructors
38
39 part(const part& other);
40
41 part(part&& other);
42
43 // Assignment
44
45 part& operator=(part other);
46
47 // Swap
48
49 friend void swap(part& first, part& second);
50
51 // Destructor
52
53 ~part();
54
55 // General accessors
56
57 type getType() const
58 {
59 return type_;
60 }
61
62 // Noun phrase accessors
63
64 std::string getNounRole() const;
65
66 selrestr getNounSelrestrs() const;
67
68 std::set<std::string> getNounSynrestrs() const;
69
70 // Preposition accessors
71
72 std::set<std::string> getPrepositionChoices() const;
73
74 bool isPrepositionLiteral() const;
75
76 // Literal accessors
77
78 std::string getLiteralValue() const;
79
80 private:
81
82 // Private constructors
83
84 part()
85 {
86 }
87
88 part(type t) : type_(t)
89 {
90 }
91
92 // Data
93
94 union {
95 struct {
96 std::string role;
97 selrestr selrestrs;
98 std::set<std::string> synrestrs;
99 } noun_phrase_;
100 struct {
101 std::set<std::string> choices;
102 bool literal;
103 } preposition_;
104 std::string literal_;
105 };
106
107 type type_ = type::invalid;
108
109 };
110
111 };
112};
113
114#endif /* end of include guard: PART_H_FB54F361 */
diff --git a/generator/progress.h b/generator/progress.h index 81f07a3..fcb680d 100644 --- a/generator/progress.h +++ b/generator/progress.h
@@ -3,48 +3,54 @@
3 3
4#include <string> 4#include <string>
5 5
6class progress { 6namespace verbly {
7 private: 7 namespace generator {
8 std::string message;
9 int total;
10 int cur = 0;
11 int lprint = 0;
12 8
13 public: 9 class progress {
14 progress(std::string message, int total) : message(message), total(total) 10 private:
15 { 11 std::string message;
16 std::cout << message << " 0%" << std::flush; 12 int total;
17 } 13 int cur = 0;
14 int lprint = 0;
18 15
19 void update(int val) 16 public:
20 { 17 progress(std::string message, int total) : message(message), total(total)
21 if (val <= total) 18 {
22 { 19 std::cout << message << " 0%" << std::flush;
23 cur = val; 20 }
24 } else { 21
25 cur = total; 22 void update(int val)
26 } 23 {
24 if (val <= total)
25 {
26 cur = val;
27 } else {
28 cur = total;
29 }
27 30
28 int pp = cur * 100 / total; 31 int pp = cur * 100 / total;
29 if (pp != lprint) 32 if (pp != lprint)
30 { 33 {
31 lprint = pp; 34 lprint = pp;
32 35
33 std::cout << "\b\b\b\b" << std::right; 36 std::cout << "\b\b\b\b" << std::right;
34 std::cout.width(3); 37 std::cout.width(3);
35 std::cout << pp << "%" << std::flush; 38 std::cout << pp << "%" << std::flush;
36 } 39 }
37 } 40 }
41
42 void update()
43 {
44 update(cur+1);
45 }
38 46
39 void update() 47 ~progress()
40 { 48 {
41 update(cur+1); 49 std::cout << "\b\b\b\b100%" << std::endl;
42 } 50 }
51 };
43 52
44 ~progress() 53 };
45 {
46 std::cout << "\b\b\b\b100%" << std::endl;
47 }
48}; 54};
49 55
50#endif /* end of include guard: PROGRESS_H_A34EF856 */ 56#endif /* end of include guard: PROGRESS_H_A34EF856 */
diff --git a/generator/pronunciation.cpp b/generator/pronunciation.cpp new file mode 100644 index 0000000..eb07607 --- /dev/null +++ b/generator/pronunciation.cpp
@@ -0,0 +1,87 @@
1#include "pronunciation.h"
2#include <list>
3#include <algorithm>
4#include <cctype>
5#include <iterator>
6#include "database.h"
7#include "field.h"
8#include "../lib/util.h"
9
10namespace verbly {
11 namespace generator {
12
13 int pronunciation::nextId_ = 0;
14
15 pronunciation::pronunciation(std::string phonemes) :
16 id_(nextId_++),
17 phonemes_(phonemes)
18 {
19 auto phonemeList = split<std::list<std::string>>(phonemes, " ");
20
21 auto rhymeStart = std::find_if(std::begin(phonemeList), std::end(phonemeList), [] (std::string phoneme) {
22 return phoneme.find("1") != std::string::npos;
23 });
24
25 // Rhyme detection
26 if (rhymeStart != std::end(phonemeList))
27 {
28 std::list<std::string> rhymePhonemes;
29
30 std::transform(rhymeStart, std::end(phonemeList), std::back_inserter(rhymePhonemes), [] (std::string phoneme) {
31 std::string naked;
32
33 std::remove_copy_if(std::begin(phoneme), std::end(phoneme), std::back_inserter(naked), [] (char ch) {
34 return std::isdigit(ch);
35 });
36
37 return naked;
38 });
39
40 rhyme_ = implode(std::begin(rhymePhonemes), std::end(rhymePhonemes), " ");
41
42 if (rhymeStart != std::begin(phonemeList))
43 {
44 prerhyme_ = *std::prev(rhymeStart);
45 }
46 }
47
48 // Syllable/stress
49 for (std::string phoneme : phonemeList)
50 {
51 if (std::isdigit(phoneme.back()))
52 {
53 // It's a vowel!
54 syllables_++;
55
56 if (phoneme.back() == '1')
57 {
58 stress_.push_back('1');
59 } else {
60 stress_.push_back('0');
61 }
62 }
63 }
64 }
65
66 database& operator<<(database& db, const pronunciation& arg)
67 {
68 std::list<field> fields;
69
70 fields.emplace_back("pronunciation_id", arg.getId());
71 fields.emplace_back("phonemes", arg.getPhonemes());
72 fields.emplace_back("syllables", arg.getSyllables());
73 fields.emplace_back("stress", arg.getStress());
74
75 if (arg.hasRhyme())
76 {
77 fields.emplace_back("rhyme", arg.getRhymePhonemes());
78 fields.emplace_back("prerhyme", arg.getPrerhyme());
79 }
80
81 db.insertIntoTable("pronunciations", std::move(fields));
82
83 return db;
84 }
85
86 };
87};
diff --git a/generator/pronunciation.h b/generator/pronunciation.h new file mode 100644 index 0000000..81be6c4 --- /dev/null +++ b/generator/pronunciation.h
@@ -0,0 +1,82 @@
1#ifndef PRONUNCIATION_H_584A08DD
2#define PRONUNCIATION_H_584A08DD
3
4#include <string>
5#include <cassert>
6
7namespace verbly {
8 namespace generator {
9
10 class database;
11
12 class pronunciation {
13 public:
14
15 // Constructor
16
17 explicit pronunciation(std::string phonemes);
18
19 // Accessors
20
21 int getId() const
22 {
23 return id_;
24 }
25
26 std::string getPhonemes() const
27 {
28 return phonemes_;
29 }
30
31 bool hasRhyme() const
32 {
33 return !rhyme_.empty();
34 }
35
36 std::string getRhymePhonemes() const
37 {
38 // Calling code should always call hasRhyme first.
39 assert(!rhyme_.empty());
40
41 return rhyme_;
42 }
43
44 std::string getPrerhyme() const
45 {
46 // Calling code should always call hasRhyme first.
47 assert(!rhyme_.empty());
48
49 return prerhyme_;
50 }
51
52 int getSyllables() const
53 {
54 return syllables_;
55 }
56
57 std::string getStress() const
58 {
59 return stress_;
60 }
61
62 private:
63
64 static int nextId_;
65
66 const int id_;
67 const std::string phonemes_;
68 std::string rhyme_;
69 std::string prerhyme_;
70 int syllables_ = 0;
71 std::string stress_;
72
73 };
74
75 // Serializer
76
77 database& operator<<(database& db, const pronunciation& arg);
78
79 };
80};
81
82#endif /* end of include guard: PRONUNCIATION_H_584A08DD */
diff --git a/generator/role.h b/generator/role.h new file mode 100644 index 0000000..5fa68b8 --- /dev/null +++ b/generator/role.h
@@ -0,0 +1,35 @@
1#ifndef ROLE_H_249F9A9C
2#define ROLE_H_249F9A9C
3
4#include "selrestr.h"
5
6namespace verbly {
7 namespace generator {
8
9 class role {
10 public:
11
12 // Mutators
13
14 void setSelrestrs(selrestr selrestrs)
15 {
16 selrestrs_ = selrestrs;
17 }
18
19 // Accessors
20
21 const selrestr& getSelrestrs() const
22 {
23 return selrestrs_;
24 }
25
26 private:
27
28 selrestr selrestrs_;
29
30 };
31
32 };
33};
34
35#endif /* end of include guard: ROLE_H_249F9A9C */
diff --git a/generator/schema.sql b/generator/schema.sql index 410b536..c3e54d8 100644 --- a/generator/schema.sql +++ b/generator/schema.sql
@@ -1,286 +1,204 @@
1DROP TABLE IF EXISTS `verbs`; 1CREATE TABLE `notions` (
2CREATE TABLE `verbs` ( 2 `notion_id` INTEGER PRIMARY KEY,
3 `verb_id` INTEGER PRIMARY KEY, 3 `part_of_speech` SMALLINT NOT NULL,
4 `infinitive` VARCHAR(32) NOT NULL, 4 `wnid` INTEGER,
5 `past_tense` VARCHAR(32) NOT NULL, 5 `images` INTEGER
6 `past_participle` VARCHAR(32) NOT NULL,
7 `ing_form` VARCHAR(32) NOT NULL,
8 `s_form` VARCHAR(32) NOT NULL
9); 6);
10 7
11DROP TABLE IF EXISTS `groups`; 8CREATE UNIQUE INDEX `notion_by_wnid` ON `notions`(`wnid`);
12CREATE TABLE `groups` (
13 `group_id` INTEGER PRIMARY KEY,
14 `data` BLOB NOT NULL
15);
16
17DROP TABLE IF EXISTS `frames`;
18CREATE TABLE `frames` (
19 `frame_id` INTEGER PRIMARY KEY,
20 `group_id` INTEGER NOT NULL,
21 `data` BLOB NOT NULL,
22 FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`)
23);
24 9
25DROP TABLE IF EXISTS `verb_groups`;
26CREATE TABLE `verb_groups` (
27 `verb_id` INTEGER NOT NULL,
28 `group_id` INTEGER NOT NULL,
29 FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`),
30 FOREIGN KEY (`group_id`) REFERENCES `groups`(`group_id`)
31);
32
33DROP TABLE IF EXISTS `adjectives`;
34CREATE TABLE `adjectives` (
35 `adjective_id` INTEGER PRIMARY KEY,
36 `base_form` VARCHAR(32) NOT NULL,
37 `comparative` VARCHAR(32),
38 `superlative` VARCHAR(32),
39 `position` CHAR(1),
40 `complexity` INTEGER NOT NULL
41);
42
43DROP TABLE IF EXISTS `adverbs`;
44CREATE TABLE `adverbs` (
45 `adverb_id` INTEGER PRIMARY KEY,
46 `base_form` VARCHAR(32) NOT NULL,
47 `comparative` VARCHAR(32),
48 `superlative` VARCHAR(32),
49 `complexity` INTEGER NOT NULL
50);
51
52DROP TABLE IF EXISTS `nouns`;
53CREATE TABLE `nouns` (
54 `noun_id` INTEGER PRIMARY KEY,
55 `singular` VARCHAR(32) NOT NULL,
56 `plural` VARCHAR(32),
57 `proper` INTEGER(1) NOT NULL,
58 `complexity` INTEGER NOT NULL,
59 `images` INTEGER NOT NULL,
60 `wnid` INTEGER NOT NULL
61);
62
63DROP TABLE IF EXISTS `hypernymy`;
64CREATE TABLE `hypernymy` ( 10CREATE TABLE `hypernymy` (
65 `hypernym_id` INTEGER NOT NULL, 11 `hypernym_id` INTEGER NOT NULL,
66 `hyponym_id` INTEGER NOT NULL, 12 `hyponym_id` INTEGER NOT NULL
67 FOREIGN KEY (`hypernym_id`) REFERENCES `nouns`(`noun_id`),
68 FOREIGN KEY (`hyponym_id`) REFERENCES `nouns`(`noun_id`)
69); 13);
70 14
71DROP TABLE IF EXISTS `instantiation`; 15CREATE INDEX `hyponym_of` ON `hypernymy`(`hypernym_id`);
16CREATE INDEX `hypernym_of` ON `hypernymy`(`hyponym_id`);
17
72CREATE TABLE `instantiation` ( 18CREATE TABLE `instantiation` (
73 `class_id` INTEGER NOT NULL, 19 `class_id` INTEGER NOT NULL,
74 `instance_id` INTEGER NOT NULL, 20 `instance_id` INTEGER NOT NULL
75 FOREIGN KEY (`class_id`) REFERENCES `nouns`(`noun_id`),
76 FOREIGN KEY (`instance_id`) REFERENCES `nouns`(`noun_id`)
77); 21);
78 22
79DROP TABLE IF EXISTS `member_meronymy`; 23CREATE INDEX `instance_of` ON `instantiation`(`class_id`);
24CREATE INDEX `class_of` ON `instantiation`(`instance_id`);
25
80CREATE TABLE `member_meronymy` ( 26CREATE TABLE `member_meronymy` (
81 `meronym_id` INTEGER NOT NULL, 27 `meronym_id` INTEGER NOT NULL,
82 `holonym_id` INTEGER NOT NULL, 28 `holonym_id` INTEGER NOT NULL
83 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
84 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
85); 29);
86 30
87DROP TABLE IF EXISTS `part_meronymy`; 31CREATE INDEX `member_holonym_of` ON `member_meronymy`(`meronym_id`);
32CREATE INDEX `member_meronym_of` ON `member_meronymy`(`holonym_id`);
33
88CREATE TABLE `part_meronymy` ( 34CREATE TABLE `part_meronymy` (
89 `meronym_id` INTEGER NOT NULL, 35 `meronym_id` INTEGER NOT NULL,
90 `holonym_id` INTEGER NOT NULL, 36 `holonym_id` INTEGER NOT NULL
91 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
92 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
93); 37);
94 38
95DROP TABLE IF EXISTS `substance_meronymy`; 39CREATE INDEX `part_holonym_of` ON `part_meronymy`(`meronym_id`);
40CREATE INDEX `part_meronym_of` ON `part_meronymy`(`holonym_id`);
41
96CREATE TABLE `substance_meronymy` ( 42CREATE TABLE `substance_meronymy` (
97 `meronym_id` INTEGER NOT NULL, 43 `meronym_id` INTEGER NOT NULL,
98 `holonym_id` INTEGER NOT NULL, 44 `holonym_id` INTEGER NOT NULL
99 FOREIGN KEY (`meronym_id`) REFERENCES `nouns`(`noun_id`),
100 FOREIGN KEY (`holonym_id`) REFERENCES `nouns`(`noun_id`)
101); 45);
102 46
103DROP TABLE IF EXISTS `variation`; 47CREATE INDEX `substance_holonym_of` ON `substance_meronymy`(`meronym_id`);
48CREATE INDEX `substance_meronym_of` ON `substance_meronymy`(`holonym_id`);
49
104CREATE TABLE `variation` ( 50CREATE TABLE `variation` (
105 `noun_id` INTEGER NOT NULL, 51 `noun_id` INTEGER NOT NULL,
106 `adjective_id` INTEGER NOT NULL, 52 `adjective_id` INTEGER NOT NULL
107 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
108 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
109); 53);
110 54
111DROP TABLE IF EXISTS `noun_antonymy`; 55CREATE INDEX `variant_of` ON `variation`(`noun_id`);
112CREATE TABLE `noun_antonymy` ( 56CREATE INDEX `attribute_of` ON `variation`(`adjective_id`);
113 `noun_1_id` INTEGER NOT NULL,
114 `noun_2_id` INTEGER NOT NULL,
115 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`),
116 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`)
117);
118 57
119DROP TABLE IF EXISTS `adjective_antonymy`; 58CREATE TABLE `similarity` (
120CREATE TABLE `adjective_antonymy` (
121 `adjective_1_id` INTEGER NOT NULL, 59 `adjective_1_id` INTEGER NOT NULL,
122 `adjective_2_id` INTEGER NOT NULL, 60 `adjective_2_id` INTEGER NOT NULL
123 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), 61);
124 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) 62
63CREATE INDEX `similar_to` ON `similarity`(`adjective_1_id`);
64
65CREATE TABLE `is_a` (
66 `notion_id` INTEGER NOT NULL,
67 `groupname` VARCHAR(32) NOT NULL
125); 68);
126 69
127DROP TABLE IF EXISTS `adverb_antonymy`; 70CREATE TABLE `entailment` (
128CREATE TABLE `adverb_antonymy` ( 71 `given_id` INTEGER NOT NULL,
129 `adverb_1_id` INTEGER NOT NULL, 72 `entailment_id` INTEGER NOT NULL
130 `adverb_2_id` INTEGER NOT NULL, 73);
131 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), 74
132 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) 75CREATE INDEX `entailment_of` ON `entailment`(`given_id`);
76CREATE INDEX `entailed_by` ON `entailment`(`entailment_id`);
77
78CREATE TABLE `causality` (
79 `cause_id` INTEGER NOT NULL,
80 `effect_id` INTEGER NOT NULL
81);
82
83CREATE INDEX `effect_of` ON `causality`(`cause_id`);
84CREATE INDEX `cause_of` ON `causality`(`effect_id`);
85
86CREATE TABLE `words` (
87 `word_id` INTEGER PRIMARY KEY,
88 `notion_id` INTEGER NOT NULL,
89 `lemma_id` INTEGER NOT NULL,
90 `tag_count` INTEGER,
91 `position` SMALLINT,
92 `group_id` INTEGER
93);
94
95CREATE INDEX `notion_words` ON `words`(`notion_id`);
96CREATE INDEX `lemma_words` ON `words`(`lemma_id`);
97CREATE INDEX `group_words` ON `words`(`group_id`);
98
99CREATE TABLE `antonymy` (
100 `antonym_1_id` INTEGER NOT NULL,
101 `antonym_2_id` INTEGER NOT NULL
133); 102);
134 103
135DROP TABLE IF EXISTS `specification`; 104CREATE INDEX `antonym_of` ON `antonymy`(`antonym_1_id`);
105
136CREATE TABLE `specification` ( 106CREATE TABLE `specification` (
137 `general_id` INTEGER NOT NULL, 107 `general_id` INTEGER NOT NULL,
138 `specific_id` INTEGER NOT NULL, 108 `specific_id` INTEGER NOT NULL
139 FOREIGN KEY (`general_id`) REFERENCES `adjectives`(`adjective_id`),
140 FOREIGN KEY (`specific_id`) REFERENCES `adjectives`(`adjective_id`)
141); 109);
142 110
143DROP TABLE IF EXISTS `pertainymy`; 111CREATE INDEX `specification_of` ON `specification`(`general_id`);
112CREATE INDEX `generalization_of` ON `specification`(`specific_id`);
113
144CREATE TABLE `pertainymy` ( 114CREATE TABLE `pertainymy` (
145 `noun_id` INTEGER NOT NULL, 115 `noun_id` INTEGER NOT NULL,
146 `pertainym_id` INTEGER NOT NULL, 116 `pertainym_id` INTEGER NOT NULL
147 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
148 FOREIGN KEY (`pertainym_id`) REFERENCES `adjectives`(`adjective_id`)
149); 117);
150 118
151DROP TABLE IF EXISTS `mannernymy`; 119CREATE INDEX `pertainym_of` ON `pertainymy`(`noun_id`);
120CREATE INDEX `anti_pertainym_of` ON `pertainymy`(`pertainym_id`);
121
152CREATE TABLE `mannernymy` ( 122CREATE TABLE `mannernymy` (
153 `adjective_id` INTEGER NOT NULL, 123 `adjective_id` INTEGER NOT NULL,
154 `mannernym_id` INTEGER NOT NULL, 124 `mannernym_id` INTEGER NOT NULL
155 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`),
156 FOREIGN KEY (`mannernym_id`) REFERENCES `adverbs`(`adverb_id`)
157); 125);
158 126
159DROP TABLE IF EXISTS `noun_synonymy`; 127CREATE INDEX `mannernym_of` ON `mannernymy`(`adjective_id`);
160CREATE TABLE `noun_synonymy` ( 128CREATE INDEX `anti_mannernym_of` ON `mannernymy`(`mannernym_id`);
161 `noun_1_id` INTEGER NOT NULL,
162 `noun_2_id` INTEGER NOT NULL,
163 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`nouns_id`),
164 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`nouns_id`)
165);
166 129
167DROP TABLE IF EXISTS `adjective_synonymy`; 130CREATE TABLE `usage` (
168CREATE TABLE `adjective_synonymy` ( 131 `domain_id` INTEGER NOT NULL,
169 `adjective_1_id` INTEGER NOT NULL, 132 `term_id` INTEGER NOT NULL
170 `adjective_2_id` INTEGER NOT NULL,
171 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`),
172 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`)
173); 133);
174 134
175DROP TABLE IF EXISTS `adverb_synonymy`; 135CREATE INDEX `usage_term_of` ON `usage`(`domain_id`);
176CREATE TABLE `adverb_synonymy` ( 136CREATE INDEX `usage_domain_of` ON `usage`(`term_id`);
177 `adverb_1_id` INTEGER NOT NULL,
178 `adverb_2_id` INTEGER NOT NULL,
179 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`),
180 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`)
181);
182 137
183DROP TABLE IF EXISTS `noun_pronunciations`; 138CREATE TABLE `topicality` (
184CREATE TABLE `noun_pronunciations` ( 139 `domain_id` INTEGER NOT NULL,
185 `noun_id` INTEGER NOT NULL, 140 `term_id` INTEGER NOT NULL
186 `pronunciation` VARCHAR(64) NOT NULL,
187 `prerhyme` VARCHAR(8),
188 `rhyme` VARCHAR(64),
189 `syllables` INT NOT NULL,
190 `stress` VARCHAR(64) NOT NULL,
191 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`)
192); 141);
193 142
194DROP TABLE IF EXISTS `verb_pronunciations`; 143CREATE INDEX `topical_term_of` ON `topicality`(`domain_id`);
195CREATE TABLE `verb_pronunciations` ( 144CREATE INDEX `topical_domain_of` ON `topicality`(`term_id`);
196 `verb_id` INTEGER NOT NULL,
197 `pronunciation` VARCHAR(64) NOT NULL,
198 `prerhyme` VARCHAR(8),
199 `rhyme` VARCHAR(64),
200 `syllables` INT NOT NULL,
201 `stress` VARCHAR(64) NOT NULL,
202 FOREIGN KEY (`verb_id`) REFERENCES `verbs`(`verb_id`)
203);
204 145
205DROP TABLE IF EXISTS `adjective_pronunciations`; 146CREATE TABLE `regionality` (
206CREATE TABLE `adjective_pronunciations` ( 147 `domain_id` INTEGER NOT NULL,
207 `adjective_id` INTEGER NOT NULL, 148 `term_id` INTEGER NOT NULL
208 `pronunciation` VARCHAR(64) NOT NULL,
209 `prerhyme` VARCHAR(8),
210 `rhyme` VARCHAR(64),
211 `syllables` INT NOT NULL,
212 `stress` VARCHAR(64) NOT NULL,
213 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
214); 149);
215 150
216DROP TABLE IF EXISTS `adverb_pronunciations`; 151CREATE INDEX `regional_term_of` ON `regionality`(`domain_id`);
217CREATE TABLE `adverb_pronunciations` ( 152CREATE INDEX `regional_domain_of` ON `regionality`(`term_id`);
218 `adverb_id` INTEGER NOT NULL,
219 `pronunciation` VARCHAR(64) NOT NULL,
220 `prerhyme` VARCHAR(8),
221 `rhyme` VARCHAR(64),
222 `syllables` INT NOT NULL,
223 `stress` VARCHAR(64) NOT NULL,
224 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`)
225);
226 153
227DROP TABLE IF EXISTS `noun_noun_derivation`; 154CREATE TABLE `forms` (
228CREATE TABLE `noun_noun_derivation` ( 155 `form_id` INTEGER PRIMARY KEY,
229 `noun_1_id` INTEGER NOT NULL, 156 `form` VARCHAR(32) NOT NULL,
230 `noun_2_id` INTEGER NOT NULL, 157 `complexity` SMALLINT NOT NULL,
231 FOREIGN KEY (`noun_1_id`) REFERENCES `nouns`(`noun_id`), 158 `proper` SMALLINT NOT NULL
232 FOREIGN KEY (`noun_2_id`) REFERENCES `nouns`(`noun_id`)
233); 159);
234 160
235DROP TABLE IF EXISTS `noun_adjective_derivation`; 161CREATE UNIQUE INDEX `form_by_string` ON `forms`(`form`);
236CREATE TABLE `noun_adjective_derivation` (
237 `noun_id` INTEGER NOT NULL,
238 `adjective_id` INTEGER NOT NULL,
239 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
240 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`)
241);
242 162
243DROP TABLE IF EXISTS `noun_adverb_derivation`; 163CREATE TABLE `lemmas_forms` (
244CREATE TABLE `noun_adverb_derivation` ( 164 `lemma_id` INTEGER NOT NULL,
245 `noun_id` INTEGER NOT NULL, 165 `form_id` INTEGER NOT NULL,
246 `adverb_id` INTEGER NOT NULL, 166 `category` SMALLINT NOT NULL
247 FOREIGN KEY (`noun_id`) REFERENCES `nouns`(`noun_id`),
248 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adverb_id`)
249); 167);
250 168
251DROP TABLE IF EXISTS `adjective_adjective_derivation`; 169CREATE INDEX `form_of` ON `lemmas_forms`(`lemma_id`);
252CREATE TABLE `adjective_adjective_derivation` ( 170CREATE INDEX `lemma_of` ON `lemmas_forms`(`form_id`);
253 `adjective_1_id` INTEGER NOT NULL, 171
254 `adjective_2_id` INTEGER NOT NULL, 172CREATE TABLE `pronunciations` (
255 FOREIGN KEY (`adjective_1_id`) REFERENCES `adjectives`(`adjective_id`), 173 `pronunciation_id` INTEGER PRIMARY KEY,
256 FOREIGN KEY (`adjective_2_id`) REFERENCES `adjectives`(`adjective_id`) 174 `phonemes` VARCHAR(64) NOT NULL,
175 `prerhyme` VARCHAR(8),
176 `rhyme` VARCHAR(64),
177 `syllables` INTEGER NOT NULL,
178 `stress` VARCHAR(64) NOT NULL
257); 179);
258 180
259DROP TABLE IF EXISTS `adjective_adverb_derivation`; 181CREATE TABLE `forms_pronunciations` (
260CREATE TABLE `adjective_adverb_derivation` ( 182 `form_id` INTEGER NOT NULL,
261 `adjective_id` INTEGER NOT NULL, 183 `pronunciation_id` INTEGER NOT NULL
262 `adverb_id` INTEGER NOT NULL,
263 FOREIGN KEY (`adjective_id`) REFERENCES `adjectives`(`adjective_id`),
264 FOREIGN KEY (`adverb_id`) REFERENCES `adverbs`(`adjective_id`)
265); 184);
266 185
267DROP TABLE IF EXISTS `adverb_adverb_derivation`; 186CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`);
268CREATE TABLE `adverb_adverb_derivation` ( 187CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`);
269 `adverb_1_id` INTEGER NOT NULL, 188
270 `adverb_2_id` INTEGER NOT NULL, 189CREATE TABLE `groups` (
271 FOREIGN KEY (`adverb_1_id`) REFERENCES `adverbs`(`adverb_id`), 190 `group_id` INTEGER PRIMARY KEY,
272 FOREIGN KEY (`adverb_2_id`) REFERENCES `adverbs`(`adverb_id`) 191 `data` BLOB NOT NULL
273); 192);
274 193
275DROP TABLE IF EXISTS `prepositions`; 194CREATE TABLE `frames` (
276CREATE TABLE `prepositions` ( 195 `frame_id` INTEGER PRIMARY KEY,
277 `preposition_id` INTEGER PRIMARY KEY, 196 `data` BLOB NOT NULL
278 `form` VARCHAR(32) NOT NULL
279); 197);
280 198
281DROP TABLE IF EXISTS `preposition_groups`; 199CREATE TABLE `groups_frames` (
282CREATE TABLE `preposition_groups` ( 200 `group_id` INTEGER NOT NULL,
283 `preposition_id` INTEGER NOT NULL, 201 `frame_id` INTEGER NOT NULL
284 `groupname` VARCHAR(32) NOT NULL,
285 FOREIGN KEY (`preposition_id`) REFERENCES `prepositions`(`preposition_id`)
286); 202);
203
204CREATE INDEX `frames_in` ON `groups_frames`(`group_id`);
diff --git a/generator/selrestr.cpp b/generator/selrestr.cpp new file mode 100644 index 0000000..8bdd3f6 --- /dev/null +++ b/generator/selrestr.cpp
@@ -0,0 +1,288 @@
1#include "selrestr.h"
2
3namespace verbly {
4 namespace generator {
5
6 selrestr::selrestr(const selrestr& other)
7 {
8 type_ = other.type_;
9
10 switch (type_)
11 {
12 case type::singleton:
13 {
14 singleton_.pos = other.singleton_.pos;
15 new(&singleton_.restriction) std::string(other.singleton_.restriction);
16
17 break;
18 }
19
20 case type::group:
21 {
22 new(&group_.children) std::list<selrestr>(other.group_.children);
23 group_.orlogic = other.group_.orlogic;
24
25 break;
26 }
27
28 case type::empty:
29 {
30 break;
31 }
32 }
33 }
34
35 selrestr::selrestr(selrestr&& other) : selrestr()
36 {
37 swap(*this, other);
38 }
39
40 selrestr& selrestr::operator=(selrestr other)
41 {
42 swap(*this, other);
43
44 return *this;
45 }
46
47 void swap(selrestr& first, selrestr& second)
48 {
49 using type = selrestr::type;
50
51 type tempType = first.type_;
52 int tempPos;
53 std::string tempRestriction;
54 std::list<selrestr> tempChildren;
55 bool tempOrlogic;
56
57 switch (tempType)
58 {
59 case type::singleton:
60 {
61 tempPos = first.singleton_.pos;
62 tempRestriction = std::move(first.singleton_.restriction);
63
64 break;
65 }
66
67 case type::group:
68 {
69 tempChildren = std::move(first.group_.children);
70 tempOrlogic = first.group_.orlogic;
71
72 break;
73 }
74
75 case type::empty:
76 {
77 break;
78 }
79 }
80
81 first.~selrestr();
82
83 first.type_ = second.type_;
84
85 switch (first.type_)
86 {
87 case type::singleton:
88 {
89 first.singleton_.pos = second.singleton_.pos;
90 new(&first.singleton_.restriction) std::string(std::move(second.singleton_.restriction));
91
92 break;
93 }
94
95 case type::group:
96 {
97 new(&first.group_.children) std::list<selrestr>(std::move(second.group_.children));
98 first.group_.orlogic = second.group_.orlogic;
99
100 break;
101 }
102
103 case type::empty:
104 {
105 break;
106 }
107 }
108
109 second.~selrestr();
110
111 second.type_ = tempType;
112
113 switch (second.type_)
114 {
115 case type::singleton:
116 {
117 second.singleton_.pos = tempPos;
118 new(&second.singleton_.restriction) std::string(std::move(tempRestriction));
119
120 break;
121 }
122
123 case type::group:
124 {
125 new(&second.group_.children) std::list<selrestr>(std::move(tempChildren));
126 second.group_.orlogic = tempOrlogic;
127
128 break;
129 }
130
131 case type::empty:
132 {
133 break;
134 }
135 }
136 }
137
138 selrestr::~selrestr()
139 {
140 switch (type_)
141 {
142 case type::singleton:
143 {
144 using string_type = std::string;
145 singleton_.restriction.~string_type();
146
147 break;
148 }
149
150 case type::group:
151 {
152 using list_type = std::list<selrestr>;
153 group_.children.~list_type();
154
155 break;
156 }
157
158 case type::empty:
159 {
160 break;
161 }
162 }
163 }
164
165 selrestr::selrestr() : type_(type::empty)
166 {
167 }
168
169 selrestr::selrestr(
170 std::string restriction,
171 bool pos) :
172 type_(type::singleton)
173 {
174 new(&singleton_.restriction) std::string(std::move(restriction));
175 singleton_.pos = pos;
176 }
177
178 std::string selrestr::getRestriction() const
179 {
180 if (type_ == type::singleton)
181 {
182 return singleton_.restriction;
183 } else {
184 throw std::domain_error("Only singleton selrestrs have restrictions");
185 }
186 }
187
188 bool selrestr::getPos() const
189 {
190 if (type_ == type::singleton)
191 {
192 return singleton_.pos;
193 } else {
194 throw std::domain_error("Only singleton selrestrs have positivity flags");
195 }
196 }
197
198 selrestr::selrestr(
199 std::list<selrestr> children,
200 bool orlogic) :
201 type_(type::group)
202 {
203 new(&group_.children) std::list<selrestr>(std::move(children));
204 group_.orlogic = orlogic;
205 }
206
207 std::list<selrestr> selrestr::getChildren() const
208 {
209 if (type_ == type::group)
210 {
211 return group_.children;
212 } else {
213 throw std::domain_error("Only group selrestrs have children");
214 }
215 }
216
217 std::list<selrestr>::const_iterator selrestr::begin() const
218 {
219 if (type_ == type::group)
220 {
221 return std::begin(group_.children);
222 } else {
223 throw std::domain_error("Only group selrestrs have children");
224 }
225 }
226
227 std::list<selrestr>::const_iterator selrestr::end() const
228 {
229 if (type_ == type::group)
230 {
231 return std::end(group_.children);
232 } else {
233 throw std::domain_error("Only group selrestrs have children");
234 }
235 }
236
237 bool selrestr::getOrlogic() const
238 {
239 if (type_ == type::group)
240 {
241 return group_.orlogic;
242 } else {
243 throw std::domain_error("Only group selrestrs have logic");
244 }
245 }
246
247 nlohmann::json selrestr::toJson() const
248 {
249 switch (type_)
250 {
251 case type::empty:
252 {
253 return {};
254 }
255
256 case type::singleton:
257 {
258 return {
259 {"type", singleton_.restriction},
260 {"pos", singleton_.pos}
261 };
262 }
263
264 case type::group:
265 {
266 std::string logic;
267 if (group_.orlogic)
268 {
269 logic = "or";
270 } else {
271 logic = "and";
272 }
273
274 std::list<nlohmann::json> children;
275 std::transform(std::begin(group_.children), std::end(group_.children), std::back_inserter(children), [] (const selrestr& child) {
276 return child.toJson();
277 });
278
279 return {
280 {"logic", logic},
281 {"children", children}
282 };
283 }
284 }
285 }
286
287 };
288};
diff --git a/generator/selrestr.h b/generator/selrestr.h new file mode 100644 index 0000000..5000970 --- /dev/null +++ b/generator/selrestr.h
@@ -0,0 +1,88 @@
1#ifndef SELRESTR_H_50652FB7
2#define SELRESTR_H_50652FB7
3
4#include <list>
5#include <string>
6#include <json.hpp>
7
8namespace verbly {
9 namespace generator {
10
11 class selrestr {
12 public:
13 enum class type {
14 empty,
15 singleton,
16 group
17 };
18
19 // Copy and move constructors
20
21 selrestr(const selrestr& other);
22 selrestr(selrestr&& other);
23
24 // Assignment
25
26 selrestr& operator=(selrestr other);
27
28 // Swap
29
30 friend void swap(selrestr& first, selrestr& second);
31
32 // Destructor
33
34 ~selrestr();
35
36 // Generic accessors
37
38 type getType() const
39 {
40 return type_;
41 }
42
43 // Empty
44
45 selrestr();
46
47 // Singleton
48
49 selrestr(std::string restriction, bool pos);
50
51 std::string getRestriction() const;
52
53 bool getPos() const;
54
55 // Group
56
57 selrestr(std::list<selrestr> children, bool orlogic);
58
59 std::list<selrestr> getChildren() const;
60
61 std::list<selrestr>::const_iterator begin() const;
62
63 std::list<selrestr>::const_iterator end() const;
64
65 bool getOrlogic() const;
66
67 // Helpers
68
69 nlohmann::json toJson() const;
70
71 private:
72 union {
73 struct {
74 bool pos;
75 std::string restriction;
76 } singleton_;
77 struct {
78 std::list<selrestr> children;
79 bool orlogic;
80 } group_;
81 };
82 type type_;
83 };
84
85 };
86};
87
88#endif /* end of include guard: SELRESTR_H_50652FB7 */
diff --git a/generator/word.cpp b/generator/word.cpp new file mode 100644 index 0000000..8ba3ce2 --- /dev/null +++ b/generator/word.cpp
@@ -0,0 +1,77 @@
1#include "word.h"
2#include <list>
3#include <string>
4#include "database.h"
5#include "notion.h"
6#include "lemma.h"
7#include "field.h"
8#include "group.h"
9
10namespace verbly {
11 namespace generator {
12
13 int word::nextId_ = 0;
14
15 word::word(
16 notion& n,
17 lemma& l) :
18 id_(nextId_++),
19 notion_(n),
20 lemma_(l)
21 {
22 }
23
24 word::word(
25 notion& n,
26 lemma& l,
27 int tagCount) :
28 id_(nextId_++),
29 notion_(n),
30 lemma_(l),
31 tagCount_(tagCount),
32 hasTagCount_(true)
33 {
34 }
35
36 void word::setAdjectivePosition(positioning adjectivePosition)
37 {
38 adjectivePosition_ = adjectivePosition;
39 }
40
41 void word::setVerbGroup(const group& verbGroup)
42 {
43 verbGroup_ = &verbGroup;
44 }
45
46 database& operator<<(database& db, const word& arg)
47 {
48 std::list<field> fields;
49
50 fields.emplace_back("word_id", arg.getId());
51 fields.emplace_back("notion_id", arg.getNotion().getId());
52 fields.emplace_back("lemma_id", arg.getLemma().getId());
53
54 if (arg.hasTagCount())
55 {
56 fields.emplace_back("tag_count", arg.getTagCount());
57 }
58
59 if ((arg.getNotion().getPartOfSpeech() == part_of_speech::adjective)
60 && (arg.getAdjectivePosition() != positioning::undefined))
61 {
62 fields.emplace_back("position", static_cast<int>(arg.getAdjectivePosition()));
63 }
64
65 if ((arg.getNotion().getPartOfSpeech() == part_of_speech::verb)
66 && (arg.hasVerbGroup()))
67 {
68 fields.emplace_back("group_id", arg.getVerbGroup().getId());
69 }
70
71 db.insertIntoTable("words", std::move(fields));
72
73 return db;
74 }
75
76 };
77};
diff --git a/generator/word.h b/generator/word.h new file mode 100644 index 0000000..bfed586 --- /dev/null +++ b/generator/word.h
@@ -0,0 +1,110 @@
1#ifndef WORD_H_91F99D46
2#define WORD_H_91F99D46
3
4#include <cassert>
5#include "enums.h"
6
7namespace verbly {
8 namespace generator {
9
10 class notion;
11 class lemma;
12 class database;
13 class group;
14
15 class word {
16 public:
17
18 // Constructors
19
20 word(notion& n, lemma& l);
21
22 word(notion& n, lemma& l, int tagCount);
23
24 // Mutators
25
26 void setAdjectivePosition(positioning adjectivePosition);
27
28 void setVerbGroup(const group& verbGroup);
29
30 // Accessors
31
32 int getId() const
33 {
34 return id_;
35 }
36
37 notion& getNotion()
38 {
39 return notion_;
40 }
41
42 const notion& getNotion() const
43 {
44 return notion_;
45 }
46
47 lemma& getLemma()
48 {
49 return lemma_;
50 }
51
52 const lemma& getLemma() const
53 {
54 return lemma_;
55 }
56
57 bool hasTagCount() const
58 {
59 return hasTagCount_;
60 }
61
62 int getTagCount() const
63 {
64 // Calling code should always call hasTagCount first.
65 assert(hasTagCount_);
66
67 return tagCount_;
68 }
69
70 positioning getAdjectivePosition() const
71 {
72 return adjectivePosition_;
73 }
74
75 bool hasVerbGroup() const
76 {
77 return (verbGroup_ != nullptr);
78 }
79
80 const group& getVerbGroup() const
81 {
82 // Calling code should always call hasVerbGroup first.
83 assert(verbGroup_ != nullptr);
84
85 return *verbGroup_;
86 }
87
88 private:
89
90 static int nextId_;
91
92 const int id_;
93 notion& notion_;
94 lemma& lemma_;
95 const int tagCount_ = 0;
96 const bool hasTagCount_ = false;
97
98 positioning adjectivePosition_ = positioning::undefined;
99 const group* verbGroup_ = nullptr;
100
101 };
102
103 // Serializer
104
105 database& operator<<(database& db, const word& arg);
106
107 };
108};
109
110#endif /* end of include guard: WORD_H_91F99D46 */
diff --git a/lib/adjective.cpp b/lib/adjective.cpp deleted file mode 100644 index ba8254a..0000000 --- a/lib/adjective.cpp +++ /dev/null
@@ -1,113 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 adjective::adjective()
6 {
7
8 }
9
10 adjective::adjective(const data& _data, int _id) : word(_data, _id)
11 {
12
13 }
14
15 std::string adjective::base_form() const
16 {
17 assert(_valid == true);
18
19 return _base_form;
20 }
21
22 std::string adjective::comparative_form() const
23 {
24 assert(_valid == true);
25
26 return _comparative_form;
27 }
28
29 std::string adjective::superlative_form() const
30 {
31 assert(_valid == true);
32
33 return _superlative_form;
34 }
35
36 adjective::positioning adjective::position() const
37 {
38 assert(_valid == true);
39
40 return _position;
41 }
42
43 bool adjective::has_comparative_form() const
44 {
45 assert(_valid == true);
46
47 return !_comparative_form.empty();
48 }
49
50 bool adjective::has_superlative_form() const
51 {
52 assert(_valid == true);
53
54 return !_superlative_form.empty();
55 }
56
57 bool adjective::has_position() const
58 {
59 assert(_valid == true);
60
61 return _position != adjective::positioning::undefined;
62 }
63
64 adjective_query adjective::antonyms() const
65 {
66 assert(_valid == true);
67
68 return _data->adjectives().antonym_of(*this);
69 }
70
71 adjective_query adjective::synonyms() const
72 {
73 assert(_valid == true);
74
75 return _data->adjectives().synonym_of(*this);
76 }
77
78 adjective_query adjective::generalizations() const
79 {
80 assert(_valid == true);
81
82 return _data->adjectives().generalization_of(*this);
83 }
84
85 adjective_query adjective::specifications() const
86 {
87 assert(_valid == true);
88
89 return _data->adjectives().specification_of(*this);
90 }
91
92 noun_query adjective::anti_pertainyms() const
93 {
94 assert(_valid == true);
95
96 return _data->nouns().anti_pertainym_of(*this);
97 }
98
99 adverb_query adjective::mannernyms() const
100 {
101 assert(_valid == true);
102
103 return _data->adverbs().mannernym_of(*this);
104 }
105
106 noun_query adjective::attributes() const
107 {
108 assert(_valid == true);
109
110 return _data->nouns().attribute_of(*this);
111 }
112
113};
diff --git a/lib/adjective.h b/lib/adjective.h deleted file mode 100644 index a6eb293..0000000 --- a/lib/adjective.h +++ /dev/null
@@ -1,51 +0,0 @@
1#ifndef ADJECTIVE_H_87B3FB75
2#define ADJECTIVE_H_87B3FB75
3
4namespace verbly {
5
6 class adjective_query;
7 class adverb_query;
8 class noun_query;
9
10 class adjective : public word {
11 public:
12 enum class positioning {
13 undefined,
14 predicate,
15 attributive,
16 postnominal
17 };
18
19 private:
20 std::string _base_form;
21 std::string _comparative_form;
22 std::string _superlative_form;
23 positioning _position = positioning::undefined;
24
25 friend class adjective_query;
26
27 public:
28 adjective();
29 adjective(const data& _data, int _id);
30
31 std::string base_form() const;
32 std::string comparative_form() const;
33 std::string superlative_form() const;
34 positioning position() const;
35
36 bool has_comparative_form() const;
37 bool has_superlative_form() const;
38 bool has_position() const;
39
40 adjective_query antonyms() const;
41 adjective_query synonyms() const;
42 adjective_query generalizations() const;
43 adjective_query specifications() const;
44 noun_query anti_pertainyms() const;
45 adverb_query mannernyms() const;
46 noun_query attributes() const;
47 };
48
49};
50
51#endif /* end of include guard: ADJECTIVE_H_87B3FB75 */
diff --git a/lib/adjective_query.cpp b/lib/adjective_query.cpp deleted file mode 100644 index 90ccef4..0000000 --- a/lib/adjective_query.cpp +++ /dev/null
@@ -1,1072 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 adjective_query::adjective_query(const data& _data) : _data(_data)
6 {
7
8 }
9
10 adjective_query& adjective_query::limit(int _limit)
11 {
12 if ((_limit > 0) || (_limit == unlimited))
13 {
14 this->_limit = _limit;
15 }
16
17 return *this;
18 }
19
20 adjective_query& adjective_query::random()
21 {
22 this->_random = true;
23
24 return *this;
25 }
26
27 adjective_query& adjective_query::except(const adjective& _word)
28 {
29 _except.push_back(_word);
30
31 return *this;
32 }
33
34 adjective_query& adjective_query::rhymes_with(const word& _word)
35 {
36 for (auto rhyme : _word.get_rhymes())
37 {
38 _rhymes.push_back(rhyme);
39 }
40
41 if (dynamic_cast<const adjective*>(&_word) != nullptr)
42 {
43 _except.push_back(dynamic_cast<const adjective&>(_word));
44 }
45
46 return *this;
47 }
48
49 adjective_query& adjective_query::rhymes_with(rhyme _r)
50 {
51 _rhymes.push_back(_r);
52
53 return *this;
54 }
55
56 adjective_query& adjective_query::has_pronunciation()
57 {
58 this->_has_prn = true;
59
60 return *this;
61 }
62
63 adjective_query& adjective_query::has_rhyming_noun()
64 {
65 _has_rhyming_noun = true;
66
67 return *this;
68 }
69
70 adjective_query& adjective_query::has_rhyming_adjective()
71 {
72 _has_rhyming_adjective = true;
73
74 return *this;
75 }
76
77 adjective_query& adjective_query::has_rhyming_adverb()
78 {
79 _has_rhyming_adverb = true;
80
81 return *this;
82 }
83
84 adjective_query& adjective_query::has_rhyming_verb()
85 {
86 _has_rhyming_verb = true;
87
88 return *this;
89 }
90
91 adjective_query& adjective_query::with_stress(filter<std::vector<bool>> _arg)
92 {
93 _stress = _arg;
94
95 return *this;
96 }
97
98 adjective_query& adjective_query::with_prefix(filter<std::string> _f)
99 {
100 _f.clean();
101 _with_prefix = _f;
102
103 return *this;
104 }
105
106 adjective_query& adjective_query::with_suffix(filter<std::string> _f)
107 {
108 _f.clean();
109 _with_suffix = _f;
110
111 return *this;
112 }
113
114 adjective_query& adjective_query::with_complexity(int _arg)
115 {
116 _with_complexity = _arg;
117
118 return *this;
119 }
120
121 adjective_query& adjective_query::requires_comparative_form()
122 {
123 _requires_comparative_form = true;
124
125 return *this;
126 }
127
128 adjective_query& adjective_query::requires_superlative_form()
129 {
130 _requires_superlative_form = true;
131
132 return *this;
133 }
134
135 adjective_query& adjective_query::position(adjective::positioning pos)
136 {
137 _position = pos;
138
139 return *this;
140 }
141
142 adjective_query& adjective_query::is_variant()
143 {
144 this->_is_variant = true;
145
146 return *this;
147 }
148
149 adjective_query& adjective_query::variant_of(filter<noun> _f)
150 {
151 _f.clean();
152 _variant_of = _f;
153
154 return *this;
155 }
156
157 adjective_query& adjective_query::has_antonyms()
158 {
159 this->_is_antonymic = true;
160
161 return *this;
162 }
163
164 adjective_query& adjective_query::antonym_of(filter<adjective> _f)
165 {
166 _f.clean();
167 _antonym_of = _f;
168
169 return *this;
170 }
171
172 adjective_query& adjective_query::has_synonyms()
173 {
174 this->_is_synonymic = true;
175
176 return *this;
177 }
178
179 adjective_query& adjective_query::synonym_of(filter<adjective> _f)
180 {
181 _f.clean();
182 _synonym_of = _f;
183
184 return *this;
185 }
186
187 adjective_query& adjective_query::is_generalization()
188 {
189 this->_is_generalization = true;
190
191 return *this;
192 }
193
194 adjective_query& adjective_query::generalization_of(filter<adjective> _f)
195 {
196 _f.clean();
197 _generalization_of = _f;
198
199 return *this;
200 }
201
202 adjective_query& adjective_query::is_specification()
203 {
204 this->_is_specification = true;
205
206 return *this;
207 }
208
209 adjective_query& adjective_query::specification_of(filter<adjective> _f)
210 {
211 _f.clean();
212 _specification_of = _f;
213
214 return *this;
215 }
216
217 adjective_query& adjective_query::is_pertainymic()
218 {
219 this->_is_pertainymic = true;
220
221 return *this;
222 }
223
224 adjective_query& adjective_query::pertainym_of(filter<noun> _f)
225 {
226 _f.clean();
227 _pertainym_of = _f;
228
229 return *this;
230 }
231
232 adjective_query& adjective_query::is_mannernymic()
233 {
234 this->_is_mannernymic = true;
235
236 return *this;
237 }
238
239 adjective_query& adjective_query::anti_mannernym_of(filter<adverb> _f)
240 {
241 _f.clean();
242 _anti_mannernym_of = _f;
243
244 return *this;
245 }
246 /*
247 adjective_query& adjective_query::derived_from(const word& _w)
248 {
249 if (dynamic_cast<const adjective*>(&_w) != nullptr)
250 {
251 _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
252 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
253 {
254 _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
255 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
256 {
257 _derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
258 }
259
260 return *this;
261 }
262
263 adjective_query& adjective_query::not_derived_from(const word& _w)
264 {
265 if (dynamic_cast<const adjective*>(&_w) != nullptr)
266 {
267 _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
268 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
269 {
270 _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
271 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
272 {
273 _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
274 }
275
276 return *this;
277 }
278 */
279 std::list<adjective> adjective_query::run() const
280 {
281 std::stringstream construct;
282 construct << "SELECT adjective_id, base_form, comparative, superlative, position FROM adjectives";
283 std::list<std::string> conditions;
284 std::list<binding> bindings;
285
286 if (_has_prn)
287 {
288 conditions.push_back("adjective_id IN (SELECT adjective_id FROM adjective_pronunciations)");
289 }
290
291 if (!_rhymes.empty())
292 {
293 std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)");
294 std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
295 conditions.push_back(cond);
296
297 for (auto rhy : _rhymes)
298 {
299 bindings.emplace_back(rhy.get_prerhyme());
300 bindings.emplace_back(rhy.get_rhyme());
301 }
302 }
303
304 if (_has_rhyming_noun)
305 {
306 conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
307 }
308
309 if (_has_rhyming_adjective)
310 {
311 conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adjective_id != curp.adjective_id)");
312 }
313
314 if (_has_rhyming_adverb)
315 {
316 conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
317 }
318
319 if (_has_rhyming_verb)
320 {
321 conditions.push_back("adjective_id IN (SELECT a.adjective_id FROM adjectives AS a INNER JOIN adjective_pronunciations AS curp ON curp.adjective_id = a.adjective_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
322 }
323
324 for (auto except : _except)
325 {
326 conditions.push_back("adjective_id != ?");
327 bindings.emplace_back(except._id);
328 }
329
330 if (_requires_comparative_form)
331 {
332 conditions.push_back("comparative IS NOT NULL");
333 }
334
335 if (_requires_superlative_form)
336 {
337 conditions.push_back("superlative IS NOT NULL");
338 }
339
340 switch (_position)
341 {
342 case adjective::positioning::predicate: conditions.push_back("position = 'p'"); break;
343 case adjective::positioning::attributive: conditions.push_back("position = 'a'"); break;
344 case adjective::positioning::postnominal: conditions.push_back("position = 'i'"); break;
345 case adjective::positioning::undefined: break;
346 }
347
348 if (!_stress.empty())
349 {
350 std::stringstream cond;
351 if (_stress.get_notlogic())
352 {
353 cond << "adjective_id NOT IN";
354 } else {
355 cond << "adjective_id IN";
356 }
357
358 cond << "(SELECT adjective_id FROM adjective_pronunciations WHERE ";
359
360 std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string {
361 switch (f.get_type())
362 {
363 case filter<std::vector<bool>>::type::singleton:
364 {
365 std::ostringstream _val;
366 for (auto syl : f.get_elem())
367 {
368 if (syl)
369 {
370 _val << "1";
371 } else {
372 _val << "0";
373 }
374 }
375
376 bindings.emplace_back(_val.str());
377
378 if (notlogic == f.get_notlogic())
379 {
380 return "stress = ?";
381 } else {
382 return "stress != ?";
383 }
384 }
385
386 case filter<std::vector<bool>>::type::group:
387 {
388 bool truelogic = notlogic != f.get_notlogic();
389
390 std::list<std::string> clauses;
391 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) {
392 return recur(f2, truelogic);
393 });
394
395 if (truelogic == f.get_orlogic())
396 {
397 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
398 } else {
399 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
400 }
401 }
402 }
403 };
404
405 cond << recur(_stress, _stress.get_notlogic());
406 cond << ")";
407 conditions.push_back(cond.str());
408 }
409
410 if (!_with_prefix.empty())
411 {
412 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
413 switch (f.get_type())
414 {
415 case filter<std::string>::type::singleton:
416 {
417 bindings.emplace_back(f.get_elem() + "%");
418
419 if (notlogic == f.get_notlogic())
420 {
421 return "base_form LIKE ?";
422 } else {
423 return "base_form NOT LIKE ?";
424 }
425 }
426
427 case filter<std::string>::type::group:
428 {
429 bool truelogic = notlogic != f.get_notlogic();
430
431 std::list<std::string> clauses;
432 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
433 return recur(f2, truelogic);
434 });
435
436 if (truelogic == f.get_orlogic())
437 {
438 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
439 } else {
440 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
441 }
442 }
443 }
444 };
445
446 conditions.push_back(recur(_with_prefix, false));
447 }
448
449 if (!_with_suffix.empty())
450 {
451 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
452 switch (f.get_type())
453 {
454 case filter<std::string>::type::singleton:
455 {
456 bindings.emplace_back("%" + f.get_elem());
457
458 if (notlogic == f.get_notlogic())
459 {
460 return "base_form LIKE ?";
461 } else {
462 return "base_form NOT LIKE ?";
463 }
464 }
465
466 case filter<std::string>::type::group:
467 {
468 bool truelogic = notlogic != f.get_notlogic();
469
470 std::list<std::string> clauses;
471 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
472 return recur(f2, truelogic);
473 });
474
475 if (truelogic == f.get_orlogic())
476 {
477 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
478 } else {
479 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
480 }
481 }
482 }
483 };
484
485 conditions.push_back(recur(_with_suffix, false));
486 }
487
488 if (_with_complexity != unlimited)
489 {
490 conditions.push_back("complexity = ?");
491 bindings.emplace_back(_with_complexity);
492 }
493
494 if (_is_variant)
495 {
496 conditions.push_back("adjective_id IN (SELECT adjective_id FROM variation)");
497 }
498
499 if (!_variant_of.empty())
500 {
501 std::stringstream cond;
502 if (_variant_of.get_notlogic())
503 {
504 cond << "adjective_id NOT IN";
505 } else {
506 cond << "adjective_id IN";
507 }
508
509 cond << "(SELECT adjective_id FROM variation WHERE ";
510
511 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
512 switch (f.get_type())
513 {
514 case filter<noun>::type::singleton:
515 {
516 bindings.emplace_back(f.get_elem()._id);
517
518 if (notlogic == f.get_notlogic())
519 {
520 return "noun_id = ?";
521 } else {
522 return "noun_id != ?";
523 }
524 }
525
526 case filter<noun>::type::group:
527 {
528 bool truelogic = notlogic != f.get_notlogic();
529
530 std::list<std::string> clauses;
531 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
532 return recur(f2, truelogic);
533 });
534
535 if (truelogic == f.get_orlogic())
536 {
537 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
538 } else {
539 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
540 }
541 }
542 }
543 };
544
545 cond << recur(_variant_of, _variant_of.get_notlogic());
546 cond << ")";
547 conditions.push_back(cond.str());
548 }
549
550 if (_is_antonymic)
551 {
552 conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_antonymy)");
553 }
554
555 if (!_antonym_of.empty())
556 {
557 std::stringstream cond;
558 if (_antonym_of.get_notlogic())
559 {
560 cond << "adjective_id NOT IN";
561 } else {
562 cond << "adjective_id IN";
563 }
564
565 cond << "(SELECT adjective_2_id FROM adjective_antonymy WHERE ";
566
567 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
568 switch (f.get_type())
569 {
570 case filter<adjective>::type::singleton:
571 {
572 bindings.emplace_back(f.get_elem()._id);
573
574 if (notlogic == f.get_notlogic())
575 {
576 return "adjective_1_id = ?";
577 } else {
578 return "adjective_1_id != ?";
579 }
580 }
581
582 case filter<adjective>::type::group:
583 {
584 bool truelogic = notlogic != f.get_notlogic();
585
586 std::list<std::string> clauses;
587 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
588 return recur(f2, truelogic);
589 });
590
591 if (truelogic == f.get_orlogic())
592 {
593 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
594 } else {
595 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
596 }
597 }
598 }
599 };
600
601 cond << recur(_antonym_of, _antonym_of.get_notlogic());
602 cond << ")";
603 conditions.push_back(cond.str());
604 }
605
606 if (_is_synonymic)
607 {
608 conditions.push_back("adjective_id IN (SELECT adjective_2_id FROM adjective_synonymy)");
609 }
610
611 if (!_synonym_of.empty())
612 {
613 std::stringstream cond;
614 if (_synonym_of.get_notlogic())
615 {
616 cond << "adjective_id NOT IN";
617 } else {
618 cond << "adjective_id IN";
619 }
620
621 cond << "(SELECT adjective_2_id FROM adjective_synonymy WHERE ";
622
623 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
624 switch (f.get_type())
625 {
626 case filter<adjective>::type::singleton:
627 {
628 bindings.emplace_back(f.get_elem()._id);
629
630 if (notlogic == f.get_notlogic())
631 {
632 return "adjective_1_id = ?";
633 } else {
634 return "adjective_1_id != ?";
635 }
636 }
637
638 case filter<adjective>::type::group:
639 {
640 bool truelogic = notlogic != f.get_notlogic();
641
642 std::list<std::string> clauses;
643 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
644 return recur(f2, truelogic);
645 });
646
647 if (truelogic == f.get_orlogic())
648 {
649 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
650 } else {
651 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
652 }
653 }
654 }
655 };
656
657 cond << recur(_synonym_of, _synonym_of.get_notlogic());
658 cond << ")";
659 conditions.push_back(cond.str());
660 }
661
662 if (_is_generalization)
663 {
664 conditions.push_back("adjective_id IN (SELECT general_id FROM specification)");
665 }
666
667 if (!_generalization_of.empty())
668 {
669 std::stringstream cond;
670 if (_generalization_of.get_notlogic())
671 {
672 cond << "adjective_id NOT IN";
673 } else {
674 cond << "adjective_id IN";
675 }
676
677 cond << "(SELECT general_id FROM specification WHERE ";
678
679 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
680 switch (f.get_type())
681 {
682 case filter<adjective>::type::singleton:
683 {
684 bindings.emplace_back(f.get_elem()._id);
685
686 if (notlogic == f.get_notlogic())
687 {
688 return "specific_id = ?";
689 } else {
690 return "specific_id != ?";
691 }
692 }
693
694 case filter<adjective>::type::group:
695 {
696 bool truelogic = notlogic != f.get_notlogic();
697
698 std::list<std::string> clauses;
699 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
700 return recur(f2, truelogic);
701 });
702
703 if (truelogic == f.get_orlogic())
704 {
705 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
706 } else {
707 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
708 }
709 }
710 }
711 };
712
713 cond << recur(_generalization_of, _generalization_of.get_notlogic());
714 cond << ")";
715 conditions.push_back(cond.str());
716 }
717
718 if (_is_specification)
719 {
720 conditions.push_back("adjective_id IN (SELECT specific_id FROM specification)");
721 }
722
723 if (!_specification_of.empty())
724 {
725 std::stringstream cond;
726 if (_specification_of.get_notlogic())
727 {
728 cond << "adjective_id NOT IN";
729 } else {
730 cond << "adjective_id IN";
731 }
732
733 cond << "(SELECT specific_id FROM specification WHERE ";
734
735 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
736 switch (f.get_type())
737 {
738 case filter<adjective>::type::singleton:
739 {
740 bindings.emplace_back(f.get_elem()._id);
741
742 if (notlogic == f.get_notlogic())
743 {
744 return "general_id = ?";
745 } else {
746 return "general_id != ?";
747 }
748 }
749
750 case filter<adjective>::type::group:
751 {
752 bool truelogic = notlogic != f.get_notlogic();
753
754 std::list<std::string> clauses;
755 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
756 return recur(f2, truelogic);
757 });
758
759 if (truelogic == f.get_orlogic())
760 {
761 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
762 } else {
763 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
764 }
765 }
766 }
767 };
768
769 cond << recur(_specification_of, _specification_of.get_notlogic());
770 cond << ")";
771 conditions.push_back(cond.str());
772 }
773
774 if (_is_pertainymic)
775 {
776 conditions.push_back("adjective_id IN (SELECT pertainym_id FROM pertainymy)");
777 }
778
779 if (!_pertainym_of.empty())
780 {
781 std::stringstream cond;
782 if (_pertainym_of.get_notlogic())
783 {
784 cond << "adjective_id NOT IN";
785 } else {
786 cond << "adjective_id IN";
787 }
788
789 cond << "(SELECT pertainym_id FROM pertainymy WHERE ";
790
791 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
792 switch (f.get_type())
793 {
794 case filter<noun>::type::singleton:
795 {
796 bindings.emplace_back(f.get_elem()._id);
797
798 if (notlogic == f.get_notlogic())
799 {
800 return "noun_id = ?";
801 } else {
802 return "noun_id != ?";
803 }
804 }
805
806 case filter<noun>::type::group:
807 {
808 bool truelogic = notlogic != f.get_notlogic();
809
810 std::list<std::string> clauses;
811 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
812 return recur(f2, truelogic);
813 });
814
815 if (truelogic == f.get_orlogic())
816 {
817 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
818 } else {
819 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
820 }
821 }
822 }
823 };
824
825 cond << recur(_pertainym_of, _pertainym_of.get_notlogic());
826 cond << ")";
827 conditions.push_back(cond.str());
828 }
829
830 if (_is_mannernymic)
831 {
832 conditions.push_back("adjective_id IN (SELECT adjective_id FROM mannernymy)");
833 }
834
835 if (!_anti_mannernym_of.empty())
836 {
837 std::stringstream cond;
838 if (_anti_mannernym_of.get_notlogic())
839 {
840 cond << "adjective_id NOT IN";
841 } else {
842 cond << "adjective_id IN";
843 }
844
845 cond << "(SELECT adjective_id FROM mannernymy WHERE ";
846
847 std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string {
848 switch (f.get_type())
849 {
850 case filter<adverb>::type::singleton:
851 {
852 bindings.emplace_back(f.get_elem()._id);
853
854 if (notlogic == f.get_notlogic())
855 {
856 return "mannernym_id = ?";
857 } else {
858 return "mannernym_id != ?";
859 }
860 }
861
862 case filter<adverb>::type::group:
863 {
864 bool truelogic = notlogic != f.get_notlogic();
865
866 std::list<std::string> clauses;
867 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) {
868 return recur(f2, truelogic);
869 });
870
871 if (truelogic == f.get_orlogic())
872 {
873 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
874 } else {
875 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
876 }
877 }
878 }
879 };
880
881 cond << recur(_anti_mannernym_of, _anti_mannernym_of.get_notlogic());
882 cond << ")";
883 conditions.push_back(cond.str());
884 }
885/*
886 if (!_derived_from_adjective.empty())
887 {
888 std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_2_id = @DERADJ");
889 std::string cond = "adjective_id IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
890 conditions.push_back(cond);
891 }
892
893 if (!_not_derived_from_adjective.empty())
894 {
895 std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_2_id = @NDERADJ");
896 std::string cond = "adjective_id NOT IN (SELECT adjective_1_id FROM adjective_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
897 conditions.push_back(cond);
898 }
899
900 if (!_derived_from_adverb.empty())
901 {
902 std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV");
903 std::string cond = "adjective_id IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
904 conditions.push_back(cond);
905 }
906
907 if (!_not_derived_from_adverb.empty())
908 {
909 std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV");
910 std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
911 conditions.push_back(cond);
912 }
913
914 if (!_derived_from_noun.empty())
915 {
916 std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN");
917 std::string cond = "adjective_id IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
918 conditions.push_back(cond);
919 }
920
921 if (!_not_derived_from_noun.empty())
922 {
923 std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN");
924 std::string cond = "adjective_id NOT IN (SELECT adjective_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
925 conditions.push_back(cond);
926 }*/
927
928 if (!conditions.empty())
929 {
930 construct << " WHERE ";
931 construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND ");
932 }
933
934 if (_random)
935 {
936 construct << " ORDER BY RANDOM()";
937 }
938
939 if (_limit != unlimited)
940 {
941 construct << " LIMIT " << _limit;
942 }
943
944 sqlite3_stmt* ppstmt;
945 std::string query = construct.str();
946 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
947 {
948 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
949 }
950
951 int i = 1;
952 for (auto& binding : bindings)
953 {
954 switch (binding.get_type())
955 {
956 case binding::type::integer:
957 {
958 sqlite3_bind_int(ppstmt, i, binding.get_integer());
959
960 break;
961 }
962
963 case binding::type::string:
964 {
965 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
966
967 break;
968 }
969 }
970
971 i++;
972 }
973
974 /*
975 for (auto adj : _derived_from_adjective)
976 {
977 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id);
978 }
979
980 for (auto adj : _not_derived_from_adjective)
981 {
982 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id);
983 }
984
985 for (auto adv : _derived_from_adverb)
986 {
987 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id);
988 }
989
990 for (auto adv : _not_derived_from_adverb)
991 {
992 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id);
993 }
994
995 for (auto n : _derived_from_noun)
996 {
997 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id);
998 }
999
1000 for (auto n : _not_derived_from_noun)
1001 {
1002 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id);
1003 }
1004*/
1005 std::list<adjective> output;
1006 while (sqlite3_step(ppstmt) == SQLITE_ROW)
1007 {
1008 adjective tnc {_data, sqlite3_column_int(ppstmt, 0)};
1009 tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
1010
1011 if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)
1012 {
1013 tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
1014 }
1015
1016 if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL)
1017 {
1018 tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3)));
1019 }
1020
1021 if (sqlite3_column_type(ppstmt, 4) != SQLITE_NULL)
1022 {
1023 std::string adjpos(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4)));
1024 if (adjpos == "p")
1025 {
1026 tnc._position = adjective::positioning::predicate;
1027 } else if (adjpos == "a")
1028 {
1029 tnc._position = adjective::positioning::attributive;
1030 } else if (adjpos == "i")
1031 {
1032 tnc._position = adjective::positioning::postnominal;
1033 }
1034 }
1035
1036 output.push_back(tnc);
1037 }
1038
1039 sqlite3_finalize(ppstmt);
1040
1041 for (auto& adjective : output)
1042 {
1043 query = "SELECT pronunciation, prerhyme, rhyme FROM adjective_pronunciations WHERE adjective_id = ?";
1044 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1045 {
1046 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
1047 }
1048
1049 sqlite3_bind_int(ppstmt, 1, adjective._id);
1050
1051 while (sqlite3_step(ppstmt) == SQLITE_ROW)
1052 {
1053 std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
1054 auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " ");
1055
1056 adjective.pronunciations.push_back(phonemes);
1057
1058 if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL))
1059 {
1060 std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
1061 std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
1062 adjective.rhymes.emplace_back(prerhyme, rhyming);
1063 }
1064 }
1065
1066 sqlite3_finalize(ppstmt);
1067 }
1068
1069 return output;
1070 }
1071
1072};
diff --git a/lib/adjective_query.h b/lib/adjective_query.h deleted file mode 100644 index e6a6609..0000000 --- a/lib/adjective_query.h +++ /dev/null
@@ -1,112 +0,0 @@
1#ifndef ADJECTIVE_QUERY_H_05E590FD
2#define ADJECTIVE_QUERY_H_05E590FD
3
4namespace verbly {
5
6 class adjective_query {
7 public:
8 adjective_query(const data& _data);
9
10 adjective_query& limit(int _limit);
11 adjective_query& random();
12 adjective_query& except(const adjective& _word);
13 adjective_query& rhymes_with(const word& _word);
14 adjective_query& rhymes_with(rhyme _r);
15 adjective_query& has_pronunciation();
16 adjective_query& has_rhyming_noun();
17 adjective_query& has_rhyming_adjective();
18 adjective_query& has_rhyming_adverb();
19 adjective_query& has_rhyming_verb();
20 adjective_query& with_stress(filter<std::vector<bool>> _arg);
21
22 adjective_query& requires_comparative_form();
23 adjective_query& requires_superlative_form();
24 adjective_query& position(adjective::positioning pos);
25
26 adjective_query& with_prefix(filter<std::string> _f);
27 adjective_query& with_suffix(filter<std::string> _f);
28
29 adjective_query& with_complexity(int _arg);
30
31 adjective_query& is_variant();
32 adjective_query& variant_of(filter<noun> _f);
33
34 adjective_query& has_antonyms();
35 adjective_query& antonym_of(filter<adjective> _f);
36
37 adjective_query& has_synonyms();
38 adjective_query& synonym_of(filter<adjective> _f);
39
40 adjective_query& is_generalization();
41 adjective_query& generalization_of(filter<adjective> _f);
42
43 adjective_query& is_specification();
44 adjective_query& specification_of(filter<adjective> _f);
45
46 adjective_query& is_pertainymic();
47 adjective_query& pertainym_of(filter<noun> _f);
48
49 adjective_query& is_mannernymic();
50 adjective_query& anti_mannernym_of(filter<adverb> _f);
51
52/* adjective_query& derived_from(const word& _w);
53 adjective_query& not_derived_from(const word& _w);*/
54
55 std::list<adjective> run() const;
56
57 const static int unlimited = -1;
58
59 protected:
60 const data& _data;
61 int _limit = unlimited;
62 bool _random = false;
63 std::list<rhyme> _rhymes;
64 std::list<adjective> _except;
65 bool _has_prn = false;
66 bool _has_rhyming_noun = false;
67 bool _has_rhyming_adjective = false;
68 bool _has_rhyming_adverb = false;
69 bool _has_rhyming_verb = false;
70 filter<std::vector<bool>> _stress;
71
72 bool _requires_comparative_form = false;
73 bool _requires_superlative_form = false;
74 adjective::positioning _position = adjective::positioning::undefined;
75
76 filter<std::string> _with_prefix;
77 filter<std::string> _with_suffix;
78
79 int _with_complexity = unlimited;
80
81 bool _is_variant = false;
82 filter<noun> _variant_of;
83
84 bool _is_antonymic = false;
85 filter<adjective> _antonym_of;
86
87 bool _is_synonymic = false;
88 filter<adjective> _synonym_of;
89
90 bool _is_generalization = false;
91 filter<adjective> _generalization_of;
92
93 bool _is_specification = false;
94 filter<adjective> _specification_of;
95
96 bool _is_pertainymic = false;
97 filter<noun> _pertainym_of;
98
99 bool _is_mannernymic = false;
100 filter<adverb> _anti_mannernym_of;
101
102/* std::list<adjective> _derived_from_adjective;
103 std::list<adjective> _not_derived_from_adjective;
104 std::list<adverb> _derived_from_adverb;
105 std::list<adverb> _not_derived_from_adverb;
106 std::list<noun> _derived_from_noun;
107 std::list<noun> _not_derived_from_noun;*/
108 };
109
110};
111
112#endif /* end of include guard: ADJECTIVE_QUERY_H_05E590FD */
diff --git a/lib/adverb.cpp b/lib/adverb.cpp deleted file mode 100644 index 442574e..0000000 --- a/lib/adverb.cpp +++ /dev/null
@@ -1,71 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 adverb::adverb()
6 {
7
8 }
9
10 adverb::adverb(const data& _data, int _id) : word(_data, _id)
11 {
12
13 }
14
15 std::string adverb::base_form() const
16 {
17 assert(_valid == true);
18
19 return _base_form;
20 }
21
22 std::string adverb::comparative_form() const
23 {
24 assert(_valid == true);
25
26 return _comparative_form;
27 }
28
29 std::string adverb::superlative_form() const
30 {
31 assert(_valid == true);
32
33 return _superlative_form;
34 }
35
36 bool adverb::has_comparative_form() const
37 {
38 assert(_valid == true);
39
40 return !_comparative_form.empty();
41 }
42
43 bool adverb::has_superlative_form() const
44 {
45 assert(_valid == true);
46
47 return !_superlative_form.empty();
48 }
49
50 adverb_query adverb::antonyms() const
51 {
52 assert(_valid == true);
53
54 return _data->adverbs().antonym_of(*this);
55 }
56
57 adverb_query adverb::synonyms() const
58 {
59 assert(_valid == true);
60
61 return _data->adverbs().synonym_of(*this);
62 }
63
64 adjective_query adverb::anti_mannernyms() const
65 {
66 assert(_valid == true);
67
68 return _data->adjectives().anti_mannernym_of(*this);
69 }
70
71};
diff --git a/lib/adverb.h b/lib/adverb.h deleted file mode 100644 index 56d4e28..0000000 --- a/lib/adverb.h +++ /dev/null
@@ -1,35 +0,0 @@
1#ifndef ADVERB_H_86F8302F
2#define ADVERB_H_86F8302F
3
4namespace verbly {
5
6 class adverb : public word {
7 private:
8 std::string _base_form;
9 std::string _comparative_form;
10 std::string _superlative_form;
11
12 friend class adverb_query;
13
14 public:
15 adverb();
16 adverb(const data& _data, int _id);
17
18 std::string base_form() const;
19 std::string comparative_form() const;
20 std::string superlative_form() const;
21
22 bool has_comparative_form() const;
23 bool has_superlative_form() const;
24
25 adverb_query antonyms() const;
26 adverb_query synonyms() const;
27 adjective_query anti_mannernyms() const;
28
29 adverb_query& derived_from(const word& _w);
30 adverb_query& not_derived_from(const word& _w);
31 };
32
33};
34
35#endif /* end of include guard: ADVERB_H_86F8302F */
diff --git a/lib/adverb_query.cpp b/lib/adverb_query.cpp deleted file mode 100644 index 3e62bb7..0000000 --- a/lib/adverb_query.cpp +++ /dev/null
@@ -1,758 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 adverb_query::adverb_query(const data& _data) : _data(_data)
6 {
7
8 }
9
10 adverb_query& adverb_query::limit(int _limit)
11 {
12 if ((_limit > 0) || (_limit == unlimited))
13 {
14 this->_limit = _limit;
15 }
16
17 return *this;
18 }
19
20 adverb_query& adverb_query::random()
21 {
22 this->_random = true;
23
24 return *this;
25 }
26
27 adverb_query& adverb_query::except(const adverb& _word)
28 {
29 _except.push_back(_word);
30
31 return *this;
32 }
33
34 adverb_query& adverb_query::rhymes_with(const word& _word)
35 {
36 for (auto rhyme : _word.get_rhymes())
37 {
38 _rhymes.push_back(rhyme);
39 }
40
41 if (dynamic_cast<const adverb*>(&_word) != nullptr)
42 {
43 _except.push_back(dynamic_cast<const adverb&>(_word));
44 }
45
46 return *this;
47 }
48
49 adverb_query& adverb_query::rhymes_with(rhyme _r)
50 {
51 _rhymes.push_back(_r);
52
53 return *this;
54 }
55
56 adverb_query& adverb_query::has_pronunciation()
57 {
58 this->_has_prn = true;
59
60 return *this;
61 }
62
63 adverb_query& adverb_query::has_rhyming_noun()
64 {
65 _has_rhyming_noun = true;
66
67 return *this;
68 }
69
70 adverb_query& adverb_query::has_rhyming_adjective()
71 {
72 _has_rhyming_adjective = true;
73
74 return *this;
75 }
76
77 adverb_query& adverb_query::has_rhyming_adverb()
78 {
79 _has_rhyming_adverb = true;
80
81 return *this;
82 }
83
84 adverb_query& adverb_query::has_rhyming_verb()
85 {
86 _has_rhyming_verb = true;
87
88 return *this;
89 }
90
91 adverb_query& adverb_query::requires_comparative_form()
92 {
93 _requires_comparative_form = true;
94
95 return *this;
96 }
97
98 adverb_query& adverb_query::requires_superlative_form()
99 {
100 _requires_superlative_form = true;
101
102 return *this;
103 }
104
105 adverb_query& adverb_query::with_stress(filter<std::vector<bool>> _arg)
106 {
107 _stress = _arg;
108
109 return *this;
110 }
111
112 adverb_query& adverb_query::with_prefix(filter<std::string> _f)
113 {
114 _f.clean();
115 _with_prefix = _f;
116
117 return *this;
118 }
119
120 adverb_query& adverb_query::with_suffix(filter<std::string> _f)
121 {
122 _f.clean();
123 _with_suffix = _f;
124
125 return *this;
126 }
127
128 adverb_query& adverb_query::with_complexity(int _arg)
129 {
130 _with_complexity = _arg;
131
132 return *this;
133 }
134
135 adverb_query& adverb_query::has_antonyms()
136 {
137 _has_antonyms = true;
138
139 return *this;
140 }
141
142 adverb_query& adverb_query::antonym_of(filter<adverb> _f)
143 {
144 _f.clean();
145 _antonym_of = _f;
146
147 return *this;
148 }
149
150 adverb_query& adverb_query::has_synonyms()
151 {
152 _has_synonyms = true;
153
154 return *this;
155 }
156
157 adverb_query& adverb_query::synonym_of(filter<adverb> _f)
158 {
159 _f.clean();
160 _synonym_of = _f;
161
162 return *this;
163 }
164
165 adverb_query& adverb_query::is_mannernymic()
166 {
167 _is_mannernymic = true;
168
169 return *this;
170 }
171
172 adverb_query& adverb_query::mannernym_of(filter<adjective> _f)
173 {
174 _f.clean();
175 _mannernym_of = _f;
176
177 return *this;
178 }
179 /*
180 adverb_query& adverb_query::derived_from(const word& _w)
181 {
182 if (dynamic_cast<const adjective*>(&_w) != nullptr)
183 {
184 _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
185 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
186 {
187 _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
188 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
189 {
190 _derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
191 }
192
193 return *this;
194 }
195
196 adverb_query& adverb_query::not_derived_from(const word& _w)
197 {
198 if (dynamic_cast<const adjective*>(&_w) != nullptr)
199 {
200 _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
201 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
202 {
203 _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
204 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
205 {
206 _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
207 }
208
209 return *this;
210 }
211 */
212 std::list<adverb> adverb_query::run() const
213 {
214 std::stringstream construct;
215 construct << "SELECT adverb_id, base_form, comparative, superlative FROM adverbs";
216 std::list<std::string> conditions;
217 std::list<binding> bindings;
218
219 if (_has_prn)
220 {
221 conditions.push_back("adverb_id IN (SELECT adverb_id FROM adverb_pronunciations)");
222 }
223
224 if (!_rhymes.empty())
225 {
226 std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)");
227 std::string cond = "adverb_id IN (SELECT adverb_id FROM adverb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
228 conditions.push_back(cond);
229
230 for (auto rhy : _rhymes)
231 {
232 bindings.emplace_back(rhy.get_prerhyme());
233 bindings.emplace_back(rhy.get_rhyme());
234 }
235 }
236
237 if (_has_rhyming_noun)
238 {
239 conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
240 }
241
242 if (_has_rhyming_adjective)
243 {
244 conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
245 }
246
247 if (_has_rhyming_adverb)
248 {
249 conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.adverb_id != curp.adverb_id)");
250 }
251
252 if (_has_rhyming_verb)
253 {
254 conditions.push_back("adverb_id IN (SELECT a.adverb_id FROM adverbs AS a INNER JOIN adverb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
255 }
256
257 for (auto except : _except)
258 {
259 conditions.push_back("adverb_id != ?");
260 bindings.emplace_back(except._id);
261 }
262
263 if (_requires_comparative_form)
264 {
265 conditions.push_back("comparative IS NOT NULL");
266 }
267
268 if (_requires_superlative_form)
269 {
270 conditions.push_back("superlative IS NOT NULL");
271 }
272
273 if (!_stress.empty())
274 {
275 std::stringstream cond;
276 if (_stress.get_notlogic())
277 {
278 cond << "adverb_id NOT IN";
279 } else {
280 cond << "adverb_id IN";
281 }
282
283 cond << "(SELECT adverb_id FROM adverb_pronunciations WHERE ";
284
285 std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string {
286 switch (f.get_type())
287 {
288 case filter<std::vector<bool>>::type::singleton:
289 {
290 std::ostringstream _val;
291 for (auto syl : f.get_elem())
292 {
293 if (syl)
294 {
295 _val << "1";
296 } else {
297 _val << "0";
298 }
299 }
300
301 bindings.emplace_back(_val.str());
302
303 if (notlogic == f.get_notlogic())
304 {
305 return "stress = ?";
306 } else {
307 return "stress != ?";
308 }
309 }
310
311 case filter<std::vector<bool>>::type::group:
312 {
313 bool truelogic = notlogic != f.get_notlogic();
314
315 std::list<std::string> clauses;
316 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) {
317 return recur(f2, truelogic);
318 });
319
320 if (truelogic == f.get_orlogic())
321 {
322 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
323 } else {
324 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
325 }
326 }
327 }
328 };
329
330 cond << recur(_stress, _stress.get_notlogic());
331 cond << ")";
332 conditions.push_back(cond.str());
333 }
334
335 if (!_with_prefix.empty())
336 {
337 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
338 switch (f.get_type())
339 {
340 case filter<std::string>::type::singleton:
341 {
342 bindings.emplace_back(f.get_elem() + "%");
343
344 if (notlogic == f.get_notlogic())
345 {
346 return "base_form LIKE ?";
347 } else {
348 return "base_form NOT LIKE ?";
349 }
350 }
351
352 case filter<std::string>::type::group:
353 {
354 bool truelogic = notlogic != f.get_notlogic();
355
356 std::list<std::string> clauses;
357 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
358 return recur(f2, truelogic);
359 });
360
361 if (truelogic == f.get_orlogic())
362 {
363 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
364 } else {
365 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
366 }
367 }
368 }
369 };
370
371 conditions.push_back(recur(_with_prefix, false));
372 }
373
374 if (!_with_suffix.empty())
375 {
376 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
377 switch (f.get_type())
378 {
379 case filter<std::string>::type::singleton:
380 {
381 bindings.emplace_back("%" + f.get_elem());
382
383 if (notlogic == f.get_notlogic())
384 {
385 return "base_form LIKE ?";
386 } else {
387 return "base_form NOT LIKE ?";
388 }
389 }
390
391 case filter<std::string>::type::group:
392 {
393 bool truelogic = notlogic != f.get_notlogic();
394
395 std::list<std::string> clauses;
396 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
397 return recur(f2, truelogic);
398 });
399
400 if (truelogic == f.get_orlogic())
401 {
402 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
403 } else {
404 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
405 }
406 }
407 }
408 };
409
410 conditions.push_back(recur(_with_suffix, false));
411 }
412
413 if (_with_complexity != unlimited)
414 {
415 conditions.push_back("complexity = ?");
416 bindings.emplace_back(_with_complexity);
417 }
418
419 if (_has_antonyms)
420 {
421 conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_antonymy)");
422 }
423
424 if (!_antonym_of.empty())
425 {
426 std::stringstream cond;
427 if (_antonym_of.get_notlogic())
428 {
429 cond << "adverb_id NOT IN";
430 } else {
431 cond << "adverb_id IN";
432 }
433
434 cond << "(SELECT adverb_2_id FROM adverb_antonymy WHERE ";
435
436 std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string {
437 switch (f.get_type())
438 {
439 case filter<adverb>::type::singleton:
440 {
441 bindings.emplace_back(f.get_elem()._id);
442
443 if (notlogic == f.get_notlogic())
444 {
445 return "adverb_1_id = ?";
446 } else {
447 return "adverb_1_id != ?";
448 }
449 }
450
451 case filter<adverb>::type::group:
452 {
453 bool truelogic = notlogic != f.get_notlogic();
454
455 std::list<std::string> clauses;
456 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) {
457 return recur(f2, truelogic);
458 });
459
460 if (truelogic == f.get_orlogic())
461 {
462 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
463 } else {
464 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
465 }
466 }
467 }
468 };
469
470 cond << recur(_antonym_of, _antonym_of.get_notlogic());
471 cond << ")";
472 conditions.push_back(cond.str());
473 }
474
475 if (_has_synonyms)
476 {
477 conditions.push_back("adverb_id IN (SELECT adverb_2_id FROM adverb_synonymy)");
478 }
479
480 if (!_synonym_of.empty())
481 {
482 std::stringstream cond;
483 if (_antonym_of.get_notlogic())
484 {
485 cond << "adverb_id NOT IN";
486 } else {
487 cond << "adverb_id IN";
488 }
489
490 cond << "(SELECT adverb_2_id FROM adverb_synonymy WHERE ";
491
492 std::function<std::string (filter<adverb>, bool)> recur = [&] (filter<adverb> f, bool notlogic) -> std::string {
493 switch (f.get_type())
494 {
495 case filter<adverb>::type::singleton:
496 {
497 bindings.emplace_back(f.get_elem()._id);
498
499 if (notlogic == f.get_notlogic())
500 {
501 return "adverb_1_id = ?";
502 } else {
503 return "adverb_1_id != ?";
504 }
505 }
506
507 case filter<adverb>::type::group:
508 {
509 bool truelogic = notlogic != f.get_notlogic();
510
511 std::list<std::string> clauses;
512 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adverb> f2) {
513 return recur(f2, truelogic);
514 });
515
516 if (truelogic == f.get_orlogic())
517 {
518 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
519 } else {
520 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
521 }
522 }
523 }
524 };
525
526 cond << recur(_synonym_of, _synonym_of.get_notlogic());
527 cond << ")";
528 conditions.push_back(cond.str());
529 }
530
531 if (_is_mannernymic)
532 {
533 conditions.push_back("adverb_id IN (SELECT mannernym_id FROM mannernymy)");
534 }
535
536 if (!_mannernym_of.empty())
537 {
538 std::stringstream cond;
539 if (_antonym_of.get_notlogic())
540 {
541 cond << "adverb_id NOT IN";
542 } else {
543 cond << "adverb_id IN";
544 }
545
546 cond << "(SELECT mannernym_id FROM mannernymy WHERE ";
547
548 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
549 switch (f.get_type())
550 {
551 case filter<adjective>::type::singleton:
552 {
553 bindings.emplace_back(f.get_elem()._id);
554
555 if (notlogic == f.get_notlogic())
556 {
557 return "adjective_id = ?";
558 } else {
559 return "adjective_id != ?";
560 }
561 }
562
563 case filter<adjective>::type::group:
564 {
565 bool truelogic = notlogic != f.get_notlogic();
566
567 std::list<std::string> clauses;
568 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
569 return recur(f2, truelogic);
570 });
571
572 if (truelogic == f.get_orlogic())
573 {
574 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
575 } else {
576 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
577 }
578 }
579 }
580 };
581
582 cond << recur(_mannernym_of, _mannernym_of.get_notlogic());
583 cond << ")";
584 conditions.push_back(cond.str());
585 }
586
587/* if (!_derived_from_adjective.empty())
588 {
589 std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ");
590 std::string cond = "adverb_id IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
591 conditions.push_back(cond);
592 }
593
594 if (!_not_derived_from_adjective.empty())
595 {
596 std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ");
597 std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM adjective_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
598 conditions.push_back(cond);
599 }
600
601 if (!_derived_from_adverb.empty())
602 {
603 std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_2_id = @DERADV");
604 std::string cond = "adverb_id IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
605 conditions.push_back(cond);
606 }
607
608 if (!_not_derived_from_adverb.empty())
609 {
610 std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_2_id = @NDERADV");
611 std::string cond = "adverb_id NOT IN (SELECT adverb_1_id FROM adverb_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
612 conditions.push_back(cond);
613 }
614
615 if (!_derived_from_noun.empty())
616 {
617 std::list<std::string> clauses(_derived_from_noun.size(), "noun_id = @DERN");
618 std::string cond = "adverb_id IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
619 conditions.push_back(cond);
620 }
621
622 if (!_not_derived_from_noun.empty())
623 {
624 std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_id = @NDERN");
625 std::string cond = "adverb_id NOT IN (SELECT adverb_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
626 conditions.push_back(cond);
627 }*/
628
629 if (!conditions.empty())
630 {
631 construct << " WHERE ";
632 construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND ");
633 }
634
635 if (_random)
636 {
637 construct << " ORDER BY RANDOM()";
638 }
639
640 if (_limit != unlimited)
641 {
642 construct << " LIMIT " << _limit;
643 }
644
645 sqlite3_stmt* ppstmt;
646 std::string query = construct.str();
647 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
648 {
649 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
650 }
651
652 int i = 1;
653 for (auto& binding : bindings)
654 {
655 switch (binding.get_type())
656 {
657 case binding::type::integer:
658 {
659 sqlite3_bind_int(ppstmt, i, binding.get_integer());
660
661 break;
662 }
663
664 case binding::type::string:
665 {
666 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
667
668 break;
669 }
670 }
671
672 i++;
673 }
674
675 /*
676 for (auto adj : _derived_from_adjective)
677 {
678 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id);
679 }
680
681 for (auto adj : _not_derived_from_adjective)
682 {
683 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id);
684 }
685
686 for (auto adv : _derived_from_adverb)
687 {
688 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id);
689 }
690
691 for (auto adv : _not_derived_from_adverb)
692 {
693 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id);
694 }
695
696 for (auto n : _derived_from_noun)
697 {
698 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id);
699 }
700
701 for (auto n : _not_derived_from_noun)
702 {
703 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id);
704 }*/
705
706 std::list<adverb> output;
707 while (sqlite3_step(ppstmt) == SQLITE_ROW)
708 {
709 adverb tnc {_data, sqlite3_column_int(ppstmt, 0)};
710 tnc._base_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
711
712 if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)
713 {
714 tnc._comparative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
715 }
716
717 if (sqlite3_column_type(ppstmt, 3) != SQLITE_NULL)
718 {
719 tnc._superlative_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3)));
720 }
721
722 output.push_back(tnc);
723 }
724
725 sqlite3_finalize(ppstmt);
726
727 for (auto& adverb : output)
728 {
729 query = "SELECT pronunciation, prerhyme, rhyme FROM adverb_pronunciations WHERE adverb_id = ?";
730 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
731 {
732 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
733 }
734
735 sqlite3_bind_int(ppstmt, 1, adverb._id);
736
737 while (sqlite3_step(ppstmt) == SQLITE_ROW)
738 {
739 std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
740 auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " ");
741
742 adverb.pronunciations.push_back(phonemes);
743
744 if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL))
745 {
746 std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
747 std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
748 adverb.rhymes.emplace_back(prerhyme, rhyming);
749 }
750 }
751
752 sqlite3_finalize(ppstmt);
753 }
754
755 return output;
756 }
757
758};
diff --git a/lib/adverb_query.h b/lib/adverb_query.h deleted file mode 100644 index 30e7400..0000000 --- a/lib/adverb_query.h +++ /dev/null
@@ -1,86 +0,0 @@
1#ifndef ADVERB_QUERY_H_CA13CCDD
2#define ADVERB_QUERY_H_CA13CCDD
3
4namespace verbly {
5
6 class adverb_query {
7 public:
8 adverb_query(const data& _data);
9
10 adverb_query& limit(int _limit);
11 adverb_query& random();
12 adverb_query& except(const adverb& _word);
13 adverb_query& rhymes_with(const word& _word);
14 adverb_query& rhymes_with(rhyme _r);
15 adverb_query& has_pronunciation();
16 adverb_query& has_rhyming_noun();
17 adverb_query& has_rhyming_adjective();
18 adverb_query& has_rhyming_adverb();
19 adverb_query& has_rhyming_verb();
20 adverb_query& with_stress(filter<std::vector<bool>> _arg);
21
22 adverb_query& requires_comparative_form();
23 adverb_query& requires_superlative_form();
24
25 adverb_query& with_prefix(filter<std::string> _f);
26 adverb_query& with_suffix(filter<std::string> _f);
27
28 adverb_query& with_complexity(int _arg);
29
30 adverb_query& has_antonyms();
31 adverb_query& antonym_of(filter<adverb> _f);
32
33 adverb_query& has_synonyms();
34 adverb_query& synonym_of(filter<adverb> _f);
35
36 adverb_query& is_mannernymic();
37 adverb_query& mannernym_of(filter<adjective> _f);
38
39/* adverb_query& derived_from(const word& _w);
40 adverb_query& not_derived_from(const word& _w);*/
41
42 std::list<adverb> run() const;
43
44 const static int unlimited = -1;
45
46 private:
47 const data& _data;
48 int _limit = unlimited;
49 bool _random = false;
50 std::list<rhyme> _rhymes;
51 std::list<adverb> _except;
52 bool _has_prn = false;
53 bool _has_rhyming_noun = false;
54 bool _has_rhyming_adjective = false;
55 bool _has_rhyming_adverb = false;
56 bool _has_rhyming_verb = false;
57 filter<std::vector<bool>> _stress;
58
59 bool _requires_comparative_form = false;
60 bool _requires_superlative_form = false;
61
62 filter<std::string> _with_prefix;
63 filter<std::string> _with_suffix;
64
65 int _with_complexity = unlimited;
66
67 bool _has_antonyms = false;
68 filter<adverb> _antonym_of;
69
70 bool _has_synonyms = false;
71 filter<adverb> _synonym_of;
72
73 bool _is_mannernymic = false;
74 filter<adjective> _mannernym_of;
75
76/* std::list<adjective> _derived_from_adjective;
77 std::list<adjective> _not_derived_from_adjective;
78 std::list<adverb> _derived_from_adverb;
79 std::list<adverb> _not_derived_from_adverb;
80 std::list<noun> _derived_from_noun;
81 std::list<noun> _not_derived_from_noun;*/
82 };
83
84};
85
86#endif /* end of include guard: ADVERB_QUERY_H_CA13CCDD */
diff --git a/lib/binding.cpp b/lib/binding.cpp new file mode 100644 index 0000000..349cd6f --- /dev/null +++ b/lib/binding.cpp
@@ -0,0 +1,180 @@
1#include "binding.h"
2#include <stdexcept>
3#include <utility>
4
5namespace verbly {
6
7 binding::binding(const binding& other)
8 {
9 type_ = other.type_;
10
11 switch (type_)
12 {
13 case type::integer:
14 {
15 integer_ = other.integer_;
16
17 break;
18 }
19
20 case type::string:
21 {
22 new(&string_) std::string(other.string_);
23
24 break;
25 }
26
27 case type::invalid:
28 {
29 break;
30 }
31 }
32 }
33
34 binding::binding(binding&& other) : binding()
35 {
36 swap(*this, other);
37 }
38
39 binding& binding::operator=(binding other)
40 {
41 swap(*this, other);
42
43 return *this;
44 }
45
46 void swap(binding& first, binding& second)
47 {
48 using type = binding::type;
49
50 type tempType = first.type_;
51 int tempInteger;
52 std::string tempString;
53
54 switch (first.type_)
55 {
56 case type::integer:
57 {
58 tempInteger = first.integer_;
59
60 break;
61 }
62
63 case type::string:
64 {
65 tempString = std::move(tempString);
66
67 break;
68 }
69
70 case type::invalid:
71 {
72 break;
73 }
74 }
75
76 first.~binding();
77
78 first.type_ = second.type_;
79
80 switch (second.type_)
81 {
82 case type::integer:
83 {
84 first.integer_ = second.integer_;
85
86 break;
87 }
88
89 case type::string:
90 {
91 new(&first.string_) std::string(std::move(second.string_));
92
93 break;
94 }
95
96 case type::invalid:
97 {
98 break;
99 }
100 }
101
102 second.~binding();
103
104 second.type_ = tempType;
105
106 switch (tempType)
107 {
108 case type::integer:
109 {
110 second.integer_ = tempInteger;
111
112 break;
113 }
114
115 case type::string:
116 {
117 new(&second.string_) std::string(std::move(tempString));
118
119 break;
120 }
121
122 case type::invalid:
123 {
124 break;
125 }
126 }
127 }
128
129 binding::~binding()
130 {
131 switch (type_)
132 {
133 case type::string:
134 {
135 using string_type = std::string;
136 string_.~string_type();
137
138 break;
139 }
140
141 case type::integer:
142 case type::invalid:
143 {
144 break;
145 }
146 }
147 }
148
149 binding::binding(int arg) :
150 type_(type::integer),
151 integer_(arg)
152 {
153 }
154
155 int binding::getInteger() const
156 {
157 if (type_ != type::integer)
158 {
159 throw std::domain_error("binding::getInteger called on non-integer binding");
160 }
161
162 return integer_;
163 }
164
165 binding::binding(std::string arg) : type_(type::string)
166 {
167 new(&string_) std::string(arg);
168 }
169
170 std::string binding::getString() const
171 {
172 if (type_ != type::string)
173 {
174 throw std::domain_error("binding::getString called on non-string binding");
175 }
176
177 return string_;
178 }
179
180};
diff --git a/lib/binding.h b/lib/binding.h new file mode 100644 index 0000000..7fbe20e --- /dev/null +++ b/lib/binding.h
@@ -0,0 +1,70 @@
1#ifndef BINDING_H_CAE0B18E
2#define BINDING_H_CAE0B18E
3
4#include <string>
5
6namespace verbly {
7
8 class binding {
9 public:
10 enum class type {
11 invalid,
12 integer,
13 string
14 };
15
16 // Default constructor
17
18 binding()
19 {
20 }
21
22 // Copy and move constructors
23
24 binding(const binding& other);
25 binding(binding&& other);
26
27 // Assignment
28
29 binding& operator=(binding other);
30
31 // Swap
32
33 friend void swap(binding& first, binding& second);
34
35 // Destructor
36
37 ~binding();
38
39 // Generic accessors
40
41 type getType() const
42 {
43 return type_;
44 }
45
46 // Integer
47
48 binding(int arg);
49
50 int getInteger() const;
51
52 // String
53
54 binding(std::string arg);
55
56 std::string getString() const;
57
58 private:
59
60 union {
61 int integer_;
62 std::string string_;
63 };
64
65 type type_ = type::invalid;
66 };
67
68};
69
70#endif /* end of include guard: BINDING_H_CAE0B18E */
diff --git a/lib/data.cpp b/lib/data.cpp deleted file mode 100644 index db42487..0000000 --- a/lib/data.cpp +++ /dev/null
@@ -1,177 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 data::data(std::string datafile)
6 {
7 if (sqlite3_open_v2(datafile.c_str(), &ppdb, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK)
8 {
9 throw std::invalid_argument(sqlite3_errmsg(ppdb));
10 }
11 }
12
13 data::data(data&& other)
14 {
15 ppdb = other.ppdb;
16 }
17
18 data& data::operator=(data&& other)
19 {
20 ppdb = other.ppdb;
21
22 return *this;
23 }
24
25 data::~data()
26 {
27 sqlite3_close_v2(ppdb);
28 }
29
30 verb_query data::verbs() const
31 {
32 return verb_query(*this);
33 }
34
35 adjective_query data::adjectives() const
36 {
37 return adjective_query(*this);
38 }
39
40 adverb_query data::adverbs() const
41 {
42 return adverb_query(*this);
43 }
44
45 noun_query data::nouns() const
46 {
47 return noun_query(*this);
48 }
49
50 frame_query data::frames() const
51 {
52 return frame_query(*this);
53 }
54
55 preposition_query data::prepositions() const
56 {
57 return preposition_query(*this);
58 }
59
60 binding::type binding::get_type() const
61 {
62 return _type;
63 }
64
65 binding::binding(const binding& other)
66 {
67 _type = other._type;
68
69 switch (_type)
70 {
71 case type::integer:
72 {
73 _integer = other._integer;
74
75 break;
76 }
77
78 case type::string:
79 {
80 new(&_string) std::string(other._string);
81
82 break;
83 }
84 }
85 }
86
87 binding::~binding()
88 {
89 switch (_type)
90 {
91 case type::string:
92 {
93 using string_type = std::string;
94 _string.~string_type();
95
96 break;
97 }
98 }
99 }
100
101 binding& binding::operator=(const binding& other)
102 {
103 this->~binding();
104
105 _type = other._type;
106
107 switch (_type)
108 {
109 case type::integer:
110 {
111 _integer = other._integer;
112
113 break;
114 }
115
116 case type::string:
117 {
118 new(&_string) std::string(other._string);
119
120 break;
121 }
122 }
123
124 return *this;
125 }
126
127 binding::binding(int _arg)
128 {
129 _type = type::integer;
130 _integer = _arg;
131 }
132
133 int binding::get_integer() const
134 {
135 assert(_type == type::integer);
136
137 return _integer;
138 }
139
140 void binding::set_integer(int _arg)
141 {
142 *this = binding(_arg);
143 }
144
145 binding& binding::operator=(int _arg)
146 {
147 *this = binding(_arg);
148
149 return *this;
150 }
151
152 binding::binding(std::string _arg)
153 {
154 _type = type::string;
155 new(&_string) std::string(_arg);
156 }
157
158 std::string binding::get_string() const
159 {
160 assert(_type == type::string);
161
162 return _string;
163 }
164
165 void binding::set_string(std::string _arg)
166 {
167 *this = binding(_arg);
168 }
169
170 binding& binding::operator=(std::string _arg)
171 {
172 *this = binding(_arg);
173
174 return *this;
175 }
176
177};
diff --git a/lib/data.h b/lib/data.h deleted file mode 100644 index b8b12b9..0000000 --- a/lib/data.h +++ /dev/null
@@ -1,380 +0,0 @@
1#ifndef DATA_H_C4AEC3DD
2#define DATA_H_C4AEC3DD
3
4#include <sqlite3.h>
5
6namespace verbly {
7
8 class data;
9 class word;
10 class adjective;
11 class noun;
12 class verb;
13 class adverb;
14 class frame;
15 class adjective_query;
16 class adverb_query;
17 class noun_query;
18 class verb_query;
19 class frame_query;
20 class preposition_query;
21
22 class data {
23 private:
24 sqlite3* ppdb;
25
26 friend class adjective_query;
27 friend class noun_query;
28 friend class verb_query;
29 friend class adverb_query;
30 friend class frame_query;
31 friend class preposition_query;
32
33 public:
34 data(std::string datafile);
35
36 data(const data& other) = delete;
37 data& operator=(const data& other) = delete;
38
39 data(data&& other);
40 data& operator=(data&& other);
41
42 ~data();
43
44 verb_query verbs() const;
45 adjective_query adjectives() const;
46 adverb_query adverbs() const;
47 noun_query nouns() const;
48 frame_query frames() const;
49 preposition_query prepositions() const;
50
51 };
52
53 template <class T>
54 class filter {
55 public:
56 enum class type {
57 singleton,
58 group
59 };
60
61 typedef filter<T> value_type;
62
63 type get_type() const
64 {
65 return _type;
66 }
67
68 filter(const filter<T>& other)
69 {
70 _type = other._type;
71 _notlogic = other._notlogic;
72
73 switch (_type)
74 {
75 case type::singleton:
76 {
77 new(&_singleton.elem) T(other._singleton.elem);
78
79 break;
80 }
81
82 case type::group:
83 {
84 new(&_group.elems) std::list<filter<T>>(other._group.elems);
85 _group.orlogic = other._group.orlogic;
86
87 break;
88 }
89 }
90 }
91
92 filter<T>& operator=(const filter<T>& other)
93 {
94 this->~filter();
95
96 _type = other._type;
97 _notlogic = other._notlogic;
98
99 switch (_type)
100 {
101 case type::singleton:
102 {
103 new(&_singleton.elem) T(other._singleton.elem);
104
105 break;
106 }
107
108 case type::group:
109 {
110 new(&_group.elems) std::list<filter<T>>(other._group.elems);
111 _group.orlogic = other._group.orlogic;
112
113 break;
114 }
115 }
116
117 return *this;
118 }
119
120 ~filter()
121 {
122 switch (_type)
123 {
124 case type::singleton:
125 {
126 _singleton.elem.~T();
127
128 break;
129 }
130
131 case type::group:
132 {
133 using list_type = std::list<filter<T>>;
134 _group.elems.~list_type();
135
136 break;
137 }
138 }
139 }
140
141 bool get_notlogic() const
142 {
143 return _notlogic;
144 }
145
146 void set_notlogic(bool _nl)
147 {
148 _notlogic = _nl;
149 }
150
151 std::list<T> inorder_flatten() const
152 {
153 std::list<T> result;
154
155 if (_type == type::singleton)
156 {
157 result.push_back(_singleton.elem);
158 } else if (_type == type::group)
159 {
160 for (auto elem : _group.elems)
161 {
162 auto l = elem.inorder_flatten();
163 result.insert(std::end(result), std::begin(l), std::end(l));
164 }
165 }
166
167 return result;
168 }
169
170 std::set<T> uniq_flatten() const
171 {
172 std::set<T> result;
173
174 if (_type == type::singleton)
175 {
176 result.insert(_singleton.elem);
177 } else if (_type == type::group)
178 {
179 for (auto elem : _group.elems)
180 {
181 auto l = elem.uniq_flatten();
182 result.insert(std::begin(l), std::end(l));
183 }
184 }
185
186 return result;
187 }
188
189 void clean()
190 {
191 if (_type == type::group)
192 {
193 std::list<typename std::list<filter<T>>::iterator> toremove;
194 for (auto it = _group.elems.begin(); it != _group.elems.end(); it++)
195 {
196 it->clean();
197
198 if (it->get_type() == type::group)
199 {
200 if (it->_group.elems.size() == 0)
201 {
202 toremove.push_back(it);
203 } else if (it->_group.elems.size() == 1)
204 {
205 bool truelogic = it->_notlogic != it->_group.elems.front()._notlogic;
206 filter<T> e = it->_group.elems.front();
207 *it = e;
208 it->_notlogic = truelogic;
209 }
210 }
211 }
212
213 for (auto rem : toremove)
214 {
215 _group.elems.erase(rem);
216 }
217
218 if (_group.elems.size() == 1)
219 {
220 bool truelogic = _notlogic != _group.elems.front()._notlogic;
221 filter<T> e = _group.elems.front();
222 *this = e;
223 _notlogic = truelogic;
224 }
225 }
226 }
227
228 // Singleton
229 filter(T _elem, bool _notlogic = false) : _type(type::singleton)
230 {
231 new(&_singleton.elem) T(_elem);
232 this->_notlogic = _notlogic;
233 }
234
235 filter<T>& operator=(T _elem)
236 {
237 *this = filter<T>{_elem};
238
239 return *this;
240 }
241
242 T get_elem() const
243 {
244 assert(_type == type::singleton);
245
246 return _singleton.elem;
247 }
248
249 void set_elem(T _elem)
250 {
251 assert(_type == type::singleton);
252
253 _singleton.elem = _elem;
254 }
255
256 // Group
257 typedef typename std::list<filter<T>>::iterator iterator;
258
259 filter() : _type(type::group)
260 {
261 new(&_group.elems) std::list<filter<T>>();
262 _group.orlogic = false;
263 }
264
265 filter(std::initializer_list<filter<T>> _init) : _type(type::group)
266 {
267 new(&_group.elems) std::list<filter<T>>(_init);
268 _group.orlogic = false;
269 }
270
271 iterator begin()
272 {
273 assert(_type == type::group);
274
275 return _group.elems.begin();
276 }
277
278 iterator end()
279 {
280 assert(_type == type::group);
281
282 return _group.elems.end();
283 }
284
285 filter<T>& operator<<(filter<T> _elem)
286 {
287 assert(_type == type::group);
288
289 _group.elems.push_back(_elem);
290
291 return *this;
292 }
293
294 void push_back(filter<T> _elem)
295 {
296 assert(_type == type::group);
297
298 _group.elems.push_back(_elem);
299 }
300
301 bool get_orlogic() const
302 {
303 assert(_type == type::group);
304
305 return _group.orlogic;
306 }
307
308 void set_orlogic(bool _ol)
309 {
310 assert(_type == type::group);
311
312 _group.orlogic = _ol;
313 }
314
315 bool empty() const
316 {
317 if (_type == type::group)
318 {
319 return _group.elems.empty();
320 } else {
321 return false;
322 }
323 }
324
325 int size() const
326 {
327 assert(_type == type::group);
328
329 return _group.elems.size();
330 }
331
332 private:
333 type _type;
334 bool _notlogic = false;
335 union {
336 struct {
337 T elem;
338 } _singleton;
339 struct {
340 std::list<filter<T>> elems;
341 bool orlogic;
342 } _group;
343 };
344 };
345
346 class binding {
347 public:
348 enum class type {
349 integer,
350 string
351 };
352
353 type get_type() const;
354 binding(const binding& other);
355 ~binding();
356 binding& operator=(const binding& other);
357
358 // Integer
359 binding(int _arg);
360 int get_integer() const;
361 void set_integer(int _arg);
362 binding& operator=(int _arg);
363
364 // String
365 binding(std::string _arg);
366 std::string get_string() const;
367 void set_string(std::string _arg);
368 binding& operator=(std::string _arg);
369
370 private:
371 union {
372 int _integer;
373 std::string _string;
374 };
375 type _type;
376 };
377
378};
379
380#endif /* end of include guard: DATA_H_C4AEC3DD */
diff --git a/lib/database.cpp b/lib/database.cpp new file mode 100644 index 0000000..351b93d --- /dev/null +++ b/lib/database.cpp
@@ -0,0 +1,79 @@
1#include "database.h"
2#include <sqlite3.h>
3#include <stdexcept>
4#include "query.h"
5
6namespace verbly {
7
8 database::database(std::string path)
9 {
10 if (sqlite3_open_v2(path.c_str(), &ppdb_, SQLITE_OPEN_READONLY, NULL) != SQLITE_OK)
11 {
12 // We still have to free the resources allocated. In the event that
13 // allocation failed, ppdb will be null and sqlite3_close_v2 will just
14 // ignore it.
15 std::string errmsg(sqlite3_errmsg(ppdb_));
16 sqlite3_close_v2(ppdb_);
17
18 throw database_error("Could not open verbly datafile", errmsg);
19 }
20 }
21
22 database::database(database&& other) : database()
23 {
24 swap(*this, other);
25 }
26
27 database& database::operator=(database&& other)
28 {
29 swap(*this, other);
30
31 return *this;
32 }
33
34 void swap(database& first, database& second)
35 {
36 std::swap(first.ppdb_, second.ppdb_);
37 }
38
39 database::~database()
40 {
41 sqlite3_close_v2(ppdb_);
42 }
43
44 query<notion> database::notions(filter where, bool random, int limit) const
45 {
46 return query<notion>(*this, ppdb_, std::move(where), random, limit);
47 }
48
49 query<word> database::words(filter where, bool random, int limit) const
50 {
51 return query<word>(*this, ppdb_, std::move(where), random, limit);
52 }
53
54 query<group> database::groups(filter where, bool random, int limit) const
55 {
56 return query<group>(*this, ppdb_, std::move(where), random, limit);
57 }
58
59 query<frame> database::frames(filter where, bool random, int limit) const
60 {
61 return query<frame>(*this, ppdb_, std::move(where), random, limit);
62 }
63
64 query<lemma> database::lemmas(filter where, bool random, int limit) const
65 {
66 return query<lemma>(*this, ppdb_, std::move(where), random, limit);
67 }
68
69 query<form> database::forms(filter where, bool random, int limit) const
70 {
71 return query<form>(*this, ppdb_, std::move(where), random, limit);
72 }
73
74 query<pronunciation> database::pronunciations(filter where, bool random, int limit) const
75 {
76 return query<pronunciation>(*this, ppdb_, std::move(where), random, limit);
77 }
78
79};
diff --git a/lib/database.h b/lib/database.h new file mode 100644 index 0000000..d68c40b --- /dev/null +++ b/lib/database.h
@@ -0,0 +1,73 @@
1#ifndef DATABASE_H_0B0A47D2
2#define DATABASE_H_0B0A47D2
3
4#include <string>
5#include <exception>
6#include <list>
7#include "notion.h"
8#include "word.h"
9#include "group.h"
10#include "frame.h"
11#include "lemma.h"
12#include "form.h"
13#include "pronunciation.h"
14
15struct sqlite3;
16
17namespace verbly {
18
19 template <typename Object>
20 class query;
21
22 class database {
23 public:
24
25 // Constructor
26
27 explicit database(std::string path);
28
29 // Disable copying
30
31 database(const database& other) = delete;
32 database& operator=(const database& other) = delete;
33
34 // Move constructor and move assignment
35
36 database(database&& other);
37 database& operator=(database&& other);
38
39 // Swap
40
41 friend void swap(database& first, database& second);
42
43 // Destructor
44
45 ~database();
46
47 // Queries
48
49 query<notion> notions(filter where, bool random = true, int limit = 1) const;
50
51 query<word> words(filter where, bool random = true, int limit = 1) const;
52
53 query<group> groups(filter where, bool random = true, int limit = 1) const;
54
55 query<frame> frames(filter where, bool random = true, int limit = 1) const;
56
57 query<lemma> lemmas(filter where, bool random = true, int limit = 1) const;
58
59 query<form> forms(filter where, bool random = true, int limit = 1) const;
60
61 query<pronunciation> pronunciations(filter where, bool random = true, int limit = 1) const;
62
63 private:
64
65 database() = default;
66
67 sqlite3* ppdb_ = nullptr;
68
69 };
70
71};
72
73#endif /* end of include guard: DATABASE_H_0B0A47D2 */
diff --git a/lib/enums.h b/lib/enums.h new file mode 100644 index 0000000..b37be7b --- /dev/null +++ b/lib/enums.h
@@ -0,0 +1,45 @@
1#ifndef ENUMS_H_260BA847
2#define ENUMS_H_260BA847
3
4namespace verbly {
5
6 enum class part_of_speech {
7 noun = 0,
8 adjective = 1,
9 adverb = 2,
10 verb = 3,
11 preposition = 4
12 };
13
14 enum class positioning {
15 undefined = -1,
16 predicate = 0,
17 attributive = 1,
18 postnominal = 2
19 };
20
21 enum class inflection {
22 base = 0,
23 plural = 1,
24 comparative = 2,
25 superlative = 3,
26 past_tense = 4,
27 past_participle = 5,
28 ing_form = 6,
29 s_form = 7
30 };
31
32 enum class object {
33 undefined = -1,
34 notion = 0,
35 word = 1,
36 group = 2,
37 frame = 3,
38 lemma = 4,
39 form = 5,
40 pronunciation = 6
41 };
42
43};
44
45#endif /* end of include guard: ENUMS_H_260BA847 */
diff --git a/lib/field.cpp b/lib/field.cpp new file mode 100644 index 0000000..d7adbb3 --- /dev/null +++ b/lib/field.cpp
@@ -0,0 +1,91 @@
1#include "field.h"
2#include "filter.h"
3
4namespace verbly {
5
6 filter field::operator==(int value) const
7 {
8 return filter(*this, filter::comparison::int_equals, value);
9 }
10
11 filter field::operator!=(int value) const
12 {
13 return filter(*this, filter::comparison::int_does_not_equal, value);
14 }
15
16 filter field::operator<(int value) const
17 {
18 return filter(*this, filter::comparison::int_is_less_than, value);
19 }
20
21 filter field::operator<=(int value) const
22 {
23 return filter(*this, filter::comparison::int_is_at_most, value);
24 }
25
26 filter field::operator>(int value) const
27 {
28 return filter(*this, filter::comparison::int_is_greater_than, value);
29 }
30
31 filter field::operator>=(int value) const
32 {
33 return filter(*this, filter::comparison::int_is_at_least, value);
34 }
35
36 filter field::operator==(part_of_speech value) const
37 {
38 return filter(*this, filter::comparison::int_equals, static_cast<int>(value));
39 }
40
41 filter field::operator==(positioning value) const
42 {
43 return filter(*this, filter::comparison::int_equals, static_cast<int>(value));
44 }
45
46 filter field::operator==(inflection value) const
47 {
48 return filter(*this, filter::comparison::int_equals, static_cast<int>(value));
49 }
50
51 filter field::operator==(bool value) const
52 {
53 return filter(*this, filter::comparison::boolean_equals, value);
54 }
55
56 filter field::operator==(std::string value) const
57 {
58 return filter(*this, filter::comparison::string_equals, std::move(value));
59 }
60
61 filter field::operator!=(std::string value) const
62 {
63 return filter(*this, filter::comparison::string_does_not_equal, std::move(value));
64 }
65
66 filter field::operator%=(std::string value) const
67 {
68 return filter(*this, filter::comparison::string_is_like, std::move(value));
69 }
70
71 field::operator filter() const
72 {
73 return filter(*this, filter::comparison::is_not_null);
74 }
75
76 filter field::operator!() const
77 {
78 return filter(*this, filter::comparison::is_null);
79 }
80
81 filter field::operator%=(filter joinCondition) const
82 {
83 if (type_ == type::hierarchal_join)
84 {
85 return filter(*this, filter::comparison::hierarchally_matches, std::move(joinCondition));
86 } else {
87 return filter(*this, filter::comparison::matches, std::move(joinCondition));
88 }
89 }
90
91};
diff --git a/lib/field.h b/lib/field.h new file mode 100644 index 0000000..30c62be --- /dev/null +++ b/lib/field.h
@@ -0,0 +1,306 @@
1#ifndef FIELD_H_43258321
2#define FIELD_H_43258321
3
4#include "enums.h"
5#include <stdexcept>
6#include <tuple>
7
8namespace verbly {
9
10 class filter;
11
12 class field {
13 public:
14 enum class type {
15 undefined,
16 string,
17 integer,
18 boolean,
19 join,
20 join_through,
21 hierarchal_join
22 };
23
24 // Default constructor
25
26 field()
27 {
28 }
29
30 // Static factories
31
32 static field stringField(
33 object obj,
34 const char* name,
35 bool nullable = false)
36 {
37 return field(obj, type::string, name, nullable);
38 }
39
40 static field stringField(
41 const char* table,
42 const char* name,
43 bool nullable = false)
44 {
45 return field(object::undefined, type::string, name, nullable, table);
46 }
47
48 static field integerField(
49 object obj,
50 const char* name,
51 bool nullable = false)
52 {
53 return field(obj, type::integer, name, nullable);
54 }
55
56 static field integerField(
57 const char* table,
58 const char* name,
59 bool nullable = false)
60 {
61 return field(object::undefined, type::integer, name, nullable, table);
62 }
63
64 static field booleanField(
65 object obj,
66 const char* name,
67 bool nullable = false)
68 {
69 return field(obj, type::boolean, name, nullable);
70 }
71
72 static field booleanField(
73 const char* table,
74 const char* name,
75 bool nullable = false)
76 {
77 return field(object::undefined, type::boolean, name, nullable, table);
78 }
79
80 static field joinField(
81 object obj,
82 const char* name,
83 object joinWith,
84 bool nullable = false)
85 {
86 return field(obj, type::join, name, nullable, 0, joinWith);
87 }
88
89 static field joinField(
90 object obj,
91 const char* name,
92 const char* table,
93 bool nullable = false)
94 {
95 return field(obj, type::join, name, nullable, table);
96 }
97
98 static field joinThrough(
99 object obj,
100 const char* name,
101 object joinWith,
102 const char* joinTable,
103 const char* foreignColumn)
104 {
105 return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, name, foreignColumn);
106 }
107
108 static field joinThrough(
109 object obj,
110 const char* name,
111 object joinWith,
112 const char* joinTable,
113 const char* foreignColumn,
114 const char* joinColumn,
115 const char* foreignJoinColumn)
116 {
117 return field(obj, type::join_through, name, true, joinTable, joinWith, foreignColumn, joinColumn, foreignJoinColumn);
118 }
119
120 static field selfJoin(
121 object obj,
122 const char* name,
123 const char* joinTable,
124 const char* joinColumn,
125 const char* foreignJoinColumn)
126 {
127 return field(obj, type::join_through, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn);
128 }
129
130 static field hierarchalSelfJoin(
131 object obj,
132 const char* name,
133 const char* joinTable,
134 const char* joinColumn,
135 const char* foreignJoinColumn)
136 {
137 return field(obj, type::hierarchal_join, name, true, joinTable, obj, name, joinColumn, foreignJoinColumn);
138 }
139
140 // Accessors
141
142 object getObject() const
143 {
144 return object_;
145 }
146
147 type getType() const
148 {
149 return type_;
150 }
151
152 bool isJoin() const
153 {
154 return ((type_ == type::join) || (type_ == type::join_through) || (type_ == type::hierarchal_join));
155 }
156
157 const char* getColumn() const
158 {
159 return column_;
160 }
161
162 bool isNullable() const
163 {
164 return nullable_;
165 }
166
167 bool hasTable() const
168 {
169 return (table_ != 0);
170 }
171
172 const char* getTable() const
173 {
174 return table_;
175 }
176
177 // Joins
178
179 object getJoinObject() const
180 {
181 // We ignore hierarchal joins because they are always self joins.
182 return ((type_ == type::join) || (type_ == type::join_through))
183 ? joinObject_
184 : throw std::domain_error("Non-join fields don't have join objects");
185 }
186
187 // Many-to-many joins
188
189 const char* getForeignColumn() const
190 {
191 // We ignore hierarchal joins because they are always self joins.
192 return (type_ == type::join_through)
193 ? foreignColumn_
194 : throw std::domain_error("Only many-to-many join fields have a foreign column");
195 }
196
197 const char* getJoinColumn() const
198 {
199 return ((type_ == type::join_through) || (type_ == type::hierarchal_join))
200 ? joinColumn_
201 : throw std::domain_error("Only many-to-many join fields have a join column");
202 }
203
204 const char* getForeignJoinColumn() const
205 {
206 return ((type_ == type::join_through) || (type_ == type::hierarchal_join))
207 ? foreignJoinColumn_
208 : throw std::domain_error("Only many-to-many join fields have a foreign join column");
209 }
210
211 // Ordering
212
213 bool operator<(const field& other) const
214 {
215 // For the most part, (object, column) uniquely identifies fields.
216 // However, there do exist a number of relationships from an object to
217 // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have
218 // the same object (notion), the same column (notion_id), and the same
219 // table (hypernymy); however, they have different join columns.
220 return std::tie(object_, column_, table_, joinColumn_) < std::tie(other.object_, other.column_, other.table_, other.joinColumn_);
221 }
222
223 // Equality
224
225 bool operator==(const field& other) const
226 {
227 // For the most part, (object, column) uniquely identifies fields.
228 // However, there do exist a number of relationships from an object to
229 // itself, such as notion hypernymy/hyponymy. Hypernymy and hyponymy have
230 // the same object (notion), the same column (notion_id), and the same
231 // table (hypernymy); however, they have different join columns.
232 return std::tie(object_, column_, table_, joinColumn_) == std::tie(other.object_, other.column_, other.table_, other.joinColumn_);
233 }
234
235 // Filter construction
236
237 filter operator==(int value) const; // Integer equality
238 filter operator!=(int value) const; // Integer inequality
239 filter operator<(int value) const; // Integer is less than
240 filter operator<=(int value) const; // Integer is at most
241 filter operator>(int value) const; // Integer is greater than
242 filter operator>=(int value) const; // Integer is at least
243
244 filter operator==(part_of_speech value) const; // Part of speech equality
245 filter operator==(positioning value) const; // Adjective positioning equality
246 filter operator==(inflection value) const; // Inflection category equality
247
248 filter operator==(bool value) const; // Boolean equality
249
250 filter operator==(std::string value) const; // String equality
251 filter operator!=(std::string value) const; // String inequality
252 filter operator%=(std::string value) const; // String matching
253
254 operator filter() const; // Non-nullity
255 filter operator!() const; // Nullity
256
257 filter operator%=(filter joinCondition) const; // Join
258
259 private:
260
261 // Constructor
262
263 field(
264 object obj,
265 type datatype,
266 const char* column,
267 bool nullable = false,
268 const char* table = 0,
269 object joinObject = object::undefined,
270 const char* foreignColumn = 0,
271 const char* joinColumn = 0,
272 const char* foreignJoinColumn = 0) :
273 object_(obj),
274 type_(datatype),
275 column_(column),
276 nullable_(nullable),
277 table_(table),
278 joinObject_(joinObject),
279 foreignColumn_(foreignColumn),
280 joinColumn_(joinColumn),
281 foreignJoinColumn_(foreignJoinColumn)
282 {
283 }
284
285 // General
286 object object_ = object::undefined;
287 type type_ = type::undefined;
288 const char* column_ = 0;
289 const char* table_ = 0;
290
291 // Non-joins and belongs-to joins
292 bool nullable_ = false;
293
294 // Joins
295 object joinObject_ = object::undefined;
296
297 // Many-to-many joins
298 const char* foreignColumn_ = 0;
299 const char* joinColumn_ = 0;
300 const char* foreignJoinColumn_ = 0;
301
302 };
303
304};
305
306#endif /* end of include guard: FIELD_H_43258321 */
diff --git a/lib/filter.cpp b/lib/filter.cpp new file mode 100644 index 0000000..959fa05 --- /dev/null +++ b/lib/filter.cpp
@@ -0,0 +1,1365 @@
1#include "filter.h"
2#include <stdexcept>
3#include <map>
4#include "notion.h"
5#include "word.h"
6#include "group.h"
7#include "frame.h"
8#include "lemma.h"
9#include "form.h"
10#include "pronunciation.h"
11
12namespace verbly {
13
14 filter::filter(const filter& other)
15 {
16 type_ = other.type_;
17
18 switch (type_)
19 {
20 case type::empty:
21 {
22 break;
23 }
24
25 case type::singleton:
26 {
27 new(&singleton_.filterField) field(other.singleton_.filterField);
28 singleton_.filterType = other.singleton_.filterType;
29
30 switch (singleton_.filterType)
31 {
32 case comparison::int_equals:
33 case comparison::int_does_not_equal:
34 case comparison::int_is_at_least:
35 case comparison::int_is_greater_than:
36 case comparison::int_is_at_most:
37 case comparison::int_is_less_than:
38 {
39 singleton_.intValue = other.singleton_.intValue;
40
41 break;
42 }
43
44 case comparison::boolean_equals:
45 {
46 singleton_.boolValue = other.singleton_.boolValue;
47
48 break;
49 }
50
51 case comparison::string_equals:
52 case comparison::string_does_not_equal:
53 case comparison::string_is_like:
54 case comparison::string_is_not_like:
55 {
56 new(&singleton_.stringValue) std::string(other.singleton_.stringValue);
57
58 break;
59 }
60
61 case comparison::is_null:
62 case comparison::is_not_null:
63 {
64 break;
65 }
66
67 case comparison::matches:
68 case comparison::does_not_match:
69 case comparison::hierarchally_matches:
70 case comparison::does_not_hierarchally_match:
71 {
72 new(&singleton_.join) std::unique_ptr<filter>(new filter(*other.singleton_.join));
73
74 break;
75 }
76 }
77
78 break;
79 }
80
81 case type::group:
82 {
83 new(&group_.children) std::list<filter>(other.group_.children);
84 group_.orlogic = other.group_.orlogic;
85
86 break;
87 }
88 }
89 }
90
91 filter::filter(filter&& other) : filter()
92 {
93 swap(*this, other);
94 }
95
96 filter& filter::operator=(filter other)
97 {
98 swap(*this, other);
99
100 return *this;
101 }
102
103 void swap(filter& first, filter& second)
104 {
105 using type = filter::type;
106 using comparison = filter::comparison;
107
108 type tempType = first.type_;
109 field tempField;
110 comparison tempComparison;
111 std::unique_ptr<filter> tempJoin;
112 std::string tempStringValue;
113 int tempIntValue;
114 bool tempBoolValue;
115 std::list<filter> tempChildren;
116 bool tempOrlogic;
117
118 switch (tempType)
119 {
120 case type::empty:
121 {
122 break;
123 }
124
125 case type::singleton:
126 {
127 tempField = std::move(first.singleton_.filterField);
128 tempComparison = first.singleton_.filterType;
129
130 switch (tempComparison)
131 {
132 case comparison::int_equals:
133 case comparison::int_does_not_equal:
134 case comparison::int_is_at_least:
135 case comparison::int_is_greater_than:
136 case comparison::int_is_at_most:
137 case comparison::int_is_less_than:
138 {
139 tempIntValue = first.singleton_.intValue;
140
141 break;
142 }
143
144 case comparison::boolean_equals:
145 {
146 tempBoolValue = first.singleton_.boolValue;
147
148 break;
149 }
150
151 case comparison::string_equals:
152 case comparison::string_does_not_equal:
153 case comparison::string_is_like:
154 case comparison::string_is_not_like:
155 {
156 tempStringValue = std::move(first.singleton_.stringValue);
157
158 break;
159 }
160
161 case comparison::is_null:
162 case comparison::is_not_null:
163 {
164 break;
165 }
166
167 case comparison::matches:
168 case comparison::does_not_match:
169 case comparison::hierarchally_matches:
170 case comparison::does_not_hierarchally_match:
171 {
172 tempJoin = std::move(first.singleton_.join);
173
174 break;
175 }
176 }
177
178 break;
179 }
180
181 case type::group:
182 {
183 tempChildren = std::move(first.group_.children);
184 tempOrlogic = first.group_.orlogic;
185
186 break;
187 }
188 }
189
190 first.~filter();
191
192 first.type_ = second.type_;
193
194 switch (first.type_)
195 {
196 case type::empty:
197 {
198 break;
199 }
200
201 case type::singleton:
202 {
203 new(&first.singleton_.filterField) field(std::move(second.singleton_.filterField));
204 first.singleton_.filterType = second.singleton_.filterType;
205
206 switch (first.singleton_.filterType)
207 {
208 case comparison::int_equals:
209 case comparison::int_does_not_equal:
210 case comparison::int_is_at_least:
211 case comparison::int_is_greater_than:
212 case comparison::int_is_at_most:
213 case comparison::int_is_less_than:
214 {
215 first.singleton_.intValue = second.singleton_.intValue;
216
217 break;
218 }
219
220 case comparison::boolean_equals:
221 {
222 first.singleton_.boolValue = second.singleton_.boolValue;
223
224 break;
225 }
226
227 case comparison::string_equals:
228 case comparison::string_does_not_equal:
229 case comparison::string_is_like:
230 case comparison::string_is_not_like:
231 {
232 new(&first.singleton_.stringValue) std::string(std::move(second.singleton_.stringValue));
233
234 break;
235 }
236
237 case comparison::is_null:
238 case comparison::is_not_null:
239 {
240 break;
241 }
242
243 case comparison::matches:
244 case comparison::does_not_match:
245 case comparison::hierarchally_matches:
246 case comparison::does_not_hierarchally_match:
247 {
248 new(&first.singleton_.join) std::unique_ptr<filter>(std::move(second.singleton_.join));
249
250 break;
251 }
252 }
253
254 break;
255 }
256
257 case type::group:
258 {
259 new(&first.group_.children) std::list<filter>(std::move(second.group_.children));
260 first.group_.orlogic = second.group_.orlogic;
261
262 break;
263 }
264 }
265
266 second.~filter();
267
268 second.type_ = tempType;
269
270 switch (second.type_)
271 {
272 case type::empty:
273 {
274 break;
275 }
276
277 case type::singleton:
278 {
279 new(&second.singleton_.filterField) field(std::move(tempField));
280 second.singleton_.filterType = tempComparison;
281
282 switch (second.singleton_.filterType)
283 {
284 case comparison::int_equals:
285 case comparison::int_does_not_equal:
286 case comparison::int_is_at_least:
287 case comparison::int_is_greater_than:
288 case comparison::int_is_at_most:
289 case comparison::int_is_less_than:
290 {
291 second.singleton_.intValue = tempIntValue;
292
293 break;
294 }
295
296 case comparison::boolean_equals:
297 {
298 second.singleton_.boolValue = tempBoolValue;
299
300 break;
301 }
302
303 case comparison::string_equals:
304 case comparison::string_does_not_equal:
305 case comparison::string_is_like:
306 case comparison::string_is_not_like:
307 {
308 new(&second.singleton_.stringValue) std::string(std::move(tempStringValue));
309
310 break;
311 }
312
313 case comparison::is_null:
314 case comparison::is_not_null:
315 {
316 break;
317 }
318
319 case comparison::matches:
320 case comparison::does_not_match:
321 case comparison::hierarchally_matches:
322 case comparison::does_not_hierarchally_match:
323 {
324 new(&second.singleton_.join) std::unique_ptr<filter>(std::move(tempJoin));
325
326 break;
327 }
328 }
329
330 break;
331 }
332
333 case type::group:
334 {
335 new(&second.group_.children) std::list<filter>(std::move(tempChildren));
336 second.group_.orlogic = tempOrlogic;
337
338 break;
339 }
340 }
341 }
342
343 filter::~filter()
344 {
345 switch (type_)
346 {
347 case type::empty:
348 {
349 break;
350 }
351
352 case type::singleton:
353 {
354 singleton_.filterField.~field();
355
356 switch (singleton_.filterType)
357 {
358 case comparison::int_equals:
359 case comparison::int_does_not_equal:
360 case comparison::int_is_at_least:
361 case comparison::int_is_greater_than:
362 case comparison::int_is_at_most:
363 case comparison::int_is_less_than:
364 case comparison::boolean_equals:
365 case comparison::is_null:
366 case comparison::is_not_null:
367 {
368 break;
369 }
370
371 case comparison::string_equals:
372 case comparison::string_does_not_equal:
373 case comparison::string_is_like:
374 case comparison::string_is_not_like:
375 {
376 using string_type = std::string;
377
378 singleton_.stringValue.~string_type();
379
380 break;
381 }
382
383 case comparison::matches:
384 case comparison::does_not_match:
385 case comparison::hierarchally_matches:
386 case comparison::does_not_hierarchally_match:
387 {
388 using ptr_type = std::unique_ptr<filter>;
389
390 singleton_.join.~ptr_type();
391
392 break;
393 }
394 }
395
396 break;
397 }
398
399 case type::group:
400 {
401 using list_type = std::list<filter>;
402
403 group_.children.~list_type();
404
405 break;
406 }
407 }
408 }
409
410 filter::filter()
411 {
412 }
413
414 filter::filter(
415 field filterField,
416 comparison filterType,
417 int filterValue) :
418 type_(type::singleton)
419 {
420 if (filterField.getType() == field::type::integer)
421 {
422 switch (filterType)
423 {
424 case comparison::int_equals:
425 case comparison::int_does_not_equal:
426 case comparison::int_is_at_least:
427 case comparison::int_is_greater_than:
428 case comparison::int_is_at_most:
429 case comparison::int_is_less_than:
430 {
431 new(&singleton_.filterField) field(std::move(filterField));
432 singleton_.filterType = filterType;
433 singleton_.intValue = filterValue;
434
435 break;
436 }
437
438 case comparison::boolean_equals:
439 case comparison::string_equals:
440 case comparison::string_does_not_equal:
441 case comparison::string_is_like:
442 case comparison::string_is_not_like:
443 case comparison::is_null:
444 case comparison::is_not_null:
445 case comparison::matches:
446 case comparison::does_not_match:
447 case comparison::hierarchally_matches:
448 case comparison::does_not_hierarchally_match:
449 {
450 throw std::invalid_argument("Invalid comparison for integer field");
451 }
452 }
453 } else {
454 throw std::domain_error("Cannot match a non-integer field against an integer value");
455 }
456 }
457
458 filter::filter(
459 field filterField,
460 comparison filterType,
461 std::string filterValue) :
462 type_(type::singleton)
463 {
464 if (filterField.getType() == field::type::string)
465 {
466 switch (filterType)
467 {
468 case comparison::string_equals:
469 case comparison::string_does_not_equal:
470 case comparison::string_is_like:
471 case comparison::string_is_not_like:
472 {
473 new(&singleton_.filterField) field(std::move(filterField));
474 singleton_.filterType = filterType;
475 new(&singleton_.stringValue) std::string(std::move(filterValue));
476
477 break;
478 }
479
480 case comparison::int_equals:
481 case comparison::int_does_not_equal:
482 case comparison::int_is_at_least:
483 case comparison::int_is_greater_than:
484 case comparison::int_is_at_most:
485 case comparison::int_is_less_than:
486 case comparison::boolean_equals:
487 case comparison::is_null:
488 case comparison::is_not_null:
489 case comparison::matches:
490 case comparison::does_not_match:
491 case comparison::hierarchally_matches:
492 case comparison::does_not_hierarchally_match:
493 {
494 throw std::invalid_argument("Invalid comparison for string field");
495 }
496 }
497 } else {
498 throw std::domain_error("Cannot match a non-string field against an string value");
499 }
500 }
501
502 filter::filter(
503 field filterField,
504 comparison filterType,
505 bool filterValue) :
506 type_(type::singleton)
507 {
508 if (filterField.getType() == field::type::boolean)
509 {
510 switch (filterType)
511 {
512 case comparison::boolean_equals:
513 {
514 new(&singleton_.filterField) field(std::move(filterField));
515 singleton_.filterType = filterType;
516 singleton_.boolValue = filterValue;
517
518 break;
519 }
520
521 case comparison::string_equals:
522 case comparison::string_does_not_equal:
523 case comparison::string_is_like:
524 case comparison::string_is_not_like:
525 case comparison::int_equals:
526 case comparison::int_does_not_equal:
527 case comparison::int_is_at_least:
528 case comparison::int_is_greater_than:
529 case comparison::int_is_at_most:
530 case comparison::int_is_less_than:
531 case comparison::is_null:
532 case comparison::is_not_null:
533 case comparison::matches:
534 case comparison::does_not_match:
535 case comparison::hierarchally_matches:
536 case comparison::does_not_hierarchally_match:
537 {
538 throw std::invalid_argument("Invalid comparison for boolean field");
539 }
540 }
541 } else {
542 throw std::domain_error("Cannot match a non-boolean field against a boolean value");
543 }
544 }
545
546 filter::filter(
547 field filterField,
548 comparison filterType) :
549 type_(type::singleton)
550 {
551 if (filterField.isNullable())
552 {
553 switch (filterType)
554 {
555 case comparison::is_null:
556 case comparison::is_not_null:
557 {
558 new(&singleton_.filterField) field(std::move(filterField));
559 singleton_.filterType = filterType;
560
561 break;
562 }
563
564 case comparison::string_equals:
565 case comparison::string_does_not_equal:
566 case comparison::string_is_like:
567 case comparison::string_is_not_like:
568 case comparison::int_equals:
569 case comparison::int_does_not_equal:
570 case comparison::int_is_at_least:
571 case comparison::int_is_greater_than:
572 case comparison::int_is_at_most:
573 case comparison::int_is_less_than:
574 case comparison::boolean_equals:
575 case comparison::matches:
576 case comparison::does_not_match:
577 case comparison::hierarchally_matches:
578 case comparison::does_not_hierarchally_match:
579 {
580 throw std::invalid_argument("Incorrect constructor for given comparison");
581 }
582 }
583 } else {
584 throw std::domain_error("Cannot check nullity/non-nullity of non-nullable field");
585 }
586 }
587
588 filter::filter(
589 field joinOn,
590 comparison filterType,
591 filter joinCondition) :
592 type_(type::singleton)
593 {
594 switch (joinOn.getType())
595 {
596 case field::type::join:
597 case field::type::join_through:
598 {
599 switch (filterType)
600 {
601 case comparison::matches:
602 case comparison::does_not_match:
603 {
604 new(&singleton_.filterField) field(std::move(joinOn));
605 singleton_.filterType = filterType;
606 new(&singleton_.join) std::unique_ptr<filter>(new filter(joinCondition.normalize(singleton_.filterField.getJoinObject())));
607
608 break;
609 }
610
611 case comparison::int_equals:
612 case comparison::int_does_not_equal:
613 case comparison::int_is_at_least:
614 case comparison::int_is_greater_than:
615 case comparison::int_is_at_most:
616 case comparison::int_is_less_than:
617 case comparison::boolean_equals:
618 case comparison::string_equals:
619 case comparison::string_does_not_equal:
620 case comparison::string_is_like:
621 case comparison::string_is_not_like:
622 case comparison::is_null:
623 case comparison::is_not_null:
624 case comparison::hierarchally_matches:
625 case comparison::does_not_hierarchally_match:
626 {
627 throw std::invalid_argument("Incorrect constructor for given comparison");
628 }
629 }
630
631 break;
632 }
633
634 case field::type::hierarchal_join:
635 {
636 switch (filterType)
637 {
638 case comparison::hierarchally_matches:
639 case comparison::does_not_hierarchally_match:
640 {
641 new(&singleton_.filterField) field(std::move(joinOn));
642 singleton_.filterType = filterType;
643 new(&singleton_.join) std::unique_ptr<filter>(new filter(joinCondition.normalize(singleton_.filterField.getObject())));
644
645 break;
646 }
647
648 case comparison::int_equals:
649 case comparison::int_does_not_equal:
650 case comparison::int_is_at_least:
651 case comparison::int_is_greater_than:
652 case comparison::int_is_at_most:
653 case comparison::int_is_less_than:
654 case comparison::boolean_equals:
655 case comparison::string_equals:
656 case comparison::string_does_not_equal:
657 case comparison::string_is_like:
658 case comparison::string_is_not_like:
659 case comparison::is_null:
660 case comparison::is_not_null:
661 case comparison::matches:
662 case comparison::does_not_match:
663 {
664 throw std::invalid_argument("Incorrect constructor for given comparison");
665 }
666 }
667
668 break;
669 }
670
671 case field::type::undefined:
672 case field::type::string:
673 case field::type::integer:
674 case field::type::boolean:
675 {
676 throw std::domain_error("Matching field must be a join field");
677 }
678 }
679 }
680
681 field filter::getField() const
682 {
683 if (type_ == type::singleton)
684 {
685 return singleton_.filterField;
686 } else {
687 throw std::domain_error("This filter does not have a field");
688 }
689 }
690
691 filter::comparison filter::getComparison() const
692 {
693 if (type_ == type::singleton)
694 {
695 return singleton_.filterType;
696 } else {
697 throw std::domain_error("This filter does not have a comparison");
698 }
699 }
700
701 filter filter::getJoinCondition() const
702 {
703 if (type_ == type::singleton)
704 {
705 switch (singleton_.filterType)
706 {
707 case comparison::matches:
708 case comparison::does_not_match:
709 case comparison::hierarchally_matches:
710 case comparison::does_not_hierarchally_match:
711 {
712 return *singleton_.join;
713 }
714
715 case comparison::string_equals:
716 case comparison::string_does_not_equal:
717 case comparison::string_is_like:
718 case comparison::string_is_not_like:
719 case comparison::int_equals:
720 case comparison::int_does_not_equal:
721 case comparison::int_is_at_least:
722 case comparison::int_is_greater_than:
723 case comparison::int_is_at_most:
724 case comparison::int_is_less_than:
725 case comparison::boolean_equals:
726 case comparison::is_null:
727 case comparison::is_not_null:
728 {
729 throw std::domain_error("This filter does not have a join condition");
730 }
731 }
732 } else {
733 throw std::domain_error("This filter does not have a join condition");
734 }
735 }
736
737 std::string filter::getStringArgument() const
738 {
739 if (type_ == type::singleton)
740 {
741 switch (singleton_.filterType)
742 {
743 case comparison::string_equals:
744 case comparison::string_does_not_equal:
745 case comparison::string_is_like:
746 case comparison::string_is_not_like:
747 {
748 return singleton_.stringValue;
749 }
750
751 case comparison::int_equals:
752 case comparison::int_does_not_equal:
753 case comparison::int_is_at_least:
754 case comparison::int_is_greater_than:
755 case comparison::int_is_at_most:
756 case comparison::int_is_less_than:
757 case comparison::boolean_equals:
758 case comparison::is_null:
759 case comparison::is_not_null:
760 case comparison::matches:
761 case comparison::does_not_match:
762 case comparison::hierarchally_matches:
763 case comparison::does_not_hierarchally_match:
764 {
765 throw std::domain_error("This filter does not have a string argument");
766 }
767 }
768 } else {
769 throw std::domain_error("This filter does not have a string argument");
770 }
771 }
772
773 int filter::getIntegerArgument() const
774 {
775 if (type_ == type::singleton)
776 {
777 switch (singleton_.filterType)
778 {
779 case comparison::int_equals:
780 case comparison::int_does_not_equal:
781 case comparison::int_is_at_least:
782 case comparison::int_is_greater_than:
783 case comparison::int_is_at_most:
784 case comparison::int_is_less_than:
785 {
786 return singleton_.intValue;
787 }
788
789 case comparison::string_equals:
790 case comparison::string_does_not_equal:
791 case comparison::string_is_like:
792 case comparison::string_is_not_like:
793 case comparison::boolean_equals:
794 case comparison::is_null:
795 case comparison::is_not_null:
796 case comparison::matches:
797 case comparison::does_not_match:
798 case comparison::hierarchally_matches:
799 case comparison::does_not_hierarchally_match:
800 {
801 throw std::domain_error("This filter does not have an integer argument");
802 }
803 }
804 } else {
805 throw std::domain_error("This filter does not have an integer argument");
806 }
807 }
808
809 bool filter::getBooleanArgument() const
810 {
811 if ((type_ == type::singleton) && (singleton_.filterType == comparison::boolean_equals))
812 {
813 return singleton_.boolValue;
814 } else {
815 throw std::domain_error("This filter does not have a boolean argument");
816 }
817 }
818
819 filter::filter(bool orlogic) : type_(type::group)
820 {
821 new(&group_.children) std::list<filter>();
822 group_.orlogic = orlogic;
823 }
824
825 bool filter::getOrlogic() const
826 {
827 if (type_ == type::group)
828 {
829 return group_.orlogic;
830 } else {
831 throw std::domain_error("This filter is not a group filter");
832 }
833 }
834
835 filter filter::operator+(filter condition) const
836 {
837 filter result(*this);
838 result += std::move(condition);
839
840 return result;
841 }
842
843 filter& filter::operator+=(filter condition)
844 {
845 if (type_ == type::group)
846 {
847 group_.children.push_back(std::move(condition));
848
849 return *this;
850 } else {
851 throw std::domain_error("Children can only be added to group filters");
852 }
853 }
854
855 filter::const_iterator filter::begin() const
856 {
857 if (type_ == type::group)
858 {
859 return std::begin(group_.children);
860 } else {
861 throw std::domain_error("This filter has no children");
862 }
863 }
864
865 filter::const_iterator filter::end() const
866 {
867 if (type_ == type::group)
868 {
869 return std::end(group_.children);
870 } else {
871 throw std::domain_error("This filter has no children");
872 }
873 }
874
875 filter filter::operator!() const
876 {
877 switch (type_)
878 {
879 case type::empty:
880 {
881 return {};
882 }
883
884 case type::singleton:
885 {
886 switch (singleton_.filterType)
887 {
888 case comparison::int_equals:
889 {
890 return filter(singleton_.filterField, comparison::int_does_not_equal, singleton_.intValue);
891 }
892
893 case comparison::int_does_not_equal:
894 {
895 return filter(singleton_.filterField, comparison::int_equals, singleton_.intValue);
896 }
897
898 case comparison::int_is_at_least:
899 {
900 return filter(singleton_.filterField, comparison::int_is_less_than, singleton_.intValue);
901 }
902
903 case comparison::int_is_greater_than:
904 {
905 return filter(singleton_.filterField, comparison::int_is_at_most, singleton_.intValue);
906 }
907
908 case comparison::int_is_at_most:
909 {
910 return filter(singleton_.filterField, comparison::int_is_greater_than, singleton_.intValue);
911 }
912
913 case comparison::int_is_less_than:
914 {
915 return filter(singleton_.filterField, comparison::int_is_at_least, singleton_.intValue);
916 }
917
918 case comparison::boolean_equals:
919 {
920 return filter(singleton_.filterField, comparison::boolean_equals, !singleton_.boolValue);
921 }
922
923 case comparison::string_equals:
924 {
925 return filter(singleton_.filterField, comparison::string_does_not_equal, singleton_.stringValue);
926 }
927
928 case comparison::string_does_not_equal:
929 {
930 return filter(singleton_.filterField, comparison::string_equals, singleton_.stringValue);
931 }
932
933 case comparison::string_is_like:
934 {
935 return filter(singleton_.filterField, comparison::string_is_not_like, singleton_.stringValue);
936 }
937
938 case comparison::string_is_not_like:
939 {
940 return filter(singleton_.filterField, comparison::string_is_like, singleton_.stringValue);
941 }
942
943 case comparison::is_null:
944 {
945 return filter(singleton_.filterField, comparison::is_not_null);
946 }
947
948 case comparison::is_not_null:
949 {
950 return filter(singleton_.filterField, comparison::is_null);
951 }
952
953 case comparison::matches:
954 {
955 return filter(singleton_.filterField, comparison::does_not_match, *singleton_.join);
956 }
957
958 case comparison::does_not_match:
959 {
960 return filter(singleton_.filterField, comparison::matches, *singleton_.join);
961 }
962
963 case comparison::hierarchally_matches:
964 {
965 return filter(singleton_.filterField, comparison::does_not_hierarchally_match, *singleton_.join);
966 }
967
968 case comparison::does_not_hierarchally_match:
969 {
970 return filter(singleton_.filterField, comparison::hierarchally_matches, *singleton_.join);
971 }
972 }
973 }
974
975 case type::group:
976 {
977 filter result(!group_.orlogic);
978
979 for (const filter& child : group_.children)
980 {
981 result += !child;
982 }
983
984 return result;
985 }
986 }
987 }
988
989 filter& filter::operator&=(filter condition)
990 {
991 return (*this = (*this && std::move(condition)));
992 }
993
994 filter& filter::operator|=(filter condition)
995 {
996 return (*this = (*this || std::move(condition)));
997 }
998
999 filter filter::operator&&(filter condition) const
1000 {
1001 switch (type_)
1002 {
1003 case type::empty:
1004 {
1005 return condition;
1006 }
1007
1008 case type::singleton:
1009 {
1010 filter result(false);
1011 result.group_.children.push_back(*this);
1012 result.group_.children.push_back(std::move(condition));
1013
1014 return result;
1015 }
1016
1017 case type::group:
1018 {
1019 if (group_.orlogic)
1020 {
1021 filter result(false);
1022 result.group_.children.push_back(*this);
1023 result.group_.children.push_back(std::move(condition));
1024
1025 return result;
1026 } else {
1027 filter result(*this);
1028 result.group_.children.push_back(std::move(condition));
1029
1030 return result;
1031 }
1032 }
1033 }
1034 }
1035
1036 filter filter::operator||(filter condition) const
1037 {
1038 switch (type_)
1039 {
1040 case type::empty:
1041 {
1042 return condition;
1043 }
1044
1045 case type::singleton:
1046 {
1047 filter result(true);
1048 result.group_.children.push_back(*this);
1049 result.group_.children.push_back(std::move(condition));
1050
1051 return result;
1052 }
1053
1054 case type::group:
1055 {
1056 if (!group_.orlogic)
1057 {
1058 filter result(true);
1059 result.group_.children.push_back(*this);
1060 result.group_.children.push_back(std::move(condition));
1061
1062 return result;
1063 } else {
1064 filter result(*this);
1065 result.group_.children.push_back(std::move(condition));
1066
1067 return result;
1068 }
1069 }
1070 }
1071 }
1072
1073 filter filter::normalize(object context) const
1074 {
1075 {
1076 switch (type_)
1077 {
1078 case type::empty:
1079 {
1080 return *this;
1081 }
1082
1083 case type::singleton:
1084 {
1085 // First, switch on the normalized context, and then switch on the
1086 // current context. We recursively recontextualize by using the
1087 // current filter as a subquery for a join such that the context of
1088 // the subquery is one step closer to the context of the current
1089 // filter, and then letting the filter constructor normalize the
1090 // subquery.
1091 switch (context)
1092 {
1093 case object::undefined:
1094 {
1095 // An undefined object indicates no participation in
1096 // recontexualization.
1097 return *this;
1098 }
1099
1100 case object::notion:
1101 {
1102 switch (singleton_.filterField.getObject())
1103 {
1104 case object::undefined:
1105 case object::notion:
1106 {
1107 return *this;
1108 }
1109
1110 case object::word:
1111 case object::group:
1112 case object::frame:
1113 case object::lemma:
1114 case object::form:
1115 case object::pronunciation:
1116 {
1117 return (verbly::notion::word %= *this);
1118 }
1119 }
1120 }
1121
1122 case object::word:
1123 {
1124 switch (singleton_.filterField.getObject())
1125 {
1126 case object::notion:
1127 {
1128 return (verbly::word::notion %= *this);
1129 }
1130
1131 case object::undefined:
1132 case object::word:
1133 {
1134 return *this;
1135 }
1136
1137 case object::group:
1138 case object::frame:
1139 {
1140 return (verbly::word::group %= *this);
1141 }
1142
1143 case object::lemma:
1144 case object::form:
1145 case object::pronunciation:
1146 {
1147 return (verbly::word::lemma %= *this);
1148 }
1149 }
1150
1151 case object::group:
1152 {
1153 switch (singleton_.filterField.getObject())
1154 {
1155 case object::undefined:
1156 case object::group:
1157 {
1158 return *this;
1159 }
1160
1161 case object::notion:
1162 case object::word:
1163 case object::lemma:
1164 case object::form:
1165 case object::pronunciation:
1166 {
1167 return (verbly::group::word %= *this);
1168 }
1169
1170 case object::frame:
1171 {
1172 return (verbly::group::frame %= *this);
1173 }
1174 }
1175 }
1176
1177 case object::frame:
1178 {
1179 switch (singleton_.filterField.getObject())
1180 {
1181 case object::undefined:
1182 case object::frame:
1183 {
1184 return *this;
1185 }
1186
1187 case object::notion:
1188 case object::word:
1189 case object::group:
1190 case object::lemma:
1191 case object::form:
1192 case object::pronunciation:
1193 {
1194 return (verbly::frame::group %= *this);
1195 }
1196 }
1197 }
1198
1199 case object::lemma:
1200 {
1201 switch (singleton_.filterField.getObject())
1202 {
1203 case object::notion:
1204 case object::word:
1205 case object::group:
1206 case object::frame:
1207 {
1208 return verbly::lemma::word %= *this;
1209 }
1210
1211 case object::undefined:
1212 case object::lemma:
1213 {
1214 return *this;
1215 }
1216
1217 case object::form:
1218 case object::pronunciation:
1219 {
1220 return (verbly::lemma::form(inflection::base) %= *this);
1221 }
1222 }
1223 }
1224
1225 case object::form:
1226 {
1227 switch (singleton_.filterField.getObject())
1228 {
1229 case object::notion:
1230 case object::word:
1231 case object::group:
1232 case object::frame:
1233 case object::lemma:
1234 {
1235 return verbly::form::lemma(inflection::base) %= *this;
1236 }
1237
1238 case object::undefined:
1239 case object::form:
1240 {
1241 return *this;
1242 }
1243
1244 case object::pronunciation:
1245 {
1246 return (verbly::form::pronunciation %= *this);
1247 }
1248 }
1249 }
1250
1251 case object::pronunciation:
1252 {
1253 switch (singleton_.filterField.getObject())
1254 {
1255 case object::notion:
1256 case object::word:
1257 case object::group:
1258 case object::frame:
1259 case object::lemma:
1260 case object::form:
1261 {
1262 return verbly::pronunciation::form %= *this;
1263 }
1264
1265 case object::undefined:
1266 case object::pronunciation:
1267 {
1268 return *this;
1269 }
1270 }
1271 }
1272 }
1273 }
1274 }
1275
1276 case type::group:
1277 {
1278 filter result(group_.orlogic);
1279 std::map<field, filter> joins;
1280
1281 for (const filter& child : group_.children)
1282 {
1283 filter normalized = child.normalize(context);
1284
1285 // Notably, this does not attempt to merge hierarchal matches.
1286 switch (normalized.getType())
1287 {
1288 case type::singleton:
1289 {
1290 switch (normalized.getComparison())
1291 {
1292 case comparison::matches:
1293 {
1294 if (!joins.count(normalized.singleton_.filterField))
1295 {
1296 joins[normalized.getField()] = filter(group_.orlogic);
1297 }
1298
1299 joins.at(normalized.getField()) += std::move(*normalized.singleton_.join);
1300
1301 break;
1302 }
1303
1304 case comparison::does_not_match:
1305 {
1306 if (!joins.count(normalized.singleton_.filterField))
1307 {
1308 joins[normalized.getField()] = filter(group_.orlogic);
1309 }
1310
1311 joins.at(normalized.getField()) += !*normalized.singleton_.join;
1312
1313 break;
1314 }
1315
1316 case comparison::int_equals:
1317 case comparison::int_does_not_equal:
1318 case comparison::int_is_at_least:
1319 case comparison::int_is_greater_than:
1320 case comparison::int_is_at_most:
1321 case comparison::int_is_less_than:
1322 case comparison::boolean_equals:
1323 case comparison::string_equals:
1324 case comparison::string_does_not_equal:
1325 case comparison::string_is_like:
1326 case comparison::string_is_not_like:
1327 case comparison::is_null:
1328 case comparison::is_not_null:
1329 case comparison::hierarchally_matches:
1330 case comparison::does_not_hierarchally_match:
1331 {
1332 result += std::move(normalized);
1333
1334 break;
1335 }
1336 }
1337
1338 break;
1339 }
1340
1341 case type::group:
1342 case type::empty:
1343 {
1344 result += std::move(normalized);
1345
1346 break;
1347 }
1348 }
1349 }
1350
1351 for (auto& mapping : joins)
1352 {
1353 const field& joinOn = mapping.first;
1354 filter& joinCondition = mapping.second;
1355
1356 result += (joinOn %= joinCondition.normalize(joinOn.getJoinObject()));
1357 }
1358
1359 return result;
1360 }
1361 }
1362 }
1363 }
1364
1365};
diff --git a/lib/filter.h b/lib/filter.h new file mode 100644 index 0000000..d213d7a --- /dev/null +++ b/lib/filter.h
@@ -0,0 +1,143 @@
1#ifndef FILTER_H_932BA9C6
2#define FILTER_H_932BA9C6
3
4#include <list>
5#include <string>
6#include <memory>
7#include "field.h"
8#include "enums.h"
9
10namespace verbly {
11
12 class filter {
13 public:
14 enum class type {
15 empty,
16 singleton,
17 group
18 };
19
20 enum class comparison {
21 int_equals,
22 int_does_not_equal,
23 int_is_at_least,
24 int_is_greater_than,
25 int_is_at_most,
26 int_is_less_than,
27 boolean_equals,
28 string_equals,
29 string_does_not_equal,
30 string_is_like,
31 string_is_not_like,
32 is_null,
33 is_not_null,
34 matches,
35 does_not_match,
36 hierarchally_matches,
37 does_not_hierarchally_match
38 };
39
40 // Copy and move constructors
41
42 filter(const filter& other);
43 filter(filter&& other);
44
45 // Assignment
46
47 filter& operator=(filter other);
48
49 // Swap
50
51 friend void swap(filter& first, filter& second);
52
53 // Destructor
54
55 ~filter();
56
57 // Accessors
58
59 type getType() const
60 {
61 return type_;
62 }
63
64 // Empty
65
66 filter();
67
68 // Singleton
69
70 filter(field filterField, comparison filterType, int filterValue);
71 filter(field filterField, comparison filterType, std::string filterValue);
72 filter(field filterField, comparison filterType, bool filterValue);
73 filter(field filterField, comparison filterType);
74 filter(field joinOn, comparison filterType, filter joinCondition);
75
76 field getField() const;
77
78 comparison getComparison() const;
79
80 filter getJoinCondition() const;
81
82 std::string getStringArgument() const;
83
84 int getIntegerArgument() const;
85
86 bool getBooleanArgument() const;
87
88 // Group
89
90 explicit filter(bool orlogic);
91
92 bool getOrlogic() const;
93
94 filter operator+(filter condition) const;
95
96 filter& operator+=(filter condition);
97
98 using const_iterator = std::list<filter>::const_iterator;
99
100 const_iterator begin() const;
101
102 const_iterator end() const;
103
104 // Negation
105
106 filter operator!() const;
107
108 // Groupifying
109
110 filter operator&&(filter condition) const;
111 filter operator||(filter condition) const;
112
113 filter& operator&=(filter condition);
114 filter& operator|=(filter condition);
115
116 // Utility
117
118 filter normalize(object context) const;
119
120 private:
121 union {
122 struct {
123 field filterField;
124 comparison filterType;
125 union {
126 std::unique_ptr<filter> join;
127 std::string stringValue;
128 int intValue;
129 bool boolValue;
130 };
131 } singleton_;
132 struct {
133 std::list<filter> children;
134 bool orlogic;
135 } group_;
136 };
137 type type_ = type::empty;
138
139 };
140
141};
142
143#endif /* end of include guard: FILTER_H_932BA9C6 */
diff --git a/lib/form.cpp b/lib/form.cpp new file mode 100644 index 0000000..8ba3bd7 --- /dev/null +++ b/lib/form.cpp
@@ -0,0 +1,53 @@
1#include "form.h"
2#include <sqlite3.h>
3#include "filter.h"
4#include "pronunciation.h"
5#include "database.h"
6#include "query.h"
7
8namespace verbly {
9
10 const object form::objectType = object::form;
11
12 const std::list<std::string> form::select = {"form_id", "form", "complexity", "proper"};
13
14 const field form::id = field::integerField(object::form, "form_id");
15 const field form::text = field::stringField(object::form, "form");
16 const field form::complexity = field::integerField(object::form, "complexity");
17 const field form::proper = field::booleanField(object::form, "proper");
18
19 const field form::pronunciation = field::joinThrough(object::form, "form_id", object::pronunciation, "forms_pronunciations", "pronunciation_id");
20
21 const field form::lemmaJoin = field::joinField(object::form, "form_id", object::lemma);
22 const field form::inflectionCategory = field::integerField("lemmas_forms", "category");
23
24 form::form(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
25 {
26 id_ = sqlite3_column_int(row, 0);
27 text_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 1)));
28 complexity_ = sqlite3_column_int(row, 2);
29 proper_ = (sqlite3_column_int(row, 3) == 1);
30 }
31
32 filter operator%=(form::inflection_field check, filter joinCondition)
33 {
34 return (form::lemmaJoin %= (joinCondition && (form::inflectionCategory == check.getCategory())));
35 }
36
37 const std::vector<pronunciation>& form::getPronunciations() const
38 {
39 if (!valid_)
40 {
41 throw std::domain_error("Bad access to uninitialized form");
42 }
43
44 if (!initializedPronunciations_)
45 {
46 pronunciations_ = db_->pronunciations(pronunciation::form %= *this, false, -1).all();
47 initializedPronunciations_ = true;
48 }
49
50 return pronunciations_;
51 }
52
53};
diff --git a/lib/form.h b/lib/form.h new file mode 100644 index 0000000..c6a1353 --- /dev/null +++ b/lib/form.h
@@ -0,0 +1,149 @@
1#ifndef FORM_H_3A6C962C
2#define FORM_H_3A6C962C
3
4#include <list>
5#include <vector>
6#include <string>
7#include <stdexcept>
8#include "field.h"
9#include "filter.h"
10
11struct sqlite3_stmt;
12
13namespace verbly {
14
15 class pronunciation;
16 class database;
17
18 class form {
19 public:
20
21 // Default constructor
22
23 form() = default;
24
25 // Construct from database
26
27 form(const database& db, sqlite3_stmt* row);
28
29 // Accessors
30
31 operator bool() const
32 {
33 return valid_;
34 }
35
36 int getId() const
37 {
38 if (!valid_)
39 {
40 throw std::domain_error("Bad access to uninitialized form");
41 }
42
43 return id_;
44 }
45
46 std::string getText() const
47 {
48 if (!valid_)
49 {
50 throw std::domain_error("Bad access to uninitialized form");
51 }
52
53 return text_;
54 }
55
56 int getComplexity() const
57 {
58 if (!valid_)
59 {
60 throw std::domain_error("Bad access to uninitialized form");
61 }
62
63 return complexity_;
64 }
65
66 bool isProper() const
67 {
68 if (!valid_)
69 {
70 throw std::domain_error("Bad access to uninitialized form");
71 }
72
73 return proper_;
74 }
75
76 const std::vector<pronunciation>& getPronunciations() const;
77
78 // Type info
79
80 static const object objectType;
81
82 static const std::list<std::string> select;
83
84 // Query fields
85
86 static const field id;
87 static const field text;
88 static const field complexity;
89 static const field proper;
90
91 operator filter() const
92 {
93 if (!valid_)
94 {
95 throw std::domain_error("Bad access to uninitialized form");
96 }
97
98 return (id == id_);
99 }
100
101 // Relationships to other objects
102
103 static const field pronunciation;
104
105 class inflection_field {
106 public:
107
108 inflection_field(inflection category) : category_(category)
109 {
110 }
111
112 const inflection getCategory() const
113 {
114 return category_;
115 }
116
117 private:
118
119 const inflection category_;
120 };
121
122 static const inflection_field lemma(inflection category)
123 {
124 return inflection_field(category);
125 }
126
127 friend filter operator%=(form::inflection_field check, filter joinCondition);
128
129 private:
130 bool valid_ = false;
131
132 int id_;
133 std::string text_;
134 int complexity_ ;
135 bool proper_;
136
137 const database* db_;
138
139 mutable bool initializedPronunciations_ = false;
140 mutable std::vector<class pronunciation> pronunciations_;
141
142 static const field lemmaJoin;
143 static const field inflectionCategory;
144
145 };
146
147};
148
149#endif /* end of include guard: FORM_H_3A6C962C */
diff --git a/lib/frame.cpp b/lib/frame.cpp index ccec81b..bc3f842 100644 --- a/lib/frame.cpp +++ b/lib/frame.cpp
@@ -1,320 +1,21 @@
1#include "verbly.h" 1#include "frame.h"
2#include <sqlite3.h>
2 3
3namespace verbly { 4namespace verbly {
4 5
5 frame::selrestr::type frame::selrestr::get_type() const 6 const object frame::objectType = object::frame;
6 {
7 return _type;
8 }
9
10 frame::selrestr::selrestr(const selrestr& other)
11 {
12 _type = other._type;
13
14 switch (_type)
15 {
16 case frame::selrestr::type::singleton:
17 {
18 _singleton.pos = other._singleton.pos;
19 new(&_singleton.restriction) std::string(other._singleton.restriction);
20
21 break;
22 }
23
24 case frame::selrestr::type::group:
25 {
26 new(&_group.children) std::list<selrestr>(other._group.children);
27 _group.orlogic = other._group.orlogic;
28
29 break;
30 }
31
32 case frame::selrestr::type::empty:
33 {
34 // Nothing!
35
36 break;
37 }
38 }
39 }
40
41 frame::selrestr::~selrestr()
42 {
43 switch (_type)
44 {
45 case frame::selrestr::type::singleton:
46 {
47 using string_type = std::string;
48 _singleton.restriction.~string_type();
49
50 break;
51 }
52
53 case frame::selrestr::type::group:
54 {
55 using list_type = std::list<selrestr>;
56 _group.children.~list_type();
57
58 break;
59 }
60
61 case frame::selrestr::type::empty:
62 {
63 // Nothing!
64
65 break;
66 }
67 }
68 }
69
70 frame::selrestr& frame::selrestr::operator=(const selrestr& other)
71 {
72 this->~selrestr();
73
74 _type = other._type;
75
76 switch (_type)
77 {
78 case frame::selrestr::type::singleton:
79 {
80 _singleton.pos = other._singleton.pos;
81 new(&_singleton.restriction) std::string(other._singleton.restriction);
82
83 break;
84 }
85
86 case frame::selrestr::type::group:
87 {
88 new(&_group.children) std::list<selrestr>(other._group.children);
89 _group.orlogic = other._group.orlogic;
90
91 break;
92 }
93
94 case frame::selrestr::type::empty:
95 {
96 // Nothing!
97
98 break;
99 }
100 }
101
102 return *this;
103 }
104
105 frame::selrestr::selrestr() : _type(frame::selrestr::type::empty)
106 {
107
108 }
109
110 frame::selrestr::selrestr(std::string restriction, bool pos) : _type(frame::selrestr::type::singleton)
111 {
112 new(&_singleton.restriction) std::string(restriction);
113 _singleton.pos = pos;
114 }
115
116 std::string frame::selrestr::get_restriction() const
117 {
118 assert(_type == frame::selrestr::type::singleton);
119
120 return _singleton.restriction;
121 }
122
123 bool frame::selrestr::get_pos() const
124 {
125 assert(_type == frame::selrestr::type::singleton);
126
127 return _singleton.pos;
128 }
129
130 frame::selrestr::selrestr(std::list<selrestr> children, bool orlogic) : _type(frame::selrestr::type::group)
131 {
132 new(&_group.children) std::list<selrestr>(children);
133 _group.orlogic = orlogic;
134 }
135
136 std::list<frame::selrestr> frame::selrestr::get_children() const
137 {
138 assert(_type == frame::selrestr::type::group);
139
140 return _group.children;
141 }
142
143 std::list<frame::selrestr>::const_iterator frame::selrestr::begin() const
144 {
145 assert(_type == frame::selrestr::type::group);
146
147 return _group.children.begin();
148 }
149
150 std::list<frame::selrestr>::const_iterator frame::selrestr::end() const
151 {
152 assert(_type == frame::selrestr::type::group);
153
154 return _group.children.end();
155 }
156
157 bool frame::selrestr::get_orlogic() const
158 {
159 assert(_type == frame::selrestr::type::group);
160
161 return _group.orlogic;
162 }
163
164 frame::part::type frame::part::get_type() const
165 {
166 return _type;
167 }
168
169 frame::part::part()
170 {
171
172 }
173 7
174 frame::part::part(const part& other) 8 const std::list<std::string> frame::select = {"frame_id", "data"};
175 {
176 _type = other._type;
177
178 switch (_type)
179 {
180 case frame::part::type::noun_phrase:
181 {
182 new(&_noun_phrase.role) std::string(other._noun_phrase.role);
183 new(&_noun_phrase.selrestrs) selrestr(other._noun_phrase.selrestrs);
184 new(&_noun_phrase.synrestrs) std::set<std::string>(other._noun_phrase.synrestrs);
185
186 break;
187 }
188
189 case frame::part::type::literal_preposition:
190 {
191 new(&_literal_preposition.choices) std::vector<std::string>(other._literal_preposition.choices);
192
193 break;
194 }
195
196 case frame::part::type::selection_preposition:
197 {
198 new(&_selection_preposition.preprestrs) std::vector<std::string>(other._selection_preposition.preprestrs);
199
200 break;
201 }
202
203 case frame::part::type::literal:
204 {
205 new(&_literal.lexval) std::string(other._literal.lexval);
206
207 break;
208 }
209
210 default:
211 {
212 // Nothing!
213
214 break;
215 }
216 }
217 }
218 9
219 frame::part::~part() 10 const field frame::id = field::integerField(object::frame, "frame_id");
220 {
221 switch (_type)
222 {
223 case frame::part::type::noun_phrase:
224 {
225 using string_type = std::string;
226 using set_type = std::set<std::string>;
227
228 _noun_phrase.role.~string_type();
229 _noun_phrase.selrestrs.~selrestr();
230 _noun_phrase.synrestrs.~set_type();
231
232 break;
233 }
234
235 case frame::part::type::literal_preposition:
236 {
237 using vector_type = std::vector<std::string>;
238 _literal_preposition.choices.~vector_type();
239
240 break;
241 }
242
243 case frame::part::type::selection_preposition:
244 {
245 using vector_type = std::vector<std::string>;
246 _selection_preposition.preprestrs.~vector_type();
247
248 break;
249 }
250
251 case frame::part::type::literal:
252 {
253 using string_type = std::string;
254 _literal.lexval.~string_type();
255
256 break;
257 }
258
259 default:
260 {
261 // Nothing!
262
263 break;
264 }
265 }
266 }
267 11
268 std::string frame::part::get_role() const 12 const field frame::group = field::joinThrough(object::frame, "frame_id", object::group, "groups_frames", "group_id");
269 {
270 assert(_type == frame::part::type::noun_phrase);
271
272 return _noun_phrase.role;
273 }
274 13
275 frame::selrestr frame::part::get_selrestrs() const 14 frame::frame(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
276 { 15 {
277 assert(_type == frame::part::type::noun_phrase); 16 id_ = sqlite3_column_int(row, 0);
278 17
279 return _noun_phrase.selrestrs; 18 // TODO: Initialize frame data from row.
280 }
281
282 std::set<std::string> frame::part::get_synrestrs() const
283 {
284 assert(_type == frame::part::type::noun_phrase);
285
286 return _noun_phrase.synrestrs;
287 }
288
289 std::vector<std::string> frame::part::get_choices() const
290 {
291 assert(_type == frame::part::type::literal_preposition);
292
293 return _literal_preposition.choices;
294 }
295
296 std::vector<std::string> frame::part::get_preprestrs() const
297 {
298 assert(_type == frame::part::type::selection_preposition);
299
300 return _selection_preposition.preprestrs;
301 }
302
303 std::string frame::part::get_literal() const
304 {
305 assert(_type == frame::part::type::literal);
306
307 return _literal.lexval;
308 }
309
310 std::vector<frame::part> frame::parts() const
311 {
312 return _parts;
313 }
314
315 std::map<std::string, frame::selrestr> frame::roles() const
316 {
317 return _roles;
318 } 19 }
319 20
320}; 21};
diff --git a/lib/frame.h b/lib/frame.h index fa57e1b..68a4346 100644 --- a/lib/frame.h +++ b/lib/frame.h
@@ -1,118 +1,78 @@
1#ifndef FRAME_H_9A5D90FE 1#ifndef FRAME_H_EA29065A
2#define FRAME_H_9A5D90FE 2#define FRAME_H_EA29065A
3
4#include <stdexcept>
5#include <list>
6#include "field.h"
7#include "filter.h"
8
9struct sqlite3_stmt;
3 10
4namespace verbly { 11namespace verbly {
5 12
6 class frame_query; 13 class database;
7 14
8 class frame { 15 class frame {
9 public: 16 public:
10 class selrestr { 17
11 public: 18 // Default constructor
12 enum class type { 19
13 empty, 20 frame() = default;
14 singleton, 21
15 group 22 // Construct from database
16 }; 23
17 24 frame(const database& db, sqlite3_stmt* row);
18 type get_type() const; 25
19 selrestr(const selrestr& other); 26 // Accessors
20 ~selrestr(); 27
21 selrestr& operator=(const selrestr& other); 28 operator bool() const
22 29 {
23 // Empty 30 return valid_;
24 selrestr(); 31 }
25 32
26 // Singleton 33 int getId() const
27 selrestr(std::string restriction, bool pos); 34 {
28 std::string get_restriction() const; 35 if (!valid_)
29 bool get_pos() const; 36 {
30 37 throw std::domain_error("Bad access to uninitialized frame");
31 // Group 38 }
32 selrestr(std::list<selrestr> children, bool orlogic);
33 std::list<selrestr> get_children() const;
34 std::list<selrestr>::const_iterator begin() const;
35 std::list<selrestr>::const_iterator end() const;
36 bool get_orlogic() const;
37
38 private:
39 union {
40 struct {
41 bool pos;
42 std::string restriction;
43 } _singleton;
44 struct {
45 std::list<selrestr> children;
46 bool orlogic;
47 } _group;
48 };
49 type _type;
50 };
51 39
52 class part { 40 return id_;
53 public: 41 }
54 enum class type { 42
55 noun_phrase, 43 // Type info
56 verb, 44
57 literal_preposition, 45 static const object objectType;
58 selection_preposition, 46
59 adjective, 47 static const std::list<std::string> select;
60 adverb, 48
61 literal 49 // Query fields
62 }; 50
63 51 static const field id;
64 type get_type() const; 52
65 part(const part& other); 53 operator filter() const
66 ~part(); 54 {
67 55 if (!valid_)
68 // Noun phrase 56 {
69 std::string get_role() const; 57 throw std::domain_error("Bad access to uninitialized frame");
70 selrestr get_selrestrs() const; 58 }
71 std::set<std::string> get_synrestrs() const;
72
73 // Literal preposition
74 std::vector<std::string> get_choices() const;
75
76 // Selection preposition
77 std::vector<std::string> get_preprestrs() const;
78
79 // Literal
80 std::string get_literal() const;
81
82 private:
83 friend class frame_query;
84
85 part();
86
87 union {
88 struct {
89 std::string role;
90 selrestr selrestrs;
91 std::set<std::string> synrestrs;
92 } _noun_phrase;
93 struct {
94 std::vector<std::string> choices;
95 } _literal_preposition;
96 struct {
97 std::vector<std::string> preprestrs;
98 } _selection_preposition;
99 struct {
100 std::string lexval;
101 } _literal;
102 };
103 type _type;
104 };
105 59
106 std::vector<part> parts() const; 60 return (id == id_);
107 std::map<std::string, selrestr> roles() const; 61 }
108 62
109 private: 63 // Relationships to other objects
110 friend class frame_query; 64
111 65 static const field group;
112 std::vector<part> _parts; 66
113 std::map<std::string, selrestr> _roles; 67 private:
68 bool valid_ = false;
69
70 int id_;
71
72 const database* db_;
73
114 }; 74 };
115 75
116}; 76};
117 77
118#endif /* end of include guard: FRAME_H_9A5D90FE */ 78#endif /* end of include guard: FRAME_H_EA29065A */
diff --git a/lib/group.cpp b/lib/group.cpp new file mode 100644 index 0000000..8b6d985 --- /dev/null +++ b/lib/group.cpp
@@ -0,0 +1,43 @@
1#include "group.h"
2#include <sqlite3.h>
3#include "frame.h"
4#include "database.h"
5#include "query.h"
6
7namespace verbly {
8
9 const object group::objectType = object::group;
10
11 const std::list<std::string> group::select = {"group_id", "data"};
12
13 const field group::id = field::integerField(object::group, "group_id");
14
15 const field group::frame = field::joinThrough(object::group, "group_id", object::frame, "groups_frames", "frame_id");
16 const field group::word = field::joinField(object::group, "group_id", object::word);
17
18 group::group(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
19 {
20 id_ = sqlite3_column_int(row, 0);
21
22 // TODO: Initialize role data from row.
23 }
24
25 const std::vector<frame>& group::getFrames() const
26 {
27 if (!valid_)
28 {
29 throw std::domain_error("Bad access to uninitialized group");
30 }
31
32 if (!initializedFrames_)
33 {
34 frames_ = db_->frames(frame::group %= *this, false, -1).all();
35
36 initializedFrames_ = true;
37 }
38
39 return frames_;
40 }
41
42};
43
diff --git a/lib/group.h b/lib/group.h new file mode 100644 index 0000000..dd53503 --- /dev/null +++ b/lib/group.h
@@ -0,0 +1,87 @@
1#ifndef GROUP_H_BD6933C0
2#define GROUP_H_BD6933C0
3
4#include <stdexcept>
5#include <list>
6#include <vector>
7#include "field.h"
8#include "filter.h"
9
10struct sqlite3_stmt;
11
12namespace verbly {
13
14 class database;
15 class frame;
16
17 class group {
18 public:
19
20 // Default constructor
21
22 group() = default;
23
24 // Construct from database
25
26 group(const database& db, sqlite3_stmt* row);
27
28 // Accessors
29
30 operator bool() const
31 {
32 return valid_;
33 }
34
35 int getId() const
36 {
37 if (!valid_)
38 {
39 throw std::domain_error("Bad access to uninitialized group");
40 }
41
42 return id_;
43 }
44
45 const std::vector<frame>& getFrames() const;
46
47 // Type info
48
49 static const object objectType;
50
51 static const std::list<std::string> select;
52
53 // Query fields
54
55 static const field id;
56
57 operator filter() const
58 {
59 if (!valid_)
60 {
61 throw std::domain_error("Bad access to uninitialized group");
62 }
63
64 return (id == id_);
65 }
66
67 // Relationships to other objects
68
69 static const field frame;
70
71 static const field word;
72
73 private:
74 bool valid_ = false;
75
76 int id_;
77
78 const database* db_;
79
80 mutable bool initializedFrames_ = false;
81 mutable std::vector<class frame> frames_;
82
83 };
84
85};
86
87#endif /* end of include guard: GROUP_H_BD6933C0 */
diff --git a/lib/lemma.cpp b/lib/lemma.cpp new file mode 100644 index 0000000..f9e9fcc --- /dev/null +++ b/lib/lemma.cpp
@@ -0,0 +1,69 @@
1#include "lemma.h"
2#include <sqlite3.h>
3#include "database.h"
4#include "query.h"
5
6namespace verbly {
7
8 const object lemma::objectType = object::lemma;
9
10 const std::list<std::string> lemma::select = {"lemma_id"};
11
12 const field lemma::id = field::integerField(object::lemma, "lemma_id");
13
14 const field lemma::word = field::joinField(object::lemma, "lemma_id", object::word);
15
16 const field lemma::formJoin = field::joinField(object::lemma, "form_id", object::form);
17 const field lemma::inflectionCategory = field::integerField(object::lemma, "category");
18
19 filter operator%=(lemma::inflection_field check, filter joinCondition)
20 {
21 return (lemma::formJoin %= joinCondition) && (lemma::inflectionCategory == check.getCategory());
22 }
23
24 lemma::lemma(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
25 {
26 id_ = sqlite3_column_int(row, 0);
27 }
28
29 const form& lemma::getBaseForm() const
30 {
31 if (!valid_)
32 {
33 throw std::domain_error("Bad access to uninitialized lemma");
34 }
35
36 if (!forms_.count(inflection::base))
37 {
38 initializeForm(inflection::base);
39 }
40
41 return forms_.at(inflection::base).front();
42 }
43
44 bool lemma::hasInflection(inflection category) const
45 {
46 return !getInflections(category).empty();
47 }
48
49 const std::vector<form>& lemma::getInflections(inflection category) const
50 {
51 if (!valid_)
52 {
53 throw std::domain_error("Bad access to uninitialized lemma");
54 }
55
56 if (!forms_.count(category))
57 {
58 initializeForm(category);
59 }
60
61 return forms_.at(category);
62 }
63
64 void lemma::initializeForm(inflection infl) const
65 {
66 forms_[infl] = db_->forms(form::lemma(infl) %= *this, false, -1).all();
67 }
68
69};
diff --git a/lib/lemma.h b/lib/lemma.h new file mode 100644 index 0000000..9a07f16 --- /dev/null +++ b/lib/lemma.h
@@ -0,0 +1,120 @@
1#ifndef LEMMA_H_0A180D30
2#define LEMMA_H_0A180D30
3
4#include <stdexcept>
5#include <vector>
6#include <list>
7#include <map>
8#include "field.h"
9#include "enums.h"
10#include "filter.h"
11
12struct sqlite3_stmt;
13
14namespace verbly {
15
16 class form;
17 class database;
18
19 class lemma {
20 public:
21
22 // Default constructor
23
24 lemma() = default;
25
26 // Construct from database
27
28 lemma(const database& db, sqlite3_stmt* row);
29
30 // Accessors
31
32 operator bool() const
33 {
34 return valid_;
35 }
36
37 int getId() const
38 {
39 if (!valid_)
40 {
41 throw std::domain_error("Bad access to uninitialized lemma");
42 }
43
44 return id_;
45 }
46
47 const form& getBaseForm() const;
48
49 bool hasInflection(inflection category) const;
50
51 const std::vector<form>& getInflections(inflection category) const;
52
53 // Type info
54
55 static const object objectType;
56
57 static const std::list<std::string> select;
58
59 // Query fields
60
61 static const field id;
62
63 operator filter() const
64 {
65 if (!valid_)
66 {
67 throw std::domain_error("Bad access to uninitialized lemma");
68 }
69
70 return (id == id_);
71 }
72
73 // Relationships to other objects
74
75 static const field word;
76
77 class inflection_field {
78 public:
79
80 inflection_field(inflection category) : category_(category)
81 {
82 }
83
84 const inflection getCategory() const
85 {
86 return category_;
87 }
88
89 private:
90
91 const inflection category_;
92 };
93
94 static const inflection_field form(inflection category)
95 {
96 return inflection_field(category);
97 }
98
99 friend filter operator%=(lemma::inflection_field check, filter joinCondition);
100
101 private:
102
103 void initializeForm(inflection category) const;
104
105 bool valid_ = false;
106
107 int id_;
108
109 mutable std::map<inflection, std::vector<class form>> forms_;
110
111 const database* db_;
112
113 static const field formJoin;
114 static const field inflectionCategory;
115
116 };
117
118};
119
120#endif /* end of include guard: LEMMA_H_0A180D30 */
diff --git a/lib/notion.cpp b/lib/notion.cpp new file mode 100644 index 0000000..16794d3 --- /dev/null +++ b/lib/notion.cpp
@@ -0,0 +1,94 @@
1#include "notion.h"
2#include <sqlite3.h>
3#include <sstream>
4
5namespace verbly {
6
7 const object notion::objectType = object::notion;
8
9 const std::list<std::string> notion::select = {"notion_id", "part_of_speech", "wnid", "images"};
10
11 const field notion::id = field::integerField(object::notion, "notion_id");
12 const field notion::partOfSpeech = field::integerField(object::notion, "part_of_speech");
13 const field notion::wnid = field::integerField(object::notion, "wnid", true);
14 const field notion::numOfImages = field::integerField(object::notion, "images", true);
15
16 const field notion::word = field::joinField(object::notion, "word_id", object::word);
17
18 const field notion::hypernyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id");
19 const field notion::hyponyms = field::selfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id");
20
21 const field notion::fullHypernyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hyponym_id", "hypernym_id");
22 const field notion::fullHyponyms = field::hierarchalSelfJoin(object::notion, "notion_id", "hypernymy", "hypernym_id", "hyponym_id");
23
24 const field notion::instances = field::selfJoin(object::notion, "notion_id", "instantiation", "class_id", "instance_id");
25 const field notion::classes = field::selfJoin(object::notion, "notion_id", "instantiation", "instance_id", "class_id");
26
27 const field notion::memberMeronyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id");
28 const field notion::memberHolonyms = field::selfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id");
29
30 const field notion::fullMemberMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "holonym_id", "meronym_id");
31 const field notion::fullMemberHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "member_meronymy", "meronym_id", "holonym_id");
32
33 const field notion::partMeronyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id");
34 const field notion::partHolonyms = field::selfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id");
35
36 const field notion::fullPartMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "holonym_id", "meronym_id");
37 const field notion::fullPartHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "part_meronymy", "meronym_id", "holonym_id");
38
39 const field notion::substanceMeronyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id");
40 const field notion::substanceHolonyms = field::selfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id");
41
42 const field notion::fullSubstanceMeronyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "holonym_id", "meronym_id");
43 const field notion::fullSubstanceHolonyms = field::hierarchalSelfJoin(object::notion, "notion_id", "substance_meronymy", "meronym_id", "holonym_id");
44
45 const field notion::variants = field::selfJoin(object::notion, "notion_id", "variation", "noun_id", "adjective_id");
46 const field notion::attributes = field::selfJoin(object::notion, "notion_id", "variation", "adjective_id", "noun_id");
47
48 const field notion::similarAdjectives = field::selfJoin(object::notion, "notion_id", "similarity", "adjective_2_id", "adjective_1_id");
49
50 const field notion::entails = field::selfJoin(object::notion, "notion_id", "entailment", "given_id", "entailment_id");
51 const field notion::entailedBy = field::selfJoin(object::notion, "notion_id", "entailment", "entailment_id", "given_id");
52
53 const field notion::causes = field::selfJoin(object::notion, "notion_id", "causality", "effect_id", "cause_id");
54 const field notion::effects = field::selfJoin(object::notion, "notion_id", "causality", "cause_id", "effect_id");
55
56 const notion::preposition_group_field prepositionGroup = {};
57
58 const field notion::preposition_group_field::isA = field::joinField(object::notion, "notion_id", "is_a");
59 const field notion::preposition_group_field::groupNameField = field::stringField("is_a", "groupname");
60
61 notion::notion(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
62 {
63 id_ = sqlite3_column_int(row, 0);
64 partOfSpeech_ = static_cast<part_of_speech>(sqlite3_column_int(row, 1));
65
66 if (sqlite3_column_type(row, 2) != SQLITE_NULL)
67 {
68 hasWnid_ = true;
69 wnid_ = sqlite3_column_int(row, 2);
70 }
71
72 if (sqlite3_column_type(row, 3) != SQLITE_NULL)
73 {
74 hasNumOfImages_ = true;
75 numOfImages_ = sqlite3_column_int(row, 3);
76 }
77 }
78
79 std::string notion::getImageNetUrl() const
80 {
81 std::stringstream url;
82 url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n";
83 url.width(8);
84 url.fill('0');
85 url << (getWnid() % 100000000);
86 return url.str();
87 }
88
89 filter notion::preposition_group_field::operator==(std::string groupName) const
90 {
91 return (isA %= (groupNameField == groupName));
92 }
93
94};
diff --git a/lib/notion.h b/lib/notion.h new file mode 100644 index 0000000..a180d73 --- /dev/null +++ b/lib/notion.h
@@ -0,0 +1,200 @@
1#ifndef NOTION_H_FD1C7646
2#define NOTION_H_FD1C7646
3
4#include <stdexcept>
5#include <string>
6#include "field.h"
7#include "filter.h"
8
9struct sqlite3_stmt;
10
11namespace verbly {
12
13 class database;
14
15 class notion {
16 public:
17
18 // Default constructor
19
20 notion() = default;
21
22 // Construct from database
23
24 notion(const database& db, sqlite3_stmt* row);
25
26 // Accessors
27
28 operator bool() const
29 {
30 return valid_;
31 }
32
33 int getId() const
34 {
35 if (!valid_)
36 {
37 throw std::domain_error("Bad access to uninitialized notion");
38 }
39
40 return id_;
41 }
42
43 part_of_speech getPartOfSpeech() const
44 {
45 if (!valid_)
46 {
47 throw std::domain_error("Bad access to uninitialized notion");
48 }
49
50 return partOfSpeech_;
51 }
52
53 bool hasWnid() const
54 {
55 if (!valid_)
56 {
57 throw std::domain_error("Bad access to uninitialized notion");
58 }
59
60 return hasWnid_;
61 }
62
63 int getWnid() const
64 {
65 if (!valid_)
66 {
67 throw std::domain_error("Bad access to uninitialized notion");
68 }
69
70 if (!hasWnid_)
71 {
72 throw std::domain_error("Notion has no wnid");
73 }
74
75 return wnid_;
76 }
77
78 bool hasNumOfImages() const
79 {
80 if (!valid_)
81 {
82 throw std::domain_error("Bad access to uninitialized notion");
83 }
84
85 return hasNumOfImages_;
86 }
87
88 int getNumOfImages() const
89 {
90 if (!valid_)
91 {
92 throw std::domain_error("Bad access to uninitialized notion");
93 }
94
95 if (!hasNumOfImages_)
96 {
97 throw std::domain_error("Notion does not have a number of images");
98 }
99
100 return numOfImages_;
101 }
102
103 // Convenience
104
105 std::string getImageNetUrl() const;
106
107 // Type info
108
109 static const object objectType;
110
111 static const std::list<std::string> select;
112
113 // Query fields
114
115 static const field id;
116 static const field partOfSpeech;
117 static const field wnid;
118 static const field numOfImages;
119
120 operator filter() const
121 {
122 return (id == id_);
123 }
124
125 // Relationships with other objects
126
127 static const field word;
128
129 // Relationships with self
130
131 static const field hypernyms;
132 static const field hyponyms;
133
134 static const field fullHypernyms;
135 static const field fullHyponyms;
136
137 static const field instances;
138 static const field classes;
139
140 static const field memberMeronyms;
141 static const field memberHolonyms;
142
143 static const field fullMemberMeronyms;
144 static const field fullMemberHolonyms;
145
146 static const field partMeronyms;
147 static const field partHolonyms;
148
149 static const field fullPartMeronyms;
150 static const field fullPartHolonyms;
151
152 static const field substanceMeronyms;
153 static const field substanceHolonyms;
154
155 static const field fullSubstanceMeronyms;
156 static const field fullSubstanceHolonyms;
157
158 static const field variants;
159 static const field attributes;
160
161 static const field similarAdjectives;
162
163 static const field entails;
164 static const field entailedBy;
165
166 static const field causes;
167 static const field effects;
168
169 // Preposition group relationship
170
171 class preposition_group_field {
172 public:
173
174 filter operator==(std::string groupName) const;
175
176 private:
177
178 static const field isA;
179 static const field groupNameField;
180 };
181
182 static const preposition_group_field prepositionGroup;
183
184 private:
185 bool valid_ = false;
186
187 int id_;
188 part_of_speech partOfSpeech_;
189 bool hasWnid_ = false;
190 int wnid_;
191 bool hasNumOfImages_ = false;
192 int numOfImages_;
193
194 const database* db_;
195
196 };
197
198};
199
200#endif /* end of include guard: NOTION_H_FD1C7646 */
diff --git a/lib/noun.cpp b/lib/noun.cpp deleted file mode 100644 index d8b34c9..0000000 --- a/lib/noun.cpp +++ /dev/null
@@ -1,221 +0,0 @@
1#include "verbly.h"
2#include <set>
3#include <iostream>
4
5namespace verbly {
6
7 noun::noun()
8 {
9
10 }
11
12 noun::noun(const data& _data, int _id) : word(_data, _id)
13 {
14
15 }
16
17 std::string noun::base_form() const
18 {
19 assert(_valid == true);
20
21 return _singular;
22 }
23
24 std::string noun::singular_form() const
25 {
26 assert(_valid == true);
27
28 return _singular;
29 }
30
31 std::string noun::plural_form() const
32 {
33 assert(_valid == true);
34
35 return _plural;
36 }
37
38 int noun::wnid() const
39 {
40 assert(_valid == true);
41
42 return _wnid;
43 }
44
45 bool noun::has_plural_form() const
46 {
47 assert(_valid == true);
48
49 return !_plural.empty();
50 }
51
52 noun_query noun::hypernyms() const
53 {
54 assert(_valid == true);
55
56 return _data->nouns().hypernym_of(*this);
57 }
58
59 noun_query noun::full_hypernyms() const
60 {
61 assert(_valid == true);
62
63 return _data->nouns().full_hypernym_of(*this);
64 }
65
66 noun_query noun::hyponyms() const
67 {
68 assert(_valid == true);
69
70 return _data->nouns().hyponym_of(*this);
71 }
72
73 noun_query noun::full_hyponyms() const
74 {
75 assert(_valid == true);
76
77 return _data->nouns().full_hyponym_of(*this);
78 }
79
80 noun_query noun::part_meronyms() const
81 {
82 assert(_valid == true);
83
84 return _data->nouns().part_meronym_of(*this);
85 }
86
87 noun_query noun::full_part_meronyms() const
88 {
89 assert(_valid == true);
90
91 return _data->nouns().full_part_meronym_of(*this);
92 }
93
94 noun_query noun::part_holonyms() const
95 {
96 assert(_valid == true);
97
98 return _data->nouns().part_holonym_of(*this);
99 }
100
101 noun_query noun::full_part_holonyms() const
102 {
103 assert(_valid == true);
104
105 return _data->nouns().full_part_holonym_of(*this);
106 }
107
108 noun_query noun::substance_meronyms() const
109 {
110 assert(_valid == true);
111
112 return _data->nouns().substance_meronym_of(*this);
113 }
114
115 noun_query noun::full_substance_meronyms() const
116 {
117 assert(_valid == true);
118
119 return _data->nouns().full_substance_meronym_of(*this);
120 }
121
122 noun_query noun::substance_holonyms() const
123 {
124 assert(_valid == true);
125
126 return _data->nouns().substance_holonym_of(*this);
127 }
128
129 noun_query noun::full_substance_holonyms() const
130 {
131 assert(_valid == true);
132
133 return _data->nouns().full_substance_holonym_of(*this);
134 }
135
136 noun_query noun::member_meronyms() const
137 {
138 assert(_valid == true);
139
140 return _data->nouns().member_meronym_of(*this);
141 }
142
143 noun_query noun::full_member_meronyms() const
144 {
145 assert(_valid == true);
146
147 return _data->nouns().full_member_meronym_of(*this);
148 }
149
150 noun_query noun::member_holonyms() const
151 {
152 assert(_valid == true);
153
154 return _data->nouns().member_holonym_of(*this);
155 }
156
157 noun_query noun::full_member_holonyms() const
158 {
159 assert(_valid == true);
160
161 return _data->nouns().full_member_holonym_of(*this);
162 }
163
164 noun_query noun::classes() const
165 {
166 assert(_valid == true);
167
168 return _data->nouns().class_of(*this);
169 }
170
171 noun_query noun::instances() const
172 {
173 assert(_valid == true);
174
175 return _data->nouns().instance_of(*this);
176 }
177
178 noun_query noun::synonyms() const
179 {
180 assert(_valid == true);
181
182 return _data->nouns().synonym_of(*this);
183 }
184
185 noun_query noun::antonyms() const
186 {
187 assert(_valid == true);
188
189 return _data->nouns().antonym_of(*this);
190 }
191
192 adjective_query noun::pertainyms() const
193 {
194 assert(_valid == true);
195
196 return _data->adjectives().pertainym_of(*this);
197 }
198
199 adjective_query noun::variations() const
200 {
201 assert(_valid == true);
202
203 return _data->adjectives().variant_of(*this);
204 }
205
206 std::string noun::imagenet_url() const
207 {
208 std::stringstream url;
209 url << "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n";
210 url.width(8);
211 url.fill('0');
212 url << (_wnid % 100000000);
213 return url.str();
214 }
215
216 bool noun::operator<(const noun& other) const
217 {
218 return _id < other._id;
219 }
220
221};
diff --git a/lib/noun.h b/lib/noun.h deleted file mode 100644 index bd71e57..0000000 --- a/lib/noun.h +++ /dev/null
@@ -1,55 +0,0 @@
1#ifndef NOUN_H_24A03C83
2#define NOUN_H_24A03C83
3
4namespace verbly {
5
6 class noun : public word {
7 private:
8 std::string _singular;
9 std::string _plural;
10 int _wnid;
11
12 friend class noun_query;
13
14 public:
15 noun();
16 noun(const data& _data, int _id);
17
18 std::string base_form() const;
19 std::string singular_form() const;
20 std::string plural_form() const;
21 int wnid() const;
22
23 bool has_plural_form() const;
24
25 noun_query hypernyms() const;
26 noun_query full_hypernyms() const;
27 noun_query hyponyms() const;
28 noun_query full_hyponyms() const;
29 noun_query part_meronyms() const;
30 noun_query full_part_meronyms() const;
31 noun_query part_holonyms() const;
32 noun_query full_part_holonyms() const;
33 noun_query substance_meronyms() const;
34 noun_query full_substance_meronyms() const;
35 noun_query substance_holonyms() const;
36 noun_query full_substance_holonyms() const;
37 noun_query member_meronyms() const;
38 noun_query full_member_meronyms() const;
39 noun_query member_holonyms() const;
40 noun_query full_member_holonyms() const;
41 noun_query classes() const;
42 noun_query instances() const;
43 noun_query synonyms() const;
44 noun_query antonyms() const;
45 adjective_query pertainyms() const;
46 adjective_query variations() const;
47
48 std::string imagenet_url() const;
49
50 bool operator<(const noun& other) const;
51 };
52
53};
54
55#endif /* end of include guard: NOUN_H_24A03C83 */
diff --git a/lib/noun_query.cpp b/lib/noun_query.cpp deleted file mode 100644 index 8648227..0000000 --- a/lib/noun_query.cpp +++ /dev/null
@@ -1,2013 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 noun_query::noun_query(const data& _data) : _data(_data)
6 {
7
8 }
9
10 noun_query& noun_query::limit(int _limit)
11 {
12 if ((_limit > 0) || (_limit == unlimited))
13 {
14 this->_limit = _limit;
15 }
16
17 return *this;
18 }
19
20 noun_query& noun_query::random()
21 {
22 this->_random = true;
23
24 return *this;
25 }
26
27 noun_query& noun_query::except(const noun& _word)
28 {
29 _except.push_back(_word);
30
31 return *this;
32 }
33
34 noun_query& noun_query::rhymes_with(const word& _word)
35 {
36 for (auto rhyme : _word.get_rhymes())
37 {
38 _rhymes.push_back(rhyme);
39 }
40
41 if (dynamic_cast<const noun*>(&_word) != nullptr)
42 {
43 _except.push_back(dynamic_cast<const noun&>(_word));
44 }
45
46 return *this;
47 }
48
49 noun_query& noun_query::rhymes_with(rhyme _r)
50 {
51 _rhymes.push_back(_r);
52
53 return *this;
54 }
55
56 noun_query& noun_query::has_pronunciation()
57 {
58 this->_has_prn = true;
59
60 return *this;
61 }
62
63 noun_query& noun_query::has_rhyming_noun()
64 {
65 _has_rhyming_noun = true;
66
67 return *this;
68 }
69
70 noun_query& noun_query::has_rhyming_adjective()
71 {
72 _has_rhyming_adjective = true;
73
74 return *this;
75 }
76
77 noun_query& noun_query::has_rhyming_adverb()
78 {
79 _has_rhyming_adverb = true;
80
81 return *this;
82 }
83
84 noun_query& noun_query::has_rhyming_verb()
85 {
86 _has_rhyming_verb = true;
87
88 return *this;
89 }
90
91 noun_query& noun_query::with_stress(filter<std::vector<bool>> _arg)
92 {
93 _stress = _arg;
94
95 return *this;
96 }
97
98 noun_query& noun_query::with_singular_form(std::string _arg)
99 {
100 _with_singular_form.push_back(_arg);
101
102 return *this;
103 }
104
105 noun_query& noun_query::with_prefix(filter<std::string> _f)
106 {
107 _f.clean();
108 _with_prefix = _f;
109
110 return *this;
111 }
112
113 noun_query& noun_query::with_suffix(filter<std::string> _f)
114 {
115 _f.clean();
116 _with_suffix = _f;
117
118 return *this;
119 }
120
121 noun_query& noun_query::requires_plural_form()
122 {
123 _requires_plural_form = true;
124
125 return *this;
126 }
127
128 noun_query& noun_query::with_complexity(int _arg)
129 {
130 _with_complexity = _arg;
131
132 return *this;
133 }
134
135 noun_query& noun_query::is_hypernym()
136 {
137 _is_hypernym = true;
138
139 return *this;
140 }
141
142 noun_query& noun_query::hypernym_of(filter<noun> _f)
143 {
144 _f.clean();
145 _hypernym_of = _f;
146
147 return *this;
148 }
149
150 noun_query& noun_query::full_hypernym_of(filter<noun> _f)
151 {
152 _f.clean();
153 _full_hypernym_of = _f;
154
155 return *this;
156 }
157
158 noun_query& noun_query::is_hyponym()
159 {
160 _is_hyponym = true;
161
162 return *this;
163 }
164
165 noun_query& noun_query::hyponym_of(filter<noun> _f)
166 {
167 _f.clean();
168 _hyponym_of = _f;
169
170 return *this;
171 }
172
173 noun_query& noun_query::full_hyponym_of(filter<noun> _f)
174 {
175 _f.clean();
176 _full_hyponym_of = _f;
177
178 return *this;
179 }
180
181 noun_query& noun_query::is_part_meronym()
182 {
183 _is_part_meronym = true;
184
185 return *this;
186 }
187
188 noun_query& noun_query::part_meronym_of(filter<noun> _f)
189 {
190 _f.clean();
191 _part_meronym_of = _f;
192
193 return *this;
194 }
195
196 noun_query& noun_query::full_part_meronym_of(filter<noun> _f)
197 {
198 _f.clean();
199 _full_part_meronym_of = _f;
200
201 return *this;
202 }
203
204 noun_query& noun_query::is_part_holonym()
205 {
206 _is_part_holonym = true;
207
208 return *this;
209 }
210
211 noun_query& noun_query::part_holonym_of(filter<noun> _f)
212 {
213 _f.clean();
214 _part_holonym_of = _f;
215
216 return *this;
217 }
218
219 noun_query& noun_query::full_part_holonym_of(filter<noun> _f)
220 {
221 _f.clean();
222 _full_part_holonym_of = _f;
223
224 return *this;
225 }
226
227 noun_query& noun_query::is_substance_meronym()
228 {
229 _is_substance_meronym = true;
230
231 return *this;
232 }
233
234 noun_query& noun_query::substance_meronym_of(filter<noun> _f)
235 {
236 _f.clean();
237 _substance_meronym_of = _f;
238
239 return *this;
240 }
241
242 noun_query& noun_query::full_substance_meronym_of(filter<noun> _f)
243 {
244 _f.clean();
245 _full_substance_meronym_of = _f;
246
247 return *this;
248 }
249
250 noun_query& noun_query::is_substance_holonym()
251 {
252 _is_substance_holonym = true;
253
254 return *this;
255 }
256
257 noun_query& noun_query::substance_holonym_of(filter<noun> _f)
258 {
259 _f.clean();
260 _substance_holonym_of = _f;
261
262 return *this;
263 }
264
265 noun_query& noun_query::full_substance_holonym_of(filter<noun> _f)
266 {
267 _f.clean();
268 _full_substance_holonym_of = _f;
269
270 return *this;
271 }
272
273 noun_query& noun_query::is_member_meronym()
274 {
275 _is_member_meronym = true;
276
277 return *this;
278 }
279
280 noun_query& noun_query::member_meronym_of(filter<noun> _f)
281 {
282 _f.clean();
283 _member_meronym_of = _f;
284
285 return *this;
286 }
287
288 noun_query& noun_query::full_member_meronym_of(filter<noun> _f)
289 {
290 _f.clean();
291 _full_member_meronym_of = _f;
292
293 return *this;
294 }
295
296 noun_query& noun_query::is_member_holonym()
297 {
298 _is_member_holonym = true;
299
300 return *this;
301 }
302
303 noun_query& noun_query::member_holonym_of(filter<noun> _f)
304 {
305 _f.clean();
306 _member_holonym_of = _f;
307
308 return *this;
309 }
310
311 noun_query& noun_query::full_member_holonym_of(filter<noun> _f)
312 {
313 _f.clean();
314 _full_member_holonym_of = _f;
315
316 return *this;
317 }
318
319 noun_query& noun_query::is_proper()
320 {
321 _is_proper = true;
322
323 return *this;
324 }
325
326 noun_query& noun_query::is_not_proper()
327 {
328 _is_not_proper = true;
329
330 return *this;
331 }
332
333 noun_query& noun_query::is_instance()
334 {
335 _is_instance = true;
336
337 return *this;
338 }
339
340 noun_query& noun_query::instance_of(filter<noun> _f)
341 {
342 _f.clean();
343 _instance_of = _f;
344
345 return *this;
346 }
347
348 noun_query& noun_query::is_class()
349 {
350 _is_class = true;
351
352 return *this;
353 }
354
355 noun_query& noun_query::class_of(filter<noun> _f)
356 {
357 _f.clean();
358 _class_of = _f;
359
360 return *this;
361 }
362
363 noun_query& noun_query::has_synonyms()
364 {
365 _has_synonyms = true;
366
367 return *this;
368 }
369
370 noun_query& noun_query::synonym_of(filter<noun> _f)
371 {
372 _f.clean();
373 _synonym_of = _f;
374
375 return *this;
376 }
377
378 noun_query& noun_query::has_antonyms()
379 {
380 _has_antonyms = true;
381
382 return *this;
383 }
384
385 noun_query& noun_query::antonym_of(filter<noun> _f)
386 {
387 _f.clean();
388 _antonym_of = _f;
389
390 return *this;
391 }
392
393 noun_query& noun_query::has_pertainym()
394 {
395 _has_pertainym = true;
396
397 return *this;
398 }
399
400 noun_query& noun_query::anti_pertainym_of(filter<adjective> _f)
401 {
402 _f.clean();
403 _anti_pertainym_of = _f;
404
405 return *this;
406 }
407
408 noun_query& noun_query::is_attribute()
409 {
410 _is_attribute = true;
411
412 return *this;
413 }
414
415 noun_query& noun_query::attribute_of(filter<adjective> _f)
416 {
417 _f.clean();
418 _attribute_of = _f;
419
420 return *this;
421 }
422
423 noun_query& noun_query::at_least_n_images(int _arg)
424 {
425 _at_least_n_images = _arg;
426
427 return *this;
428 }
429
430 noun_query& noun_query::with_wnid(int _arg)
431 {
432 _with_wnid.insert(_arg);
433
434 return *this;
435 }
436
437 /*
438 noun_query& noun_query::derived_from(const word& _w)
439 {
440 if (dynamic_cast<const adjective*>(&_w) != nullptr)
441 {
442 _derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
443 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
444 {
445 _derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
446 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
447 {
448 _derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
449 }
450
451 return *this;
452 }
453
454 noun_query& noun_query::not_derived_from(const word& _w)
455 {
456 if (dynamic_cast<const adjective*>(&_w) != nullptr)
457 {
458 _not_derived_from_adjective.push_back(dynamic_cast<const adjective&>(_w));
459 } else if (dynamic_cast<const adverb*>(&_w) != nullptr)
460 {
461 _not_derived_from_adverb.push_back(dynamic_cast<const adverb&>(_w));
462 } else if (dynamic_cast<const noun*>(&_w) != nullptr)
463 {
464 _not_derived_from_noun.push_back(dynamic_cast<const noun&>(_w));
465 }
466
467 return *this;
468 }*/
469
470 std::list<noun> noun_query::run() const
471 {
472 std::stringstream construct;
473
474 if (!_full_hypernym_of.empty() || !_full_hyponym_of.empty() || !_full_part_meronym_of.empty() || !_full_part_holonym_of.empty() || !_full_substance_meronym_of.empty() || !_full_substance_holonym_of.empty() || !_full_member_meronym_of.empty() || !_full_member_holonym_of.empty())
475 {
476 construct << "WITH RECURSIVE ";
477
478 std::list<std::string> ctes;
479
480 for (auto hyponym : _full_hypernym_of.uniq_flatten())
481 {
482 ctes.push_back("hypernym_tree_" + std::to_string(hyponym._id) + " AS (SELECT hypernym_id FROM hypernymy WHERE hyponym_id = " + std::to_string(hyponym._id) + " UNION SELECT h.hypernym_id FROM hypernym_tree_" + std::to_string(hyponym._id) + " AS t INNER JOIN hypernymy AS h ON t.hypernym_id = h.hyponym_id)");
483 }
484
485 for (auto hypernym : _full_hyponym_of.uniq_flatten())
486 {
487 ctes.push_back("hyponym_tree_" + std::to_string(hypernym._id) + " AS (SELECT hyponym_id FROM hypernymy WHERE hypernym_id = " + std::to_string(hypernym._id) + " UNION SELECT h.hyponym_id FROM hyponym_tree_" + std::to_string(hypernym._id) + " AS t INNER JOIN hypernymy AS h ON t.hyponym_id = h.hypernym_id)");
488 }
489
490 for (auto holonym : _full_part_meronym_of.uniq_flatten())
491 {
492 ctes.push_back("part_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM part_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM part_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN part_meronymy AS h ON t.meronym_id = h.holonym_id)");
493 }
494
495 for (auto meronym : _full_part_holonym_of.uniq_flatten())
496 {
497 ctes.push_back("part_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM part_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM part_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN part_meronymy AS h ON t.holonym_id = h.meronym_id)");
498 }
499
500 for (auto holonym : _full_substance_meronym_of.uniq_flatten())
501 {
502 ctes.push_back("substance_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM substance_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM substance_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.meronym_id = h.holonym_id)");
503 }
504
505 for (auto meronym : _full_substance_holonym_of.uniq_flatten())
506 {
507 ctes.push_back("substance_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM substance_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM substance_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN substance_meronymy AS h ON t.holonym_id = h.meronym_id)");
508 }
509
510 for (auto holonym : _full_member_meronym_of.uniq_flatten())
511 {
512 ctes.push_back("member_meronym_tree_" + std::to_string(holonym._id) + " AS (SELECT meronym_id FROM member_meronymy WHERE holonym_id = " + std::to_string(holonym._id) + " UNION SELECT h.meronym_id FROM member_meronym_tree_" + std::to_string(holonym._id) + " AS t INNER JOIN member_meronymy AS h ON t.meronym_id = h.holonym_id)");
513 }
514
515 for (auto meronym : _full_member_holonym_of.uniq_flatten())
516 {
517 ctes.push_back("member_holonym_tree_" + std::to_string(meronym._id) + " AS (SELECT holonym_id FROM member_meronymy WHERE meronym_id = " + std::to_string(meronym._id) + " UNION SELECT h.holonym_id FROM member_holonym_tree_" + std::to_string(meronym._id) + " AS t INNER JOIN member_meronymy AS h ON t.holonym_id = h.meronym_id)");
518 }
519
520 construct << verbly::implode(std::begin(ctes), std::end(ctes), ", ");
521 construct << " ";
522 }
523
524 construct << "SELECT noun_id, singular, plural, wnid FROM nouns";
525 std::list<std::string> conditions;
526 std::list<binding> bindings;
527
528 if (_has_prn)
529 {
530 conditions.push_back("noun_id IN (SELECT noun_id FROM noun_pronunciations)");
531 }
532
533 if (!_rhymes.empty())
534 {
535 std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)");
536 std::string cond = "noun_id IN (SELECT noun_id FROM noun_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
537 conditions.push_back(cond);
538
539 for (auto rhy : _rhymes)
540 {
541 bindings.emplace_back(rhy.get_prerhyme());
542 bindings.emplace_back(rhy.get_rhyme());
543 }
544 }
545
546 if (_has_rhyming_noun)
547 {
548 conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.noun_id != curp.noun_id)");
549 }
550
551 if (_has_rhyming_adjective)
552 {
553 conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
554 }
555
556 if (_has_rhyming_adverb)
557 {
558 conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
559 }
560
561 if (_has_rhyming_verb)
562 {
563 conditions.push_back("noun_id IN (SELECT a.noun_id FROM nouns AS a INNER JOIN noun_pronunciations AS curp ON curp.noun_id = a.noun_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
564 }
565
566 if (!_stress.empty())
567 {
568 std::stringstream cond;
569 if (_stress.get_notlogic())
570 {
571 cond << "noun_id NOT IN";
572 } else {
573 cond << "noun_id IN";
574 }
575
576 cond << "(SELECT noun_id FROM noun_pronunciations WHERE ";
577
578 std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string {
579 switch (f.get_type())
580 {
581 case filter<std::vector<bool>>::type::singleton:
582 {
583 std::ostringstream _val;
584 for (auto syl : f.get_elem())
585 {
586 if (syl)
587 {
588 _val << "1";
589 } else {
590 _val << "0";
591 }
592 }
593
594 bindings.emplace_back(_val.str());
595
596 if (notlogic == f.get_notlogic())
597 {
598 return "stress = ?";
599 } else {
600 return "stress != ?";
601 }
602 }
603
604 case filter<std::vector<bool>>::type::group:
605 {
606 bool truelogic = notlogic != f.get_notlogic();
607
608 std::list<std::string> clauses;
609 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) {
610 return recur(f2, truelogic);
611 });
612
613 if (truelogic == f.get_orlogic())
614 {
615 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
616 } else {
617 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
618 }
619 }
620 }
621 };
622
623 cond << recur(_stress, _stress.get_notlogic());
624 cond << ")";
625 conditions.push_back(cond.str());
626 }
627
628 for (auto except : _except)
629 {
630 conditions.push_back("noun_id != ?");
631 bindings.emplace_back(except._id);
632 }
633
634 if (!_with_singular_form.empty())
635 {
636 std::list<std::string> clauses(_with_singular_form.size(), "singular = ?");
637 std::string cond = "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
638 conditions.push_back(cond);
639
640 for (auto form : _with_singular_form)
641 {
642 bindings.emplace_back(form);
643 }
644 }
645
646 if (_requires_plural_form)
647 {
648 conditions.push_back("plural IS NOT NULL");
649 }
650
651 if (!_with_prefix.empty())
652 {
653 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
654 switch (f.get_type())
655 {
656 case filter<std::string>::type::singleton:
657 {
658 bindings.emplace_back(f.get_elem() + "%");
659
660 if (notlogic == f.get_notlogic())
661 {
662 return "singular LIKE ?";
663 } else {
664 return "singular NOT LIKE ?";
665 }
666 }
667
668 case filter<std::string>::type::group:
669 {
670 bool truelogic = notlogic != f.get_notlogic();
671
672 std::list<std::string> clauses;
673 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
674 return recur(f2, truelogic);
675 });
676
677 if (truelogic == f.get_orlogic())
678 {
679 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
680 } else {
681 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
682 }
683 }
684 }
685 };
686
687 conditions.push_back(recur(_with_prefix, false));
688 }
689
690 if (!_with_suffix.empty())
691 {
692 std::function<std::string (filter<std::string>, bool)> recur = [&] (filter<std::string> f, bool notlogic) -> std::string {
693 switch (f.get_type())
694 {
695 case filter<std::string>::type::singleton:
696 {
697 bindings.emplace_back("%" + f.get_elem());
698
699 if (notlogic == f.get_notlogic())
700 {
701 return "singular LIKE ?";
702 } else {
703 return "singular NOT LIKE ?";
704 }
705 }
706
707 case filter<std::string>::type::group:
708 {
709 bool truelogic = notlogic != f.get_notlogic();
710
711 std::list<std::string> clauses;
712 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::string> f2) {
713 return recur(f2, truelogic);
714 });
715
716 if (truelogic == f.get_orlogic())
717 {
718 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
719 } else {
720 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
721 }
722 }
723 }
724 };
725
726 conditions.push_back(recur(_with_suffix, false));
727 }
728
729 if (_with_complexity != unlimited)
730 {
731 conditions.push_back("complexity = ?");
732 bindings.emplace_back(_with_complexity);
733 }
734
735 if (_is_hypernym)
736 {
737 conditions.push_back("noun_id IN (SELECT hypernym_id FROM hypernymy)");
738 }
739
740 if (!_hypernym_of.empty())
741 {
742 std::stringstream cond;
743 if (_hypernym_of.get_notlogic())
744 {
745 cond << "noun_id NOT IN";
746 } else {
747 cond << "noun_id IN";
748 }
749
750 cond << "(SELECT hypernym_id FROM hypernymy WHERE ";
751
752 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
753 switch (f.get_type())
754 {
755 case filter<noun>::type::singleton:
756 {
757 bindings.emplace_back(f.get_elem()._id);
758
759 if (notlogic == f.get_notlogic())
760 {
761 return "hyponym_id = ?";
762 } else {
763 return "hyponym_id != ?";
764 }
765 }
766
767 case filter<noun>::type::group:
768 {
769 bool truelogic = notlogic != f.get_notlogic();
770
771 std::list<std::string> clauses;
772 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
773 return recur(f2, truelogic);
774 });
775
776 if (truelogic == f.get_orlogic())
777 {
778 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
779 } else {
780 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
781 }
782 }
783 }
784 };
785
786 cond << recur(_hypernym_of, _hypernym_of.get_notlogic());
787 cond << ")";
788 conditions.push_back(cond.str());
789 }
790
791 if (!_full_hypernym_of.empty())
792 {
793 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
794 switch (f.get_type())
795 {
796 case filter<noun>::type::singleton:
797 {
798 if (notlogic == f.get_notlogic())
799 {
800 return "noun_id IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")";
801 } else {
802 return "noun_id NOT IN (SELECT hypernym_id FROM hypernym_tree_" + std::to_string(f.get_elem()._id) + ")";
803 }
804 }
805
806 case filter<noun>::type::group:
807 {
808 bool truelogic = notlogic != f.get_notlogic();
809
810 std::list<std::string> clauses;
811 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
812 return recur(f2, truelogic);
813 });
814
815 if (truelogic == f.get_orlogic())
816 {
817 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
818 } else {
819 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
820 }
821 }
822 }
823 };
824
825 conditions.push_back(recur(_full_hypernym_of, false));
826 }
827
828 if (!_full_hyponym_of.empty())
829 {
830 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
831 switch (f.get_type())
832 {
833 case filter<noun>::type::singleton:
834 {
835 if (notlogic == f.get_notlogic())
836 {
837 return "noun_id IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")";
838 } else {
839 return "noun_id NOT IN (SELECT hyponym_id FROM hyponym_tree_" + std::to_string(f.get_elem()._id) + ")";
840 }
841 }
842
843 case filter<noun>::type::group:
844 {
845 bool truelogic = notlogic != f.get_notlogic();
846
847 std::list<std::string> clauses;
848 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
849 return recur(f2, truelogic);
850 });
851
852 if (truelogic == f.get_orlogic())
853 {
854 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
855 } else {
856 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
857 }
858 }
859 }
860 };
861
862 conditions.push_back(recur(_full_hyponym_of, false));
863 }
864
865 if (_is_hyponym)
866 {
867 conditions.push_back("noun_id IN (SELECT hyponym_id FROM hypernymy)");
868 }
869
870 if (!_hyponym_of.empty())
871 {
872 std::stringstream cond;
873 if (_hyponym_of.get_notlogic())
874 {
875 cond << "noun_id NOT IN";
876 } else {
877 cond << "noun_id IN";
878 }
879
880 cond << "(SELECT hyponym_id FROM hypernymy WHERE ";
881
882 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
883 switch (f.get_type())
884 {
885 case filter<noun>::type::singleton:
886 {
887 bindings.emplace_back(f.get_elem()._id);
888
889 if (notlogic == f.get_notlogic())
890 {
891 return "hypernym_id = ?";
892 } else {
893 return "hypernym_id != ?";
894 }
895 }
896
897 case filter<noun>::type::group:
898 {
899 bool truelogic = notlogic != f.get_notlogic();
900
901 std::list<std::string> clauses;
902 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
903 return recur(f2, truelogic);
904 });
905
906 if (truelogic == f.get_orlogic())
907 {
908 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
909 } else {
910 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
911 }
912 }
913 }
914 };
915
916 cond << recur(_hyponym_of, _hyponym_of.get_notlogic());
917 cond << ")";
918 conditions.push_back(cond.str());
919 }
920
921 if (_is_part_meronym)
922 {
923 conditions.push_back("noun_id IN (SELECT meronym_id FROM part_meronymy)");
924 }
925
926 if (!_part_meronym_of.empty())
927 {
928 std::stringstream cond;
929 if (_part_meronym_of.get_notlogic())
930 {
931 cond << "noun_id NOT IN";
932 } else {
933 cond << "noun_id IN";
934 }
935
936 cond << "(SELECT meronym_id FROM part_meronymy WHERE ";
937
938 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
939 switch (f.get_type())
940 {
941 case filter<noun>::type::singleton:
942 {
943 bindings.emplace_back(f.get_elem()._id);
944
945 if (notlogic == f.get_notlogic())
946 {
947 return "holonym_id = ?";
948 } else {
949 return "holonym_id != ?";
950 }
951 }
952
953 case filter<noun>::type::group:
954 {
955 bool truelogic = notlogic != f.get_notlogic();
956
957 std::list<std::string> clauses;
958 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
959 return recur(f2, truelogic);
960 });
961
962 if (truelogic == f.get_orlogic())
963 {
964 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
965 } else {
966 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
967 }
968 }
969 }
970 };
971
972 cond << recur(_part_meronym_of, _part_meronym_of.get_notlogic());
973 cond << ")";
974 conditions.push_back(cond.str());
975 }
976
977 if (!_full_part_meronym_of.empty())
978 {
979 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
980 switch (f.get_type())
981 {
982 case filter<noun>::type::singleton:
983 {
984 if (notlogic == f.get_notlogic())
985 {
986 return "noun_id IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
987 } else {
988 return "noun_id NOT IN (SELECT meronym_id FROM part_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
989 }
990 }
991
992 case filter<noun>::type::group:
993 {
994 bool truelogic = notlogic != f.get_notlogic();
995
996 std::list<std::string> clauses;
997 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
998 return recur(f2, truelogic);
999 });
1000
1001 if (truelogic == f.get_orlogic())
1002 {
1003 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1004 } else {
1005 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1006 }
1007 }
1008 }
1009 };
1010
1011 conditions.push_back(recur(_full_part_meronym_of, false));
1012 }
1013
1014 if (_is_part_holonym)
1015 {
1016 conditions.push_back("noun_id IN (SELECT holonym_id FROM part_meronymy)");
1017 }
1018
1019 if (!_part_holonym_of.empty())
1020 {
1021 std::stringstream cond;
1022 if (_part_holonym_of.get_notlogic())
1023 {
1024 cond << "noun_id NOT IN";
1025 } else {
1026 cond << "noun_id IN";
1027 }
1028
1029 cond << "(SELECT holonym_id FROM part_meronymy WHERE ";
1030
1031 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1032 switch (f.get_type())
1033 {
1034 case filter<noun>::type::singleton:
1035 {
1036 bindings.emplace_back(f.get_elem()._id);
1037
1038 if (notlogic == f.get_notlogic())
1039 {
1040 return "meronym_id = ?";
1041 } else {
1042 return "meronym_id != ?";
1043 }
1044 }
1045
1046 case filter<noun>::type::group:
1047 {
1048 bool truelogic = notlogic != f.get_notlogic();
1049
1050 std::list<std::string> clauses;
1051 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1052 return recur(f2, truelogic);
1053 });
1054
1055 if (truelogic == f.get_orlogic())
1056 {
1057 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1058 } else {
1059 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1060 }
1061 }
1062 }
1063 };
1064
1065 cond << recur(_part_holonym_of, _part_holonym_of.get_notlogic());
1066 cond << ")";
1067 conditions.push_back(cond.str());
1068 }
1069
1070 if (!_full_part_holonym_of.empty())
1071 {
1072 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1073 switch (f.get_type())
1074 {
1075 case filter<noun>::type::singleton:
1076 {
1077 if (notlogic == f.get_notlogic())
1078 {
1079 return "noun_id IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1080 } else {
1081 return "noun_id NOT IN (SELECT holonym_id FROM part_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1082 }
1083 }
1084
1085 case filter<noun>::type::group:
1086 {
1087 bool truelogic = notlogic != f.get_notlogic();
1088
1089 std::list<std::string> clauses;
1090 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1091 return recur(f2, truelogic);
1092 });
1093
1094 if (truelogic == f.get_orlogic())
1095 {
1096 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1097 } else {
1098 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1099 }
1100 }
1101 }
1102 };
1103
1104 conditions.push_back(recur(_full_part_holonym_of, false));
1105 }
1106
1107 if (_is_substance_meronym)
1108 {
1109 conditions.push_back("noun_id IN (SELECT meronym_id FROM substance_meronymy)");
1110 }
1111
1112 if (!_substance_meronym_of.empty())
1113 {
1114 std::stringstream cond;
1115 if (_substance_meronym_of.get_notlogic())
1116 {
1117 cond << "noun_id NOT IN";
1118 } else {
1119 cond << "noun_id IN";
1120 }
1121
1122 cond << "(SELECT meronym_id FROM substance_meronymy WHERE ";
1123
1124 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1125 switch (f.get_type())
1126 {
1127 case filter<noun>::type::singleton:
1128 {
1129 bindings.emplace_back(f.get_elem()._id);
1130
1131 if (notlogic == f.get_notlogic())
1132 {
1133 return "holonym_id = ?";
1134 } else {
1135 return "holonym_id != ?";
1136 }
1137 }
1138
1139 case filter<noun>::type::group:
1140 {
1141 bool truelogic = notlogic != f.get_notlogic();
1142
1143 std::list<std::string> clauses;
1144 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1145 return recur(f2, truelogic);
1146 });
1147
1148 if (truelogic == f.get_orlogic())
1149 {
1150 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1151 } else {
1152 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1153 }
1154 }
1155 }
1156 };
1157
1158 cond << recur(_substance_meronym_of, _substance_meronym_of.get_notlogic());
1159 cond << ")";
1160 conditions.push_back(cond.str());
1161 }
1162
1163 if (!_full_substance_meronym_of.empty())
1164 {
1165 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1166 switch (f.get_type())
1167 {
1168 case filter<noun>::type::singleton:
1169 {
1170 if (notlogic == f.get_notlogic())
1171 {
1172 return "noun_id IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
1173 } else {
1174 return "noun_id NOT IN (SELECT meronym_id FROM substance_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
1175 }
1176 }
1177
1178 case filter<noun>::type::group:
1179 {
1180 bool truelogic = notlogic != f.get_notlogic();
1181
1182 std::list<std::string> clauses;
1183 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1184 return recur(f2, truelogic);
1185 });
1186
1187 if (truelogic == f.get_orlogic())
1188 {
1189 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1190 } else {
1191 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1192 }
1193 }
1194 }
1195 };
1196
1197 conditions.push_back(recur(_full_substance_meronym_of, false));
1198 }
1199
1200 if (_is_substance_holonym)
1201 {
1202 conditions.push_back("noun_id IN (SELECT holonym_id FROM substance_meronymy)");
1203 }
1204
1205 if (!_substance_holonym_of.empty())
1206 {
1207 std::stringstream cond;
1208 if (_substance_holonym_of.get_notlogic())
1209 {
1210 cond << "noun_id NOT IN";
1211 } else {
1212 cond << "noun_id IN";
1213 }
1214
1215 cond << "(SELECT holonym_id FROM substance_meronymy WHERE ";
1216
1217 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1218 switch (f.get_type())
1219 {
1220 case filter<noun>::type::singleton:
1221 {
1222 bindings.emplace_back(f.get_elem()._id);
1223
1224 if (notlogic == f.get_notlogic())
1225 {
1226 return "meronym_id = ?";
1227 } else {
1228 return "meronym_id != ?";
1229 }
1230 }
1231
1232 case filter<noun>::type::group:
1233 {
1234 bool truelogic = notlogic != f.get_notlogic();
1235
1236 std::list<std::string> clauses;
1237 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1238 return recur(f2, truelogic);
1239 });
1240
1241 if (truelogic == f.get_orlogic())
1242 {
1243 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1244 } else {
1245 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1246 }
1247 }
1248 }
1249 };
1250
1251 cond << recur(_substance_holonym_of, _substance_holonym_of.get_notlogic());
1252 cond << ")";
1253 conditions.push_back(cond.str());
1254 }
1255
1256 if (!_full_substance_holonym_of.empty())
1257 {
1258 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1259 switch (f.get_type())
1260 {
1261 case filter<noun>::type::singleton:
1262 {
1263 if (notlogic == f.get_notlogic())
1264 {
1265 return "noun_id IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1266 } else {
1267 return "noun_id NOT IN (SELECT holonym_id FROM substance_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1268 }
1269 }
1270
1271 case filter<noun>::type::group:
1272 {
1273 bool truelogic = notlogic != f.get_notlogic();
1274
1275 std::list<std::string> clauses;
1276 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1277 return recur(f2, truelogic);
1278 });
1279
1280 if (truelogic == f.get_orlogic())
1281 {
1282 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1283 } else {
1284 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1285 }
1286 }
1287 }
1288 };
1289
1290 conditions.push_back(recur(_full_substance_holonym_of, false));
1291 }
1292
1293 if (_is_member_meronym)
1294 {
1295 conditions.push_back("noun_id IN (SELECT meronym_id FROM member_meronymy)");
1296 }
1297
1298 if (!_member_meronym_of.empty())
1299 {
1300 std::stringstream cond;
1301 if (_member_meronym_of.get_notlogic())
1302 {
1303 cond << "noun_id NOT IN";
1304 } else {
1305 cond << "noun_id IN";
1306 }
1307
1308 cond << "(SELECT meronym_id FROM member_meronymy WHERE ";
1309
1310 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1311 switch (f.get_type())
1312 {
1313 case filter<noun>::type::singleton:
1314 {
1315 bindings.emplace_back(f.get_elem()._id);
1316
1317 if (notlogic == f.get_notlogic())
1318 {
1319 return "holonym_id = ?";
1320 } else {
1321 return "holonym_id != ?";
1322 }
1323 }
1324
1325 case filter<noun>::type::group:
1326 {
1327 bool truelogic = notlogic != f.get_notlogic();
1328
1329 std::list<std::string> clauses;
1330 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1331 return recur(f2, truelogic);
1332 });
1333
1334 if (truelogic == f.get_orlogic())
1335 {
1336 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1337 } else {
1338 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1339 }
1340 }
1341 }
1342 };
1343
1344 cond << recur(_member_meronym_of, _member_meronym_of.get_notlogic());
1345 cond << ")";
1346 conditions.push_back(cond.str());
1347 }
1348
1349 if (!_full_member_meronym_of.empty())
1350 {
1351 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1352 switch (f.get_type())
1353 {
1354 case filter<noun>::type::singleton:
1355 {
1356 if (notlogic == f.get_notlogic())
1357 {
1358 return "noun_id IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
1359 } else {
1360 return "noun_id NOT IN (SELECT meronym_id FROM member_meronym_tree_" + std::to_string(f.get_elem()._id) + ")";
1361 }
1362 }
1363
1364 case filter<noun>::type::group:
1365 {
1366 bool truelogic = notlogic != f.get_notlogic();
1367
1368 std::list<std::string> clauses;
1369 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1370 return recur(f2, truelogic);
1371 });
1372
1373 if (truelogic == f.get_orlogic())
1374 {
1375 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1376 } else {
1377 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1378 }
1379 }
1380 }
1381 };
1382
1383 conditions.push_back(recur(_full_member_meronym_of, false));
1384 }
1385
1386 if (_is_member_holonym)
1387 {
1388 conditions.push_back("noun_id IN (SELECT holonym_id FROM member_meronym)");
1389 }
1390
1391 if (!_member_holonym_of.empty())
1392 {
1393 std::stringstream cond;
1394 if (_member_holonym_of.get_notlogic())
1395 {
1396 cond << "noun_id NOT IN";
1397 } else {
1398 cond << "noun_id IN";
1399 }
1400
1401 cond << "(SELECT holonym_id FROM member_meronymy WHERE ";
1402
1403 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1404 switch (f.get_type())
1405 {
1406 case filter<noun>::type::singleton:
1407 {
1408 bindings.emplace_back(f.get_elem()._id);
1409
1410 if (notlogic == f.get_notlogic())
1411 {
1412 return "meronym_id = ?";
1413 } else {
1414 return "meronym_id != ?";
1415 }
1416 }
1417
1418 case filter<noun>::type::group:
1419 {
1420 bool truelogic = notlogic != f.get_notlogic();
1421
1422 std::list<std::string> clauses;
1423 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1424 return recur(f2, truelogic);
1425 });
1426
1427 if (truelogic == f.get_orlogic())
1428 {
1429 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1430 } else {
1431 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1432 }
1433 }
1434 }
1435 };
1436
1437 cond << recur(_member_holonym_of, _member_holonym_of.get_notlogic());
1438 cond << ")";
1439 conditions.push_back(cond.str());
1440 }
1441
1442 if (!_full_member_holonym_of.empty())
1443 {
1444 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1445 switch (f.get_type())
1446 {
1447 case filter<noun>::type::singleton:
1448 {
1449 if (notlogic == f.get_notlogic())
1450 {
1451 return "noun_id IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1452 } else {
1453 return "noun_id NOT IN (SELECT holonym_id FROM member_holonym_tree_" + std::to_string(f.get_elem()._id) + ")";
1454 }
1455 }
1456
1457 case filter<noun>::type::group:
1458 {
1459 bool truelogic = notlogic != f.get_notlogic();
1460
1461 std::list<std::string> clauses;
1462 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1463 return recur(f2, truelogic);
1464 });
1465
1466 if (truelogic == f.get_orlogic())
1467 {
1468 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1469 } else {
1470 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1471 }
1472 }
1473 }
1474 };
1475
1476 conditions.push_back(recur(_full_member_holonym_of, false));
1477 }
1478
1479 if (_is_proper)
1480 {
1481 conditions.push_back("proper = 1");
1482 }
1483
1484 if (_is_not_proper)
1485 {
1486 conditions.push_back("proper = 0");
1487 }
1488
1489 if (_is_instance)
1490 {
1491 conditions.push_back("noun_id IN (SELECT instance_id FROM instantiation)");
1492 }
1493
1494 if (!_instance_of.empty())
1495 {
1496 std::stringstream cond;
1497 if (_instance_of.get_notlogic())
1498 {
1499 cond << "noun_id NOT IN";
1500 } else {
1501 cond << "noun_id IN";
1502 }
1503
1504 cond << "(SELECT instance_id FROM instantiation WHERE ";
1505
1506 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1507 switch (f.get_type())
1508 {
1509 case filter<noun>::type::singleton:
1510 {
1511 bindings.emplace_back(f.get_elem()._id);
1512
1513 if (notlogic == f.get_notlogic())
1514 {
1515 return "class_id = ?";
1516 } else {
1517 return "class_id != ?";
1518 }
1519 }
1520
1521 case filter<noun>::type::group:
1522 {
1523 bool truelogic = notlogic != f.get_notlogic();
1524
1525 std::list<std::string> clauses;
1526 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1527 return recur(f2, truelogic);
1528 });
1529
1530 if (truelogic == f.get_orlogic())
1531 {
1532 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1533 } else {
1534 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1535 }
1536 }
1537 }
1538 };
1539
1540 cond << recur(_instance_of, _instance_of.get_notlogic());
1541 cond << ")";
1542 conditions.push_back(cond.str());
1543 }
1544
1545 if (_is_class)
1546 {
1547 conditions.push_back("noun_id IN (SELECT class_id FROM instantiation)");
1548 }
1549
1550 if (!_class_of.empty())
1551 {
1552 std::stringstream cond;
1553 if (_class_of.get_notlogic())
1554 {
1555 cond << "noun_id NOT IN";
1556 } else {
1557 cond << "noun_id IN";
1558 }
1559
1560 cond << "(SELECT class_id FROM instantiation WHERE ";
1561
1562 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1563 switch (f.get_type())
1564 {
1565 case filter<noun>::type::singleton:
1566 {
1567 bindings.emplace_back(f.get_elem()._id);
1568
1569 if (notlogic == f.get_notlogic())
1570 {
1571 return "instance_id = ?";
1572 } else {
1573 return "instance_id != ?";
1574 }
1575 }
1576
1577 case filter<noun>::type::group:
1578 {
1579 bool truelogic = notlogic != f.get_notlogic();
1580
1581 std::list<std::string> clauses;
1582 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1583 return recur(f2, truelogic);
1584 });
1585
1586 if (truelogic == f.get_orlogic())
1587 {
1588 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1589 } else {
1590 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1591 }
1592 }
1593 }
1594 };
1595
1596 cond << recur(_class_of, _class_of.get_notlogic());
1597 cond << ")";
1598 conditions.push_back(cond.str());
1599 }
1600
1601 if (_has_synonyms)
1602 {
1603 conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_synonymy)");
1604 }
1605
1606 if (!_synonym_of.empty())
1607 {
1608 std::stringstream cond;
1609 if (_synonym_of.get_notlogic())
1610 {
1611 cond << "noun_id NOT IN";
1612 } else {
1613 cond << "noun_id IN";
1614 }
1615
1616 cond << "(SELECT noun_2_id FROM noun_synonymy WHERE ";
1617
1618 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1619 switch (f.get_type())
1620 {
1621 case filter<noun>::type::singleton:
1622 {
1623 bindings.emplace_back(f.get_elem()._id);
1624
1625 if (notlogic == f.get_notlogic())
1626 {
1627 return "noun_1_id = ?";
1628 } else {
1629 return "noun_1_id != ?";
1630 }
1631 }
1632
1633 case filter<noun>::type::group:
1634 {
1635 bool truelogic = notlogic != f.get_notlogic();
1636
1637 std::list<std::string> clauses;
1638 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1639 return recur(f2, truelogic);
1640 });
1641
1642 if (truelogic == f.get_orlogic())
1643 {
1644 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1645 } else {
1646 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1647 }
1648 }
1649 }
1650 };
1651
1652 cond << recur(_synonym_of, _synonym_of.get_notlogic());
1653 cond << ")";
1654 conditions.push_back(cond.str());
1655 }
1656
1657 if (_has_antonyms)
1658 {
1659 conditions.push_back("noun_id IN (SELECT noun_2_id FROM noun_antonymy)");
1660 }
1661
1662 if (!_antonym_of.empty())
1663 {
1664 std::stringstream cond;
1665 if (_antonym_of.get_notlogic())
1666 {
1667 cond << "noun_id NOT IN";
1668 } else {
1669 cond << "noun_id IN";
1670 }
1671
1672 cond << "(SELECT noun_2_id FROM noun_antonymy WHERE ";
1673
1674 std::function<std::string (filter<noun>, bool)> recur = [&] (filter<noun> f, bool notlogic) -> std::string {
1675 switch (f.get_type())
1676 {
1677 case filter<noun>::type::singleton:
1678 {
1679 bindings.emplace_back(f.get_elem()._id);
1680
1681 if (notlogic == f.get_notlogic())
1682 {
1683 return "noun_1_id = ?";
1684 } else {
1685 return "noun_1_id != ?";
1686 }
1687 }
1688
1689 case filter<noun>::type::group:
1690 {
1691 bool truelogic = notlogic != f.get_notlogic();
1692
1693 std::list<std::string> clauses;
1694 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<noun> f2) {
1695 return recur(f2, truelogic);
1696 });
1697
1698 if (truelogic == f.get_orlogic())
1699 {
1700 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1701 } else {
1702 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1703 }
1704 }
1705 }
1706 };
1707
1708 cond << recur(_antonym_of, _antonym_of.get_notlogic());
1709 cond << ")";
1710 conditions.push_back(cond.str());
1711 }
1712
1713 if (_has_pertainym)
1714 {
1715 conditions.push_back("noun_id IN (SELECT noun_id FROM pertainymy)");
1716 }
1717
1718 if (!_anti_pertainym_of.empty())
1719 {
1720 std::stringstream cond;
1721 if (_anti_pertainym_of.get_notlogic())
1722 {
1723 cond << "noun_id NOT IN";
1724 } else {
1725 cond << "noun_id IN";
1726 }
1727
1728 cond << "(SELECT noun_id FROM pertainymy WHERE ";
1729
1730 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
1731 switch (f.get_type())
1732 {
1733 case filter<adjective>::type::singleton:
1734 {
1735 bindings.emplace_back(f.get_elem()._id);
1736
1737 if (notlogic == f.get_notlogic())
1738 {
1739 return "pertainym_id = ?";
1740 } else {
1741 return "pertainym_id != ?";
1742 }
1743 }
1744
1745 case filter<adjective>::type::group:
1746 {
1747 bool truelogic = notlogic != f.get_notlogic();
1748
1749 std::list<std::string> clauses;
1750 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
1751 return recur(f2, truelogic);
1752 });
1753
1754 if (truelogic == f.get_orlogic())
1755 {
1756 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1757 } else {
1758 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1759 }
1760 }
1761 }
1762 };
1763
1764 cond << recur(_anti_pertainym_of, _anti_pertainym_of.get_notlogic());
1765 cond << ")";
1766 conditions.push_back(cond.str());
1767 }
1768
1769 if (_is_attribute)
1770 {
1771 conditions.push_back("noun_id IN (SELECT noun_id FROM variation)");
1772 }
1773
1774 if (!_attribute_of.empty())
1775 {
1776 std::stringstream cond;
1777 if (_attribute_of.get_notlogic())
1778 {
1779 cond << "noun_id NOT IN";
1780 } else {
1781 cond << "noun_id IN";
1782 }
1783
1784 cond << "(SELECT noun_id FROM variation WHERE ";
1785
1786 std::function<std::string (filter<adjective>, bool)> recur = [&] (filter<adjective> f, bool notlogic) -> std::string {
1787 switch (f.get_type())
1788 {
1789 case filter<adjective>::type::singleton:
1790 {
1791 bindings.emplace_back(f.get_elem()._id);
1792
1793 if (notlogic == f.get_notlogic())
1794 {
1795 return "adjective_id = ?";
1796 } else {
1797 return "adjective_id != ?";
1798 }
1799 }
1800
1801 case filter<adjective>::type::group:
1802 {
1803 bool truelogic = notlogic != f.get_notlogic();
1804
1805 std::list<std::string> clauses;
1806 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<adjective> f2) {
1807 return recur(f2, truelogic);
1808 });
1809
1810 if (truelogic == f.get_orlogic())
1811 {
1812 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
1813 } else {
1814 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1815 }
1816 }
1817 }
1818 };
1819
1820 cond << recur(_attribute_of, _attribute_of.get_notlogic());
1821 cond << ")";
1822 conditions.push_back(cond.str());
1823 }
1824
1825 if (_at_least_n_images != unlimited)
1826 {
1827 conditions.push_back("images >= ?");
1828 bindings.emplace_back(_at_least_n_images);
1829 }
1830
1831 if (!_with_wnid.empty())
1832 {
1833 std::vector<std::string> clauses(_with_wnid.size(), "wnid = ?");
1834 std::string cond = verbly::implode(std::begin(clauses), std::end(clauses), " OR ");
1835 conditions.push_back("(" + cond + ")");
1836
1837 for (auto wnid : _with_wnid)
1838 {
1839 bindings.emplace_back(wnid);
1840 }
1841 }
1842
1843 /*
1844 if (!_derived_from_adjective.empty())
1845 {
1846 std::list<std::string> clauses(_derived_from_adjective.size(), "adjective_id = @DERADJ");
1847 std::string cond = "noun_id IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1848 conditions.push_back(cond);
1849 }
1850
1851 if (!_not_derived_from_adjective.empty())
1852 {
1853 std::list<std::string> clauses(_not_derived_from_adjective.size(), "adjective_id = @NDERADJ");
1854 std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adjective_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1855 conditions.push_back(cond);
1856 }
1857
1858 if (!_derived_from_adverb.empty())
1859 {
1860 std::list<std::string> clauses(_derived_from_adverb.size(), "adverb_id = @DERADV");
1861 std::string cond = "noun_id IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1862 conditions.push_back(cond);
1863 }
1864
1865 if (!_not_derived_from_adverb.empty())
1866 {
1867 std::list<std::string> clauses(_not_derived_from_adverb.size(), "adverb_id = @NDERADV");
1868 std::string cond = "noun_id NOT IN (SELECT noun_id FROM noun_adverb_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1869 conditions.push_back(cond);
1870 }
1871
1872 if (!_derived_from_noun.empty())
1873 {
1874 std::list<std::string> clauses(_derived_from_noun.size(), "noun_2_id = @DERN");
1875 std::string cond = "noun_id IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1876 conditions.push_back(cond);
1877 }
1878
1879 if (!_not_derived_from_noun.empty())
1880 {
1881 std::list<std::string> clauses(_not_derived_from_noun.size(), "noun_2_id = @NDERN");
1882 std::string cond = "noun_id NOT IN (SELECT noun_1_id FROM noun_noun_derivation WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
1883 conditions.push_back(cond);
1884 }
1885 */
1886 if (!conditions.empty())
1887 {
1888 construct << " WHERE ";
1889 construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND ");
1890 }
1891
1892 if (_random)
1893 {
1894 construct << " ORDER BY RANDOM()";
1895 }
1896
1897 if (_limit != unlimited)
1898 {
1899 construct << " LIMIT " << _limit;
1900 }
1901
1902 sqlite3_stmt* ppstmt;
1903 std::string query = construct.str();
1904 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1905 {
1906 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
1907 }
1908
1909 int i = 1;
1910 for (auto& binding : bindings)
1911 {
1912 switch (binding.get_type())
1913 {
1914 case binding::type::integer:
1915 {
1916 sqlite3_bind_int(ppstmt, i, binding.get_integer());
1917
1918 break;
1919 }
1920
1921 case binding::type::string:
1922 {
1923 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
1924
1925 break;
1926 }
1927 }
1928
1929 i++;
1930 }
1931
1932 /*
1933 for (auto adj : _derived_from_adjective)
1934 {
1935 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADJ"), adj._id);
1936 }
1937
1938 for (auto adj : _not_derived_from_adjective)
1939 {
1940 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADJ"), adj._id);
1941 }
1942
1943 for (auto adv : _derived_from_adverb)
1944 {
1945 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERADV"), adv._id);
1946 }
1947
1948 for (auto adv : _not_derived_from_adverb)
1949 {
1950 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERADV"), adv._id);
1951 }
1952
1953 for (auto n : _derived_from_noun)
1954 {
1955 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@DERN"), n._id);
1956 }
1957
1958 for (auto n : _not_derived_from_noun)
1959 {
1960 sqlite3_bind_int(ppstmt, sqlite3_bind_parameter_index(ppstmt, "@NDERN"), n._id);
1961 }
1962*/
1963 std::list<noun> output;
1964 while (sqlite3_step(ppstmt) == SQLITE_ROW)
1965 {
1966 noun tnc {_data, sqlite3_column_int(ppstmt, 0)};
1967 tnc._singular = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
1968
1969 if (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL)
1970 {
1971 tnc._plural = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
1972 }
1973
1974 tnc._wnid = sqlite3_column_int(ppstmt, 3);
1975
1976 output.push_back(tnc);
1977 }
1978
1979 sqlite3_finalize(ppstmt);
1980
1981 for (auto& noun : output)
1982 {
1983 query = "SELECT pronunciation, prerhyme, rhyme FROM noun_pronunciations WHERE noun_id = ?";
1984 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
1985 {
1986 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
1987 }
1988
1989 sqlite3_bind_int(ppstmt, 1, noun._id);
1990
1991 while (sqlite3_step(ppstmt) == SQLITE_ROW)
1992 {
1993 std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
1994 auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " ");
1995
1996 noun.pronunciations.push_back(phonemes);
1997
1998 if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL))
1999 {
2000 std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
2001 std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
2002
2003 noun.rhymes.emplace_back(prerhyme, rhyming);
2004 }
2005 }
2006
2007 sqlite3_finalize(ppstmt);
2008 }
2009
2010 return output;
2011 }
2012
2013};
diff --git a/lib/noun_query.h b/lib/noun_query.h deleted file mode 100644 index 74df260..0000000 --- a/lib/noun_query.h +++ /dev/null
@@ -1,180 +0,0 @@
1#ifndef NOUN_QUERY_H_5DE51DD7
2#define NOUN_QUERY_H_5DE51DD7
3
4namespace verbly {
5
6 class noun_query {
7 public:
8 noun_query(const data& _data);
9
10 noun_query& limit(int _limit);
11 noun_query& random();
12 noun_query& except(const noun& _word);
13 noun_query& rhymes_with(const word& _word);
14 noun_query& rhymes_with(rhyme _r);
15 noun_query& has_pronunciation();
16 noun_query& has_rhyming_noun();
17 noun_query& has_rhyming_adjective();
18 noun_query& has_rhyming_adverb();
19 noun_query& has_rhyming_verb();
20 noun_query& with_stress(filter<std::vector<bool>> _arg);
21
22 noun_query& with_singular_form(std::string _arg);
23 noun_query& with_prefix(filter<std::string> _f);
24 noun_query& with_suffix(filter<std::string> _f);
25
26 noun_query& requires_plural_form();
27
28 noun_query& with_complexity(int _arg);
29
30 noun_query& is_hypernym();
31 noun_query& hypernym_of(filter<noun> _f);
32 noun_query& full_hypernym_of(filter<noun> _f);
33
34 noun_query& is_hyponym();
35 noun_query& hyponym_of(filter<noun> _f);
36 noun_query& full_hyponym_of(filter<noun> _f);
37
38 noun_query& is_part_meronym();
39 noun_query& part_meronym_of(filter<noun> _f);
40 noun_query& full_part_meronym_of(filter<noun> _f);
41
42 noun_query& is_part_holonym();
43 noun_query& part_holonym_of(filter<noun> _f);
44 noun_query& full_part_holonym_of(filter<noun> _f);
45
46 noun_query& is_substance_meronym();
47 noun_query& substance_meronym_of(filter<noun> _f);
48 noun_query& full_substance_meronym_of(filter<noun> _f);
49
50 noun_query& is_substance_holonym();
51 noun_query& substance_holonym_of(filter<noun> _f);
52 noun_query& full_substance_holonym_of(filter<noun> _f);
53
54 noun_query& is_member_meronym();
55 noun_query& member_meronym_of(filter<noun> _f);
56 noun_query& full_member_meronym_of(filter<noun> _f);
57
58 noun_query& is_member_holonym();
59 noun_query& member_holonym_of(filter<noun> _f);
60 noun_query& full_member_holonym_of(filter<noun> _f);
61
62 noun_query& is_proper();
63 noun_query& is_not_proper();
64
65 noun_query& is_instance();
66 noun_query& instance_of(filter<noun> _f);
67
68 noun_query& is_class();
69 noun_query& class_of(filter<noun> _f);
70
71 noun_query& has_synonyms();
72 noun_query& synonym_of(filter<noun> _f);
73
74 noun_query& has_antonyms();
75 noun_query& antonym_of(filter<noun> _f);
76
77 noun_query& has_pertainym();
78 noun_query& anti_pertainym_of(filter<adjective> _f);
79
80 noun_query& is_attribute();
81 noun_query& attribute_of(filter<adjective> _f);
82
83 noun_query& at_least_n_images(int _arg);
84 noun_query& with_wnid(int _arg);
85
86/* noun_query& derived_from(const word& _w);
87 noun_query& not_derived_from(const word& _w);*/
88
89 std::list<noun> run() const;
90
91 const static int unlimited = -1;
92
93 private:
94 const data& _data;
95 int _limit = unlimited;
96 bool _random = false;
97 std::list<rhyme> _rhymes;
98 std::list<noun> _except;
99 bool _has_prn = false;
100 bool _has_rhyming_noun = false;
101 bool _has_rhyming_adjective = false;
102 bool _has_rhyming_adverb = false;
103 bool _has_rhyming_verb = false;
104 filter<std::vector<bool>> _stress;
105
106 std::list<std::string> _with_singular_form;
107 filter<std::string> _with_prefix;
108 filter<std::string> _with_suffix;
109
110 int _with_complexity = unlimited;
111
112 bool _requires_plural_form = false;
113
114 bool _is_hypernym = false;
115 filter<noun> _hypernym_of;
116 filter<noun> _full_hypernym_of;
117
118 bool _is_hyponym = false;
119 filter<noun> _hyponym_of;
120 filter<noun> _full_hyponym_of;
121
122 bool _is_part_meronym = false;
123 filter<noun> _part_meronym_of;
124 filter<noun> _full_part_meronym_of;
125
126 bool _is_substance_meronym = false;
127 filter<noun> _substance_meronym_of;
128 filter<noun> _full_substance_meronym_of;
129
130 bool _is_member_meronym = false;
131 filter<noun> _member_meronym_of;
132 filter<noun> _full_member_meronym_of;
133
134 bool _is_part_holonym = false;
135 filter<noun> _part_holonym_of;
136 filter<noun> _full_part_holonym_of;
137
138 bool _is_substance_holonym = false;
139 filter<noun> _substance_holonym_of;
140 filter<noun> _full_substance_holonym_of;
141
142 bool _is_member_holonym = false;
143 filter<noun> _member_holonym_of;
144 filter<noun> _full_member_holonym_of;
145
146 bool _is_proper = false;
147 bool _is_not_proper = false;
148
149 bool _is_instance = false;
150 filter<noun> _instance_of;
151
152 bool _is_class = false;
153 filter<noun> _class_of;
154
155 bool _has_synonyms = false;
156 filter<noun> _synonym_of;
157
158 bool _has_antonyms = false;
159 filter<noun> _antonym_of;
160
161 bool _has_pertainym = false;
162 filter<adjective> _anti_pertainym_of;
163
164 bool _is_attribute = false;
165 filter<adjective> _attribute_of;
166
167 int _at_least_n_images = unlimited;
168 std::set<int> _with_wnid;
169
170/* std::list<adjective> _derived_from_adjective;
171 std::list<adjective> _not_derived_from_adjective;
172 std::list<adverb> _derived_from_adverb;
173 std::list<adverb> _not_derived_from_adverb;
174 std::list<noun> _derived_from_noun;
175 std::list<noun> _not_derived_from_noun;*/
176 };
177
178};
179
180#endif /* end of include guard: NOUN_QUERY_H_5DE51DD7 */
diff --git a/lib/preposition.cpp b/lib/preposition.cpp deleted file mode 100644 index cea9165..0000000 --- a/lib/preposition.cpp +++ /dev/null
@@ -1,107 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 std::string preposition::get_form() const
6 {
7 return form;
8 }
9
10 preposition_query::preposition_query(const data& _data) : _data(_data)
11 {
12
13 }
14
15 preposition_query& preposition_query::limit(int _limit)
16 {
17 this->_limit = _limit;
18
19 return *this;
20 }
21
22 preposition_query& preposition_query::random()
23 {
24 _random = true;
25
26 return *this;
27 }
28
29 preposition_query& preposition_query::in_group(std::string _arg)
30 {
31 _in_group.push_back(_arg);
32
33 return *this;
34 }
35
36 std::list<preposition> preposition_query::run() const
37 {
38 std::stringstream construct;
39 construct << "SELECT form FROM prepositions";
40 std::list<binding> bindings;
41
42 if (!_in_group.empty())
43 {
44 std::list<std::string> clauses(_in_group.size(), "groupname = ?");
45 construct << " WHERE preposition_id IN (SELECT preposition_id FROM preposition_groups WHERE ";
46 construct << verbly::implode(std::begin(clauses), std::end(clauses), " OR ");
47 construct << ")";
48
49 for (auto g : _in_group)
50 {
51 bindings.emplace_back(g);
52 }
53 }
54
55 if (_random)
56 {
57 construct << " ORDER BY RANDOM()";
58 }
59
60 if (_limit != unlimited)
61 {
62 construct << " LIMIT " << _limit;
63 }
64
65 sqlite3_stmt* ppstmt;
66 std::string query = construct.str();
67 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
68 {
69 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
70 }
71
72 int i = 1;
73 for (auto& binding : bindings)
74 {
75 switch (binding.get_type())
76 {
77 case binding::type::integer:
78 {
79 sqlite3_bind_int(ppstmt, i, binding.get_integer());
80
81 break;
82 }
83
84 case binding::type::string:
85 {
86 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
87
88 break;
89 }
90 }
91
92 i++;
93 }
94
95 std::list<preposition> output;
96 while (sqlite3_step(ppstmt) == SQLITE_ROW)
97 {
98 preposition pp;
99 pp.form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
100
101 output.push_back(pp);
102 }
103
104 return output;
105 }
106
107};
diff --git a/lib/preposition.h b/lib/preposition.h deleted file mode 100644 index 89f24fa..0000000 --- a/lib/preposition.h +++ /dev/null
@@ -1,38 +0,0 @@
1#ifndef PREPOSITION_H_FF908021
2#define PREPOSITION_H_FF908021
3
4namespace verbly {
5
6 class preposition_query;
7
8 class preposition {
9 public:
10 std::string get_form() const;
11
12 private:
13 friend class preposition_query;
14
15 std::string form;
16 };
17
18 class preposition_query {
19 public:
20 preposition_query(const data& _data);
21
22 preposition_query& limit(int _limit);
23 preposition_query& random();
24 preposition_query& in_group(std::string _arg);
25
26 std::list<preposition> run() const;
27
28 const static int unlimited = -1;
29 private:
30 const data& _data;
31 int _limit = unlimited;
32 bool _random = false;
33 std::list<std::string> _in_group;
34 };
35
36};
37
38#endif /* end of include guard: PREPOSITION_H_FF908021 */
diff --git a/lib/pronunciation.cpp b/lib/pronunciation.cpp new file mode 100644 index 0000000..f5b742f --- /dev/null +++ b/lib/pronunciation.cpp
@@ -0,0 +1,69 @@
1#include "pronunciation.h"
2#include <sqlite3.h>
3#include "form.h"
4#include "lemma.h"
5#include "word.h"
6#include "util.h"
7
8namespace verbly {
9
10 const object pronunciation::objectType = object::pronunciation;
11
12 const std::list<std::string> pronunciation::select = {"pronunciation_id", "phonemes", "syllables", "stress", "prerhyme", "rhyme"};
13
14 const field pronunciation::id = field::integerField(object::pronunciation, "pronunciation_id");
15 const field pronunciation::numOfSyllables = field::integerField(object::pronunciation, "syllables");
16 const field pronunciation::stress = field::stringField(object::pronunciation, "stress");
17
18 const field pronunciation::form = field::joinThrough(object::pronunciation, "pronunciation_id", object::form, "forms_pronunciations", "form_id");
19
20 const field pronunciation::prerhyme = field::stringField(object::pronunciation, "prerhyme", true);
21 const field pronunciation::rhyme = field::stringField(object::pronunciation, "rhyme", true);
22
23 pronunciation::pronunciation(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
24 {
25 id_ = sqlite3_column_int(row, 0);
26
27 std::string phonemesStr(reinterpret_cast<const char*>(sqlite3_column_text(row, 1)));
28 phonemes_ = split<std::vector<std::string>>(phonemesStr, " ");
29
30 syllables_ = sqlite3_column_int(row, 2);
31 stress_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 3)));
32
33 if (sqlite3_column_type(row, 5) != SQLITE_NULL)
34 {
35 hasRhyme_ = true;
36
37 prerhyme_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 4)));
38 rhyme_ = std::string(reinterpret_cast<const char*>(sqlite3_column_text(row, 5)));
39 }
40 }
41
42 filter pronunciation::rhymesWith(const pronunciation& arg)
43 {
44 return (prerhyme != arg.getPrerhyme()) && (rhyme == arg.getRhyme());
45 }
46
47 /*filter pronunciation::rhymesWith(const class form& arg)
48 {
49 filter result;
50
51 for (const pronunciation& p : arg.getPronunciations())
52 {
53 result |= rhymesWith(p);
54 }
55
56 return result;
57 }
58
59 filter pronunciation::rhymesWith(const lemma& arg)
60 {
61 return rhymesWith(arg.getBaseForm());
62 }
63
64 filter pronunciation::rhymesWith(const word& arg)
65 {
66 return rhymesWith(arg.getLemma());
67 }*/
68
69};
diff --git a/lib/pronunciation.h b/lib/pronunciation.h new file mode 100644 index 0000000..c7a1d4d --- /dev/null +++ b/lib/pronunciation.h
@@ -0,0 +1,163 @@
1#ifndef PRONUNCIATION_H_C68F86B0
2#define PRONUNCIATION_H_C68F86B0
3
4#include <stdexcept>
5#include <vector>
6#include <string>
7#include "field.h"
8#include "filter.h"
9
10struct sqlite3_stmt;
11
12namespace verbly {
13
14 class form;
15 class lemma;
16 class word;
17 class database;
18
19 class pronunciation {
20 public:
21
22 // Default constructor
23
24 pronunciation() = default;
25
26 // Construct from database
27
28 pronunciation(const database& db, sqlite3_stmt* row);
29
30 // Accessors
31
32 operator bool() const
33 {
34 return valid_;
35 }
36
37 int getId() const
38 {
39 if (!valid_)
40 {
41 throw std::domain_error("Bad access to uninitialized pronunciation");
42 }
43
44 return id_;
45 }
46
47 const std::vector<std::string>& getPhonemes() const
48 {
49 if (!valid_)
50 {
51 throw std::domain_error("Bad access to uninitialized pronunciation");
52 }
53
54 return phonemes_;
55 }
56
57 int getSyllables() const
58 {
59 if (!valid_)
60 {
61 throw std::domain_error("Bad access to uninitialized pronunciation");
62 }
63
64 return syllables_;
65 }
66
67 std::string getStress() const
68 {
69 if (!valid_)
70 {
71 throw std::domain_error("Bad access to uninitialized pronunciation");
72 }
73
74 return stress_;
75 }
76
77 bool hasRhyme() const
78 {
79 if (!valid_)
80 {
81 throw std::domain_error("Bad access to uninitialized pronunciation");
82 }
83
84 return hasRhyme_;
85 }
86
87 std::string getPrerhyme() const
88 {
89 if (!valid_)
90 {
91 throw std::domain_error("Bad access to uninitialized pronunciation");
92 }
93
94 if (!hasRhyme_)
95 {
96 throw std::domain_error("This pronunciation has no rhyme");
97 }
98
99 return prerhyme_;
100 }
101
102 std::string getRhyme() const
103 {
104 if (!valid_)
105 {
106 throw std::domain_error("Bad access to uninitialized pronunciation");
107 }
108
109 if (!hasRhyme_)
110 {
111 throw std::domain_error("This pronunciation has no rhyme");
112 }
113
114 return rhyme_;
115 }
116
117 // Type info
118
119 static const object objectType;
120
121 static const std::list<std::string> select;
122
123 // Query fields
124
125 static const field id;
126 static const field numOfSyllables;
127 static const field stress;
128
129 operator filter() const
130 {
131 return (id == id_);
132 }
133
134 static filter rhymesWith(const pronunciation& arg);
135 static filter rhymesWith(const class form& arg);
136 static filter rhymesWith(const lemma& arg);
137 static filter rhymesWith(const word& arg);
138
139 // Relationships to other objects
140
141 static const field form;
142
143 private:
144 bool valid_ = false;
145
146 int id_;
147 std::vector<std::string> phonemes_;
148 int syllables_;
149 std::string stress_;
150 bool hasRhyme_ = false;
151 std::string prerhyme_;
152 std::string rhyme_;
153
154 const database* db_;
155
156 static const field prerhyme;
157 static const field rhyme;
158
159 };
160
161};
162
163#endif /* end of include guard: PRONUNCIATION_H_C68F86B0 */
diff --git a/lib/query.h b/lib/query.h new file mode 100644 index 0000000..e31be3d --- /dev/null +++ b/lib/query.h
@@ -0,0 +1,123 @@
1#ifndef QUERY_H_7CC5284C
2#define QUERY_H_7CC5284C
3
4#include <vector>
5#include <stdexcept>
6#include <string>
7#include <list>
8#include <sqlite3.h>
9#include <iostream>
10#include "statement.h"
11#include "binding.h"
12
13namespace verbly {
14
15 class database_error : public std::logic_error {
16 public:
17
18 database_error(std::string msg, std::string sqlMsg) : std::logic_error(msg + " (" + sqlMsg + ")")
19 {
20 }
21 };
22
23 template <typename Object>
24 class query {
25 public:
26
27 query(const database& db, sqlite3* ppdb, filter queryFilter, bool random, int limit) : db_(&db)
28 {
29 statement stmt(Object::objectType, std::move(queryFilter));
30
31 std::string queryString = stmt.getQueryString(Object::select, random, limit);
32 std::list<binding> bindings = stmt.getBindings();
33
34 std::cout << queryString << std::endl;
35
36 if (sqlite3_prepare_v2(ppdb, queryString.c_str(), queryString.length(), &ppstmt_, NULL) != SQLITE_OK)
37 {
38 std::string errorMsg = sqlite3_errmsg(ppdb);
39 sqlite3_finalize(ppstmt_);
40
41 throw database_error("Error preparing query", errorMsg);
42 }
43
44 int i = 1;
45 for (const binding& value : bindings)
46 {
47 switch (value.getType())
48 {
49 case binding::type::integer:
50 {
51 if (sqlite3_bind_int(ppstmt_, i, value.getInteger()) != SQLITE_OK)
52 {
53 std::string errorMsg = sqlite3_errmsg(ppdb);
54 sqlite3_finalize(ppstmt_);
55
56 throw database_error("Error binding value to query", errorMsg);
57 }
58
59 break;
60 }
61
62 case binding::type::string:
63 {
64 if (sqlite3_bind_text(ppstmt_, i, value.getString().c_str(), value.getString().length(), SQLITE_TRANSIENT) != SQLITE_OK)
65 {
66 std::string errorMsg = sqlite3_errmsg(ppdb);
67 sqlite3_finalize(ppstmt_);
68
69 throw database_error("Error binding value to query", errorMsg);
70 }
71
72 break;
73 }
74
75 case binding::type::invalid:
76 {
77 throw std::logic_error("Cannot use invalid bindings");
78 }
79 }
80
81 i++;
82 }
83 }
84
85 ~query()
86 {
87 sqlite3_finalize(ppstmt_);
88 }
89
90 std::vector<Object> all() const
91 {
92 std::vector<Object> result;
93
94 while (sqlite3_step(ppstmt_) == SQLITE_ROW)
95 {
96 result.emplace_back(*db_, ppstmt_);
97 }
98
99 sqlite3_reset(ppstmt_);
100
101 return result;
102 }
103
104 Object first() const
105 {
106 std::vector<Object> results = all();
107 if (!results.empty())
108 {
109 return results.front();
110 } else {
111 throw std::logic_error("query returned empty dataset");
112 }
113 }
114
115 private:
116 const database* db_;
117 sqlite3_stmt* ppstmt_;
118
119 };
120
121};
122
123#endif /* end of include guard: QUERY_H_7CC5284C */
diff --git a/lib/statement.cpp b/lib/statement.cpp new file mode 100644 index 0000000..52fa00d --- /dev/null +++ b/lib/statement.cpp
@@ -0,0 +1,806 @@
1#include "statement.h"
2#include <sstream>
3#include <utility>
4#include "filter.h"
5#include "util.h"
6#include "notion.h"
7#include "word.h"
8#include "group.h"
9#include "frame.h"
10#include "lemma.h"
11#include "form.h"
12#include "pronunciation.h"
13
14namespace verbly {
15
16 statement::statement(
17 object context,
18 filter queryFilter) :
19 statement(getTableForContext(context), queryFilter.normalize(context))
20 {
21 }
22
23 std::string statement::getQueryString(std::list<std::string> select, bool random, int limit) const
24 {
25 std::stringstream queryStream;
26
27 if (!withs_.empty())
28 {
29 queryStream << "WITH RECURSIVE ";
30
31 std::list<std::string> ctes;
32 for (const with& cte : withs_)
33 {
34 std::stringstream cteStream;
35 cteStream << cte.getIdentifier();
36 cteStream << " AS (SELECT ";
37 cteStream << cte.getTopTable();
38 cteStream << ".* FROM ";
39 cteStream << cte.getTableForId(cte.getTopTable());
40 cteStream << " AS ";
41 cteStream << cte.getTopTable();
42
43 for (const join& j : cte.getJoins())
44 {
45 cteStream << " ";
46 cteStream << j;
47 }
48
49 if (cte.getCondition().getType() != condition::type::empty)
50 {
51 cteStream << " WHERE ";
52 cteStream << cte.getCondition().toSql();
53 }
54
55 cteStream << " UNION SELECT l.* FROM ";
56 cteStream << cte.getIdentifier();
57 cteStream << " AS t INNER JOIN ";
58 cteStream << cte.getField().getTable();
59 cteStream << " AS j ON t.";
60 cteStream << cte.getField().getColumn();
61 cteStream << " = j.";
62 cteStream << cte.getField().getForeignJoinColumn();
63 cteStream << " INNER JOIN ";
64 cteStream << cte.getTableForId(cte.getTopTable());
65 cteStream << " AS l ON j.";
66 cteStream << cte.getField().getJoinColumn();
67 cteStream << " = l.";
68 cteStream << cte.getField().getColumn();
69 cteStream << ")";
70
71 ctes.push_back(cteStream.str());
72 }
73
74 queryStream << implode(std::begin(ctes), std::end(ctes), ", ");
75 queryStream << " ";
76 }
77
78 std::list<std::string> realSelect;
79 for (std::string& s : select)
80 {
81 realSelect.push_back(topTable_ + "." + s);
82 }
83
84 queryStream << "SELECT ";
85 queryStream << implode(std::begin(realSelect), std::end(realSelect), ", ");
86 queryStream << " FROM ";
87 queryStream << tables_.at(topTable_);
88 queryStream << " AS ";
89 queryStream << topTable_;
90
91 for (const join& j : joins_)
92 {
93 queryStream << " ";
94 queryStream << j;
95 }
96
97 if (topCondition_.getType() != condition::type::empty)
98 {
99 queryStream << " WHERE ";
100 queryStream << topCondition_.toSql();
101 }
102
103 if (random)
104 {
105 queryStream << " ORDER BY RANDOM()";
106 }
107
108 if (limit > 0)
109 {
110 queryStream << " LIMIT ";
111 queryStream << limit;
112 }
113
114 return queryStream.str();
115 }
116
117 std::list<binding> statement::getBindings() const
118 {
119 std::list<binding> result;
120
121 for (const with& w : withs_)
122 {
123 for (binding value : w.getCondition().flattenBindings())
124 {
125 result.push_back(std::move(value));
126 }
127 }
128
129 for (binding value : topCondition_.flattenBindings())
130 {
131 result.push_back(std::move(value));
132 }
133
134 return result;
135 }
136
137 statement::statement(
138 std::string tableName,
139 filter clause,
140 int nextTableId,
141 int nextWithId) :
142 nextTableId_(nextTableId),
143 nextWithId_(nextWithId),
144 topTable_(instantiateTable(std::move(tableName))),
145 topCondition_(parseFilter(std::move(clause)))
146 {
147 }
148
149 statement::condition statement::parseFilter(filter clause)
150 {
151 switch (clause.getType())
152 {
153 case filter::type::empty:
154 {
155 return {};
156 }
157
158 case filter::type::singleton:
159 {
160 switch (clause.getField().getType())
161 {
162 case field::type::undefined:
163 {
164 return {};
165 }
166
167 case field::type::string:
168 case field::type::integer:
169 case field::type::boolean:
170 {
171 switch (clause.getComparison())
172 {
173 case filter::comparison::is_null:
174 {
175 return condition(topTable_, clause.getField().getColumn(), true);
176 }
177
178 case filter::comparison::is_not_null:
179 {
180 return condition(topTable_, clause.getField().getColumn(), false);
181 }
182
183 case filter::comparison::int_equals:
184 {
185 return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getIntegerArgument());
186 }
187
188 case filter::comparison::int_does_not_equal:
189 {
190 return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getIntegerArgument());
191 }
192
193 case filter::comparison::int_is_at_least:
194 {
195 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_least, clause.getIntegerArgument());
196 }
197
198 case filter::comparison::int_is_greater_than:
199 {
200 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_greater_than, clause.getIntegerArgument());
201 }
202
203 case filter::comparison::int_is_at_most:
204 {
205 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_most, clause.getIntegerArgument());
206 }
207
208 case filter::comparison::int_is_less_than:
209 {
210 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_less_than, clause.getIntegerArgument());
211 }
212
213 case filter::comparison::boolean_equals:
214 {
215 return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getBooleanArgument() ? 1 : 0);
216 }
217
218 case filter::comparison::string_equals:
219 {
220 return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getStringArgument());
221 }
222
223 case filter::comparison::string_does_not_equal:
224 {
225 return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getStringArgument());
226 }
227
228 case filter::comparison::string_is_like:
229 {
230 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_like, clause.getStringArgument());
231 }
232
233 case filter::comparison::string_is_not_like:
234 {
235 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_not_like, clause.getStringArgument());
236 }
237
238 case filter::comparison::matches:
239 case filter::comparison::does_not_match:
240 case filter::comparison::hierarchally_matches:
241 case filter::comparison::does_not_hierarchally_match:
242 {
243 throw std::logic_error("Invalid comparison type for field");
244 }
245 }
246 }
247
248 case field::type::join:
249 {
250 std::string joinTableName;
251 if (clause.getField().hasTable())
252 {
253 joinTableName = clause.getField().getTable();
254 } else {
255 joinTableName = getTableForContext(clause.getField().getJoinObject());
256 }
257
258 statement joinStmt(
259 joinTableName,
260 clause.getJoinCondition().normalize(clause.getField().getJoinObject()),
261 nextTableId_,
262 nextWithId_);
263
264 std::string joinTable = joinStmt.topTable_;
265 condition curCond = integrate(std::move(joinStmt));
266
267 bool outer = false;
268 if (clause.getComparison() == filter::comparison::does_not_match)
269 {
270 outer = true;
271
272 curCond &= condition(joinTable, clause.getField().getColumn(), true);
273 }
274
275 joins_.emplace_back(outer, joinTableName, topTable_, clause.getField().getColumn(), joinTable, clause.getField().getColumn());
276
277 return curCond;
278 }
279
280 case field::type::join_through:
281 {
282 statement joinStmt(
283 getTableForContext(clause.getField().getJoinObject()),
284 clause.getJoinCondition().normalize(clause.getField().getJoinObject()),
285 nextTableId_,
286 nextWithId_);
287
288 std::string joinTable = joinStmt.topTable_;
289 std::string throughTable = instantiateTable(clause.getField().getTable());
290 condition curCond = integrate(std::move(joinStmt));
291
292 bool outer = false;
293 if (clause.getComparison() == filter::comparison::does_not_match)
294 {
295 outer = true;
296
297 curCond &= condition(throughTable, clause.getField().getJoinColumn(), true);
298 }
299
300 joins_.emplace_back(outer, clause.getField().getTable(), topTable_, clause.getField().getColumn(), throughTable, clause.getField().getJoinColumn());
301 joins_.emplace_back(false, getTableForContext(clause.getField().getJoinObject()), throughTable, clause.getField().getForeignJoinColumn(), joinTable, clause.getField().getForeignColumn());
302
303 return curCond;
304 }
305
306 case field::type::hierarchal_join:
307 {
308 std::string withName = std::string(clause.getField().getTable()) + "_tree_" + std::to_string(nextWithId_++);
309 std::string withInstName = instantiateTable(withName);
310
311 bool outer = false;
312 if (clause.getComparison() == filter::comparison::does_not_hierarchally_match)
313 {
314 outer = true;
315 }
316
317 joins_.emplace_back(outer, withName, topTable_, clause.getField().getColumn(), withInstName, clause.getField().getColumn());
318
319 statement withStmt(
320 getTableForContext(clause.getField().getObject()),
321 clause.getJoinCondition().normalize(clause.getField().getObject()),
322 nextTableId_,
323 nextWithId_);
324
325 for (auto& w : withStmt.withs_)
326 {
327 withs_.push_back(std::move(w));
328 }
329
330 nextTableId_ = withStmt.nextTableId_;
331 nextWithId_ = withStmt.nextWithId_;
332
333 withs_.emplace_back(
334 withName,
335 clause.getField(),
336 std::move(withStmt.tables_),
337 std::move(withStmt.topTable_),
338 std::move(withStmt.topCondition_),
339 std::move(withStmt.joins_));
340
341 if (clause.getComparison() == filter::comparison::does_not_hierarchally_match)
342 {
343 return condition(withInstName, clause.getField().getColumn(), true);
344 } else {
345 return {};
346 }
347 }
348 }
349 }
350
351 case filter::type::group:
352 {
353 condition grp(clause.getOrlogic());
354
355 for (const filter& child : clause)
356 {
357 condition newChild = parseFilter(child);
358 if (newChild.getType() != condition::type::empty)
359 {
360 grp += std::move(newChild);
361 }
362 }
363
364 if (grp.getChildren().empty())
365 {
366 grp = {};
367 }
368
369 return grp;
370 }
371 }
372 }
373
374 std::string statement::instantiateTable(std::string name)
375 {
376 std::string identifier = name + "_" + std::to_string(nextTableId_++);
377 tables_[identifier] = name;
378
379 return identifier;
380 }
381
382 statement::condition statement::integrate(statement subStmt)
383 {
384 for (auto& mapping : subStmt.tables_)
385 {
386 tables_[mapping.first] = mapping.second;
387 }
388
389 for (auto& j : subStmt.joins_)
390 {
391 joins_.push_back(j);
392 }
393
394 for (auto& w : subStmt.withs_)
395 {
396 withs_.push_back(w);
397 }
398
399 nextTableId_ = subStmt.nextTableId_;
400 nextWithId_ = subStmt.nextWithId_;
401
402 return subStmt.topCondition_;
403 }
404
405 std::ostream& operator<<(std::ostream& oss, const statement::join& j)
406 {
407 if (j.isOuterJoin())
408 {
409 oss << "LEFT";
410 } else {
411 oss << "INNER";
412 }
413
414 return oss
415 << " JOIN "
416 << j.getForeignTableName()
417 << " AS "
418 << j.getForeignTable()
419 << " ON "
420 << j.getForeignTable()
421 << "."
422 << j.getForeignColumn()
423 << " = "
424 << j.getJoinTable()
425 << "."
426 << j.getJoinColumn();
427 }
428
429 statement::condition::condition(const condition& other)
430 {
431 type_ = other.type_;
432
433 switch (type_)
434 {
435 case type::empty:
436 {
437 break;
438 }
439
440 case type::singleton:
441 {
442 new(&singleton_.table_) std::string(other.singleton_.table_);
443 new(&singleton_.column_) std::string(other.singleton_.column_);
444 singleton_.comparison_ = other.singleton_.comparison_;
445 new(&singleton_.value_) binding(other.singleton_.value_);
446
447 break;
448 }
449
450 case type::group:
451 {
452 new(&group_.children_) std::list<condition>(other.group_.children_);
453 group_.orlogic_ = other.group_.orlogic_;
454
455 break;
456 }
457 }
458 }
459
460 statement::condition::condition(condition&& other) : condition()
461 {
462 swap(*this, other);
463 }
464
465 statement::condition& statement::condition::operator=(condition other)
466 {
467 swap(*this, other);
468
469 return *this;
470 }
471
472 void swap(statement::condition& first, statement::condition& second)
473 {
474 using type = statement::condition::type;
475 using condition = statement::condition;
476
477 type tempType = first.type_;
478 std::string tempTable;
479 std::string tempColumn;
480 condition::comparison tempComparison;
481 binding tempBinding;
482 std::list<condition> tempChildren;
483 bool tempOrlogic;
484
485 switch (tempType)
486 {
487 case type::empty:
488 {
489 break;
490 }
491
492 case type::singleton:
493 {
494 tempTable = std::move(first.singleton_.table_);
495 tempColumn = std::move(first.singleton_.column_);
496 tempComparison = first.singleton_.comparison_;
497 tempBinding = std::move(first.singleton_.value_);
498
499 break;
500 }
501
502 case type::group:
503 {
504 tempChildren = std::move(first.group_.children_);
505 tempOrlogic = first.group_.orlogic_;
506
507 break;
508 }
509 }
510
511 first.~condition();
512
513 first.type_ = second.type_;
514
515 switch (first.type_)
516 {
517 case type::empty:
518 {
519 break;
520 }
521
522 case type::singleton:
523 {
524 new(&first.singleton_.table_) std::string(std::move(second.singleton_.table_));
525 new(&first.singleton_.column_) std::string(std::move(second.singleton_.column_));
526 first.singleton_.comparison_ = second.singleton_.comparison_;
527 new(&first.singleton_.value_) binding(std::move(second.singleton_.value_));
528
529 break;
530 }
531
532 case type::group:
533 {
534 new(&first.group_.children_) std::list<condition>(std::move(second.group_.children_));
535 first.group_.orlogic_ = second.group_.orlogic_;
536
537 break;
538 }
539 }
540
541 second.~condition();
542
543 second.type_ = tempType;
544
545 switch (second.type_)
546 {
547 case type::empty:
548 {
549 break;
550 }
551
552 case type::singleton:
553 {
554 new(&second.singleton_.table_) std::string(std::move(tempTable));
555 new(&second.singleton_.column_) std::string(std::move(tempColumn));
556 second.singleton_.comparison_ = tempComparison;
557 new(&second.singleton_.value_) binding(std::move(tempBinding));
558
559 break;
560 }
561
562 case type::group:
563 {
564 new(&second.group_.children_) std::list<condition>(std::move(tempChildren));
565 second.group_.orlogic_ = tempOrlogic;
566
567 break;
568 }
569 }
570 }
571
572 statement::condition::~condition()
573 {
574 switch (type_)
575 {
576 case type::empty:
577 {
578 break;
579 }
580
581 case type::singleton:
582 {
583 using string_type = std::string;
584
585 singleton_.table_.~string_type();
586 singleton_.column_.~string_type();
587 singleton_.value_.~binding();
588
589 break;
590 }
591
592 case type::group:
593 {
594 using list_type = std::list<condition>;
595
596 group_.children_.~list_type();
597
598 break;
599 }
600 }
601 }
602
603 statement::condition::condition() : type_(type::empty)
604 {
605 }
606
607 statement::condition::condition(
608 std::string table,
609 std::string column,
610 bool isNull) :
611 type_(type::singleton)
612 {
613 new(&singleton_.table_) std::string(std::move(table));
614 new(&singleton_.column_) std::string(std::move(column));
615
616 if (isNull)
617 {
618 singleton_.comparison_ = comparison::is_null;
619 } else {
620 singleton_.comparison_ = comparison::is_not_null;
621 }
622 }
623
624 statement::condition::condition(
625 std::string table,
626 std::string column,
627 comparison comp,
628 binding value) :
629 type_(type::singleton)
630 {
631 new(&singleton_.table_) std::string(std::move(table));
632 new(&singleton_.column_) std::string(std::move(column));
633 singleton_.comparison_ = comp;
634 new(&singleton_.value_) binding(std::move(value));
635 }
636
637 std::string statement::condition::toSql() const
638 {
639 switch (type_)
640 {
641 case type::empty:
642 {
643 return "";
644 }
645
646 case type::singleton:
647 {
648 switch (singleton_.comparison_)
649 {
650 case comparison::equals:
651 {
652 return singleton_.table_ + "." + singleton_.column_ + " = ?";
653 }
654
655 case comparison::does_not_equal:
656 {
657 return singleton_.table_ + "." + singleton_.column_ + " != ?";
658 }
659
660 case comparison::is_greater_than:
661 {
662 return singleton_.table_ + "." + singleton_.column_ + " > ?";
663 }
664
665 case comparison::is_at_most:
666 {
667 return singleton_.table_ + "." + singleton_.column_ + " <= ?";
668 }
669
670 case comparison::is_less_than:
671 {
672 return singleton_.table_ + "." + singleton_.column_ + " < ?";
673 }
674
675 case comparison::is_at_least:
676 {
677 return singleton_.table_ + "." + singleton_.column_ + " >= ?";
678 }
679
680 case comparison::is_like:
681 {
682 return singleton_.table_ + "." + singleton_.column_ + " LIKE ?";
683 }
684
685 case comparison::is_not_like:
686 {
687 return singleton_.table_ + "." + singleton_.column_ + " NOT LIKE ?";
688 }
689
690 case comparison::is_not_null:
691 {
692 return singleton_.table_ + "." + singleton_.column_ + " IS NOT NULL";
693 }
694
695 case comparison::is_null:
696 {
697 return singleton_.table_ + "." + singleton_.column_ + " IS NULL";
698 }
699 }
700 }
701
702 case type::group:
703 {
704 std::list<std::string> clauses;
705 for (const condition& cond : group_.children_)
706 {
707 clauses.push_back(cond.toSql());
708 }
709
710 return implode(std::begin(clauses), std::end(clauses), group_.orlogic_ ? " OR " : " AND ");
711 }
712 }
713 }
714
715 std::list<binding> statement::condition::flattenBindings() const
716 {
717 switch (type_)
718 {
719 case type::empty:
720 {
721 return {};
722 }
723
724 case type::singleton:
725 {
726 return {singleton_.value_};
727 }
728
729 case type::group:
730 {
731 std::list<binding> bindings;
732 for (const condition& cond : group_.children_)
733 {
734 for (binding value : cond.flattenBindings())
735 {
736 bindings.push_back(std::move(value));
737 }
738 }
739
740 return bindings;
741 }
742 }
743 }
744
745 statement::condition::condition(bool orlogic) : type_(type::group)
746 {
747 new(&group_.children_) std::list<condition>();
748 group_.orlogic_ = orlogic;
749 }
750
751 statement::condition& statement::condition::operator+=(condition n)
752 {
753 if (type_ == type::group)
754 {
755 group_.children_.push_back(std::move(n));
756
757 return *this;
758 } else {
759 throw std::domain_error("Cannot add condition to non-group condition");
760 }
761 }
762
763 statement::condition& statement::condition::operator&=(condition n)
764 {
765 switch (type_)
766 {
767 case type::empty:
768 {
769 *this = std::move(n);
770
771 break;
772 }
773
774 case type::singleton:
775 {
776 condition grp(false);
777 grp += *this;
778 grp += std::move(n);
779
780 *this = grp;
781
782 break;
783 }
784
785 case type::group:
786 {
787 *this += std::move(n);
788
789 break;
790 }
791 }
792
793 return *this;
794 }
795
796 const std::list<statement::condition>& statement::condition::getChildren() const
797 {
798 if (type_ == type::group)
799 {
800 return group_.children_;
801 } else {
802 throw std::domain_error("Cannot get children of non-group condition");
803 }
804 }
805
806};
diff --git a/lib/statement.h b/lib/statement.h new file mode 100644 index 0000000..a528d60 --- /dev/null +++ b/lib/statement.h
@@ -0,0 +1,272 @@
1#ifndef STATEMENT_H_29F51659
2#define STATEMENT_H_29F51659
3
4#include <string>
5#include <list>
6#include <map>
7#include <set>
8#include "binding.h"
9#include "enums.h"
10#include "field.h"
11#include "filter.h"
12
13namespace verbly {
14
15 class filter;
16
17 class statement {
18 public:
19
20 statement(object context, filter queryFilter);
21
22 std::string getQueryString(std::list<std::string> select, bool random, int limit) const;
23
24 std::list<binding> getBindings() const;
25
26 private:
27
28 class join {
29 public:
30
31 join(
32 bool outer,
33 std::string foreignTableName,
34 std::string joinTable,
35 std::string joinColumn,
36 std::string foreignTable,
37 std::string foreignColumn) :
38 outer_(outer),
39 foreignTableName_(std::move(foreignTableName)),
40 joinTable_(std::move(joinTable)),
41 joinColumn_(std::move(joinColumn)),
42 foreignTable_(std::move(foreignTable)),
43 foreignColumn_(std::move(foreignColumn))
44 {
45 }
46
47 bool isOuterJoin() const
48 {
49 return outer_;
50 }
51
52 const std::string& getForeignTableName() const
53 {
54 return foreignTableName_;
55 }
56
57 const std::string& getJoinTable() const
58 {
59 return joinTable_;
60 }
61
62 const std::string& getJoinColumn() const
63 {
64 return joinColumn_;
65 }
66
67 const std::string& getForeignTable() const
68 {
69 return foreignTable_;
70 }
71
72 const std::string& getForeignColumn() const
73 {
74 return foreignColumn_;
75 }
76
77 private:
78 bool outer_ = false;
79 std::string foreignTableName_;
80 std::string joinTable_;
81 std::string joinColumn_;
82 std::string foreignTable_;
83 std::string foreignColumn_;
84
85 };
86
87 friend std::ostream& operator<<(std::ostream& oss, const join& j);
88
89 class condition {
90 public:
91 enum class type {
92 empty,
93 singleton,
94 group
95 };
96
97 enum class comparison {
98 equals,
99 does_not_equal,
100 is_greater_than,
101 is_at_most,
102 is_less_than,
103 is_at_least,
104 is_like,
105 is_not_like,
106 is_not_null,
107 is_null
108 };
109
110 // Copy and move constructors
111
112 condition(const condition& other);
113 condition(condition&& other);
114
115 // Assignment
116
117 condition& operator=(condition other);
118
119 // Swap
120
121 friend void swap(condition& first, condition& second);
122
123 // Destructor
124
125 ~condition();
126
127 // Accessors
128
129 type getType() const
130 {
131 return type_;
132 }
133
134 // Empty
135
136 condition();
137
138 // Singleton
139
140 condition(std::string table, std::string column, bool isNull);
141
142 condition(std::string table, std::string column, comparison comp, binding value);
143
144 // Group
145
146 explicit condition(bool orlogic);
147
148 condition& operator+=(condition n);
149
150 condition& operator&=(condition n);
151
152 const std::list<condition>& getChildren() const;
153
154 // Utility
155
156 std::string toSql() const;
157
158 std::list<binding> flattenBindings() const;
159
160 private:
161 union {
162 struct {
163 std::string table_;
164 std::string column_;
165 comparison comparison_;
166 binding value_;
167 } singleton_;
168 struct {
169 std::list<condition> children_;
170 bool orlogic_;
171 } group_;
172 };
173 type type_;
174 };
175
176 friend void swap(condition& first, condition& second);
177
178 class with {
179 public:
180
181 with(
182 std::string identifier,
183 field f,
184 std::map<std::string, std::string> tables,
185 std::string topTable,
186 condition where,
187 std::list<join> joins) :
188 identifier_(std::move(identifier)),
189 field_(f),
190 tables_(std::move(tables)),
191 topTable_(std::move(topTable)),
192 topCondition_(std::move(where)),
193 joins_(std::move(joins))
194 {
195 }
196
197 const std::string& getIdentifier() const
198 {
199 return identifier_;
200 }
201
202 field getField() const
203 {
204 return field_;
205 }
206
207 std::string getTableForId(std::string identifier) const
208 {
209 return tables_.at(identifier);
210 }
211
212 const std::string& getTopTable() const
213 {
214 return topTable_;
215 }
216
217 const condition& getCondition() const
218 {
219 return topCondition_;
220 }
221
222 const std::list<join>& getJoins() const
223 {
224 return joins_;
225 }
226
227 private:
228 std::string identifier_;
229 field field_;
230 std::map<std::string, std::string> tables_;
231 std::string topTable_;
232 condition topCondition_;
233 std::list<join> joins_;
234
235 };
236
237 static constexpr const char* getTableForContext(object context)
238 {
239 return (context == object::notion) ? "notions"
240 : (context == object::word) ? "words"
241 : (context == object::group) ? "groups"
242 : (context == object::frame) ? "frames"
243 : (context == object::lemma) ? "lemmas_forms"
244 : (context == object::form) ? "forms"
245 : (context == object::pronunciation) ? "pronunciations"
246 : throw std::domain_error("Provided context has no associated table");
247 }
248
249 static const std::list<field> getSelectForContext(object context);
250
251 statement(std::string tableName, filter clause, int nextTableId = 0, int nextWithId = 0);
252
253 condition parseFilter(filter queryFilter);
254
255 std::string instantiateTable(std::string name);
256
257 condition integrate(statement subStmt);
258
259 int nextTableId_;
260 int nextWithId_;
261
262 std::map<std::string, std::string> tables_;
263 std::string topTable_;
264 std::list<join> joins_;
265 std::list<with> withs_;
266 condition topCondition_;
267
268 };
269
270};
271
272#endif /* end of include guard: STATEMENT_H_29F51659 */
diff --git a/lib/util.h b/lib/util.h index fb5fe67..b74b050 100644 --- a/lib/util.h +++ b/lib/util.h
@@ -1,6 +1,10 @@
1#ifndef UTIL_H_15DDCA2D 1#ifndef UTIL_H_15DDCA2D
2#define UTIL_H_15DDCA2D 2#define UTIL_H_15DDCA2D
3 3
4#include <string>
5#include <sstream>
6#include <iterator>
7
4namespace verbly { 8namespace verbly {
5 9
6 template <class InputIterator> 10 template <class InputIterator>
@@ -21,25 +25,33 @@ namespace verbly {
21 return result.str(); 25 return result.str();
22 } 26 }
23 27
24 template <class Container> 28 template <class OutputIterator>
25 Container split(std::string input, std::string delimiter) 29 void split(std::string input, std::string delimiter, OutputIterator out)
26 { 30 {
27 Container result;
28
29 while (!input.empty()) 31 while (!input.empty())
30 { 32 {
31 int divider = input.find(delimiter); 33 int divider = input.find(delimiter);
32 if (divider == std::string::npos) 34 if (divider == std::string::npos)
33 { 35 {
34 result.push_back(input); 36 *out = input;
37 out++;
35 38
36 input = ""; 39 input = "";
37 } else { 40 } else {
38 result.push_back(input.substr(0, divider)); 41 *out = input.substr(0, divider);
42 out++;
39 43
40 input = input.substr(divider+delimiter.length()); 44 input = input.substr(divider+delimiter.length());
41 } 45 }
42 } 46 }
47 }
48
49 template <class Container>
50 Container split(std::string input, std::string delimiter)
51 {
52 Container result;
53
54 split(input, delimiter, std::back_inserter(result));
43 55
44 return result; 56 return result;
45 } 57 }
diff --git a/lib/verb.cpp b/lib/verb.cpp deleted file mode 100644 index 1f45d53..0000000 --- a/lib/verb.cpp +++ /dev/null
@@ -1,64 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 verb::verb()
6 {
7
8 }
9
10 verb::verb(const data& _data, int _id) : word(_data, _id)
11 {
12
13 }
14
15 std::string verb::base_form() const
16 {
17 assert(_valid == true);
18
19 return _infinitive;
20 }
21
22 std::string verb::infinitive_form() const
23 {
24 assert(_valid == true);
25
26 return _infinitive;
27 }
28
29 std::string verb::past_tense_form() const
30 {
31 assert(_valid == true);
32
33 return _past_tense;
34 }
35
36 std::string verb::past_participle_form() const
37 {
38 assert(_valid == true);
39
40 return _past_participle;
41 }
42
43 std::string verb::ing_form() const
44 {
45 assert(_valid == true);
46
47 return _ing_form;
48 }
49
50 std::string verb::s_form() const
51 {
52 assert(_valid == true);
53
54 return _s_form;
55 }
56
57 frame_query verb::frames() const
58 {
59 assert(_valid == true);
60
61 return _data->frames().for_verb(*this);
62 }
63
64};
diff --git a/lib/verb.h b/lib/verb.h deleted file mode 100644 index 7a2486e..0000000 --- a/lib/verb.h +++ /dev/null
@@ -1,34 +0,0 @@
1#ifndef VERB_H_BCC929AD
2#define VERB_H_BCC929AD
3
4namespace verbly {
5
6 class frame_query;
7
8 class verb : public word {
9 private:
10 std::string _infinitive;
11 std::string _past_tense;
12 std::string _past_participle;
13 std::string _ing_form;
14 std::string _s_form;
15
16 friend class verb_query;
17
18 public:
19 verb();
20 verb(const data& _data, int _id);
21
22 std::string base_form() const;
23 std::string infinitive_form() const;
24 std::string past_tense_form() const;
25 std::string past_participle_form() const;
26 std::string ing_form() const;
27 std::string s_form() const;
28
29 frame_query frames() const;
30 };
31
32};
33
34#endif /* end of include guard: VERB_H_BCC929AD */
diff --git a/lib/verb_query.cpp b/lib/verb_query.cpp deleted file mode 100644 index 4e6c253..0000000 --- a/lib/verb_query.cpp +++ /dev/null
@@ -1,315 +0,0 @@
1#include "verbly.h"
2
3namespace verbly {
4
5 verb_query::verb_query(const data& _data) : _data(_data)
6 {
7
8 }
9
10 verb_query& verb_query::limit(int _limit)
11 {
12 if ((_limit > 0) || (_limit == unlimited))
13 {
14 this->_limit = _limit;
15 }
16
17 return *this;
18 }
19
20 verb_query& verb_query::random()
21 {
22 this->_random = true;
23
24 return *this;
25 }
26
27 verb_query& verb_query::except(const verb& _word)
28 {
29 _except.push_back(_word);
30
31 return *this;
32 }
33
34 verb_query& verb_query::rhymes_with(const word& _word)
35 {
36 for (auto rhyme : _word.get_rhymes())
37 {
38 _rhymes.push_back(rhyme);
39 }
40
41 if (dynamic_cast<const verb*>(&_word) != nullptr)
42 {
43 _except.push_back(dynamic_cast<const verb&>(_word));
44 }
45
46 return *this;
47 }
48
49 verb_query& verb_query::rhymes_with(rhyme _r)
50 {
51 _rhymes.push_back(_r);
52
53 return *this;
54 }
55
56 verb_query& verb_query::has_pronunciation()
57 {
58 this->_has_prn = true;
59
60 return *this;
61 }
62
63 verb_query& verb_query::has_rhyming_noun()
64 {
65 _has_rhyming_noun = true;
66
67 return *this;
68 }
69
70 verb_query& verb_query::has_rhyming_adjective()
71 {
72 _has_rhyming_adjective = true;
73
74 return *this;
75 }
76
77 verb_query& verb_query::has_rhyming_adverb()
78 {
79 _has_rhyming_adverb = true;
80
81 return *this;
82 }
83
84 verb_query& verb_query::has_rhyming_verb()
85 {
86 _has_rhyming_verb = true;
87
88 return *this;
89 }
90
91 verb_query& verb_query::with_stress(filter<std::vector<bool>> _arg)
92 {
93 _stress = _arg;
94
95 return *this;
96 }
97
98 verb_query& verb_query::has_frames()
99 {
100 this->_has_frames = true;
101
102 return *this;
103 }
104
105 std::list<verb> verb_query::run() const
106 {
107 std::stringstream construct;
108 construct << "SELECT verb_id, infinitive, past_tense, past_participle, ing_form, s_form FROM verbs";
109 std::list<std::string> conditions;
110 std::list<binding> bindings;
111
112 if (_has_prn)
113 {
114 conditions.push_back("verb_id IN (SELECT verb_id FROM verb_pronunciations)");
115 }
116
117 if (!_rhymes.empty())
118 {
119 std::list<std::string> clauses(_rhymes.size(), "(prerhyme != ? AND rhyme = ?)");
120 std::string cond = "verb_id IN (SELECT verb_id FROM verb_pronunciations WHERE " + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
121 conditions.push_back(cond);
122
123 for (auto rhy : _rhymes)
124 {
125 bindings.emplace_back(rhy.get_prerhyme());
126 bindings.emplace_back(rhy.get_rhyme());
127 }
128 }
129
130 if (_has_rhyming_noun)
131 {
132 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN noun_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
133 }
134
135 if (_has_rhyming_adjective)
136 {
137 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adjective_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
138 }
139
140 if (_has_rhyming_adverb)
141 {
142 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN adverb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme)");
143 }
144
145 if (_has_rhyming_verb)
146 {
147 conditions.push_back("verb_id IN (SELECT a.verb_id FROM verbs AS a INNER JOIN verb_pronunciations AS curp ON curp.noun_id = a.adverb_id INNER JOIN verb_pronunciations AS rhmp ON rhmp.prerhyme != curp.prerhyme AND rhmp.rhyme = curp.rhyme AND rhmp.verb_id != curp.verb_id)");
148 }
149
150 if (!_stress.empty())
151 {
152 std::stringstream cond;
153 if (_stress.get_notlogic())
154 {
155 cond << "verb_id NOT IN";
156 } else {
157 cond << "verb_id IN";
158 }
159
160 cond << "(SELECT verb_id FROM verb_pronunciations WHERE ";
161
162 std::function<std::string (filter<std::vector<bool>>, bool)> recur = [&] (filter<std::vector<bool>> f, bool notlogic) -> std::string {
163 switch (f.get_type())
164 {
165 case filter<std::vector<bool>>::type::singleton:
166 {
167 std::ostringstream _val;
168 for (auto syl : f.get_elem())
169 {
170 if (syl)
171 {
172 _val << "1";
173 } else {
174 _val << "0";
175 }
176 }
177
178 bindings.emplace_back(_val.str());
179
180 if (notlogic == f.get_notlogic())
181 {
182 return "stress = ?";
183 } else {
184 return "stress != ?";
185 }
186 }
187
188 case filter<std::vector<bool>>::type::group:
189 {
190 bool truelogic = notlogic != f.get_notlogic();
191
192 std::list<std::string> clauses;
193 std::transform(std::begin(f), std::end(f), std::back_inserter(clauses), [&] (filter<std::vector<bool>> f2) {
194 return recur(f2, truelogic);
195 });
196
197 if (truelogic == f.get_orlogic())
198 {
199 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " AND ") + ")";
200 } else {
201 return "(" + verbly::implode(std::begin(clauses), std::end(clauses), " OR ") + ")";
202 }
203 }
204 }
205 };
206
207 cond << recur(_stress, _stress.get_notlogic());
208 cond << ")";
209 conditions.push_back(cond.str());
210 }
211
212 for (auto except : _except)
213 {
214 conditions.push_back("verb_id != ?");
215 bindings.emplace_back(except._id);
216 }
217
218 if (!_has_frames)
219 {
220 conditions.push_back("verb_id IN (SELECT verb_id FROM verb_groups)");
221 }
222
223 if (!conditions.empty())
224 {
225 construct << " WHERE ";
226 construct << verbly::implode(std::begin(conditions), std::end(conditions), " AND ");
227 }
228
229 if (_random)
230 {
231 construct << " ORDER BY RANDOM()";
232 }
233
234 if (_limit != unlimited)
235 {
236 construct << " LIMIT " << _limit;
237 }
238
239 sqlite3_stmt* ppstmt;
240 std::string query = construct.str();
241 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
242 {
243 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
244 }
245
246 int i = 1;
247 for (auto& binding : bindings)
248 {
249 switch (binding.get_type())
250 {
251 case binding::type::integer:
252 {
253 sqlite3_bind_int(ppstmt, i, binding.get_integer());
254
255 break;
256 }
257
258 case binding::type::string:
259 {
260 sqlite3_bind_text(ppstmt, i, binding.get_string().c_str(), binding.get_string().length(), SQLITE_TRANSIENT);
261
262 break;
263 }
264 }
265
266 i++;
267 }
268
269 std::list<verb> output;
270 while (sqlite3_step(ppstmt) == SQLITE_ROW)
271 {
272 verb tnc {_data, sqlite3_column_int(ppstmt, 0)};
273 tnc._infinitive = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
274 tnc._past_tense = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
275 tnc._past_participle = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 3)));
276 tnc._ing_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 4)));
277 tnc._s_form = std::string(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 5)));
278
279 output.push_back(tnc);
280 }
281
282 sqlite3_finalize(ppstmt);
283
284 for (auto& verb : output)
285 {
286 query = "SELECT pronunciation, prerhyme, rhyme FROM verb_pronunciations WHERE verb_id = ?";
287 if (sqlite3_prepare_v2(_data.ppdb, query.c_str(), query.length(), &ppstmt, NULL) != SQLITE_OK)
288 {
289 throw std::runtime_error(sqlite3_errmsg(_data.ppdb));
290 }
291
292 sqlite3_bind_int(ppstmt, 1, verb._id);
293
294 while (sqlite3_step(ppstmt) == SQLITE_ROW)
295 {
296 std::string pronunciation(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 0)));
297 auto phonemes = verbly::split<std::list<std::string>>(pronunciation, " ");
298
299 verb.pronunciations.push_back(phonemes);
300
301 if ((sqlite3_column_type(ppstmt, 1) != SQLITE_NULL) && (sqlite3_column_type(ppstmt, 2) != SQLITE_NULL))
302 {
303 std::string prerhyme(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 1)));
304 std::string rhyming(reinterpret_cast<const char*>(sqlite3_column_text(ppstmt, 2)));
305 verb.rhymes.emplace_back(prerhyme, rhyming);
306 }
307 }
308
309 sqlite3_finalize(ppstmt);
310 }
311
312 return output;
313 }
314
315};
diff --git a/lib/verb_query.h b/lib/verb_query.h deleted file mode 100644 index 566ae37..0000000 --- a/lib/verb_query.h +++ /dev/null
@@ -1,45 +0,0 @@
1#ifndef VERB_QUERY_H_34E5A679
2#define VERB_QUERY_H_34E5A679
3
4namespace verbly {
5
6 class verb_query {
7 public:
8 verb_query(const data& _data);
9
10 verb_query& limit(int _limit);
11 verb_query& random();
12 verb_query& except(const verb& _word);
13 verb_query& rhymes_with(const word& _word);
14 verb_query& rhymes_with(rhyme _r);
15 verb_query& has_pronunciation();
16 verb_query& has_rhyming_noun();
17 verb_query& has_rhyming_adjective();
18 verb_query& has_rhyming_adverb();
19 verb_query& has_rhyming_verb();
20 verb_query& with_stress(filter<std::vector<bool>> _arg);
21
22 verb_query& has_frames();
23
24 std::list<verb> run() const;
25
26 const static int unlimited = -1;
27
28 private:
29 const data& _data;
30 int _limit = unlimited;
31 bool _random = false;
32 std::list<rhyme> _rhymes;
33 std::list<verb> _except;
34 bool _has_prn = false;
35 bool _has_frames = false;
36 bool _has_rhyming_noun = false;
37 bool _has_rhyming_adjective = false;
38 bool _has_rhyming_adverb = false;
39 bool _has_rhyming_verb = false;
40 filter<std::vector<bool>> _stress;
41 };
42
43};
44
45#endif /* end of include guard: VERB_QUERY_H_34E5A679 */
diff --git a/lib/verbly.h b/lib/verbly.h index cfaf5bc..6dfc01a 100644 --- a/lib/verbly.h +++ b/lib/verbly.h
@@ -1,35 +1,17 @@
1#ifndef VERBLY_H_5B39CE50 1#ifndef VERBLY_H_5B39CE50
2#define VERBLY_H_5B39CE50 2#define VERBLY_H_5B39CE50
3 3
4#include <string>
5#include <list>
6#include <sstream>
7#include <algorithm>
8#include <cassert>
9#include <set>
10#include <stdexcept>
11#include <vector>
12#include <map>
13#include <iterator>
14#include <sstream>
15#include <functional>
16#include <iostream>
17#include <new>
18
19#include "util.h" 4#include "util.h"
20#include "data.h" 5#include "database.h"
6#include "filter.h"
7#include "field.h"
8#include "query.h"
9#include "notion.h"
21#include "word.h" 10#include "word.h"
22#include "verb.h" 11#include "group.h"
23#include "adverb.h"
24#include "adjective.h"
25#include "noun.h"
26#include "frame.h" 12#include "frame.h"
27#include "preposition.h" 13#include "lemma.h"
28#include "token.h" 14#include "form.h"
29#include "noun_query.h" 15#include "pronunciation.h"
30#include "adverb_query.h"
31#include "adjective_query.h"
32#include "verb_query.h"
33#include "frame_query.h"
34 16
35#endif /* end of include guard: VERBLY_H_5B39CE50 */ 17#endif /* end of include guard: VERBLY_H_5B39CE50 */
diff --git a/lib/word.cpp b/lib/word.cpp index 49e34a1..3edf2d2 100644 --- a/lib/word.cpp +++ b/lib/word.cpp
@@ -1,60 +1,112 @@
1#include "verbly.h" 1#include "word.h"
2#include <algorithm> 2#include <sqlite3.h>
3#include "form.h"
4#include "util.h"
5#include "database.h"
6#include "query.h"
3 7
4namespace verbly { 8namespace verbly {
5 9
6 rhyme::rhyme(std::string prerhyme, std::string phonemes) : _prerhyme(prerhyme), _rhyme(phonemes) 10 const object word::objectType = object::word;
7 {
8
9 }
10 11
11 std::string rhyme::get_prerhyme() const 12 const std::list<std::string> word::select = {"word_id", "notion_id", "lemma_id", "tag_count", "position", "group_id"};
12 {
13 return _prerhyme;
14 }
15 13
16 std::string rhyme::get_rhyme() const 14 const field word::id = field::integerField(object::word, "word_id");
17 { 15 const field word::tagCount = field::integerField(object::word, "tag_count", true);
18 return _rhyme; 16 const field word::adjectivePosition = field::integerField(object::word, "position", true);
19 } 17
18 const field word::notion = field::joinField(object::word, "notion_id", object::notion);
19 const field word::lemma = field::joinField(object::word, "lemma_id", object::lemma);
20 const field word::group = field::joinField(object::word, "group_id", object::group, true);
21
22 const field word::antonyms = field::selfJoin(object::word, "word_id", "antonymy", "antonym_2_id", "antonym_1_id");
23
24 const field word::specifications = field::selfJoin(object::word, "word_id", "specification", "general_id", "specific_id");
25 const field word::generalizations = field::selfJoin(object::word, "word_id", "specification", "specific_id", "general_id");
20 26
21 bool rhyme::operator==(const rhyme& other) const 27 const field word::pertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "noun_id", "pertainym_id");
28 const field word::antiPertainyms = field::selfJoin(object::word, "word_id", "pertainymy", "pertainym_id", "noun_id");
29
30 const field word::mannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "adjective_id", "mannernym_id");
31 const field word::antiMannernyms = field::selfJoin(object::word, "word_id", "mannernymy", "mannernym_id", "adjective_id");
32
33 const field word::usageTerms = field::selfJoin(object::word, "word_id", "usage", "domain_id", "term_id");
34 const field word::usageDomains = field::selfJoin(object::word, "word_id", "usage", "term_id", "domain_id");
35
36 const field word::topicalTerms = field::selfJoin(object::word, "word_id", "topicality", "domain_id", "term_id");
37 const field word::topicalDomains = field::selfJoin(object::word, "word_id", "topicality", "term_id", "domain_id");
38
39 const field word::regionalTerms = field::selfJoin(object::word, "word_id", "regionality", "domain_id", "term_id");
40 const field word::regionalDomains = field::selfJoin(object::word, "word_id", "regionality", "term_id", "domain_id");
41
42 word::word(const database& db, sqlite3_stmt* row) : db_(&db), valid_(true)
22 { 43 {
23 return std::tie(_prerhyme, _rhyme) == std::tie(other._prerhyme, other._rhyme); 44 id_ = sqlite3_column_int(row, 0);
45 notionId_ = sqlite3_column_int(row, 1);
46 lemmaId_ = sqlite3_column_int(row, 2);
47
48 if (sqlite3_column_type(row, 3) != SQLITE_NULL)
49 {
50 hasTagCount_ = true;
51 tagCount_ = sqlite3_column_int(row, 3);
52 }
53
54 if (sqlite3_column_type(row, 4) != SQLITE_NULL)
55 {
56 adjectivePosition_ = static_cast<positioning>(sqlite3_column_int(row, 4));
57 }
58
59 if (sqlite3_column_type(row, 5) != SQLITE_NULL)
60 {
61 hasGroup_ = true;
62 groupId_ = sqlite3_column_int(row, 5);
63 }
24 } 64 }
25 65
26 word::word() 66 const notion& word::getNotion() const
27 { 67 {
68 if (!valid_)
69 {
70 throw std::domain_error("Bad access to uninitialized word");
71 }
72
73 if (!notion_)
74 {
75 notion_ = db_->notions(notion::id == notionId_).first();
76 }
28 77
78 return notion_;
29 } 79 }
30 80
31 word::word(const data& _data, int _id) : _data(&_data), _id(_id), _valid(true) 81 const lemma& word::getLemma() const
32 { 82 {
83 if (!valid_)
84 {
85 throw std::domain_error("Bad access to uninitialized word");
86 }
33 87
88 if (!lemma_)
89 {
90 lemma_ = db_->lemmas(lemma::id == lemmaId_).first();
91 }
92
93 return lemma_;
34 } 94 }
35 95
36 std::list<rhyme> word::get_rhymes() const 96 std::string word::getBaseForm() const
37 { 97 {
38 assert(_valid == true); 98 return getLemma().getBaseForm().getText();
39
40 return rhymes;
41 } 99 }
42 100
43 bool word::starts_with_vowel_sound() const 101 std::list<std::string> word::getInflections(inflection category) const
44 { 102 {
45 assert(_valid == true); 103 std::list<std::string> result;
46 104 for (const form& infl : getLemma().getInflections(category))
47 if (pronunciations.size() > 0)
48 { 105 {
49 return std::any_of(std::begin(pronunciations), std::end(pronunciations), [] (std::list<std::string> phonemes) { 106 result.push_back(infl.getText());
50 return (phonemes.front().find_first_of("012") != std::string::npos);
51 });
52 } else {
53 // If the word is not in CMUDICT, fall back to checking whether the first letter is a vowel
54 // Not perfect but will work in most cases
55 char ch = tolower(base_form().front());
56 return (ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u');
57 } 107 }
108
109 return result;
58 } 110 }
59 111
60}; 112};
diff --git a/lib/word.h b/lib/word.h index 08797a3..f71dad9 100644 --- a/lib/word.h +++ b/lib/word.h
@@ -1,48 +1,173 @@
1#ifndef WORD_H_8FC89498 1#ifndef WORD_H_DF91B1B4
2#define WORD_H_8FC89498 2#define WORD_H_DF91B1B4
3
4#include <stdexcept>
5#include <map>
6#include "field.h"
7#include "filter.h"
8#include "notion.h"
9#include "lemma.h"
10#include "group.h"
11
12struct sqlite3_stmt;
3 13
4namespace verbly { 14namespace verbly {
5 15
6 class rhyme { 16 class database;
7 public: 17
8 rhyme(std::string prerhyme, std::string phonemes); 18 class word {
19 public:
20
21 // Default constructor
22
23 word() = default;
24
25 // Construct from database
26
27 word(const database& db, sqlite3_stmt* row);
28
29 // Accessors
30
31 operator bool() const
32 {
33 return valid_;
34 }
35
36 int getId() const
37 {
38 if (!valid_)
39 {
40 throw std::domain_error("Bad access to uninitialized word");
41 }
9 42
10 std::string get_prerhyme() const; 43 return id_;
11 std::string get_rhyme() const; 44 }
45
46 bool hasTagCount() const
47 {
48 if (!valid_)
49 {
50 throw std::domain_error("Bad access to uninitialized word");
51 }
12 52
13 bool operator==(const rhyme& other) const; 53 return hasTagCount_;
54 }
55
56 int getTagCount() const
57 {
58 if (!valid_)
59 {
60 throw std::domain_error("Bad access to uninitialized word");
61 }
14 62
15 private: 63 if (!hasTagCount_)
16 std::string _prerhyme; 64 {
17 std::string _rhyme; 65 throw std::domain_error("Word has no tag count");
18 }; 66 }
19
20 class word {
21 protected:
22 const data* _data;
23 int _id;
24 bool _valid = false;
25 67
26 std::list<std::list<std::string>> pronunciations; 68 return tagCount_;
27 std::list<rhyme> rhymes; 69 }
70
71 bool hasAdjectivePositioning() const
72 {
73 if (!valid_)
74 {
75 throw std::domain_error("Bad access to uninitialized word");
76 }
28 77
29 word(); 78 return (adjectivePosition_ != positioning::undefined);
30 word(const data& _data, int _id); 79 }
80
81 positioning getAdjectivePosition() const
82 {
83 if (!valid_)
84 {
85 throw std::domain_error("Bad access to uninitialized word");
86 }
31 87
32 friend class adjective_query; 88 if (adjectivePosition_ == positioning::undefined)
33 friend class verb_query; 89 {
34 friend class noun_query; 90 throw std::domain_error("Word has no adjective position");
35 friend class adverb_query; 91 }
36 friend class frame_query;
37 friend class preposition_query;
38
39 public:
40 virtual std::string base_form() const = 0;
41 92
42 std::list<rhyme> get_rhymes() const; 93 return adjectivePosition_;
43 bool starts_with_vowel_sound() const; 94 }
95
96 const notion& getNotion() const;
97
98 const lemma& getLemma() const;
99
100 // Convenience accessors
101
102 std::string getBaseForm() const;
103
104 std::list<std::string> getInflections(inflection infl) const;
105
106 // Type info
107
108 static const object objectType;
109
110 static const std::list<std::string> select;
111
112 // Query fields
113
114 static const field id;
115 static const field tagCount;
116 static const field adjectivePosition;
117
118 operator filter() const
119 {
120 return (id == id_);
121 }
122
123 // Relationships with other objects
124
125 static const field notion;
126 static const field lemma;
127 static const field group;
128
129 // Relationships with self
130
131 static const field antonyms;
132
133 static const field specifications;
134 static const field generalizations;
135
136 static const field pertainyms;
137 static const field antiPertainyms;
138
139 static const field mannernyms;
140 static const field antiMannernyms;
141
142 static const field usageTerms;
143 static const field usageDomains;
144
145 static const field topicalTerms;
146 static const field topicalDomains;
147
148 static const field regionalTerms;
149 static const field regionalDomains;
150
151 private:
152 bool valid_ = false;
153
154 int id_;
155 bool hasTagCount_ = false;
156 int tagCount_;
157 positioning adjectivePosition_ = positioning::undefined;
158 int notionId_;
159 int lemmaId_;
160 bool hasGroup_ = false;
161 int groupId_;
162
163 const database* db_;
164
165 mutable class notion notion_;
166 mutable class lemma lemma_;
167 mutable class group group_;
168
44 }; 169 };
45 170
46}; 171};
47 172
48#endif /* end of include guard: WORD_H_8FC89498 */ 173#endif /* end of include guard: WORD_H_DF91B1B4 */