summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-02-05 08:56:39 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-02-05 08:56:39 -0500
commite4fa0cb86d97c23c24cd7bdd62c23f03eed312da (patch)
tree70a20fdf684b1724659196a7de8d21a4a6ca194f /lib
parentbea3673ae1b3d19585dec56e96dbcd8a56b96e6d (diff)
downloadverbly-e4fa0cb86d97c23c24cd7bdd62c23f03eed312da.tar.gz
verbly-e4fa0cb86d97c23c24cd7bdd62c23f03eed312da.tar.bz2
verbly-e4fa0cb86d97c23c24cd7bdd62c23f03eed312da.zip
Flattened selrestrs
Now, selrestrs are, instead of logically being a tree of
positive/negative restrictions that are ANDed/ORed together, they are a
flat set of positive restrictions that are ORed together. They are
stored as strings in a table called selrestrs, just like synrestrs,
which makes them a lot more queryable now as well. This change required
some changes to the VerbNet data, because we needed to consolidate any
ANDed clauses into single selrestrs, as well as convert any negative
selrestrs into positive ones. The changes made are detailed on the wiki.

Preposition choices are now encoded as comma-separated lists instead of
using JSON. This change, along with the selrestrs one, allows us to
remove verbly's dependency on nlohmann::json.
Diffstat (limited to 'lib')
-rw-r--r--lib/database.cpp32
-rw-r--r--lib/database.h2
-rw-r--r--lib/part.cpp41
-rw-r--r--lib/part.h22
-rw-r--r--lib/selrestr.cpp309
-rw-r--r--lib/selrestr.h90
-rw-r--r--lib/verbly.h1
7 files changed, 71 insertions, 426 deletions
diff --git a/lib/database.cpp b/lib/database.cpp index 563ec31..c7b37ec 100644 --- a/lib/database.cpp +++ b/lib/database.cpp
@@ -76,6 +76,38 @@ namespace verbly {
76 return query<pronunciation>(*this, ppdb_, std::move(where), std::move(sortOrder), limit); 76 return query<pronunciation>(*this, ppdb_, std::move(where), std::move(sortOrder), limit);
77 } 77 }
78 78
79 std::set<std::string> database::selrestrs(int partId) const
80 {
81 std::string queryString = "SELECT selrestr FROM selrestrs WHERE part_id = ?";
82
83 sqlite3_stmt* ppstmt;
84 if (sqlite3_prepare_v2(ppdb_, queryString.c_str(), queryString.length(), &ppstmt, NULL) != SQLITE_OK)
85 {
86 std::string errorMsg = sqlite3_errmsg(ppdb_);
87 sqlite3_finalize(ppstmt);
88
89 throw database_error("Error preparing query", errorMsg);
90 }
91
92 if (sqlite3_bind_int(ppstmt, 1, partId) != SQLITE_OK)
93 {
94 std::string errorMsg = sqlite3_errmsg(ppdb_);
95 sqlite3_finalize(ppstmt);
96
97 throw database_error("Error binding value to query", errorMsg);
98 }
99
100 std::set<std::string> result;
101 while (sqlite3_step(ppstmt) == SQLITE_ROW)
102 {
103 result.insert(reinterpret_cast<const char*>(sqlite3_column_blob(ppstmt, 0)));
104 }
105
106 sqlite3_finalize(ppstmt);
107
108 return result;
109 }
110
79 std::set<std::string> database::synrestrs(int partId) const 111 std::set<std::string> database::synrestrs(int partId) const
80 { 112 {
81 std::string queryString = "SELECT synrestr FROM synrestrs WHERE part_id = ?"; 113 std::string queryString = "SELECT synrestr FROM synrestrs WHERE part_id = ?";
diff --git a/lib/database.h b/lib/database.h index 0b10eba..5567061 100644 --- a/lib/database.h +++ b/lib/database.h
@@ -62,6 +62,8 @@ namespace verbly {
62 62
63 query<pronunciation> pronunciations(filter where, order sortOrder = {}, int limit = 1) const; 63 query<pronunciation> pronunciations(filter where, order sortOrder = {}, int limit = 1) const;
64 64
65 std::set<std::string> selrestrs(int partId) const;
66
65 std::set<std::string> synrestrs(int partId) const; 67 std::set<std::string> synrestrs(int partId) const;
66 68
67 private: 69 private:
diff --git a/lib/part.cpp b/lib/part.cpp index cbd951b..341d4bb 100644 --- a/lib/part.cpp +++ b/lib/part.cpp
@@ -1,14 +1,14 @@
1#include "part.h" 1#include "part.h"
2#include <stdexcept> 2#include <stdexcept>
3#include <sqlite3.h> 3#include <sqlite3.h>
4#include "selrestr.h"
5#include "database.h" 4#include "database.h"
5#include "util.h"
6 6
7namespace verbly { 7namespace verbly {
8 8
9 const object part::objectType = object::part; 9 const object part::objectType = object::part;
10 10
11 const std::list<std::string> part::select = {"part_id", "frame_id", "part_index", "type", "role", "selrestrs", "prepositions", "preposition_literality", "literal_value"}; 11 const std::list<std::string> part::select = {"part_id", "frame_id", "part_index", "type", "role", "prepositions", "preposition_literality", "literal_value"};
12 12
13 const field part::index = field::integerField(object::part, "part_index"); 13 const field part::index = field::integerField(object::part, "part_index");
14 const field part::type = field::integerField(object::part, "type"); 14 const field part::type = field::integerField(object::part, "type");
@@ -17,17 +17,21 @@ namespace verbly {
17 17
18 const field part::frames = field::joinField(object::part, "frame_id", object::frame); 18 const field part::frames = field::joinField(object::part, "frame_id", object::frame);
19 19
20 const field part::selrestr_field::selrestrJoin = field::joinField(object::part, "part_id", "selrestrs");
21 const field part::selrestr_field::selrestrField = field::stringField("selrestrs", "selrestr");
22
20 const field part::synrestr_field::synrestrJoin = field::joinField(object::part, "part_id", "synrestrs"); 23 const field part::synrestr_field::synrestrJoin = field::joinField(object::part, "part_id", "synrestrs");
21 const field part::synrestr_field::synrestrField = field::stringField("synrestrs", "synrestr"); 24 const field part::synrestr_field::synrestrField = field::stringField("synrestrs", "synrestr");
22 25
26 const part::selrestr_field part::selrestrs = {};
23 const part::synrestr_field part::synrestrs = {}; 27 const part::synrestr_field part::synrestrs = {};
24 28
25 part part::createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs) 29 part part::createNounPhrase(std::string role, std::set<std::string> selrestrs, std::set<std::string> synrestrs)
26 { 30 {
27 part p(part_type::noun_phrase); 31 part p(part_type::noun_phrase);
28 32
29 new(&p.noun_phrase_.role) std::string(std::move(role)); 33 new(&p.noun_phrase_.role) std::string(std::move(role));
30 new(&p.noun_phrase_.selrestrs) selrestr(std::move(selrestrs)); 34 new(&p.noun_phrase_.selrestrs) std::set<std::string>(std::move(selrestrs));
31 new(&p.noun_phrase_.synrestrs) std::set<std::string>(std::move(synrestrs)); 35 new(&p.noun_phrase_.synrestrs) std::set<std::string>(std::move(synrestrs));
32 36
33 return p; 37 return p;
@@ -78,7 +82,7 @@ namespace verbly {
78 case part_type::noun_phrase: 82 case part_type::noun_phrase:
79 { 83 {
80 new(&noun_phrase_.role) std::string(reinterpret_cast<const char*>(sqlite3_column_blob(row, 4))); 84 new(&noun_phrase_.role) std::string(reinterpret_cast<const char*>(sqlite3_column_blob(row, 4)));
81 new(&noun_phrase_.selrestrs) selrestr(nlohmann::json::parse(reinterpret_cast<const char*>(sqlite3_column_blob(row, 5)))); 85 new(&noun_phrase_.selrestrs) std::set<std::string>(db.selrestrs(id));
82 new(&noun_phrase_.synrestrs) std::set<std::string>(db.synrestrs(id)); 86 new(&noun_phrase_.synrestrs) std::set<std::string>(db.synrestrs(id));
83 87
84 break; 88 break;
@@ -86,22 +90,17 @@ namespace verbly {
86 90
87 case part_type::preposition: 91 case part_type::preposition:
88 { 92 {
89 new(&preposition_.choices) std::vector<std::string>(); 93 std::string serializedChoices(reinterpret_cast<const char*>(sqlite3_column_blob(row, 5)));
90 preposition_.literal = (sqlite3_column_int(row, 7) == 1); 94 new(&preposition_.choices) std::vector<std::string>(split<std::vector<std::string>>(serializedChoices, ","));
91 95
92 std::string choicesJsonStr(reinterpret_cast<const char*>(sqlite3_column_blob(row, 6))); 96 preposition_.literal = (sqlite3_column_int(row, 6) == 1);
93 nlohmann::json choicesJson = nlohmann::json::parse(std::move(choicesJsonStr));
94 for (const nlohmann::json& choiceJson : choicesJson)
95 {
96 preposition_.choices.push_back(choiceJson.get<std::string>());
97 }
98 97
99 break; 98 break;
100 } 99 }
101 100
102 case part_type::literal: 101 case part_type::literal:
103 { 102 {
104 new(&literal_) std::string(reinterpret_cast<const char*>(sqlite3_column_blob(row, 8))); 103 new(&literal_) std::string(reinterpret_cast<const char*>(sqlite3_column_blob(row, 7)));
105 104
106 break; 105 break;
107 } 106 }
@@ -125,7 +124,7 @@ namespace verbly {
125 case part_type::noun_phrase: 124 case part_type::noun_phrase:
126 { 125 {
127 new(&noun_phrase_.role) std::string(other.noun_phrase_.role); 126 new(&noun_phrase_.role) std::string(other.noun_phrase_.role);
128 new(&noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs); 127 new(&noun_phrase_.selrestrs) std::set<std::string>(other.noun_phrase_.selrestrs);
129 new(&noun_phrase_.synrestrs) std::set<std::string>(other.noun_phrase_.synrestrs); 128 new(&noun_phrase_.synrestrs) std::set<std::string>(other.noun_phrase_.synrestrs);
130 129
131 break; 130 break;
@@ -174,7 +173,7 @@ namespace verbly {
174 173
175 type tempType = first.type_; 174 type tempType = first.type_;
176 std::string tempRole; 175 std::string tempRole;
177 selrestr tempSelrestrs; 176 std::set<std::string> tempSelrestrs;
178 std::set<std::string> tempSynrestrs; 177 std::set<std::string> tempSynrestrs;
179 std::vector<std::string> tempChoices; 178 std::vector<std::string> tempChoices;
180 bool tempPrepLiteral; 179 bool tempPrepLiteral;
@@ -224,7 +223,7 @@ namespace verbly {
224 case type::noun_phrase: 223 case type::noun_phrase:
225 { 224 {
226 new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role)); 225 new(&first.noun_phrase_.role) std::string(std::move(second.noun_phrase_.role));
227 new(&first.noun_phrase_.selrestrs) selrestr(std::move(second.noun_phrase_.selrestrs)); 226 new(&first.noun_phrase_.selrestrs) std::set<std::string>(std::move(second.noun_phrase_.selrestrs));
228 new(&first.noun_phrase_.synrestrs) std::set<std::string>(std::move(second.noun_phrase_.synrestrs)); 227 new(&first.noun_phrase_.synrestrs) std::set<std::string>(std::move(second.noun_phrase_.synrestrs));
229 228
230 break; 229 break;
@@ -263,7 +262,7 @@ namespace verbly {
263 case type::noun_phrase: 262 case type::noun_phrase:
264 { 263 {
265 new(&second.noun_phrase_.role) std::string(std::move(tempRole)); 264 new(&second.noun_phrase_.role) std::string(std::move(tempRole));
266 new(&second.noun_phrase_.selrestrs) selrestr(std::move(tempSelrestrs)); 265 new(&second.noun_phrase_.selrestrs) std::set<std::string>(std::move(tempSelrestrs));
267 new(&second.noun_phrase_.synrestrs) std::set<std::string>(std::move(tempSynrestrs)); 266 new(&second.noun_phrase_.synrestrs) std::set<std::string>(std::move(tempSynrestrs));
268 267
269 break; 268 break;
@@ -304,7 +303,7 @@ namespace verbly {
304 using set_type = std::set<std::string>; 303 using set_type = std::set<std::string>;
305 304
306 noun_phrase_.role.~string_type(); 305 noun_phrase_.role.~string_type();
307 noun_phrase_.selrestrs.~selrestr(); 306 noun_phrase_.selrestrs.~set_type();
308 noun_phrase_.synrestrs.~set_type(); 307 noun_phrase_.synrestrs.~set_type();
309 308
310 break; 309 break;
@@ -348,7 +347,7 @@ namespace verbly {
348 } 347 }
349 } 348 }
350 349
351 selrestr part::getNounSelrestrs() const 350 std::set<std::string> part::getNounSelrestrs() const
352 { 351 {
353 if (type_ == part_type::noun_phrase) 352 if (type_ == part_type::noun_phrase)
354 { 353 {
diff --git a/lib/part.h b/lib/part.h index 7180f57..450db3d 100644 --- a/lib/part.h +++ b/lib/part.h
@@ -5,7 +5,6 @@
5#include <vector> 5#include <vector>
6#include <set> 6#include <set>
7#include <list> 7#include <list>
8#include "selrestr.h"
9#include "field.h" 8#include "field.h"
10#include "filter.h" 9#include "filter.h"
11#include "enums.h" 10#include "enums.h"
@@ -21,7 +20,7 @@ namespace verbly {
21 20
22 // Static factories 21 // Static factories
23 22
24 static part createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs); 23 static part createNounPhrase(std::string role, std::set<std::string> selrestrs, std::set<std::string> synrestrs);
25 24
26 static part createVerb(); 25 static part createVerb();
27 26
@@ -77,7 +76,7 @@ namespace verbly {
77 76
78 std::string getNounRole() const; 77 std::string getNounRole() const;
79 78
80 selrestr getNounSelrestrs() const; 79 std::set<std::string> getNounSelrestrs() const;
81 80
82 std::set<std::string> getNounSynrestrs() const; 81 std::set<std::string> getNounSynrestrs() const;
83 82
@@ -110,8 +109,21 @@ namespace verbly {
110 109
111 static const field frames; 110 static const field frames;
112 111
113 // Noun synrestr relationship 112 // Noun selrestr and synrestr relationships
114 113
114 class selrestr_field {
115 public:
116
117 filter operator%=(std::string selrestr) const;
118
119 private:
120
121 static const field selrestrJoin;
122 static const field selrestrField;
123 };
124
125 static const selrestr_field selrestrs;
126
115 class synrestr_field { 127 class synrestr_field {
116 public: 128 public:
117 129
@@ -138,7 +150,7 @@ namespace verbly {
138 union { 150 union {
139 struct { 151 struct {
140 std::string role; 152 std::string role;
141 selrestr selrestrs; 153 std::set<std::string> selrestrs;
142 std::set<std::string> synrestrs; 154 std::set<std::string> synrestrs;
143 } noun_phrase_; 155 } noun_phrase_;
144 struct { 156 struct {
diff --git a/lib/selrestr.cpp b/lib/selrestr.cpp deleted file mode 100644 index 8646871..0000000 --- a/lib/selrestr.cpp +++ /dev/null
@@ -1,309 +0,0 @@
1#include "selrestr.h"
2
3namespace verbly {
4
5 selrestr::selrestr(nlohmann::json data)
6 {
7 if (data.find("children") != data.end())
8 {
9 type_ = type::group;
10 new(&group_.children) std::list<selrestr>();
11
12 for (const nlohmann::json& child : data["children"])
13 {
14 group_.children.emplace_back(child);
15 }
16
17 group_.orlogic = (data["logic"] == "or");
18 } else if (data.find("type") != data.end())
19 {
20 type_ = type::singleton;
21 singleton_.pos = data["pos"].get<bool>();
22 new(&singleton_.restriction) std::string(data["type"].get<std::string>());
23 } else {
24 type_ = type::empty;
25 }
26 }
27
28 selrestr::selrestr(const selrestr& other)
29 {
30 type_ = other.type_;
31
32 switch (type_)
33 {
34 case type::singleton:
35 {
36 singleton_.pos = other.singleton_.pos;
37 new(&singleton_.restriction) std::string(other.singleton_.restriction);
38
39 break;
40 }
41
42 case type::group:
43 {
44 new(&group_.children) std::list<selrestr>(other.group_.children);
45 group_.orlogic = other.group_.orlogic;
46
47 break;
48 }
49
50 case type::empty:
51 {
52 break;
53 }
54 }
55 }
56
57 selrestr::selrestr(selrestr&& other) : selrestr()
58 {
59 swap(*this, other);
60 }
61
62 selrestr& selrestr::operator=(selrestr other)
63 {
64 swap(*this, other);
65
66 return *this;
67 }
68
69 void swap(selrestr& first, selrestr& second)
70 {
71 using type = selrestr::type;
72
73 type tempType = first.type_;
74 int tempPos;
75 std::string tempRestriction;
76 std::list<selrestr> tempChildren;
77 bool tempOrlogic;
78
79 switch (tempType)
80 {
81 case type::singleton:
82 {
83 tempPos = first.singleton_.pos;
84 tempRestriction = std::move(first.singleton_.restriction);
85
86 break;
87 }
88
89 case type::group:
90 {
91 tempChildren = std::move(first.group_.children);
92 tempOrlogic = first.group_.orlogic;
93
94 break;
95 }
96
97 case type::empty:
98 {
99 break;
100 }
101 }
102
103 first.~selrestr();
104
105 first.type_ = second.type_;
106
107 switch (first.type_)
108 {
109 case type::singleton:
110 {
111 first.singleton_.pos = second.singleton_.pos;
112 new(&first.singleton_.restriction) std::string(std::move(second.singleton_.restriction));
113
114 break;
115 }
116
117 case type::group:
118 {
119 new(&first.group_.children) std::list<selrestr>(std::move(second.group_.children));
120 first.group_.orlogic = second.group_.orlogic;
121
122 break;
123 }
124
125 case type::empty:
126 {
127 break;
128 }
129 }
130
131 second.~selrestr();
132
133 second.type_ = tempType;
134
135 switch (second.type_)
136 {
137 case type::singleton:
138 {
139 second.singleton_.pos = tempPos;
140 new(&second.singleton_.restriction) std::string(std::move(tempRestriction));
141
142 break;
143 }
144
145 case type::group:
146 {
147 new(&second.group_.children) std::list<selrestr>(std::move(tempChildren));
148 second.group_.orlogic = tempOrlogic;
149
150 break;
151 }
152
153 case type::empty:
154 {
155 break;
156 }
157 }
158 }
159
160 selrestr::~selrestr()
161 {
162 switch (type_)
163 {
164 case type::singleton:
165 {
166 using string_type = std::string;
167 singleton_.restriction.~string_type();
168
169 break;
170 }
171
172 case type::group:
173 {
174 using list_type = std::list<selrestr>;
175 group_.children.~list_type();
176
177 break;
178 }
179
180 case type::empty:
181 {
182 break;
183 }
184 }
185 }
186
187 selrestr::selrestr() : type_(type::empty)
188 {
189 }
190
191 selrestr::selrestr(
192 std::string restriction,
193 bool pos) :
194 type_(type::singleton)
195 {
196 new(&singleton_.restriction) std::string(std::move(restriction));
197 singleton_.pos = pos;
198 }
199
200 std::string selrestr::getRestriction() const
201 {
202 if (type_ == type::singleton)
203 {
204 return singleton_.restriction;
205 } else {
206 throw std::domain_error("Only singleton selrestrs have restrictions");
207 }
208 }
209
210 bool selrestr::getPos() const
211 {
212 if (type_ == type::singleton)
213 {
214 return singleton_.pos;
215 } else {
216 throw std::domain_error("Only singleton selrestrs have positivity flags");
217 }
218 }
219
220 selrestr::selrestr(
221 std::list<selrestr> children,
222 bool orlogic) :
223 type_(type::group)
224 {
225 new(&group_.children) std::list<selrestr>(std::move(children));
226 group_.orlogic = orlogic;
227 }
228
229 std::list<selrestr> selrestr::getChildren() const
230 {
231 if (type_ == type::group)
232 {
233 return group_.children;
234 } else {
235 throw std::domain_error("Only group selrestrs have children");
236 }
237 }
238
239 std::list<selrestr>::const_iterator selrestr::begin() const
240 {
241 if (type_ == type::group)
242 {
243 return std::begin(group_.children);
244 } else {
245 throw std::domain_error("Only group selrestrs have children");
246 }
247 }
248
249 std::list<selrestr>::const_iterator selrestr::end() const
250 {
251 if (type_ == type::group)
252 {
253 return std::end(group_.children);
254 } else {
255 throw std::domain_error("Only group selrestrs have children");
256 }
257 }
258
259 bool selrestr::getOrlogic() const
260 {
261 if (type_ == type::group)
262 {
263 return group_.orlogic;
264 } else {
265 throw std::domain_error("Only group selrestrs have logic");
266 }
267 }
268
269 nlohmann::json selrestr::toJson() const
270 {
271 switch (type_)
272 {
273 case type::empty:
274 {
275 return {};
276 }
277
278 case type::singleton:
279 {
280 return {
281 {"type", singleton_.restriction},
282 {"pos", singleton_.pos}
283 };
284 }
285
286 case type::group:
287 {
288 std::string logic;
289 if (group_.orlogic)
290 {
291 logic = "or";
292 } else {
293 logic = "and";
294 }
295
296 std::list<nlohmann::json> children;
297 std::transform(std::begin(group_.children), std::end(group_.children), std::back_inserter(children), [] (const selrestr& child) {
298 return child.toJson();
299 });
300
301 return {
302 {"logic", logic},
303 {"children", children}
304 };
305 }
306 }
307 }
308
309};
diff --git a/lib/selrestr.h b/lib/selrestr.h deleted file mode 100644 index a7cde0a..0000000 --- a/lib/selrestr.h +++ /dev/null
@@ -1,90 +0,0 @@
1#ifndef SELRESTR_H_50652FB7
2#define SELRESTR_H_50652FB7
3
4#include <list>
5#include <string>
6#include "../vendor/json/json.hpp"
7
8namespace verbly {
9
10 class selrestr {
11 public:
12 enum class type {
13 empty,
14 singleton,
15 group
16 };
17
18 // Construct from json
19
20 explicit selrestr(nlohmann::json json);
21
22 // Copy and move constructors
23
24 selrestr(const selrestr& other);
25 selrestr(selrestr&& other);
26
27 // Assignment
28
29 selrestr& operator=(selrestr other);
30
31 // Swap
32
33 friend void swap(selrestr& first, selrestr& second);
34
35 // Destructor
36
37 ~selrestr();
38
39 // Generic accessors
40
41 type getType() const
42 {
43 return type_;
44 }
45
46 // Empty
47
48 selrestr();
49
50 // Singleton
51
52 selrestr(std::string restriction, bool pos);
53
54 std::string getRestriction() const;
55
56 bool getPos() const;
57
58 // Group
59
60 selrestr(std::list<selrestr> children, bool orlogic);
61
62 std::list<selrestr> getChildren() const;
63
64 std::list<selrestr>::const_iterator begin() const;
65
66 std::list<selrestr>::const_iterator end() const;
67
68 bool getOrlogic() const;
69
70 // Helpers
71
72 nlohmann::json toJson() const;
73
74 private:
75 union {
76 struct {
77 bool pos;
78 std::string restriction;
79 } singleton_;
80 struct {
81 std::list<selrestr> children;
82 bool orlogic;
83 } group_;
84 };
85 type type_;
86 };
87
88};
89
90#endif /* end of include guard: SELRESTR_H_50652FB7 */
diff --git a/lib/verbly.h b/lib/verbly.h index 112907b..0f48a8c 100644 --- a/lib/verbly.h +++ b/lib/verbly.h
@@ -15,6 +15,5 @@
15#include "form.h" 15#include "form.h"
16#include "pronunciation.h" 16#include "pronunciation.h"
17#include "token.h" 17#include "token.h"
18#include "selrestr.h"
19 18
20#endif /* end of include guard: VERBLY_H_5B39CE50 */ 19#endif /* end of include guard: VERBLY_H_5B39CE50 */