diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-03-24 23:16:07 -0400 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2016-03-24 23:16:07 -0400 |
commit | eef5de613c75661e5d94baa086f6f2ddc26c7ed0 (patch) | |
tree | 180230f6a245c5bca94d894273f5d2b93ded3f04 /lib/data.h | |
parent | d5ee4e39e5b5b3b8daa85cd972802195ad35e965 (diff) | |
download | verbly-eef5de613c75661e5d94baa086f6f2ddc26c7ed0.tar.gz verbly-eef5de613c75661e5d94baa086f6f2ddc26c7ed0.tar.bz2 verbly-eef5de613c75661e5d94baa086f6f2ddc26c7ed0.zip |
Added verb frames
In addition: - Added prepositions. - Rewrote a lot of the query interface. It now, for a lot of relationships, supports nested AND, OR, and NOT logic. - Rewrote the token class. It is now a union-like class instead of being polymorphic, which means smart pointers are no longer necessary. - Querying with regards to word derivation has been temporarily removed. - Sentinel values are now supported for all word types. - The VerbNet data retrieved from http://verbs.colorado.edu/~mpalmer/projects/verbnet/downloads.html was found to not be perfectly satisfactory in some regards, especially regarding adjective phrases. A patch file is now included in the repository describing the changes made to the VerbNet v3.2 download for the canonical verbly datafile.
Diffstat (limited to 'lib/data.h')
-rw-r--r-- | lib/data.h | 299 |
1 files changed, 298 insertions, 1 deletions
diff --git a/lib/data.h b/lib/data.h index 37092d7..6c2d580 100644 --- a/lib/data.h +++ b/lib/data.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define DATA_H_C4AEC3DD | 2 | #define DATA_H_C4AEC3DD |
3 | 3 | ||
4 | #include <sqlite3.h> | 4 | #include <sqlite3.h> |
5 | #include <stdexcept> | ||
6 | 5 | ||
7 | namespace verbly { | 6 | namespace verbly { |
8 | 7 | ||
@@ -12,10 +11,13 @@ namespace verbly { | |||
12 | class noun; | 11 | class noun; |
13 | class verb; | 12 | class verb; |
14 | class adverb; | 13 | class adverb; |
14 | class frame; | ||
15 | class adjective_query; | 15 | class adjective_query; |
16 | class adverb_query; | 16 | class adverb_query; |
17 | class noun_query; | 17 | class noun_query; |
18 | class verb_query; | 18 | class verb_query; |
19 | class frame_query; | ||
20 | class preposition_query; | ||
19 | 21 | ||
20 | class data { | 22 | class data { |
21 | private: | 23 | private: |
@@ -25,6 +27,8 @@ namespace verbly { | |||
25 | friend class noun_query; | 27 | friend class noun_query; |
26 | friend class verb_query; | 28 | friend class verb_query; |
27 | friend class adverb_query; | 29 | friend class adverb_query; |
30 | friend class frame_query; | ||
31 | friend class preposition_query; | ||
28 | 32 | ||
29 | public: | 33 | public: |
30 | data(std::string datafile); | 34 | data(std::string datafile); |
@@ -41,9 +45,302 @@ namespace verbly { | |||
41 | adjective_query adjectives() const; | 45 | adjective_query adjectives() const; |
42 | adverb_query adverbs() const; | 46 | adverb_query adverbs() const; |
43 | noun_query nouns() const; | 47 | noun_query nouns() const; |
48 | frame_query frames() const; | ||
49 | preposition_query prepositions() const; | ||
44 | 50 | ||
45 | }; | 51 | }; |
46 | 52 | ||
53 | template <class T> | ||
54 | class filter { | ||
55 | public: | ||
56 | enum class type { | ||
57 | singleton, | ||
58 | group | ||
59 | }; | ||
60 | |||
61 | typedef filter<T> value_type; | ||
62 | |||
63 | type get_type() const | ||
64 | { | ||
65 | return _type; | ||
66 | } | ||
67 | |||
68 | filter(const filter<T>& other) | ||
69 | { | ||
70 | _type = other._type; | ||
71 | _notlogic = other._notlogic; | ||
72 | |||
73 | switch (_type) | ||
74 | { | ||
75 | case type::singleton: | ||
76 | { | ||
77 | new(&_singleton.elem) T(other._singleton.elem); | ||
78 | |||
79 | break; | ||
80 | } | ||
81 | |||
82 | case type::group: | ||
83 | { | ||
84 | new(&_group.elems) std::list<filter<T>>(other._group.elems); | ||
85 | _group.orlogic = other._group.orlogic; | ||
86 | |||
87 | break; | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | filter<T>& operator=(const filter<T>& other) | ||
93 | { | ||
94 | this->~filter(); | ||
95 | |||
96 | _type = other._type; | ||
97 | _notlogic = other._notlogic; | ||
98 | |||
99 | switch (_type) | ||
100 | { | ||
101 | case type::singleton: | ||
102 | { | ||
103 | new(&_singleton.elem) T(other._singleton.elem); | ||
104 | |||
105 | break; | ||
106 | } | ||
107 | |||
108 | case type::group: | ||
109 | { | ||
110 | new(&_group.elems) std::list<filter<T>>(other._group.elems); | ||
111 | _group.orlogic = other._group.orlogic; | ||
112 | |||
113 | break; | ||
114 | } | ||
115 | } | ||
116 | |||
117 | return *this; | ||
118 | } | ||
119 | |||
120 | ~filter() | ||
121 | { | ||
122 | switch (_type) | ||
123 | { | ||
124 | case type::singleton: | ||
125 | { | ||
126 | _singleton.elem.~T(); | ||
127 | |||
128 | break; | ||
129 | } | ||
130 | |||
131 | case type::group: | ||
132 | { | ||
133 | using list_type = std::list<filter<T>>; | ||
134 | _group.elems.~list_type(); | ||
135 | |||
136 | break; | ||
137 | } | ||
138 | } | ||
139 | } | ||
140 | |||
141 | bool get_notlogic() const | ||
142 | { | ||
143 | return _notlogic; | ||
144 | } | ||
145 | |||
146 | void set_notlogic(bool _nl) | ||
147 | { | ||
148 | _notlogic = _nl; | ||
149 | } | ||
150 | |||
151 | std::list<T> inorder_flatten() const | ||
152 | { | ||
153 | std::list<T> result; | ||
154 | |||
155 | if (_type == type::singleton) | ||
156 | { | ||
157 | result.push_back(_singleton.elem); | ||
158 | } else if (_type == type::group) | ||
159 | { | ||
160 | for (auto elem : _group.elems) | ||
161 | { | ||
162 | auto l = elem.inorder_flatten(); | ||
163 | result.insert(std::end(result), std::begin(l), std::end(l)); | ||
164 | } | ||
165 | } | ||
166 | |||
167 | return result; | ||
168 | } | ||
169 | |||
170 | std::set<T> uniq_flatten() const | ||
171 | { | ||
172 | std::set<T> result; | ||
173 | |||
174 | if (_type == type::singleton) | ||
175 | { | ||
176 | result.insert(_singleton.elem); | ||
177 | } else if (_type == type::group) | ||
178 | { | ||
179 | for (auto elem : _group.elems) | ||
180 | { | ||
181 | auto l = elem.uniq_flatten(); | ||
182 | result.insert(std::begin(l), std::end(l)); | ||
183 | } | ||
184 | } | ||
185 | |||
186 | return result; | ||
187 | } | ||
188 | |||
189 | void clean() | ||
190 | { | ||
191 | if (_type == type::group) | ||
192 | { | ||
193 | std::list<typename std::list<filter<T>>::iterator> toremove; | ||
194 | for (auto it = _group.elems.begin(); it != _group.elems.end(); it++) | ||
195 | { | ||
196 | it->clean(); | ||
197 | |||
198 | if (it->get_type() == type::group) | ||
199 | { | ||
200 | if (it->_group.elems.size() == 0) | ||
201 | { | ||
202 | toremove.push_back(it); | ||
203 | } else if (it->_group.elems.size() == 1) | ||
204 | { | ||
205 | bool truelogic = it->_notlogic != it->_group.elems.front()._notlogic; | ||
206 | *it = it->_group.elems.front(); | ||
207 | it->_notlogic = truelogic; | ||
208 | } | ||
209 | } | ||
210 | } | ||
211 | |||
212 | for (auto rem : toremove) | ||
213 | { | ||
214 | _group.elems.erase(rem); | ||
215 | } | ||
216 | |||
217 | if (_group.elems.size() == 1) | ||
218 | { | ||
219 | bool truelogic = _notlogic != _group.elems.front()._notlogic; | ||
220 | *this = _group.elems.front(); | ||
221 | _notlogic = truelogic; | ||
222 | } | ||
223 | } | ||
224 | } | ||
225 | |||
226 | // Singleton | ||
227 | filter(T _elem, bool _notlogic = false) : _type(type::singleton) | ||
228 | { | ||
229 | new(&_singleton.elem) T(_elem); | ||
230 | this->_notlogic = _notlogic; | ||
231 | } | ||
232 | |||
233 | filter<T>& operator=(T _elem) | ||
234 | { | ||
235 | *this = filter<T>{_elem}; | ||
236 | |||
237 | return *this; | ||
238 | } | ||
239 | |||
240 | T get_elem() const | ||
241 | { | ||
242 | assert(_type == type::singleton); | ||
243 | |||
244 | return _singleton.elem; | ||
245 | } | ||
246 | |||
247 | void set_elem(T _elem) | ||
248 | { | ||
249 | assert(_type == type::singleton); | ||
250 | |||
251 | _singleton.elem = _elem; | ||
252 | } | ||
253 | |||
254 | // Group | ||
255 | typedef typename std::list<filter<T>>::iterator iterator; | ||
256 | |||
257 | filter() : _type(type::group) | ||
258 | { | ||
259 | new(&_group.elems) std::list<filter<T>>(); | ||
260 | _group.orlogic = false; | ||
261 | } | ||
262 | |||
263 | filter(std::initializer_list<filter<T>> _init) : _type(type::group) | ||
264 | { | ||
265 | new(&_group.elems) std::list<filter<T>>(_init); | ||
266 | _group.orlogic = false; | ||
267 | } | ||
268 | |||
269 | iterator begin() | ||
270 | { | ||
271 | assert(_type == type::group); | ||
272 | |||
273 | return _group.elems.begin(); | ||
274 | } | ||
275 | |||
276 | iterator end() | ||
277 | { | ||
278 | assert(_type == type::group); | ||
279 | |||
280 | return _group.elems.end(); | ||
281 | } | ||
282 | |||
283 | filter<T>& operator<<(filter<T> _elem) | ||
284 | { | ||
285 | assert(_type == type::group); | ||
286 | |||
287 | _group.elems.push_back(_elem); | ||
288 | |||
289 | return *this; | ||
290 | } | ||
291 | |||
292 | void push_back(filter<T> _elem) | ||
293 | { | ||
294 | assert(_type == type::group); | ||
295 | |||
296 | _group.elems.push_back(_elem); | ||
297 | } | ||
298 | |||
299 | bool get_orlogic() const | ||
300 | { | ||
301 | assert(_type == type::group); | ||
302 | |||
303 | return _group.orlogic; | ||
304 | } | ||
305 | |||
306 | void set_orlogic(bool _ol) | ||
307 | { | ||
308 | assert(_type == type::group); | ||
309 | |||
310 | _group.orlogic = _ol; | ||
311 | } | ||
312 | |||
313 | bool empty() const | ||
314 | { | ||
315 | if (_type == type::group) | ||
316 | { | ||
317 | return _group.elems.empty(); | ||
318 | } else { | ||
319 | return false; | ||
320 | } | ||
321 | } | ||
322 | |||
323 | int size() const | ||
324 | { | ||
325 | assert(_type == type::group); | ||
326 | |||
327 | return _group.elems.size(); | ||
328 | } | ||
329 | |||
330 | private: | ||
331 | type _type; | ||
332 | bool _notlogic = false; | ||
333 | union { | ||
334 | struct { | ||
335 | T elem; | ||
336 | } _singleton; | ||
337 | struct { | ||
338 | std::list<filter<T>> elems; | ||
339 | bool orlogic; | ||
340 | } _group; | ||
341 | }; | ||
342 | }; | ||
343 | |||
47 | }; | 344 | }; |
48 | 345 | ||
49 | #endif /* end of include guard: DATA_H_C4AEC3DD */ | 346 | #endif /* end of include guard: DATA_H_C4AEC3DD */ |