diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-23 11:59:23 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-23 11:59:23 -0500 |
commit | 4cbe7a42a685bc2449f1adb7c37144c9496eab5f (patch) | |
tree | cc635814bb6a42222dda78bf3b4a0d6a2c9b81fb | |
parent | 9bd863c9002b525b7827f9158d9136143393be5c (diff) | |
download | verbly-4cbe7a42a685bc2449f1adb7c37144c9496eab5f.tar.gz verbly-4cbe7a42a685bc2449f1adb7c37144c9496eab5f.tar.bz2 verbly-4cbe7a42a685bc2449f1adb7c37144c9496eab5f.zip |
Rewrote tokens
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/token.cpp | 813 | ||||
-rw-r--r-- | lib/token.h | 236 | ||||
-rw-r--r-- | lib/verbly.h | 4 |
4 files changed, 405 insertions, 650 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d42fdd..5c15e79 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt | |||
@@ -7,7 +7,7 @@ pkg_check_modules(sqlite3 sqlite3>=3.8.3 REQUIRED) | |||
7 | set(CMAKE_BUILD_TYPE Debug) | 7 | set(CMAKE_BUILD_TYPE Debug) |
8 | 8 | ||
9 | include_directories(vendor/json) | 9 | include_directories(vendor/json) |
10 | add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp lib/selrestr.cpp lib/part.cpp) | 10 | add_library(verbly lib/filter.cpp lib/field.cpp lib/notion.cpp lib/word.cpp lib/group.cpp lib/frame.cpp lib/lemma.cpp lib/form.cpp lib/pronunciation.cpp lib/statement.cpp lib/binding.cpp lib/database.cpp lib/token.cpp lib/selrestr.cpp lib/part.cpp) |
11 | set_property(TARGET verbly PROPERTY CXX_STANDARD 11) | 11 | set_property(TARGET verbly PROPERTY CXX_STANDARD 11) |
12 | set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) | 12 | set_property(TARGET verbly PROPERTY CXX_STANDARD_REQUIRED ON) |
13 | target_link_libraries(verbly ${sqlite3_LIBRARIES}) | 13 | target_link_libraries(verbly ${sqlite3_LIBRARIES}) |
diff --git a/lib/token.cpp b/lib/token.cpp index 3cc77e5..769acad 100644 --- a/lib/token.cpp +++ b/lib/token.cpp | |||
@@ -1,645 +1,450 @@ | |||
1 | #include "verbly.h" | 1 | #include "token.h" |
2 | #include <stdexcept> | ||
3 | #include "util.h" | ||
2 | 4 | ||
3 | namespace verbly { | 5 | namespace verbly { |
4 | 6 | ||
5 | token::type token::get_type() const | ||
6 | { | ||
7 | return _type; | ||
8 | } | ||
9 | |||
10 | int token::get_extra() const | ||
11 | { | ||
12 | return _extra; | ||
13 | } | ||
14 | |||
15 | void token::set_extra(int _arg) | ||
16 | { | ||
17 | _extra = _arg; | ||
18 | } | ||
19 | |||
20 | token::token(const token& other) | 7 | token::token(const token& other) |
21 | { | 8 | { |
22 | _type = other._type; | 9 | type_ = other.type_; |
23 | _extra = other._extra; | 10 | |
24 | 11 | switch (type_) | |
25 | switch (_type) | ||
26 | { | 12 | { |
27 | case token::type::verb: | 13 | case type::word: |
28 | { | 14 | { |
29 | new(&_verb._verb) verb(other._verb._verb); | 15 | new(&word_.word_) word(other.word_.word_); |
30 | _verb._infl = other._verb._infl; | 16 | word_.category_ = other.word_.category_; |
31 | 17 | ||
32 | break; | ||
33 | } | ||
34 | |||
35 | case token::type::noun: | ||
36 | { | ||
37 | new(&_noun._noun) noun(other._noun._noun); | ||
38 | _noun._infl = other._noun._infl; | ||
39 | |||
40 | break; | 18 | break; |
41 | } | 19 | } |
42 | 20 | ||
43 | case token::type::adjective: | 21 | case type::literal: |
44 | { | 22 | { |
45 | new(&_adjective._adjective) adjective(other._adjective._adjective); | 23 | new(&literal_) std::string(other.literal_); |
46 | _adjective._infl = other._adjective._infl; | 24 | |
47 | |||
48 | break; | 25 | break; |
49 | } | 26 | } |
50 | 27 | ||
51 | case token::type::adverb: | 28 | case type::part: |
52 | { | 29 | { |
53 | new(&_adverb._adverb) adverb(other._adverb._adverb); | 30 | new(&part_) part(other.part_); |
54 | _adverb._infl = other._adverb._infl; | 31 | |
55 | |||
56 | break; | 32 | break; |
57 | } | 33 | } |
58 | 34 | ||
59 | case token::type::preposition: | 35 | case type::fillin: |
60 | { | 36 | { |
61 | new(&_preposition._preposition) preposition(other._preposition._preposition); | 37 | new(&fillin_) std::set<std::string>(other.fillin_); |
62 | 38 | ||
63 | break; | 39 | break; |
64 | } | 40 | } |
65 | 41 | ||
66 | case token::type::fillin: | 42 | case type::utterance: |
67 | { | 43 | { |
68 | _fillin._type = other._fillin._type; | 44 | new(&utterance_) std::list<token>(other.utterance_); |
69 | 45 | ||
70 | break; | 46 | break; |
71 | } | 47 | } |
72 | 48 | } | |
73 | case token::type::string: | 49 | } |
50 | |||
51 | token::token(token&& other) : token() | ||
52 | { | ||
53 | swap(*this, other); | ||
54 | } | ||
55 | |||
56 | token& token::operator=(token other) | ||
57 | { | ||
58 | swap(*this, other); | ||
59 | |||
60 | return *this; | ||
61 | } | ||
62 | |||
63 | void swap(token& first, token& second) | ||
64 | { | ||
65 | using type = token::type; | ||
66 | |||
67 | type tempType = first.type_; | ||
68 | word tempWord; | ||
69 | inflection tempCategory; | ||
70 | std::string tempLiteral; | ||
71 | part tempPart; | ||
72 | std::set<std::string> tempFillin; | ||
73 | std::list<token> tempUtterance; | ||
74 | |||
75 | switch (tempType) | ||
76 | { | ||
77 | case type::word: | ||
74 | { | 78 | { |
75 | new(&_string._str) std::string(other._string._str); | 79 | tempWord = std::move(first.word_.word_); |
76 | 80 | tempCategory = first.word_.category_; | |
81 | |||
77 | break; | 82 | break; |
78 | } | 83 | } |
79 | 84 | ||
80 | case token::type::utterance: | 85 | case type::literal: |
81 | { | 86 | { |
82 | new(&_utterance._utterance) std::list<token>(other._utterance._utterance); | 87 | tempLiteral = std::move(first.literal_); |
83 | 88 | ||
84 | break; | 89 | break; |
85 | } | 90 | } |
86 | } | 91 | |
87 | } | 92 | case type::part: |
88 | |||
89 | token& token::operator=(const token& other) | ||
90 | { | ||
91 | this->~token(); | ||
92 | |||
93 | _type = other._type; | ||
94 | _extra = other._extra; | ||
95 | |||
96 | switch (_type) | ||
97 | { | ||
98 | case token::type::verb: | ||
99 | { | 93 | { |
100 | new(&_verb._verb) verb(other._verb._verb); | 94 | tempPart = std::move(first.part_); |
101 | _verb._infl = other._verb._infl; | 95 | |
102 | |||
103 | break; | 96 | break; |
104 | } | 97 | } |
105 | 98 | ||
106 | case token::type::noun: | 99 | case type::fillin: |
107 | { | 100 | { |
108 | new(&_noun._noun) noun(other._noun._noun); | 101 | tempFillin = std::move(first.fillin_); |
109 | _noun._infl = other._noun._infl; | 102 | |
110 | |||
111 | break; | 103 | break; |
112 | } | 104 | } |
113 | 105 | ||
114 | case token::type::adjective: | 106 | case type::utterance: |
115 | { | 107 | { |
116 | new(&_adjective._adjective) adjective(other._adjective._adjective); | 108 | tempUtterance = std::move(first.utterance_); |
117 | _adjective._infl = other._adjective._infl; | 109 | |
118 | |||
119 | break; | 110 | break; |
120 | } | 111 | } |
121 | 112 | } | |
122 | case token::type::adverb: | 113 | |
114 | first.~token(); | ||
115 | |||
116 | first.type_ = second.type_; | ||
117 | |||
118 | switch (first.type_) | ||
119 | { | ||
120 | case type::word: | ||
123 | { | 121 | { |
124 | new(&_adverb._adverb) adverb(other._adverb._adverb); | 122 | new(&first.word_.word_) word(std::move(second.word_.word_)); |
125 | _adverb._infl = other._adverb._infl; | 123 | first.word_.category_ = second.word_.category_; |
126 | 124 | ||
127 | break; | 125 | break; |
128 | } | 126 | } |
129 | 127 | ||
130 | case token::type::preposition: | 128 | case type::literal: |
131 | { | 129 | { |
132 | new(&_preposition._preposition) preposition(other._preposition._preposition); | 130 | new(&first.literal_) std::string(std::move(second.literal_)); |
133 | 131 | ||
134 | break; | 132 | break; |
135 | } | 133 | } |
136 | 134 | ||
137 | case token::type::fillin: | 135 | case type::part: |
138 | { | 136 | { |
139 | _fillin._type = other._fillin._type; | 137 | new(&first.part_) part(std::move(second.part_)); |
140 | 138 | ||
141 | break; | 139 | break; |
142 | } | 140 | } |
143 | 141 | ||
144 | case token::type::string: | 142 | case type::fillin: |
145 | { | 143 | { |
146 | new(&_string._str) std::string(other._string._str); | 144 | new(&first.fillin_) std::set<std::string>(std::move(second.fillin_)); |
147 | 145 | ||
148 | break; | 146 | break; |
149 | } | 147 | } |
150 | 148 | ||
151 | case token::type::utterance: | 149 | case type::utterance: |
152 | { | 150 | { |
153 | new(&_utterance._utterance) std::list<token>(other._utterance._utterance); | 151 | new(&first.utterance_) std::list<token>(std::move(second.utterance_)); |
154 | 152 | ||
155 | break; | 153 | break; |
156 | } | 154 | } |
157 | } | 155 | } |
158 | 156 | ||
159 | return *this; | 157 | second.~token(); |
160 | } | 158 | |
161 | 159 | second.type_ = tempType; | |
162 | token::~token() | 160 | |
163 | { | 161 | switch (second.type_) |
164 | switch (_type) | ||
165 | { | 162 | { |
166 | case token::type::verb: | 163 | case type::word: |
167 | { | 164 | { |
168 | _verb._verb.~verb(); | 165 | new(&second.word_.word_) word(std::move(tempWord)); |
169 | 166 | second.word_.category_ = tempCategory; | |
167 | |||
170 | break; | 168 | break; |
171 | } | 169 | } |
172 | 170 | ||
173 | case token::type::noun: | 171 | case type::literal: |
174 | { | 172 | { |
175 | _noun._noun.~noun(); | 173 | new(&second.literal_) std::string(std::move(tempLiteral)); |
176 | 174 | ||
177 | break; | 175 | break; |
178 | } | 176 | } |
179 | 177 | ||
180 | case token::type::adjective: | 178 | case type::part: |
181 | { | 179 | { |
182 | _adjective._adjective.~adjective(); | 180 | new(&second.part_) part(std::move(tempPart)); |
183 | 181 | ||
184 | break; | 182 | break; |
185 | } | 183 | } |
186 | 184 | ||
187 | case token::type::adverb: | 185 | case type::fillin: |
188 | { | 186 | { |
189 | _adverb._adverb.~adverb(); | 187 | new(&second.fillin_) std::set<std::string>(std::move(tempFillin)); |
190 | 188 | ||
191 | break; | 189 | break; |
192 | } | 190 | } |
193 | 191 | ||
194 | case token::type::preposition: | 192 | case type::utterance: |
195 | { | 193 | { |
196 | _preposition._preposition.~preposition(); | 194 | new(&second.utterance_) std::list<token>(std::move(tempUtterance)); |
197 | 195 | ||
198 | break; | 196 | break; |
199 | } | 197 | } |
200 | 198 | } | |
201 | case token::type::fillin: | 199 | } |
200 | |||
201 | token::~token() | ||
202 | { | ||
203 | switch (type_) | ||
204 | { | ||
205 | case type::word: | ||
202 | { | 206 | { |
203 | // Nothing! | 207 | word_.word_.~word(); |
204 | 208 | ||
205 | break; | 209 | break; |
206 | } | 210 | } |
207 | 211 | ||
208 | case token::type::string: | 212 | case type::literal: |
209 | { | 213 | { |
210 | using string_type = std::string; | 214 | using string_type = std::string; |
211 | _string._str.~string_type(); | 215 | literal_.~string_type(); |
212 | 216 | ||
217 | break; | ||
218 | } | ||
219 | |||
220 | case type::part: | ||
221 | { | ||
222 | part_.~part(); | ||
223 | |||
224 | break; | ||
225 | } | ||
226 | |||
227 | case type::fillin: | ||
228 | { | ||
229 | using set_type = std::set<std::string>; | ||
230 | fillin_.~set_type(); | ||
231 | |||
213 | break; | 232 | break; |
214 | } | 233 | } |
215 | 234 | ||
216 | case token::type::utterance: | 235 | case type::utterance: |
217 | { | 236 | { |
218 | using list_type = std::list<token>; | 237 | using list_type = std::list<token>; |
219 | _utterance._utterance.~list_type(); | 238 | utterance_.~list_type(); |
220 | 239 | ||
221 | break; | 240 | break; |
222 | } | 241 | } |
223 | } | 242 | } |
224 | } | 243 | } |
225 | 244 | ||
226 | bool token::is_complete() const | 245 | bool token::isComplete() const |
227 | { | 246 | { |
228 | if (_type == token::type::utterance) | 247 | switch (type_) |
229 | { | 248 | { |
230 | return std::all_of(std::begin(_utterance._utterance), std::end(_utterance._utterance), [] (const token& tkn) { | 249 | case type::word: return true; |
231 | return tkn.is_complete(); | 250 | case type::literal: return true; |
251 | case type::part: return false; | ||
252 | case type::fillin: return false; | ||
253 | case type::utterance: return std::all_of(std::begin(utterance_), std::end(utterance_), [] (const token& tkn) { | ||
254 | return tkn.isComplete(); | ||
232 | }); | 255 | }); |
233 | } else if (_type == token::type::fillin) | ||
234 | { | ||
235 | return false; | ||
236 | } else { | ||
237 | return true; | ||
238 | } | 256 | } |
239 | } | 257 | } |
240 | 258 | ||
241 | std::string token::compile() const | 259 | std::string token::compile() const |
242 | { | 260 | { |
243 | switch (_type) | 261 | switch (type_) |
244 | { | 262 | { |
245 | case token::type::verb: | 263 | case type::word: return word_.word_.getInflections(word_.category_).front(); |
246 | { | 264 | case type::literal: return literal_; |
247 | switch (_verb._infl) | 265 | case type::part: throw std::domain_error("Cannot compile incomplete token"); |
248 | { | 266 | case type::fillin: throw std::domain_error("Cannot compile incomplete token"); |
249 | case token::verb_inflection::infinitive: return _verb._verb.infinitive_form(); | 267 | |
250 | case token::verb_inflection::past_tense: return _verb._verb.past_tense_form(); | 268 | case type::utterance: |
251 | case token::verb_inflection::past_participle: return _verb._verb.past_participle_form(); | ||
252 | case token::verb_inflection::ing_form: return _verb._verb.ing_form(); | ||
253 | case token::verb_inflection::s_form: return _verb._verb.s_form(); | ||
254 | } | ||
255 | } | ||
256 | |||
257 | case token::type::noun: | ||
258 | { | ||
259 | switch (_noun._infl) | ||
260 | { | ||
261 | case token::noun_inflection::singular: return _noun._noun.singular_form(); | ||
262 | case token::noun_inflection::plural: return _noun._noun.plural_form(); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | case token::type::adjective: | ||
267 | { | ||
268 | switch (_adjective._infl) | ||
269 | { | ||
270 | case token::adjective_inflection::base: return _adjective._adjective.base_form(); | ||
271 | case token::adjective_inflection::comparative: return _adjective._adjective.comparative_form(); | ||
272 | case token::adjective_inflection::superlative: return _adjective._adjective.superlative_form(); | ||
273 | } | ||
274 | } | ||
275 | |||
276 | case token::type::adverb: | ||
277 | { | 269 | { |
278 | switch (_adverb._infl) | 270 | std::list<std::string> compiled; |
271 | for (const token& tkn : utterance_) | ||
279 | { | 272 | { |
280 | case token::adverb_inflection::base: return _adverb._adverb.base_form(); | 273 | compiled.push_back(tkn.compile()); |
281 | case token::adverb_inflection::comparative: return _adverb._adverb.comparative_form(); | ||
282 | case token::adverb_inflection::superlative: return _adverb._adverb.superlative_form(); | ||
283 | } | 274 | } |
284 | } | 275 | |
285 | 276 | return implode(std::begin(compiled), std::end(compiled), " "); | |
286 | case token::type::preposition: return _preposition._preposition.get_form(); | ||
287 | case token::type::string: return _string._str; | ||
288 | |||
289 | case token::type::fillin: | ||
290 | { | ||
291 | throw std::runtime_error("Cannot compile a fillin token."); | ||
292 | } | ||
293 | |||
294 | case token::type::utterance: | ||
295 | { | ||
296 | std::list<std::string> compiled; | ||
297 | std::transform(std::begin(_utterance._utterance), std::end(_utterance._utterance), std::back_inserter(compiled), [] (token tkn) { | ||
298 | return tkn.compile(); | ||
299 | }); | ||
300 | |||
301 | return verbly::implode(std::begin(compiled), std::end(compiled), " "); | ||
302 | } | 277 | } |
303 | } | 278 | } |
304 | } | 279 | } |
305 | 280 | ||
306 | token::token(verb _verb) : _type(type::verb) | 281 | token::token(word arg, inflection category) : type_(type::word) |
307 | { | ||
308 | new(&this->_verb._verb) verb(_verb); | ||
309 | this->_verb._infl = verb_inflection::infinitive; | ||
310 | } | ||
311 | |||
312 | token::token(verb _verb, verb_inflection _infl) : token(_verb) | ||
313 | { | ||
314 | this->_verb._infl = _infl; | ||
315 | } | ||
316 | |||
317 | token& token::operator=(verb _verb) | ||
318 | { | ||
319 | *this = token{_verb}; | ||
320 | |||
321 | return *this; | ||
322 | } | ||
323 | |||
324 | verb token::get_verb() const | ||
325 | { | ||
326 | assert(_type == type::verb); | ||
327 | |||
328 | return _verb._verb; | ||
329 | } | ||
330 | |||
331 | void token::set_verb(verb _verb) | ||
332 | { | ||
333 | assert(_type == type::verb); | ||
334 | |||
335 | this->_verb._verb = _verb; | ||
336 | } | ||
337 | |||
338 | token::verb_inflection token::get_verb_inflection() const | ||
339 | { | ||
340 | assert(_type == type::verb); | ||
341 | |||
342 | return _verb._infl; | ||
343 | } | ||
344 | |||
345 | void token::set_verb_inflection(verb_inflection _infl) | ||
346 | { | ||
347 | assert(_type == type::verb); | ||
348 | |||
349 | _verb._infl = _infl; | ||
350 | } | ||
351 | |||
352 | token::token(noun _noun) : _type(type::noun) | ||
353 | { | ||
354 | new(&this->_noun._noun) noun(_noun); | ||
355 | this->_noun._infl = noun_inflection::singular; | ||
356 | } | ||
357 | |||
358 | token::token(noun _noun, noun_inflection _infl) : token(_noun) | ||
359 | { | ||
360 | this->_noun._infl = _infl; | ||
361 | } | ||
362 | |||
363 | token& token::operator=(noun _noun) | ||
364 | { | ||
365 | *this = token{_noun}; | ||
366 | |||
367 | return *this; | ||
368 | } | ||
369 | |||
370 | noun token::get_noun() const | ||
371 | { | ||
372 | assert(_type == type::noun); | ||
373 | |||
374 | return _noun._noun; | ||
375 | } | ||
376 | |||
377 | void token::set_noun(noun _noun) | ||
378 | { | ||
379 | assert(_type == type::noun); | ||
380 | |||
381 | this->_noun._noun = _noun; | ||
382 | } | ||
383 | |||
384 | token::noun_inflection token::get_noun_inflection() const | ||
385 | { | ||
386 | assert(_type == type::noun); | ||
387 | |||
388 | return _noun._infl; | ||
389 | } | ||
390 | |||
391 | void token::set_noun_inflection(noun_inflection _infl) | ||
392 | { | ||
393 | assert(_type == type::noun); | ||
394 | |||
395 | _noun._infl = _infl; | ||
396 | } | ||
397 | |||
398 | token::token(adjective _adjective) : _type(type::adjective) | ||
399 | { | ||
400 | new(&this->_adjective._adjective) adjective(_adjective); | ||
401 | this->_adjective._infl = adjective_inflection::base; | ||
402 | } | ||
403 | |||
404 | token::token(adjective _adjective, adjective_inflection _infl) : token(_adjective) | ||
405 | { | ||
406 | this->_adjective._infl = _infl; | ||
407 | } | ||
408 | |||
409 | token& token::operator=(adjective _adjective) | ||
410 | { | ||
411 | *this = token{_adjective}; | ||
412 | |||
413 | return *this; | ||
414 | } | ||
415 | |||
416 | adjective token::get_adjective() const | ||
417 | { | ||
418 | assert(_type == type::adjective); | ||
419 | |||
420 | return _adjective._adjective; | ||
421 | } | ||
422 | |||
423 | void token::set_adjective(adjective _adjective) | ||
424 | { | ||
425 | assert(_type == type::adjective); | ||
426 | |||
427 | this->_adjective._adjective = _adjective; | ||
428 | } | ||
429 | |||
430 | token::adjective_inflection token::get_adjective_inflection() const | ||
431 | { | ||
432 | assert(_type == type::adjective); | ||
433 | |||
434 | return _adjective._infl; | ||
435 | } | ||
436 | |||
437 | void token::set_adjective_inflection(adjective_inflection _infl) | ||
438 | { | ||
439 | assert(_type == type::adjective); | ||
440 | |||
441 | _adjective._infl = _infl; | ||
442 | } | ||
443 | |||
444 | token::token(adverb _adverb) : _type(type::adverb) | ||
445 | { | ||
446 | new(&this->_adverb._adverb) adverb(_adverb); | ||
447 | this->_adverb._infl = adverb_inflection::base; | ||
448 | } | ||
449 | |||
450 | token::token(adverb _adverb, adverb_inflection _infl) : token(_adverb) | ||
451 | { | ||
452 | this->_adverb._infl = _infl; | ||
453 | } | ||
454 | |||
455 | token& token::operator=(adverb _adverb) | ||
456 | { | ||
457 | *this = token{_adverb}; | ||
458 | |||
459 | return *this; | ||
460 | } | ||
461 | |||
462 | adverb token::get_adverb() const | ||
463 | { | 282 | { |
464 | assert(_type == type::adverb); | 283 | new(&word_.word_) word(std::move(arg)); |
465 | 284 | word_.category_ = category; | |
466 | return _adverb._adverb; | ||
467 | } | 285 | } |
468 | 286 | ||
469 | void token::set_adverb(adverb _adverb) | 287 | const word& token::getWord() const |
470 | { | 288 | { |
471 | assert(_type == type::adverb); | 289 | if (type_ != type::word) |
472 | 290 | { | |
473 | this->_adverb._adverb = _adverb; | 291 | throw std::domain_error("Token is not a word"); |
292 | } | ||
293 | |||
294 | return word_.word_; | ||
474 | } | 295 | } |
475 | 296 | ||
476 | token::adverb_inflection token::get_adverb_inflection() const | 297 | token token::inflect(inflection category) const |
477 | { | 298 | { |
478 | assert(_type == type::adverb); | 299 | if (type_ != type::word) |
479 | 300 | { | |
480 | return _adverb._infl; | 301 | throw std::domain_error("Token is not a word"); |
302 | } | ||
303 | |||
304 | return token(word_.word_, category); | ||
481 | } | 305 | } |
482 | 306 | ||
483 | void token::set_adverb_inflection(adverb_inflection _infl) | 307 | token::token(std::string arg) : type_(type::literal) |
484 | { | 308 | { |
485 | assert(_type == type::adverb); | 309 | new(&literal_) std::string(std::move(arg)); |
486 | |||
487 | _adverb._infl = _infl; | ||
488 | } | 310 | } |
489 | 311 | ||
490 | token::token(preposition _preposition) : _type(type::preposition) | 312 | token::token(const char* arg) : token(std::string(arg)) |
491 | { | 313 | { |
492 | new(&this->_preposition._preposition) preposition(_preposition); | ||
493 | } | 314 | } |
494 | 315 | ||
495 | token& token::operator=(preposition _preposition) | 316 | std::string token::getLiteral() const |
496 | { | 317 | { |
497 | *this = token{_preposition}; | 318 | if (type_ != type::literal) |
498 | 319 | { | |
499 | return *this; | 320 | throw std::domain_error("Token is not a literal"); |
321 | } | ||
322 | |||
323 | return literal_; | ||
500 | } | 324 | } |
501 | 325 | ||
502 | preposition token::get_preposition() const | 326 | token::token(part arg) : type_(type::part) |
503 | { | 327 | { |
504 | assert(_type == type::preposition); | 328 | new(&part_) part(std::move(arg)); |
505 | |||
506 | return _preposition._preposition; | ||
507 | } | 329 | } |
508 | 330 | ||
509 | void token::set_preposition(preposition _preposition) | 331 | part token::getPart() const |
510 | { | 332 | { |
511 | assert(_type == type::preposition); | 333 | if (type_ != type::part) |
512 | 334 | { | |
513 | this->_preposition._preposition = _preposition; | 335 | throw std::domain_error("Token is not a part"); |
336 | } | ||
337 | |||
338 | return part_; | ||
514 | } | 339 | } |
515 | 340 | ||
516 | token::token(fillin_type _ft) : _type(type::fillin) | 341 | token::token(std::set<std::string> synrestrs) : type_(type::fillin) |
517 | { | 342 | { |
518 | _fillin._type = _ft; | 343 | new(&fillin_) std::set<std::string>(std::move(synrestrs)); |
519 | } | 344 | } |
520 | 345 | ||
521 | token& token::operator=(fillin_type _ft) | 346 | const std::set<std::string>& token::getSynrestrs() const |
522 | { | 347 | { |
523 | *this = token{_ft}; | 348 | if (type_ != type::fillin) |
524 | 349 | { | |
525 | return *this; | 350 | throw std::domain_error("Token is not a fillin"); |
351 | } | ||
352 | |||
353 | return fillin_; | ||
526 | } | 354 | } |
527 | 355 | ||
528 | token::fillin_type token::get_fillin_type() const | 356 | bool token::hasSynrestr(std::string synrestr) const |
529 | { | 357 | { |
530 | assert(_type == type::fillin); | 358 | if (type_ != type::fillin) |
531 | 359 | { | |
532 | return _fillin._type; | 360 | throw std::domain_error("Token is not a fillin"); |
361 | } | ||
362 | |||
363 | return (fillin_.count(synrestr) == 1); | ||
533 | } | 364 | } |
534 | 365 | ||
535 | void token::set_fillin_type(fillin_type _ft) | 366 | void token::addSynrestr(std::string synrestr) |
536 | { | 367 | { |
537 | assert(_type == type::fillin); | 368 | if (type_ != type::fillin) |
538 | 369 | { | |
539 | _fillin._type = _ft; | 370 | throw std::domain_error("Token is not a fillin"); |
371 | } | ||
372 | |||
373 | fillin_.insert(std::move(synrestr)); | ||
540 | } | 374 | } |
541 | 375 | ||
542 | token::token() : _type(type::utterance) | 376 | token::token() : type_(type::utterance) |
543 | { | 377 | { |
544 | new(&_utterance._utterance) std::list<token>(); | 378 | new(&utterance_) std::list<token>(); |
545 | } | 379 | } |
546 | 380 | ||
547 | token::token(std::initializer_list<token> _init) : _type(type::utterance) | 381 | token::token(std::vector<part> parts) : type_(type::utterance) |
548 | { | 382 | { |
549 | new(&_utterance._utterance) std::list<token>(_init); | 383 | new(&utterance_) std::list<token>(std::begin(parts), std::end(parts)); |
550 | } | 384 | } |
551 | 385 | ||
552 | token::iterator token::begin() | 386 | token::iterator token::begin() |
553 | { | 387 | { |
554 | assert(_type == type::utterance); | 388 | if (type_ != type::utterance) |
555 | 389 | { | |
556 | return _utterance._utterance.begin(); | 390 | throw std::domain_error("Token is not an utterance"); |
557 | } | 391 | } |
558 | 392 | ||
559 | token::iterator token::end() | 393 | return std::begin(utterance_); |
560 | { | ||
561 | assert(_type == type::utterance); | ||
562 | |||
563 | return _utterance._utterance.end(); | ||
564 | } | ||
565 | |||
566 | token& token::operator<<(token _tkn) | ||
567 | { | ||
568 | assert(_type == type::utterance); | ||
569 | |||
570 | _utterance._utterance.push_back(_tkn); | ||
571 | |||
572 | return *this; | ||
573 | } | ||
574 | |||
575 | void token::push_back(token _tkn) | ||
576 | { | ||
577 | assert(_type == type::utterance); | ||
578 | |||
579 | _utterance._utterance.push_back(_tkn); | ||
580 | } | ||
581 | |||
582 | void token::insert(iterator before, token _tkn) | ||
583 | { | ||
584 | assert(_type == type::utterance); | ||
585 | |||
586 | _utterance._utterance.insert(before, _tkn); | ||
587 | } | 394 | } |
588 | 395 | ||
589 | void token::replace(iterator torepl, token _tkn) | 396 | token::const_iterator token::begin() const |
590 | { | 397 | { |
591 | assert(_type == type::utterance); | 398 | if (type_ != type::utterance) |
592 | 399 | { | |
593 | _utterance._utterance.insert(torepl, _tkn); | 400 | throw std::domain_error("Token is not an utterance"); |
594 | _utterance._utterance.erase(torepl); | 401 | } |
402 | |||
403 | return std::begin(utterance_); | ||
595 | } | 404 | } |
596 | 405 | ||
597 | void token::erase(iterator toer) | 406 | token::iterator token::end() |
598 | { | 407 | { |
599 | assert(_type == type::utterance); | 408 | if (type_ != type::utterance) |
600 | 409 | { | |
601 | _utterance._utterance.erase(toer); | 410 | throw std::domain_error("Token is not an utterance"); |
411 | } | ||
412 | |||
413 | return std::end(utterance_); | ||
602 | } | 414 | } |
603 | 415 | ||
604 | token::token(std::string _str) : _type(type::string) | 416 | token::const_iterator token::end() const |
605 | { | 417 | { |
606 | new(&_string._str) std::string(_str); | 418 | if (type_ != type::utterance) |
419 | { | ||
420 | throw std::domain_error("Token is not an utterance"); | ||
421 | } | ||
422 | |||
423 | return std::end(utterance_); | ||
607 | } | 424 | } |
608 | 425 | ||
609 | token& token::operator=(std::string _str) | 426 | token& token::operator<<(token arg) |
610 | { | 427 | { |
611 | *this = token{_str}; | 428 | if (type_ != type::utterance) |
612 | 429 | { | |
430 | throw std::domain_error("Token is not an utterance"); | ||
431 | } | ||
432 | |||
433 | utterance_.push_back(std::move(arg)); | ||
434 | |||
613 | return *this; | 435 | return *this; |
614 | } | 436 | } |
615 | 437 | ||
616 | std::string token::get_string() const | 438 | std::ostream& operator<<(std::ostream& os, token::type type) |
617 | { | ||
618 | assert(_type == type::string); | ||
619 | |||
620 | return _string._str; | ||
621 | } | ||
622 | |||
623 | void token::set_string(std::string _str) | ||
624 | { | ||
625 | assert(_type == type::string); | ||
626 | |||
627 | _string._str = _str; | ||
628 | } | ||
629 | |||
630 | std::ostream& operator<<(std::ostream& os, token::type _type) | ||
631 | { | 439 | { |
632 | switch (_type) | 440 | switch (type) |
633 | { | 441 | { |
634 | case token::type::verb: return os << "verb"; | 442 | case token::type::word: return os << "word"; |
635 | case token::type::noun: return os << "noun"; | 443 | case token::type::literal: return os << "literal"; |
636 | case token::type::adjective: return os << "adjective"; | 444 | case token::type::part: return os << "part"; |
637 | case token::type::adverb: return os << "adverb"; | ||
638 | case token::type::preposition: return os << "preposition"; | ||
639 | case token::type::fillin: return os << "fillin"; | 445 | case token::type::fillin: return os << "fillin"; |
640 | case token::type::utterance: return os << "utterance"; | 446 | case token::type::utterance: return os << "utterance"; |
641 | case token::type::string: return os << "string"; | ||
642 | } | 447 | } |
643 | } | 448 | } |
644 | 449 | ||
645 | }; | 450 | }; |
diff --git a/lib/token.h b/lib/token.h index ff3c37b..e7f8c28 100644 --- a/lib/token.h +++ b/lib/token.h | |||
@@ -1,170 +1,116 @@ | |||
1 | #ifndef TOKEN_H_AD62C505 | 1 | #ifndef TOKEN_H_AD62C505 |
2 | #define TOKEN_H_AD62C505 | 2 | #define TOKEN_H_AD62C505 |
3 | 3 | ||
4 | #include <ostream> | ||
5 | #include <string> | ||
6 | #include <list> | ||
7 | #include <set> | ||
8 | #include "enums.h" | ||
9 | #include "word.h" | ||
10 | #include "part.h" | ||
11 | |||
4 | namespace verbly { | 12 | namespace verbly { |
5 | 13 | ||
6 | class token { | 14 | class token { |
7 | public: | 15 | public: |
8 | enum class type { | 16 | enum class type { |
9 | verb, | 17 | word, |
10 | noun, | 18 | literal, |
11 | adjective, | 19 | part, |
12 | adverb, | ||
13 | preposition, | ||
14 | fillin, | 20 | fillin, |
15 | utterance, | 21 | utterance |
16 | string | ||
17 | }; | ||
18 | |||
19 | enum class verb_inflection { | ||
20 | infinitive, | ||
21 | past_tense, | ||
22 | past_participle, | ||
23 | s_form, | ||
24 | ing_form | ||
25 | }; | ||
26 | |||
27 | enum class noun_inflection { | ||
28 | singular, | ||
29 | plural | ||
30 | }; | ||
31 | |||
32 | enum class adjective_inflection { | ||
33 | base, | ||
34 | comparative, | ||
35 | superlative | ||
36 | }; | ||
37 | |||
38 | enum class adverb_inflection { | ||
39 | base, | ||
40 | comparative, | ||
41 | superlative | ||
42 | }; | ||
43 | |||
44 | enum class fillin_type { | ||
45 | generic, | ||
46 | noun_phrase, | ||
47 | adjective_phrase, | ||
48 | adverb_phrase, | ||
49 | participle_phrase, | ||
50 | infinitive_phrase | ||
51 | }; | 22 | }; |
52 | 23 | ||
53 | type get_type() const; | 24 | // Copy & move constructors |
54 | 25 | ||
55 | int get_extra() const; | ||
56 | void set_extra(int _arg); | ||
57 | |||
58 | token(const token& other); | 26 | token(const token& other); |
59 | token& operator=(const token& other); | 27 | token(token&& other); |
28 | |||
29 | // Assignment operator | ||
30 | |||
31 | token& operator=(token other); | ||
32 | |||
33 | // Swap | ||
34 | |||
35 | friend void swap(token& first, token& second); | ||
36 | |||
37 | // Destructor | ||
38 | |||
60 | ~token(); | 39 | ~token(); |
61 | 40 | ||
62 | bool is_complete() const; | 41 | // Accessors |
42 | |||
43 | type getType() const | ||
44 | { | ||
45 | return type_; | ||
46 | } | ||
47 | |||
48 | bool isComplete() const; | ||
49 | |||
63 | std::string compile() const; | 50 | std::string compile() const; |
64 | 51 | ||
65 | // Verb | 52 | // Word |
66 | token(verb _verb); | 53 | |
67 | token(verb _verb, verb_inflection _infl); | 54 | token(word arg, inflection category = inflection::base); |
68 | token& operator=(verb _verb); | 55 | |
69 | verb get_verb() const; | 56 | const word& getWord() const; |
70 | void set_verb(verb _verb); | 57 | |
71 | verb_inflection get_verb_inflection() const; | 58 | token inflect(inflection category) const; |
72 | void set_verb_inflection(verb_inflection _infl); | 59 | |
73 | 60 | // Literal | |
74 | // Noun | 61 | |
75 | token(noun _noun); | 62 | token(std::string arg); |
76 | token(noun _noun, noun_inflection _infl); | 63 | token(const char* arg); |
77 | token& operator=(noun _noun); | 64 | |
78 | noun get_noun() const; | 65 | std::string getLiteral() const; |
79 | void set_noun(noun _noun); | 66 | |
80 | noun_inflection get_noun_inflection() const; | 67 | // Part |
81 | void set_noun_inflection(noun_inflection _infl); | 68 | |
82 | 69 | token(part arg); | |
83 | // Adjective | 70 | |
84 | token(adjective _adjective); | 71 | part getPart() const; |
85 | token(adjective _adjective, adjective_inflection _infl); | 72 | |
86 | token& operator=(adjective _adjective); | ||
87 | adjective get_adjective() const; | ||
88 | void set_adjective(adjective _adjective); | ||
89 | adjective_inflection get_adjective_inflection() const; | ||
90 | void set_adjective_inflection(adjective_inflection _infl); | ||
91 | |||
92 | // Adverb | ||
93 | token(adverb _adverb); | ||
94 | token(adverb _adverb, adverb_inflection _infl); | ||
95 | token& operator=(adverb _adverb); | ||
96 | adverb get_adverb() const; | ||
97 | void set_adverb(adverb _adverb); | ||
98 | adverb_inflection get_adverb_inflection() const; | ||
99 | void set_adverb_inflection(adverb_inflection _infl); | ||
100 | |||
101 | // Preposition | ||
102 | token(preposition _preposition); | ||
103 | token& operator=(preposition _preposition); | ||
104 | preposition get_preposition() const; | ||
105 | void set_preposition(preposition _preposition); | ||
106 | |||
107 | // Fillin | 73 | // Fillin |
108 | token(fillin_type _ft); | 74 | |
109 | token& operator=(fillin_type _ft); | 75 | token(std::set<std::string> synrestrs); |
110 | fillin_type get_fillin_type() const; | 76 | |
111 | void set_fillin_type(fillin_type _ft); | 77 | const std::set<std::string>& getSynrestrs() const; |
112 | 78 | ||
79 | bool hasSynrestr(std::string synrestr) const; | ||
80 | |||
81 | void addSynrestr(std::string synrestr); | ||
82 | |||
113 | // Utterance | 83 | // Utterance |
114 | typedef std::list<token>::iterator iterator; | 84 | |
115 | 85 | using iterator = std::list<token>::iterator; | |
86 | using const_iterator = std::list<token>::const_iterator; | ||
87 | |||
116 | token(); | 88 | token(); |
117 | token(std::initializer_list<token> _init); | 89 | token(std::vector<part> parts); |
90 | |||
118 | iterator begin(); | 91 | iterator begin(); |
92 | const_iterator begin() const; | ||
93 | |||
119 | iterator end(); | 94 | iterator end(); |
120 | token& operator<<(token _tkn); | 95 | const_iterator end() const; |
121 | void push_back(token _tkn); | 96 | |
122 | void insert(iterator before, token _tkn); | 97 | token& operator<<(token arg); |
123 | void replace(iterator torepl, token _tkn); | 98 | |
124 | void erase(iterator toer); | ||
125 | |||
126 | // String | ||
127 | token(std::string _str); | ||
128 | token& operator=(std::string _str); | ||
129 | std::string get_string() const; | ||
130 | void set_string(std::string _str); | ||
131 | |||
132 | private: | 99 | private: |
133 | type _type; | ||
134 | int _extra = 0; | ||
135 | union { | 100 | union { |
136 | struct { | 101 | struct { |
137 | verb _verb; | 102 | word word_; |
138 | verb_inflection _infl; | 103 | inflection category_; |
139 | } _verb; | 104 | } word_; |
140 | struct { | 105 | std::string literal_; |
141 | noun _noun; | 106 | part part_; |
142 | noun_inflection _infl; | 107 | std::set<std::string> fillin_; |
143 | } _noun; | 108 | std::list<token> utterance_; |
144 | struct { | ||
145 | adjective _adjective; | ||
146 | adjective_inflection _infl; | ||
147 | } _adjective; | ||
148 | struct { | ||
149 | adverb _adverb; | ||
150 | adverb_inflection _infl; | ||
151 | } _adverb; | ||
152 | struct { | ||
153 | preposition _preposition; | ||
154 | } _preposition; | ||
155 | struct { | ||
156 | fillin_type _type; | ||
157 | } _fillin; | ||
158 | struct { | ||
159 | std::string _str; | ||
160 | } _string; | ||
161 | struct { | ||
162 | std::list<token> _utterance; | ||
163 | } _utterance; | ||
164 | }; | 109 | }; |
110 | type type_; | ||
165 | }; | 111 | }; |
166 | 112 | ||
167 | std::ostream& operator<<(std::ostream& os, token::type _type); | 113 | std::ostream& operator<<(std::ostream& os, token::type type); |
168 | 114 | ||
169 | }; | 115 | }; |
170 | 116 | ||
diff --git a/lib/verbly.h b/lib/verbly.h index 6dfc01a..d8875b3 100644 --- a/lib/verbly.h +++ b/lib/verbly.h | |||
@@ -13,5 +13,9 @@ | |||
13 | #include "lemma.h" | 13 | #include "lemma.h" |
14 | #include "form.h" | 14 | #include "form.h" |
15 | #include "pronunciation.h" | 15 | #include "pronunciation.h" |
16 | #include "token.h" | ||
17 | #include "selrestr.h" | ||
18 | #include "part.h" | ||
19 | #include "role.h" | ||
16 | 20 | ||
17 | #endif /* end of include guard: VERBLY_H_5B39CE50 */ | 21 | #endif /* end of include guard: VERBLY_H_5B39CE50 */ |