summary refs log tree commit diff stats
path: root/lib/token.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/token.cpp')
-rw-r--r--lib/token.cpp813
1 files changed, 309 insertions, 504 deletions
diff --git a/lib/token.cpp b/lib/token.cpp index 3cc77e5..769acad 100644 --- a/lib/token.cpp +++ b/lib/token.cpp
@@ -1,645 +1,450 @@
1#include "verbly.h" 1#include "token.h"
2#include <stdexcept>
3#include "util.h"
2 4
3namespace verbly { 5namespace verbly {
4 6
5 token::type token::get_type() const
6 {
7 return _type;
8 }
9
10 int token::get_extra() const
11 {
12 return _extra;
13 }
14
15 void token::set_extra(int _arg)
16 {
17 _extra = _arg;
18 }
19
20 token::token(const token& other) 7 token::token(const token& other)
21 { 8 {
22 _type = other._type; 9 type_ = other.type_;
23 _extra = other._extra; 10
24 11 switch (type_)
25 switch (_type)
26 { 12 {
27 case token::type::verb: 13 case type::word:
28 { 14 {
29 new(&_verb._verb) verb(other._verb._verb); 15 new(&word_.word_) word(other.word_.word_);
30 _verb._infl = other._verb._infl; 16 word_.category_ = other.word_.category_;
31 17
32 break;
33 }
34
35 case token::type::noun:
36 {
37 new(&_noun._noun) noun(other._noun._noun);
38 _noun._infl = other._noun._infl;
39
40 break; 18 break;
41 } 19 }
42 20
43 case token::type::adjective: 21 case type::literal:
44 { 22 {
45 new(&_adjective._adjective) adjective(other._adjective._adjective); 23 new(&literal_) std::string(other.literal_);
46 _adjective._infl = other._adjective._infl; 24
47
48 break; 25 break;
49 } 26 }
50 27
51 case token::type::adverb: 28 case type::part:
52 { 29 {
53 new(&_adverb._adverb) adverb(other._adverb._adverb); 30 new(&part_) part(other.part_);
54 _adverb._infl = other._adverb._infl; 31
55
56 break; 32 break;
57 } 33 }
58 34
59 case token::type::preposition: 35 case type::fillin:
60 { 36 {
61 new(&_preposition._preposition) preposition(other._preposition._preposition); 37 new(&fillin_) std::set<std::string>(other.fillin_);
62 38
63 break; 39 break;
64 } 40 }
65 41
66 case token::type::fillin: 42 case type::utterance:
67 { 43 {
68 _fillin._type = other._fillin._type; 44 new(&utterance_) std::list<token>(other.utterance_);
69 45
70 break; 46 break;
71 } 47 }
72 48 }
73 case token::type::string: 49 }
50
51 token::token(token&& other) : token()
52 {
53 swap(*this, other);
54 }
55
56 token& token::operator=(token other)
57 {
58 swap(*this, other);
59
60 return *this;
61 }
62
63 void swap(token& first, token& second)
64 {
65 using type = token::type;
66
67 type tempType = first.type_;
68 word tempWord;
69 inflection tempCategory;
70 std::string tempLiteral;
71 part tempPart;
72 std::set<std::string> tempFillin;
73 std::list<token> tempUtterance;
74
75 switch (tempType)
76 {
77 case type::word:
74 { 78 {
75 new(&_string._str) std::string(other._string._str); 79 tempWord = std::move(first.word_.word_);
76 80 tempCategory = first.word_.category_;
81
77 break; 82 break;
78 } 83 }
79 84
80 case token::type::utterance: 85 case type::literal:
81 { 86 {
82 new(&_utterance._utterance) std::list<token>(other._utterance._utterance); 87 tempLiteral = std::move(first.literal_);
83 88
84 break; 89 break;
85 } 90 }
86 } 91
87 } 92 case type::part:
88
89 token& token::operator=(const token& other)
90 {
91 this->~token();
92
93 _type = other._type;
94 _extra = other._extra;
95
96 switch (_type)
97 {
98 case token::type::verb:
99 { 93 {
100 new(&_verb._verb) verb(other._verb._verb); 94 tempPart = std::move(first.part_);
101 _verb._infl = other._verb._infl; 95
102
103 break; 96 break;
104 } 97 }
105 98
106 case token::type::noun: 99 case type::fillin:
107 { 100 {
108 new(&_noun._noun) noun(other._noun._noun); 101 tempFillin = std::move(first.fillin_);
109 _noun._infl = other._noun._infl; 102
110
111 break; 103 break;
112 } 104 }
113 105
114 case token::type::adjective: 106 case type::utterance:
115 { 107 {
116 new(&_adjective._adjective) adjective(other._adjective._adjective); 108 tempUtterance = std::move(first.utterance_);
117 _adjective._infl = other._adjective._infl; 109
118
119 break; 110 break;
120 } 111 }
121 112 }
122 case token::type::adverb: 113
114 first.~token();
115
116 first.type_ = second.type_;
117
118 switch (first.type_)
119 {
120 case type::word:
123 { 121 {
124 new(&_adverb._adverb) adverb(other._adverb._adverb); 122 new(&first.word_.word_) word(std::move(second.word_.word_));
125 _adverb._infl = other._adverb._infl; 123 first.word_.category_ = second.word_.category_;
126 124
127 break; 125 break;
128 } 126 }
129 127
130 case token::type::preposition: 128 case type::literal:
131 { 129 {
132 new(&_preposition._preposition) preposition(other._preposition._preposition); 130 new(&first.literal_) std::string(std::move(second.literal_));
133 131
134 break; 132 break;
135 } 133 }
136 134
137 case token::type::fillin: 135 case type::part:
138 { 136 {
139 _fillin._type = other._fillin._type; 137 new(&first.part_) part(std::move(second.part_));
140 138
141 break; 139 break;
142 } 140 }
143 141
144 case token::type::string: 142 case type::fillin:
145 { 143 {
146 new(&_string._str) std::string(other._string._str); 144 new(&first.fillin_) std::set<std::string>(std::move(second.fillin_));
147 145
148 break; 146 break;
149 } 147 }
150 148
151 case token::type::utterance: 149 case type::utterance:
152 { 150 {
153 new(&_utterance._utterance) std::list<token>(other._utterance._utterance); 151 new(&first.utterance_) std::list<token>(std::move(second.utterance_));
154 152
155 break; 153 break;
156 } 154 }
157 } 155 }
158 156
159 return *this; 157 second.~token();
160 } 158
161 159 second.type_ = tempType;
162 token::~token() 160
163 { 161 switch (second.type_)
164 switch (_type)
165 { 162 {
166 case token::type::verb: 163 case type::word:
167 { 164 {
168 _verb._verb.~verb(); 165 new(&second.word_.word_) word(std::move(tempWord));
169 166 second.word_.category_ = tempCategory;
167
170 break; 168 break;
171 } 169 }
172 170
173 case token::type::noun: 171 case type::literal:
174 { 172 {
175 _noun._noun.~noun(); 173 new(&second.literal_) std::string(std::move(tempLiteral));
176 174
177 break; 175 break;
178 } 176 }
179 177
180 case token::type::adjective: 178 case type::part:
181 { 179 {
182 _adjective._adjective.~adjective(); 180 new(&second.part_) part(std::move(tempPart));
183 181
184 break; 182 break;
185 } 183 }
186 184
187 case token::type::adverb: 185 case type::fillin:
188 { 186 {
189 _adverb._adverb.~adverb(); 187 new(&second.fillin_) std::set<std::string>(std::move(tempFillin));
190 188
191 break; 189 break;
192 } 190 }
193 191
194 case token::type::preposition: 192 case type::utterance:
195 { 193 {
196 _preposition._preposition.~preposition(); 194 new(&second.utterance_) std::list<token>(std::move(tempUtterance));
197 195
198 break; 196 break;
199 } 197 }
200 198 }
201 case token::type::fillin: 199 }
200
201 token::~token()
202 {
203 switch (type_)
204 {
205 case type::word:
202 { 206 {
203 // Nothing! 207 word_.word_.~word();
204 208
205 break; 209 break;
206 } 210 }
207 211
208 case token::type::string: 212 case type::literal:
209 { 213 {
210 using string_type = std::string; 214 using string_type = std::string;
211 _string._str.~string_type(); 215 literal_.~string_type();
212 216
217 break;
218 }
219
220 case type::part:
221 {
222 part_.~part();
223
224 break;
225 }
226
227 case type::fillin:
228 {
229 using set_type = std::set<std::string>;
230 fillin_.~set_type();
231
213 break; 232 break;
214 } 233 }
215 234
216 case token::type::utterance: 235 case type::utterance:
217 { 236 {
218 using list_type = std::list<token>; 237 using list_type = std::list<token>;
219 _utterance._utterance.~list_type(); 238 utterance_.~list_type();
220 239
221 break; 240 break;
222 } 241 }
223 } 242 }
224 } 243 }
225 244
226 bool token::is_complete() const 245 bool token::isComplete() const
227 { 246 {
228 if (_type == token::type::utterance) 247 switch (type_)
229 { 248 {
230 return std::all_of(std::begin(_utterance._utterance), std::end(_utterance._utterance), [] (const token& tkn) { 249 case type::word: return true;
231 return tkn.is_complete(); 250 case type::literal: return true;
251 case type::part: return false;
252 case type::fillin: return false;
253 case type::utterance: return std::all_of(std::begin(utterance_), std::end(utterance_), [] (const token& tkn) {
254 return tkn.isComplete();
232 }); 255 });
233 } else if (_type == token::type::fillin)
234 {
235 return false;
236 } else {
237 return true;
238 } 256 }
239 } 257 }
240 258
241 std::string token::compile() const 259 std::string token::compile() const
242 { 260 {
243 switch (_type) 261 switch (type_)
244 { 262 {
245 case token::type::verb: 263 case type::word: return word_.word_.getInflections(word_.category_).front();
246 { 264 case type::literal: return literal_;
247 switch (_verb._infl) 265 case type::part: throw std::domain_error("Cannot compile incomplete token");
248 { 266 case type::fillin: throw std::domain_error("Cannot compile incomplete token");
249 case token::verb_inflection::infinitive: return _verb._verb.infinitive_form(); 267
250 case token::verb_inflection::past_tense: return _verb._verb.past_tense_form(); 268 case type::utterance:
251 case token::verb_inflection::past_participle: return _verb._verb.past_participle_form();
252 case token::verb_inflection::ing_form: return _verb._verb.ing_form();
253 case token::verb_inflection::s_form: return _verb._verb.s_form();
254 }
255 }
256
257 case token::type::noun:
258 {
259 switch (_noun._infl)
260 {
261 case token::noun_inflection::singular: return _noun._noun.singular_form();
262 case token::noun_inflection::plural: return _noun._noun.plural_form();
263 }
264 }
265
266 case token::type::adjective:
267 {
268 switch (_adjective._infl)
269 {
270 case token::adjective_inflection::base: return _adjective._adjective.base_form();
271 case token::adjective_inflection::comparative: return _adjective._adjective.comparative_form();
272 case token::adjective_inflection::superlative: return _adjective._adjective.superlative_form();
273 }
274 }
275
276 case token::type::adverb:
277 { 269 {
278 switch (_adverb._infl) 270 std::list<std::string> compiled;
271 for (const token& tkn : utterance_)
279 { 272 {
280 case token::adverb_inflection::base: return _adverb._adverb.base_form(); 273 compiled.push_back(tkn.compile());
281 case token::adverb_inflection::comparative: return _adverb._adverb.comparative_form();
282 case token::adverb_inflection::superlative: return _adverb._adverb.superlative_form();
283 } 274 }
284 } 275
285 276 return implode(std::begin(compiled), std::end(compiled), " ");
286 case token::type::preposition: return _preposition._preposition.get_form();
287 case token::type::string: return _string._str;
288
289 case token::type::fillin:
290 {
291 throw std::runtime_error("Cannot compile a fillin token.");
292 }
293
294 case token::type::utterance:
295 {
296 std::list<std::string> compiled;
297 std::transform(std::begin(_utterance._utterance), std::end(_utterance._utterance), std::back_inserter(compiled), [] (token tkn) {
298 return tkn.compile();
299 });
300
301 return verbly::implode(std::begin(compiled), std::end(compiled), " ");
302 } 277 }
303 } 278 }
304 } 279 }
305 280
306 token::token(verb _verb) : _type(type::verb) 281 token::token(word arg, inflection category) : type_(type::word)
307 {
308 new(&this->_verb._verb) verb(_verb);
309 this->_verb._infl = verb_inflection::infinitive;
310 }
311
312 token::token(verb _verb, verb_inflection _infl) : token(_verb)
313 {
314 this->_verb._infl = _infl;
315 }
316
317 token& token::operator=(verb _verb)
318 {
319 *this = token{_verb};
320
321 return *this;
322 }
323
324 verb token::get_verb() const
325 {
326 assert(_type == type::verb);
327
328 return _verb._verb;
329 }
330
331 void token::set_verb(verb _verb)
332 {
333 assert(_type == type::verb);
334
335 this->_verb._verb = _verb;
336 }
337
338 token::verb_inflection token::get_verb_inflection() const
339 {
340 assert(_type == type::verb);
341
342 return _verb._infl;
343 }
344
345 void token::set_verb_inflection(verb_inflection _infl)
346 {
347 assert(_type == type::verb);
348
349 _verb._infl = _infl;
350 }
351
352 token::token(noun _noun) : _type(type::noun)
353 {
354 new(&this->_noun._noun) noun(_noun);
355 this->_noun._infl = noun_inflection::singular;
356 }
357
358 token::token(noun _noun, noun_inflection _infl) : token(_noun)
359 {
360 this->_noun._infl = _infl;
361 }
362
363 token& token::operator=(noun _noun)
364 {
365 *this = token{_noun};
366
367 return *this;
368 }
369
370 noun token::get_noun() const
371 {
372 assert(_type == type::noun);
373
374 return _noun._noun;
375 }
376
377 void token::set_noun(noun _noun)
378 {
379 assert(_type == type::noun);
380
381 this->_noun._noun = _noun;
382 }
383
384 token::noun_inflection token::get_noun_inflection() const
385 {
386 assert(_type == type::noun);
387
388 return _noun._infl;
389 }
390
391 void token::set_noun_inflection(noun_inflection _infl)
392 {
393 assert(_type == type::noun);
394
395 _noun._infl = _infl;
396 }
397
398 token::token(adjective _adjective) : _type(type::adjective)
399 {
400 new(&this->_adjective._adjective) adjective(_adjective);
401 this->_adjective._infl = adjective_inflection::base;
402 }
403
404 token::token(adjective _adjective, adjective_inflection _infl) : token(_adjective)
405 {
406 this->_adjective._infl = _infl;
407 }
408
409 token& token::operator=(adjective _adjective)
410 {
411 *this = token{_adjective};
412
413 return *this;
414 }
415
416 adjective token::get_adjective() const
417 {
418 assert(_type == type::adjective);
419
420 return _adjective._adjective;
421 }
422
423 void token::set_adjective(adjective _adjective)
424 {
425 assert(_type == type::adjective);
426
427 this->_adjective._adjective = _adjective;
428 }
429
430 token::adjective_inflection token::get_adjective_inflection() const
431 {
432 assert(_type == type::adjective);
433
434 return _adjective._infl;
435 }
436
437 void token::set_adjective_inflection(adjective_inflection _infl)
438 {
439 assert(_type == type::adjective);
440
441 _adjective._infl = _infl;
442 }
443
444 token::token(adverb _adverb) : _type(type::adverb)
445 {
446 new(&this->_adverb._adverb) adverb(_adverb);
447 this->_adverb._infl = adverb_inflection::base;
448 }
449
450 token::token(adverb _adverb, adverb_inflection _infl) : token(_adverb)
451 {
452 this->_adverb._infl = _infl;
453 }
454
455 token& token::operator=(adverb _adverb)
456 {
457 *this = token{_adverb};
458
459 return *this;
460 }
461
462 adverb token::get_adverb() const
463 { 282 {
464 assert(_type == type::adverb); 283 new(&word_.word_) word(std::move(arg));
465 284 word_.category_ = category;
466 return _adverb._adverb;
467 } 285 }
468 286
469 void token::set_adverb(adverb _adverb) 287 const word& token::getWord() const
470 { 288 {
471 assert(_type == type::adverb); 289 if (type_ != type::word)
472 290 {
473 this->_adverb._adverb = _adverb; 291 throw std::domain_error("Token is not a word");
292 }
293
294 return word_.word_;
474 } 295 }
475 296
476 token::adverb_inflection token::get_adverb_inflection() const 297 token token::inflect(inflection category) const
477 { 298 {
478 assert(_type == type::adverb); 299 if (type_ != type::word)
479 300 {
480 return _adverb._infl; 301 throw std::domain_error("Token is not a word");
302 }
303
304 return token(word_.word_, category);
481 } 305 }
482 306
483 void token::set_adverb_inflection(adverb_inflection _infl) 307 token::token(std::string arg) : type_(type::literal)
484 { 308 {
485 assert(_type == type::adverb); 309 new(&literal_) std::string(std::move(arg));
486
487 _adverb._infl = _infl;
488 } 310 }
489 311
490 token::token(preposition _preposition) : _type(type::preposition) 312 token::token(const char* arg) : token(std::string(arg))
491 { 313 {
492 new(&this->_preposition._preposition) preposition(_preposition);
493 } 314 }
494 315
495 token& token::operator=(preposition _preposition) 316 std::string token::getLiteral() const
496 { 317 {
497 *this = token{_preposition}; 318 if (type_ != type::literal)
498 319 {
499 return *this; 320 throw std::domain_error("Token is not a literal");
321 }
322
323 return literal_;
500 } 324 }
501 325
502 preposition token::get_preposition() const 326 token::token(part arg) : type_(type::part)
503 { 327 {
504 assert(_type == type::preposition); 328 new(&part_) part(std::move(arg));
505
506 return _preposition._preposition;
507 } 329 }
508 330
509 void token::set_preposition(preposition _preposition) 331 part token::getPart() const
510 { 332 {
511 assert(_type == type::preposition); 333 if (type_ != type::part)
512 334 {
513 this->_preposition._preposition = _preposition; 335 throw std::domain_error("Token is not a part");
336 }
337
338 return part_;
514 } 339 }
515 340
516 token::token(fillin_type _ft) : _type(type::fillin) 341 token::token(std::set<std::string> synrestrs) : type_(type::fillin)
517 { 342 {
518 _fillin._type = _ft; 343 new(&fillin_) std::set<std::string>(std::move(synrestrs));
519 } 344 }
520 345
521 token& token::operator=(fillin_type _ft) 346 const std::set<std::string>& token::getSynrestrs() const
522 { 347 {
523 *this = token{_ft}; 348 if (type_ != type::fillin)
524 349 {
525 return *this; 350 throw std::domain_error("Token is not a fillin");
351 }
352
353 return fillin_;
526 } 354 }
527 355
528 token::fillin_type token::get_fillin_type() const 356 bool token::hasSynrestr(std::string synrestr) const
529 { 357 {
530 assert(_type == type::fillin); 358 if (type_ != type::fillin)
531 359 {
532 return _fillin._type; 360 throw std::domain_error("Token is not a fillin");
361 }
362
363 return (fillin_.count(synrestr) == 1);
533 } 364 }
534 365
535 void token::set_fillin_type(fillin_type _ft) 366 void token::addSynrestr(std::string synrestr)
536 { 367 {
537 assert(_type == type::fillin); 368 if (type_ != type::fillin)
538 369 {
539 _fillin._type = _ft; 370 throw std::domain_error("Token is not a fillin");
371 }
372
373 fillin_.insert(std::move(synrestr));
540 } 374 }
541 375
542 token::token() : _type(type::utterance) 376 token::token() : type_(type::utterance)
543 { 377 {
544 new(&_utterance._utterance) std::list<token>(); 378 new(&utterance_) std::list<token>();
545 } 379 }
546 380
547 token::token(std::initializer_list<token> _init) : _type(type::utterance) 381 token::token(std::vector<part> parts) : type_(type::utterance)
548 { 382 {
549 new(&_utterance._utterance) std::list<token>(_init); 383 new(&utterance_) std::list<token>(std::begin(parts), std::end(parts));
550 } 384 }
551 385
552 token::iterator token::begin() 386 token::iterator token::begin()
553 { 387 {
554 assert(_type == type::utterance); 388 if (type_ != type::utterance)
555 389 {
556 return _utterance._utterance.begin(); 390 throw std::domain_error("Token is not an utterance");
557 } 391 }
558 392
559 token::iterator token::end() 393 return std::begin(utterance_);
560 {
561 assert(_type == type::utterance);
562
563 return _utterance._utterance.end();
564 }
565
566 token& token::operator<<(token _tkn)
567 {
568 assert(_type == type::utterance);
569
570 _utterance._utterance.push_back(_tkn);
571
572 return *this;
573 }
574
575 void token::push_back(token _tkn)
576 {
577 assert(_type == type::utterance);
578
579 _utterance._utterance.push_back(_tkn);
580 }
581
582 void token::insert(iterator before, token _tkn)
583 {
584 assert(_type == type::utterance);
585
586 _utterance._utterance.insert(before, _tkn);
587 } 394 }
588 395
589 void token::replace(iterator torepl, token _tkn) 396 token::const_iterator token::begin() const
590 { 397 {
591 assert(_type == type::utterance); 398 if (type_ != type::utterance)
592 399 {
593 _utterance._utterance.insert(torepl, _tkn); 400 throw std::domain_error("Token is not an utterance");
594 _utterance._utterance.erase(torepl); 401 }
402
403 return std::begin(utterance_);
595 } 404 }
596 405
597 void token::erase(iterator toer) 406 token::iterator token::end()
598 { 407 {
599 assert(_type == type::utterance); 408 if (type_ != type::utterance)
600 409 {
601 _utterance._utterance.erase(toer); 410 throw std::domain_error("Token is not an utterance");
411 }
412
413 return std::end(utterance_);
602 } 414 }
603 415
604 token::token(std::string _str) : _type(type::string) 416 token::const_iterator token::end() const
605 { 417 {
606 new(&_string._str) std::string(_str); 418 if (type_ != type::utterance)
419 {
420 throw std::domain_error("Token is not an utterance");
421 }
422
423 return std::end(utterance_);
607 } 424 }
608 425
609 token& token::operator=(std::string _str) 426 token& token::operator<<(token arg)
610 { 427 {
611 *this = token{_str}; 428 if (type_ != type::utterance)
612 429 {
430 throw std::domain_error("Token is not an utterance");
431 }
432
433 utterance_.push_back(std::move(arg));
434
613 return *this; 435 return *this;
614 } 436 }
615 437
616 std::string token::get_string() const 438 std::ostream& operator<<(std::ostream& os, token::type type)
617 {
618 assert(_type == type::string);
619
620 return _string._str;
621 }
622
623 void token::set_string(std::string _str)
624 {
625 assert(_type == type::string);
626
627 _string._str = _str;
628 }
629
630 std::ostream& operator<<(std::ostream& os, token::type _type)
631 { 439 {
632 switch (_type) 440 switch (type)
633 { 441 {
634 case token::type::verb: return os << "verb"; 442 case token::type::word: return os << "word";
635 case token::type::noun: return os << "noun"; 443 case token::type::literal: return os << "literal";
636 case token::type::adjective: return os << "adjective"; 444 case token::type::part: return os << "part";
637 case token::type::adverb: return os << "adverb";
638 case token::type::preposition: return os << "preposition";
639 case token::type::fillin: return os << "fillin"; 445 case token::type::fillin: return os << "fillin";
640 case token::type::utterance: return os << "utterance"; 446 case token::type::utterance: return os << "utterance";
641 case token::type::string: return os << "string";
642 } 447 }
643 } 448 }
644 449
645}; 450};