summary refs log tree commit diff stats
path: root/lib/token.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2018-09-27 21:40:52 -0400
committerKelly Rauchenberger <fefferburbia@gmail.com>2018-09-27 21:40:52 -0400
commit38c17f093615a16a4b4ec6dc2b5d3edb5c1d3895 (patch)
tree8da5a3d0eacf5e2fd04c33f57d592e4c1ca303ad /lib/token.cpp
parent3a8bfa95a5df04d97f05545d5bb8df5f3c3f96a3 (diff)
downloadverbly-38c17f093615a16a4b4ec6dc2b5d3edb5c1d3895.tar.gz
verbly-38c17f093615a16a4b4ec6dc2b5d3edb5c1d3895.tar.bz2
verbly-38c17f093615a16a4b4ec6dc2b5d3edb5c1d3895.zip
More hkutil refactoring
All database access goes through hatkirby::database now.

verbly::token, verbly::statement::condition, and verbly::part have been converted to use mpark::variant now. verbly::binding has been deleted, and replaced with a mpark::variant typedef in statement.h. This means that the only remaining tagged union class is verbly::generator::part.

refs #5
Diffstat (limited to 'lib/token.cpp')
-rw-r--r--lib/token.cpp495
1 files changed, 128 insertions, 367 deletions
diff --git a/lib/token.cpp b/lib/token.cpp index 7b1d1fa..b3c7062 100644 --- a/lib/token.cpp +++ b/lib/token.cpp
@@ -5,322 +5,43 @@
5 5
6namespace verbly { 6namespace verbly {
7 7
8 token::token(const token& other) 8 bool token::isComplete() const
9 {
10 type_ = other.type_;
11
12 switch (type_)
13 {
14 case type::word:
15 {
16 new(&word_.word_) word(other.word_.word_);
17 word_.category_ = other.word_.category_;
18
19 break;
20 }
21
22 case type::literal:
23 {
24 new(&literal_) std::string(other.literal_);
25
26 break;
27 }
28
29 case type::part:
30 {
31 new(&part_) part(other.part_);
32
33 break;
34 }
35
36 case type::fillin:
37 {
38 new(&fillin_) std::set<std::string>(other.fillin_);
39
40 break;
41 }
42
43 case type::utterance:
44 {
45 new(&utterance_) std::list<token>(other.utterance_);
46
47 break;
48 }
49
50 case type::transform:
51 {
52 transform_.type_ = other.transform_.type_;
53 new(&transform_.strParam_) std::string(other.transform_.strParam_);
54 new(&transform_.strParam2_) std::string(other.transform_.strParam2_);
55 transform_.casingParam_ = other.transform_.casingParam_;
56 new(&transform_.inner_) std::unique_ptr<token>(new token(*other.transform_.inner_));
57
58 break;
59 }
60 }
61 }
62
63 token::token(token&& other) : token()
64 {
65 swap(*this, other);
66 }
67
68 token& token::operator=(token other)
69 {
70 swap(*this, other);
71
72 return *this;
73 }
74
75 void swap(token& first, token& second)
76 {
77 using type = token::type;
78 using transform_type = token::transform_type;
79 using casing = token::casing;
80
81 type tempType = first.type_;
82 word tempWord;
83 inflection tempCategory;
84 std::string tempLiteral;
85 part tempPart;
86 std::set<std::string> tempFillin;
87 std::list<token> tempUtterance;
88 transform_type tempTransformType;
89 std::string tempTransformStrParam;
90 std::string tempTransformStrParam2;
91 casing tempTransformCasingParam;
92 std::unique_ptr<token> tempTransformInner;
93
94 switch (tempType)
95 {
96 case type::word:
97 {
98 tempWord = std::move(first.word_.word_);
99 tempCategory = first.word_.category_;
100
101 break;
102 }
103
104 case type::literal:
105 {
106 tempLiteral = std::move(first.literal_);
107
108 break;
109 }
110
111 case type::part:
112 {
113 tempPart = std::move(first.part_);
114
115 break;
116 }
117
118 case type::fillin:
119 {
120 tempFillin = std::move(first.fillin_);
121
122 break;
123 }
124
125 case type::utterance:
126 {
127 tempUtterance = std::move(first.utterance_);
128
129 break;
130 }
131
132 case type::transform:
133 {
134 tempTransformType = first.transform_.type_;
135 tempTransformStrParam = std::move(first.transform_.strParam_);
136 tempTransformStrParam2 = std::move(first.transform_.strParam2_);
137 tempTransformCasingParam = first.transform_.casingParam_;
138 tempTransformInner = std::move(first.transform_.inner_);
139
140 break;
141 }
142 }
143
144 first.~token();
145
146 first.type_ = second.type_;
147
148 switch (first.type_)
149 {
150 case type::word:
151 {
152 new(&first.word_.word_) word(std::move(second.word_.word_));
153 first.word_.category_ = second.word_.category_;
154
155 break;
156 }
157
158 case type::literal:
159 {
160 new(&first.literal_) std::string(std::move(second.literal_));
161
162 break;
163 }
164
165 case type::part:
166 {
167 new(&first.part_) part(std::move(second.part_));
168
169 break;
170 }
171
172 case type::fillin:
173 {
174 new(&first.fillin_) std::set<std::string>(std::move(second.fillin_));
175
176 break;
177 }
178
179 case type::utterance:
180 {
181 new(&first.utterance_) std::list<token>(std::move(second.utterance_));
182
183 break;
184 }
185
186 case type::transform:
187 {
188 first.transform_.type_ = second.transform_.type_;
189 new(&first.transform_.strParam_) std::string(std::move(second.transform_.strParam_));
190 new(&first.transform_.strParam2_) std::string(std::move(second.transform_.strParam2_));
191 first.transform_.casingParam_ = second.transform_.casingParam_;
192 new(&first.transform_.inner_) std::unique_ptr<token>(std::move(second.transform_.inner_));
193
194 break;
195 }
196 }
197
198 second.~token();
199
200 second.type_ = tempType;
201
202 switch (second.type_)
203 {
204 case type::word:
205 {
206 new(&second.word_.word_) word(std::move(tempWord));
207 second.word_.category_ = tempCategory;
208
209 break;
210 }
211
212 case type::literal:
213 {
214 new(&second.literal_) std::string(std::move(tempLiteral));
215
216 break;
217 }
218
219 case type::part:
220 {
221 new(&second.part_) part(std::move(tempPart));
222
223 break;
224 }
225
226 case type::fillin:
227 {
228 new(&second.fillin_) std::set<std::string>(std::move(tempFillin));
229
230 break;
231 }
232
233 case type::utterance:
234 {
235 new(&second.utterance_) std::list<token>(std::move(tempUtterance));
236
237 break;
238 }
239
240 case type::transform:
241 {
242 second.transform_.type_ = tempTransformType;
243 new(&second.transform_.strParam_) std::string(std::move(tempTransformStrParam));
244 new(&second.transform_.strParam2_) std::string(std::move(tempTransformStrParam2));
245 second.transform_.casingParam_ = tempTransformCasingParam;
246 new(&second.transform_.inner_) std::unique_ptr<token>(std::move(tempTransformInner));
247
248 break;
249 }
250 }
251 }
252
253 token::~token()
254 { 9 {
255 switch (type_) 10 switch (type_)
256 { 11 {
257 case type::word: 12 case type::word:
258 {
259 word_.word_.~word();
260
261 break;
262 }
263
264 case type::literal: 13 case type::literal:
265 { 14 {
266 using string_type = std::string; 15 return true;
267 literal_.~string_type();
268
269 break;
270 } 16 }
271 17
272 case type::part: 18 case type::part:
273 {
274 part_.~part();
275
276 break;
277 }
278
279 case type::fillin: 19 case type::fillin:
280 { 20 {
281 using set_type = std::set<std::string>; 21 return false;
282 fillin_.~set_type();
283
284 break;
285 } 22 }
286 23
287 case type::utterance: 24 case type::utterance:
288 { 25 {
289 using list_type = std::list<token>; 26 const utterance_type& utterance = mpark::get<utterance_type>(variant_);
290 utterance_.~list_type();
291 27
292 break; 28 return std::all_of(
29 std::begin(utterance),
30 std::end(utterance),
31 [] (const token& tkn) {
32 return tkn.isComplete();
33 });
293 } 34 }
294 35
295 case type::transform: 36 case type::transform:
296 { 37 {
297 using string_type = std::string; 38 const transform_type& transform = mpark::get<transform_type>(variant_);
298 using ptr_type = std::unique_ptr<token>;
299
300 transform_.strParam_.~string_type();
301 transform_.strParam2_.~string_type();
302 transform_.inner_.~ptr_type();
303 39
304 break; 40 return transform.inner->isComplete();
305 } 41 }
306 } 42 }
307 } 43 }
308 44
309 bool token::isComplete() const
310 {
311 switch (type_)
312 {
313 case type::word: return true;
314 case type::literal: return true;
315 case type::part: return false;
316 case type::fillin: return false;
317 case type::utterance: return std::all_of(std::begin(utterance_), std::end(utterance_), [] (const token& tkn) {
318 return tkn.isComplete();
319 });
320 case type::transform: return transform_.inner_->isComplete();
321 }
322 }
323
324 std::string token::compile() const 45 std::string token::compile() const
325 { 46 {
326 return compileHelper(" ", false, casing::normal); 47 return compileHelper(" ", false, casing::normal);
@@ -335,8 +56,9 @@ namespace verbly {
335 { 56 {
336 case type::word: 57 case type::word:
337 { 58 {
338 const form& wordForm = word_.word_.getInflections(word_.category_) 59 const word_type& w = mpark::get<word_type>(variant_);
339 .front(); 60
61 const form& wordForm = w.value.getInflections(w.category).front();
340 62
341 std::string result = wordForm.getText(); 63 std::string result = wordForm.getText();
342 64
@@ -381,13 +103,12 @@ namespace verbly {
381 } 103 }
382 } 104 }
383 105
384
385 return result; 106 return result;
386 } 107 }
387 108
388 case type::literal: 109 case type::literal:
389 { 110 {
390 std::string result = literal_; 111 std::string result = mpark::get<literal_type>(variant_);
391 112
392 if (indefiniteArticle && std::isalpha(result[0])) 113 if (indefiniteArticle && std::isalpha(result[0]))
393 { 114 {
@@ -435,14 +156,19 @@ namespace verbly {
435 return result; 156 return result;
436 } 157 }
437 158
438 case type::part: throw std::domain_error("Cannot compile incomplete token"); 159 case type::part:
439 case type::fillin: throw std::domain_error("Cannot compile incomplete token"); 160 case type::fillin:
161 {
162 throw std::domain_error("Cannot compile incomplete token");
163 }
440 164
441 case type::utterance: 165 case type::utterance:
442 { 166 {
167 const utterance_type& utterance = mpark::get<utterance_type>(variant_);
168
443 bool first = true; 169 bool first = true;
444 std::list<std::string> compiled; 170 std::list<std::string> compiled;
445 for (const token& tkn : utterance_) 171 for (const token& tkn : utterance)
446 { 172 {
447 casing propagateCasing = capitalization; 173 casing propagateCasing = capitalization;
448 if ((capitalization == casing::capitalize) && (!first)) 174 if ((capitalization == casing::capitalize) && (!first))
@@ -458,58 +184,70 @@ namespace verbly {
458 first = false; 184 first = false;
459 } 185 }
460 186
461 return hatkirby::implode(std::begin(compiled), std::end(compiled), separator); 187 return hatkirby::implode(
188 std::begin(compiled),
189 std::end(compiled),
190 separator);
462 } 191 }
463 192
464 case type::transform: 193 case type::transform:
465 { 194 {
466 switch (transform_.type_) 195 const transform_type& transform = mpark::get<transform_type>(variant_);
196
197 switch (transform.type)
467 { 198 {
468 case transform_type::separator: 199 case transform_mode::separator:
469 { 200 {
470 return transform_.inner_->compileHelper( 201 return transform.inner->compileHelper(
471 transform_.strParam_, indefiniteArticle, capitalization); 202 transform.strParam,
203 indefiniteArticle,
204 capitalization);
472 } 205 }
473 206
474 case transform_type::punctuation: 207 case transform_mode::punctuation:
475 { 208 {
476 return transform_.inner_->compileHelper( 209 return transform.inner->compileHelper(
477 separator, indefiniteArticle, capitalization) 210 separator,
478 + transform_.strParam_; 211 indefiniteArticle,
212 capitalization) + transform.strParam;
479 } 213 }
480 214
481 case transform_type::indefinite_article: 215 case transform_mode::indefinite_article:
482 { 216 {
483 return transform_.inner_->compileHelper( 217 return transform.inner->compileHelper(
484 separator, true, capitalization); 218 separator,
219 true,
220 capitalization);
485 } 221 }
486 222
487 case transform_type::capitalize: 223 case transform_mode::capitalize:
488 { 224 {
489 return transform_.inner_->compileHelper( 225 return transform.inner->compileHelper(
490 separator, 226 separator,
491 indefiniteArticle, 227 indefiniteArticle,
492 transform_.casingParam_); 228 transform.casingParam);
493 } 229 }
494 230
495 case transform_type::quote: 231 case transform_mode::quote:
496 { 232 {
497 return transform_.strParam_ + 233 return transform.strParam +
498 transform_.inner_->compileHelper( 234 transform.inner->compileHelper(
499 separator, 235 separator,
500 indefiniteArticle, 236 indefiniteArticle,
501 capitalization) + 237 capitalization) +
502 transform_.strParam2_; 238 transform.strParam2;
503 } 239 }
504 } 240 }
505 } 241 }
506 } 242 }
507 } 243 }
508 244
509 token::token(word arg, inflection category) : type_(type::word) 245 token::token(
246 word arg,
247 inflection category) :
248 type_(type::word),
249 variant_(word_type { std::move(arg), category })
510 { 250 {
511 new(&word_.word_) word(std::move(arg));
512 word_.category_ = category;
513 } 251 }
514 252
515 const word& token::getWord() const 253 const word& token::getWord() const
@@ -519,7 +257,7 @@ namespace verbly {
519 throw std::domain_error("Token is not a word"); 257 throw std::domain_error("Token is not a word");
520 } 258 }
521 259
522 return word_.word_; 260 return mpark::get<word_type>(variant_).value;
523 } 261 }
524 262
525 token token::inflect(inflection category) const 263 token token::inflect(inflection category) const
@@ -529,46 +267,57 @@ namespace verbly {
529 throw std::domain_error("Token is not a word"); 267 throw std::domain_error("Token is not a word");
530 } 268 }
531 269
532 return token(word_.word_, category); 270 return {
271 mpark::get<word_type>(variant_).value,
272 category
273 };
533 } 274 }
534 275
535 token::token(std::string arg) : type_(type::literal) 276 token::token(
277 std::string arg) :
278 type_(type::literal),
279 variant_(std::move(arg))
536 { 280 {
537 new(&literal_) std::string(std::move(arg));
538 } 281 }
539 282
540 token::token(const char* arg) : token(std::string(arg)) 283 token::token(
284 const char* arg) :
285 token(std::string(arg))
541 { 286 {
542 } 287 }
543 288
544 std::string token::getLiteral() const 289 const std::string& token::getLiteral() const
545 { 290 {
546 if (type_ != type::literal) 291 if (type_ != type::literal)
547 { 292 {
548 throw std::domain_error("Token is not a literal"); 293 throw std::domain_error("Token is not a literal");
549 } 294 }
550 295
551 return literal_; 296 return mpark::get<literal_type>(variant_);
552 } 297 }
553 298
554 token::token(part arg) : type_(type::part) 299 token::token(
300 part arg) :
301 type_(type::part),
302 variant_(std::move(arg))
555 { 303 {
556 new(&part_) part(std::move(arg));
557 } 304 }
558 305
559 part token::getPart() const 306 const part& token::getPart() const
560 { 307 {
561 if (type_ != type::part) 308 if (type_ != type::part)
562 { 309 {
563 throw std::domain_error("Token is not a part"); 310 throw std::domain_error("Token is not a part");
564 } 311 }
565 312
566 return part_; 313 return mpark::get<part>(variant_);
567 } 314 }
568 315
569 token::token(std::set<std::string> synrestrs) : type_(type::fillin) 316 token::token(
317 std::set<std::string> synrestrs) :
318 type_(type::fillin),
319 variant_(std::move(synrestrs))
570 { 320 {
571 new(&fillin_) std::set<std::string>(std::move(synrestrs));
572 } 321 }
573 322
574 const std::set<std::string>& token::getSynrestrs() const 323 const std::set<std::string>& token::getSynrestrs() const
@@ -578,7 +327,7 @@ namespace verbly {
578 throw std::domain_error("Token is not a fillin"); 327 throw std::domain_error("Token is not a fillin");
579 } 328 }
580 329
581 return fillin_; 330 return mpark::get<fillin_type>(variant_);
582 } 331 }
583 332
584 bool token::hasSynrestr(std::string synrestr) const 333 bool token::hasSynrestr(std::string synrestr) const
@@ -588,7 +337,7 @@ namespace verbly {
588 throw std::domain_error("Token is not a fillin"); 337 throw std::domain_error("Token is not a fillin");
589 } 338 }
590 339
591 return (fillin_.count(synrestr) == 1); 340 return mpark::get<fillin_type>(variant_).count(synrestr);
592 } 341 }
593 342
594 void token::addSynrestr(std::string synrestr) 343 void token::addSynrestr(std::string synrestr)
@@ -598,22 +347,28 @@ namespace verbly {
598 throw std::domain_error("Token is not a fillin"); 347 throw std::domain_error("Token is not a fillin");
599 } 348 }
600 349
601 fillin_.insert(std::move(synrestr)); 350 fillin_type& fillin = mpark::get<fillin_type>(variant_);
351 fillin.insert(std::move(synrestr));
602 } 352 }
603 353
604 token::token() : type_(type::utterance) 354 token::token() :
355 type_(type::utterance),
356 variant_(utterance_type {})
605 { 357 {
606 new(&utterance_) std::list<token>();
607 } 358 }
608 359
609 token::token(std::vector<part> parts) : type_(type::utterance) 360 token::token(
361 std::vector<part> parts) :
362 type_(type::utterance),
363 variant_(utterance_type { std::begin(parts), std::end(parts) })
610 { 364 {
611 new(&utterance_) std::list<token>(std::begin(parts), std::end(parts));
612 } 365 }
613 366
614 token::token(std::initializer_list<token> parts) : type_(type::utterance) 367 token::token(
368 std::initializer_list<token> parts) :
369 type_(type::utterance),
370 variant_(utterance_type { std::move(parts) })
615 { 371 {
616 new(&utterance_) std::list<token>(std::move(parts));
617 } 372 }
618 373
619 token::iterator token::begin() 374 token::iterator token::begin()
@@ -623,7 +378,7 @@ namespace verbly {
623 throw std::domain_error("Token is not an utterance"); 378 throw std::domain_error("Token is not an utterance");
624 } 379 }
625 380
626 return std::begin(utterance_); 381 return std::begin(mpark::get<utterance_type>(variant_));
627 } 382 }
628 383
629 token::const_iterator token::begin() const 384 token::const_iterator token::begin() const
@@ -633,7 +388,7 @@ namespace verbly {
633 throw std::domain_error("Token is not an utterance"); 388 throw std::domain_error("Token is not an utterance");
634 } 389 }
635 390
636 return std::begin(utterance_); 391 return std::begin(mpark::get<utterance_type>(variant_));
637 } 392 }
638 393
639 token::iterator token::end() 394 token::iterator token::end()
@@ -643,7 +398,7 @@ namespace verbly {
643 throw std::domain_error("Token is not an utterance"); 398 throw std::domain_error("Token is not an utterance");
644 } 399 }
645 400
646 return std::end(utterance_); 401 return std::end(mpark::get<utterance_type>(variant_));
647 } 402 }
648 403
649 token::const_iterator token::end() const 404 token::const_iterator token::end() const
@@ -653,7 +408,7 @@ namespace verbly {
653 throw std::domain_error("Token is not an utterance"); 408 throw std::domain_error("Token is not an utterance");
654 } 409 }
655 410
656 return std::end(utterance_); 411 return std::end(mpark::get<utterance_type>(variant_));
657 } 412 }
658 413
659 token& token::operator<<(token arg) 414 token& token::operator<<(token arg)
@@ -663,35 +418,36 @@ namespace verbly {
663 throw std::domain_error("Token is not an utterance"); 418 throw std::domain_error("Token is not an utterance");
664 } 419 }
665 420
666 utterance_.push_back(std::move(arg)); 421 utterance_type& utterance = mpark::get<utterance_type>(variant_);
422 utterance.push_back(std::move(arg));
667 423
668 return *this; 424 return *this;
669 } 425 }
670 426
671 token token::separator(std::string param, token inner) 427 token token::separator(std::string param, token inner)
672 { 428 {
673 return token(transform_type::separator, std::move(param), "", std::move(inner)); 429 return token(transform_mode::separator, std::move(param), "", std::move(inner));
674 } 430 }
675 431
676 token token::punctuation(std::string param, token inner) 432 token token::punctuation(std::string param, token inner)
677 { 433 {
678 return token(transform_type::punctuation, std::move(param), "", std::move(inner)); 434 return token(transform_mode::punctuation, std::move(param), "", std::move(inner));
679 } 435 }
680 436
681 token token::indefiniteArticle(token inner) 437 token token::indefiniteArticle(token inner)
682 { 438 {
683 return token(transform_type::indefinite_article, "", "", std::move(inner)); 439 return token(transform_mode::indefinite_article, "", "", std::move(inner));
684 } 440 }
685 441
686 token token::capitalize(casing param, token inner) 442 token token::capitalize(casing param, token inner)
687 { 443 {
688 return token(transform_type::capitalize, param, std::move(inner)); 444 return token(transform_mode::capitalize, param, std::move(inner));
689 } 445 }
690 446
691 token token::quote(std::string opening, std::string closing, token inner) 447 token token::quote(std::string opening, std::string closing, token inner)
692 { 448 {
693 return token( 449 return token(
694 transform_type::quote, 450 transform_mode::quote,
695 std::move(opening), 451 std::move(opening),
696 std::move(closing), 452 std::move(closing),
697 std::move(inner)); 453 std::move(inner));
@@ -704,7 +460,7 @@ namespace verbly {
704 throw std::domain_error("Invalid access on non-tranform token"); 460 throw std::domain_error("Invalid access on non-tranform token");
705 } 461 }
706 462
707 return *transform_.inner_; 463 return *mpark::get<transform_type>(variant_).inner;
708 } 464 }
709 465
710 const token& token::getInnerToken() const 466 const token& token::getInnerToken() const
@@ -714,33 +470,38 @@ namespace verbly {
714 throw std::domain_error("Invalid access on non-tranform token"); 470 throw std::domain_error("Invalid access on non-tranform token");
715 } 471 }
716 472
717 return *transform_.inner_; 473 return *mpark::get<transform_type>(variant_).inner;
718 } 474 }
719 475
720 token::token( 476 token::token(
721 transform_type type, 477 transform_mode type,
722 std::string param1, 478 std::string param1,
723 std::string param2, 479 std::string param2,
724 token inner) : 480 token inner) :
725 type_(type::transform) 481 type_(type::transform),
482 variant_(transform_type {
483 type,
484 std::move(param1),
485 std::move(param2),
486 casing::normal,
487 new token(std::move(inner))
488 })
726 { 489 {
727 transform_.type_ = type;
728 new(&transform_.strParam_) std::string(std::move(param1));
729 new(&transform_.strParam2_) std::string(std::move(param2));
730 new(&transform_.inner_) std::unique_ptr<token>(new token(std::move(inner)));
731 } 490 }
732 491
733 token::token( 492 token::token(
734 transform_type type, 493 transform_mode type,
735 casing param, 494 casing param,
736 token inner) : 495 token inner) :
737 type_(type::transform) 496 type_(type::transform),
497 variant_(transform_type {
498 type,
499 {},
500 {},
501 param,
502 new token(std::move(inner))
503 })
738 { 504 {
739 transform_.type_ = type;
740 new(&transform_.strParam_) std::string();
741 new(&transform_.strParam2_) std::string();
742 transform_.casingParam_ = param;
743 new(&transform_.inner_) std::unique_ptr<token>(new token(std::move(inner)));
744 } 505 }
745 506
746 std::ostream& operator<<(std::ostream& os, token::type type) 507 std::ostream& operator<<(std::ostream& os, token::type type)