diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/token.cpp | 212 | ||||
-rw-r--r-- | lib/token.h | 37 |
2 files changed, 244 insertions, 5 deletions
diff --git a/lib/token.cpp b/lib/token.cpp index 735aa7e..91591d3 100644 --- a/lib/token.cpp +++ b/lib/token.cpp | |||
@@ -46,6 +46,15 @@ namespace verbly { | |||
46 | 46 | ||
47 | break; | 47 | break; |
48 | } | 48 | } |
49 | |||
50 | case type::transform: | ||
51 | { | ||
52 | transform_.type_ = other.transform_.type_; | ||
53 | new(&transform_.param_) std::string(other.transform_.param_); | ||
54 | new(&transform_.inner_) std::unique_ptr<token>(new token(*other.transform_.inner_)); | ||
55 | |||
56 | break; | ||
57 | } | ||
49 | } | 58 | } |
50 | } | 59 | } |
51 | 60 | ||
@@ -64,6 +73,7 @@ namespace verbly { | |||
64 | void swap(token& first, token& second) | 73 | void swap(token& first, token& second) |
65 | { | 74 | { |
66 | using type = token::type; | 75 | using type = token::type; |
76 | using transform_type = token::transform_type; | ||
67 | 77 | ||
68 | type tempType = first.type_; | 78 | type tempType = first.type_; |
69 | word tempWord; | 79 | word tempWord; |
@@ -72,6 +82,9 @@ namespace verbly { | |||
72 | part tempPart; | 82 | part tempPart; |
73 | std::set<std::string> tempFillin; | 83 | std::set<std::string> tempFillin; |
74 | std::list<token> tempUtterance; | 84 | std::list<token> tempUtterance; |
85 | transform_type tempTransformType; | ||
86 | std::string tempTransformParam; | ||
87 | std::unique_ptr<token> tempTransformInner; | ||
75 | 88 | ||
76 | switch (tempType) | 89 | switch (tempType) |
77 | { | 90 | { |
@@ -110,6 +123,15 @@ namespace verbly { | |||
110 | 123 | ||
111 | break; | 124 | break; |
112 | } | 125 | } |
126 | |||
127 | case type::transform: | ||
128 | { | ||
129 | tempTransformType = first.transform_.type_; | ||
130 | tempTransformParam = std::move(first.transform_.param_); | ||
131 | tempTransformInner = std::move(first.transform_.inner_); | ||
132 | |||
133 | break; | ||
134 | } | ||
113 | } | 135 | } |
114 | 136 | ||
115 | first.~token(); | 137 | first.~token(); |
@@ -153,6 +175,15 @@ namespace verbly { | |||
153 | 175 | ||
154 | break; | 176 | break; |
155 | } | 177 | } |
178 | |||
179 | case type::transform: | ||
180 | { | ||
181 | first.transform_.type_ = second.transform_.type_; | ||
182 | new(&first.transform_.param_) std::string(std::move(second.transform_.param_)); | ||
183 | new(&first.transform_.inner_) std::unique_ptr<token>(std::move(second.transform_.inner_)); | ||
184 | |||
185 | break; | ||
186 | } | ||
156 | } | 187 | } |
157 | 188 | ||
158 | second.~token(); | 189 | second.~token(); |
@@ -196,6 +227,15 @@ namespace verbly { | |||
196 | 227 | ||
197 | break; | 228 | break; |
198 | } | 229 | } |
230 | |||
231 | case type::transform: | ||
232 | { | ||
233 | second.transform_.type_ = tempTransformType; | ||
234 | new(&second.transform_.param_) std::string(std::move(tempTransformParam)); | ||
235 | new(&second.transform_.inner_) std::unique_ptr<token>(std::move(tempTransformInner)); | ||
236 | |||
237 | break; | ||
238 | } | ||
199 | } | 239 | } |
200 | } | 240 | } |
201 | 241 | ||
@@ -240,6 +280,17 @@ namespace verbly { | |||
240 | 280 | ||
241 | break; | 281 | break; |
242 | } | 282 | } |
283 | |||
284 | case type::transform: | ||
285 | { | ||
286 | using string_type = std::string; | ||
287 | using ptr_type = std::unique_ptr<token>; | ||
288 | |||
289 | transform_.param_.~string_type(); | ||
290 | transform_.inner_.~ptr_type(); | ||
291 | |||
292 | break; | ||
293 | } | ||
243 | } | 294 | } |
244 | } | 295 | } |
245 | 296 | ||
@@ -254,27 +305,123 @@ namespace verbly { | |||
254 | case type::utterance: return std::all_of(std::begin(utterance_), std::end(utterance_), [] (const token& tkn) { | 305 | case type::utterance: return std::all_of(std::begin(utterance_), std::end(utterance_), [] (const token& tkn) { |
255 | return tkn.isComplete(); | 306 | return tkn.isComplete(); |
256 | }); | 307 | }); |
308 | case type::transform: return transform_.inner_->isComplete(); | ||
257 | } | 309 | } |
258 | } | 310 | } |
259 | 311 | ||
260 | std::string token::compile() const | 312 | std::string token::compile() const |
261 | { | 313 | { |
314 | return compileHelper(" ", false, false); | ||
315 | } | ||
316 | |||
317 | std::string token::compileHelper( | ||
318 | std::string separator, | ||
319 | bool definiteArticle, | ||
320 | bool capitalize) const | ||
321 | { | ||
262 | switch (type_) | 322 | switch (type_) |
263 | { | 323 | { |
264 | case type::word: return word_.word_.getInflections(word_.category_).front().getText(); | 324 | case type::word: |
265 | case type::literal: return literal_; | 325 | { |
326 | const form& wordForm = word_.word_.getInflections(word_.category_) | ||
327 | .front(); | ||
328 | |||
329 | std::string result = wordForm.getText(); | ||
330 | |||
331 | if (definiteArticle) | ||
332 | { | ||
333 | if (wordForm.startsWithVowelSound()) | ||
334 | { | ||
335 | result = "an " + result; | ||
336 | } else { | ||
337 | result = "a " + result; | ||
338 | } | ||
339 | } | ||
340 | |||
341 | if (capitalize) | ||
342 | { | ||
343 | if (std::isalpha(result[0])) | ||
344 | { | ||
345 | result[0] = std::toupper(result[0]); | ||
346 | } | ||
347 | } | ||
348 | |||
349 | return result; | ||
350 | } | ||
351 | |||
352 | case type::literal: | ||
353 | { | ||
354 | std::string result = literal_; | ||
355 | |||
356 | if (definiteArticle && std::isalpha(result[0])) | ||
357 | { | ||
358 | char canon = std::tolower(result[0]); | ||
359 | if ((canon == 'a') || (canon == 'e') || (canon == 'i') | ||
360 | || (canon == 'o') || (canon == 'u')) | ||
361 | { | ||
362 | result = "an " + result; | ||
363 | } else { | ||
364 | result = "a " + result; | ||
365 | } | ||
366 | } | ||
367 | |||
368 | if (capitalize) | ||
369 | { | ||
370 | if (std::isalpha(result[0])) | ||
371 | { | ||
372 | result[0] = std::toupper(result[0]); | ||
373 | } | ||
374 | } | ||
375 | |||
376 | return result; | ||
377 | } | ||
378 | |||
266 | case type::part: throw std::domain_error("Cannot compile incomplete token"); | 379 | case type::part: throw std::domain_error("Cannot compile incomplete token"); |
267 | case type::fillin: throw std::domain_error("Cannot compile incomplete token"); | 380 | case type::fillin: throw std::domain_error("Cannot compile incomplete token"); |
268 | 381 | ||
269 | case type::utterance: | 382 | case type::utterance: |
270 | { | 383 | { |
384 | bool first = true; | ||
271 | std::list<std::string> compiled; | 385 | std::list<std::string> compiled; |
272 | for (const token& tkn : utterance_) | 386 | for (const token& tkn : utterance_) |
273 | { | 387 | { |
274 | compiled.push_back(tkn.compile()); | 388 | compiled.push_back( |
389 | tkn.compileHelper(" ", | ||
390 | first && definiteArticle, | ||
391 | first && capitalize)); | ||
392 | |||
393 | first = false; | ||
275 | } | 394 | } |
276 | 395 | ||
277 | return implode(std::begin(compiled), std::end(compiled), " "); | 396 | return implode(std::begin(compiled), std::end(compiled), separator); |
397 | } | ||
398 | |||
399 | case type::transform: | ||
400 | { | ||
401 | switch (transform_.type_) | ||
402 | { | ||
403 | case transform_type::separator: | ||
404 | { | ||
405 | return transform_.inner_->compileHelper( | ||
406 | transform_.param_, definiteArticle, capitalize); | ||
407 | } | ||
408 | |||
409 | case transform_type::punctuation: | ||
410 | { | ||
411 | return transform_.inner_->compileHelper( | ||
412 | separator, definiteArticle, capitalize) + transform_.param_; | ||
413 | } | ||
414 | |||
415 | case transform_type::definite_article: | ||
416 | { | ||
417 | return transform_.inner_->compileHelper(separator, true, capitalize); | ||
418 | } | ||
419 | |||
420 | case transform_type::capitalize: | ||
421 | { | ||
422 | return transform_.inner_->compileHelper(separator, definiteArticle, true); | ||
423 | } | ||
424 | } | ||
278 | } | 425 | } |
279 | } | 426 | } |
280 | } | 427 | } |
@@ -384,6 +531,11 @@ namespace verbly { | |||
384 | new(&utterance_) std::list<token>(std::begin(parts), std::end(parts)); | 531 | new(&utterance_) std::list<token>(std::begin(parts), std::end(parts)); |
385 | } | 532 | } |
386 | 533 | ||
534 | token::token(std::initializer_list<token> parts) : type_(type::utterance) | ||
535 | { | ||
536 | new(&utterance_) std::list<token>(std::move(parts)); | ||
537 | } | ||
538 | |||
387 | token::iterator token::begin() | 539 | token::iterator token::begin() |
388 | { | 540 | { |
389 | if (type_ != type::utterance) | 541 | if (type_ != type::utterance) |
@@ -436,6 +588,57 @@ namespace verbly { | |||
436 | return *this; | 588 | return *this; |
437 | } | 589 | } |
438 | 590 | ||
591 | token token::separator(std::string param, token inner) | ||
592 | { | ||
593 | return token(transform_type::separator, std::move(param), std::move(inner)); | ||
594 | } | ||
595 | |||
596 | token token::punctuation(std::string param, token inner) | ||
597 | { | ||
598 | return token(transform_type::punctuation, std::move(param), std::move(inner)); | ||
599 | } | ||
600 | |||
601 | token token::definiteArticle(token inner) | ||
602 | { | ||
603 | return token(transform_type::definite_article, "", std::move(inner)); | ||
604 | } | ||
605 | |||
606 | token token::capitalize(token inner) | ||
607 | { | ||
608 | return token(transform_type::capitalize, "", std::move(inner)); | ||
609 | } | ||
610 | |||
611 | token& token::getInnerToken() | ||
612 | { | ||
613 | if (type_ != type::transform) | ||
614 | { | ||
615 | throw std::domain_error("Invalid access on non-tranform token"); | ||
616 | } | ||
617 | |||
618 | return *transform_.inner_; | ||
619 | } | ||
620 | |||
621 | const token& token::getInnerToken() const | ||
622 | { | ||
623 | if (type_ != type::transform) | ||
624 | { | ||
625 | throw std::domain_error("Invalid access on non-tranform token"); | ||
626 | } | ||
627 | |||
628 | return *transform_.inner_; | ||
629 | } | ||
630 | |||
631 | token::token( | ||
632 | transform_type type, | ||
633 | std::string param, | ||
634 | token inner) : | ||
635 | type_(type::transform) | ||
636 | { | ||
637 | transform_.type_ = type; | ||
638 | new(&transform_.param_) std::string(std::move(param)); | ||
639 | new(&transform_.inner_) std::unique_ptr<token>(new token(std::move(inner))); | ||
640 | } | ||
641 | |||
439 | std::ostream& operator<<(std::ostream& os, token::type type) | 642 | std::ostream& operator<<(std::ostream& os, token::type type) |
440 | { | 643 | { |
441 | switch (type) | 644 | switch (type) |
@@ -445,6 +648,7 @@ namespace verbly { | |||
445 | case token::type::part: return os << "part"; | 648 | case token::type::part: return os << "part"; |
446 | case token::type::fillin: return os << "fillin"; | 649 | case token::type::fillin: return os << "fillin"; |
447 | case token::type::utterance: return os << "utterance"; | 650 | case token::type::utterance: return os << "utterance"; |
651 | case token::type::transform: return os << "transform"; | ||
448 | } | 652 | } |
449 | } | 653 | } |
450 | 654 | ||
diff --git a/lib/token.h b/lib/token.h index e7f8c28..f3188c9 100644 --- a/lib/token.h +++ b/lib/token.h | |||
@@ -18,7 +18,8 @@ namespace verbly { | |||
18 | literal, | 18 | literal, |
19 | part, | 19 | part, |
20 | fillin, | 20 | fillin, |
21 | utterance | 21 | utterance, |
22 | transform | ||
22 | }; | 23 | }; |
23 | 24 | ||
24 | // Copy & move constructors | 25 | // Copy & move constructors |
@@ -87,6 +88,7 @@ namespace verbly { | |||
87 | 88 | ||
88 | token(); | 89 | token(); |
89 | token(std::vector<part> parts); | 90 | token(std::vector<part> parts); |
91 | token(std::initializer_list<token> pieces); | ||
90 | 92 | ||
91 | iterator begin(); | 93 | iterator begin(); |
92 | const_iterator begin() const; | 94 | const_iterator begin() const; |
@@ -96,7 +98,35 @@ namespace verbly { | |||
96 | 98 | ||
97 | token& operator<<(token arg); | 99 | token& operator<<(token arg); |
98 | 100 | ||
101 | // Transform | ||
102 | |||
103 | static token separator(std::string param, token inner); | ||
104 | static token punctuation(std::string param, token inner); | ||
105 | static token definiteArticle(token inner); | ||
106 | static token capitalize(token inner); | ||
107 | |||
108 | token& getInnerToken(); | ||
109 | const token& getInnerToken() const; | ||
110 | |||
99 | private: | 111 | private: |
112 | |||
113 | std::string compileHelper( | ||
114 | std::string separator, | ||
115 | bool definiteArticle, | ||
116 | bool capitalize) const; | ||
117 | |||
118 | enum class transform_type { | ||
119 | separator, | ||
120 | punctuation, | ||
121 | definite_article, | ||
122 | capitalize | ||
123 | }; | ||
124 | |||
125 | token( | ||
126 | transform_type type, | ||
127 | std::string param, | ||
128 | token inner); | ||
129 | |||
100 | union { | 130 | union { |
101 | struct { | 131 | struct { |
102 | word word_; | 132 | word word_; |
@@ -106,6 +136,11 @@ namespace verbly { | |||
106 | part part_; | 136 | part part_; |
107 | std::set<std::string> fillin_; | 137 | std::set<std::string> fillin_; |
108 | std::list<token> utterance_; | 138 | std::list<token> utterance_; |
139 | struct { | ||
140 | transform_type type_; | ||
141 | std::string param_; | ||
142 | std::unique_ptr<token> inner_; | ||
143 | } transform_; | ||
109 | }; | 144 | }; |
110 | type type_; | 145 | type type_; |
111 | }; | 146 | }; |