diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/token.cpp | 212 | ||||
| -rw-r--r-- | lib/token.h | 37 |
2 files changed, 244 insertions, 5 deletions
| diff --git a/lib/token.cpp b/lib/token.cpp index 735aa7e..91591d3 100644 --- a/lib/token.cpp +++ b/lib/token.cpp | |||
| @@ -46,6 +46,15 @@ namespace verbly { | |||
| 46 | 46 | ||
| 47 | break; | 47 | break; |
| 48 | } | 48 | } |
| 49 | |||
| 50 | case type::transform: | ||
| 51 | { | ||
| 52 | transform_.type_ = other.transform_.type_; | ||
| 53 | new(&transform_.param_) std::string(other.transform_.param_); | ||
| 54 | new(&transform_.inner_) std::unique_ptr<token>(new token(*other.transform_.inner_)); | ||
| 55 | |||
| 56 | break; | ||
| 57 | } | ||
| 49 | } | 58 | } |
| 50 | } | 59 | } |
| 51 | 60 | ||
| @@ -64,6 +73,7 @@ namespace verbly { | |||
| 64 | void swap(token& first, token& second) | 73 | void swap(token& first, token& second) |
| 65 | { | 74 | { |
| 66 | using type = token::type; | 75 | using type = token::type; |
| 76 | using transform_type = token::transform_type; | ||
| 67 | 77 | ||
| 68 | type tempType = first.type_; | 78 | type tempType = first.type_; |
| 69 | word tempWord; | 79 | word tempWord; |
| @@ -72,6 +82,9 @@ namespace verbly { | |||
| 72 | part tempPart; | 82 | part tempPart; |
| 73 | std::set<std::string> tempFillin; | 83 | std::set<std::string> tempFillin; |
| 74 | std::list<token> tempUtterance; | 84 | std::list<token> tempUtterance; |
| 85 | transform_type tempTransformType; | ||
| 86 | std::string tempTransformParam; | ||
| 87 | std::unique_ptr<token> tempTransformInner; | ||
| 75 | 88 | ||
| 76 | switch (tempType) | 89 | switch (tempType) |
| 77 | { | 90 | { |
| @@ -110,6 +123,15 @@ namespace verbly { | |||
| 110 | 123 | ||
| 111 | break; | 124 | break; |
| 112 | } | 125 | } |
| 126 | |||
| 127 | case type::transform: | ||
| 128 | { | ||
| 129 | tempTransformType = first.transform_.type_; | ||
| 130 | tempTransformParam = std::move(first.transform_.param_); | ||
| 131 | tempTransformInner = std::move(first.transform_.inner_); | ||
| 132 | |||
| 133 | break; | ||
| 134 | } | ||
| 113 | } | 135 | } |
| 114 | 136 | ||
| 115 | first.~token(); | 137 | first.~token(); |
| @@ -153,6 +175,15 @@ namespace verbly { | |||
| 153 | 175 | ||
| 154 | break; | 176 | break; |
| 155 | } | 177 | } |
| 178 | |||
| 179 | case type::transform: | ||
| 180 | { | ||
| 181 | first.transform_.type_ = second.transform_.type_; | ||
| 182 | new(&first.transform_.param_) std::string(std::move(second.transform_.param_)); | ||
| 183 | new(&first.transform_.inner_) std::unique_ptr<token>(std::move(second.transform_.inner_)); | ||
| 184 | |||
| 185 | break; | ||
| 186 | } | ||
| 156 | } | 187 | } |
| 157 | 188 | ||
| 158 | second.~token(); | 189 | second.~token(); |
| @@ -196,6 +227,15 @@ namespace verbly { | |||
| 196 | 227 | ||
| 197 | break; | 228 | break; |
| 198 | } | 229 | } |
| 230 | |||
| 231 | case type::transform: | ||
| 232 | { | ||
| 233 | second.transform_.type_ = tempTransformType; | ||
| 234 | new(&second.transform_.param_) std::string(std::move(tempTransformParam)); | ||
| 235 | new(&second.transform_.inner_) std::unique_ptr<token>(std::move(tempTransformInner)); | ||
| 236 | |||
| 237 | break; | ||
| 238 | } | ||
| 199 | } | 239 | } |
| 200 | } | 240 | } |
| 201 | 241 | ||
| @@ -240,6 +280,17 @@ namespace verbly { | |||
| 240 | 280 | ||
| 241 | break; | 281 | break; |
| 242 | } | 282 | } |
| 283 | |||
| 284 | case type::transform: | ||
| 285 | { | ||
| 286 | using string_type = std::string; | ||
| 287 | using ptr_type = std::unique_ptr<token>; | ||
| 288 | |||
| 289 | transform_.param_.~string_type(); | ||
| 290 | transform_.inner_.~ptr_type(); | ||
| 291 | |||
| 292 | break; | ||
| 293 | } | ||
| 243 | } | 294 | } |
| 244 | } | 295 | } |
| 245 | 296 | ||
| @@ -254,27 +305,123 @@ namespace verbly { | |||
| 254 | case type::utterance: return std::all_of(std::begin(utterance_), std::end(utterance_), [] (const token& tkn) { | 305 | case type::utterance: return std::all_of(std::begin(utterance_), std::end(utterance_), [] (const token& tkn) { |
| 255 | return tkn.isComplete(); | 306 | return tkn.isComplete(); |
| 256 | }); | 307 | }); |
| 308 | case type::transform: return transform_.inner_->isComplete(); | ||
| 257 | } | 309 | } |
| 258 | } | 310 | } |
| 259 | 311 | ||
| 260 | std::string token::compile() const | 312 | std::string token::compile() const |
| 261 | { | 313 | { |
| 314 | return compileHelper(" ", false, false); | ||
| 315 | } | ||
| 316 | |||
| 317 | std::string token::compileHelper( | ||
| 318 | std::string separator, | ||
| 319 | bool definiteArticle, | ||
| 320 | bool capitalize) const | ||
| 321 | { | ||
| 262 | switch (type_) | 322 | switch (type_) |
| 263 | { | 323 | { |
| 264 | case type::word: return word_.word_.getInflections(word_.category_).front().getText(); | 324 | case type::word: |
| 265 | case type::literal: return literal_; | 325 | { |
| 326 | const form& wordForm = word_.word_.getInflections(word_.category_) | ||
| 327 | .front(); | ||
| 328 | |||
| 329 | std::string result = wordForm.getText(); | ||
| 330 | |||
| 331 | if (definiteArticle) | ||
| 332 | { | ||
| 333 | if (wordForm.startsWithVowelSound()) | ||
| 334 | { | ||
| 335 | result = "an " + result; | ||
| 336 | } else { | ||
| 337 | result = "a " + result; | ||
| 338 | } | ||
| 339 | } | ||
| 340 | |||
| 341 | if (capitalize) | ||
| 342 | { | ||
| 343 | if (std::isalpha(result[0])) | ||
| 344 | { | ||
| 345 | result[0] = std::toupper(result[0]); | ||
| 346 | } | ||
| 347 | } | ||
| 348 | |||
| 349 | return result; | ||
| 350 | } | ||
| 351 | |||
| 352 | case type::literal: | ||
| 353 | { | ||
| 354 | std::string result = literal_; | ||
| 355 | |||
| 356 | if (definiteArticle && std::isalpha(result[0])) | ||
| 357 | { | ||
| 358 | char canon = std::tolower(result[0]); | ||
| 359 | if ((canon == 'a') || (canon == 'e') || (canon == 'i') | ||
| 360 | || (canon == 'o') || (canon == 'u')) | ||
| 361 | { | ||
| 362 | result = "an " + result; | ||
| 363 | } else { | ||
| 364 | result = "a " + result; | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | if (capitalize) | ||
| 369 | { | ||
| 370 | if (std::isalpha(result[0])) | ||
| 371 | { | ||
| 372 | result[0] = std::toupper(result[0]); | ||
| 373 | } | ||
| 374 | } | ||
| 375 | |||
| 376 | return result; | ||
| 377 | } | ||
| 378 | |||
| 266 | case type::part: throw std::domain_error("Cannot compile incomplete token"); | 379 | case type::part: throw std::domain_error("Cannot compile incomplete token"); |
| 267 | case type::fillin: throw std::domain_error("Cannot compile incomplete token"); | 380 | case type::fillin: throw std::domain_error("Cannot compile incomplete token"); |
| 268 | 381 | ||
| 269 | case type::utterance: | 382 | case type::utterance: |
| 270 | { | 383 | { |
| 384 | bool first = true; | ||
| 271 | std::list<std::string> compiled; | 385 | std::list<std::string> compiled; |
| 272 | for (const token& tkn : utterance_) | 386 | for (const token& tkn : utterance_) |
| 273 | { | 387 | { |
| 274 | compiled.push_back(tkn.compile()); | 388 | compiled.push_back( |
| 389 | tkn.compileHelper(" ", | ||
| 390 | first && definiteArticle, | ||
| 391 | first && capitalize)); | ||
| 392 | |||
| 393 | first = false; | ||
| 275 | } | 394 | } |
| 276 | 395 | ||
| 277 | return implode(std::begin(compiled), std::end(compiled), " "); | 396 | return implode(std::begin(compiled), std::end(compiled), separator); |
| 397 | } | ||
| 398 | |||
| 399 | case type::transform: | ||
| 400 | { | ||
| 401 | switch (transform_.type_) | ||
| 402 | { | ||
| 403 | case transform_type::separator: | ||
| 404 | { | ||
| 405 | return transform_.inner_->compileHelper( | ||
| 406 | transform_.param_, definiteArticle, capitalize); | ||
| 407 | } | ||
| 408 | |||
| 409 | case transform_type::punctuation: | ||
| 410 | { | ||
| 411 | return transform_.inner_->compileHelper( | ||
| 412 | separator, definiteArticle, capitalize) + transform_.param_; | ||
| 413 | } | ||
| 414 | |||
| 415 | case transform_type::definite_article: | ||
| 416 | { | ||
| 417 | return transform_.inner_->compileHelper(separator, true, capitalize); | ||
| 418 | } | ||
| 419 | |||
| 420 | case transform_type::capitalize: | ||
| 421 | { | ||
| 422 | return transform_.inner_->compileHelper(separator, definiteArticle, true); | ||
| 423 | } | ||
| 424 | } | ||
| 278 | } | 425 | } |
| 279 | } | 426 | } |
| 280 | } | 427 | } |
| @@ -384,6 +531,11 @@ namespace verbly { | |||
| 384 | new(&utterance_) std::list<token>(std::begin(parts), std::end(parts)); | 531 | new(&utterance_) std::list<token>(std::begin(parts), std::end(parts)); |
| 385 | } | 532 | } |
| 386 | 533 | ||
| 534 | token::token(std::initializer_list<token> parts) : type_(type::utterance) | ||
| 535 | { | ||
| 536 | new(&utterance_) std::list<token>(std::move(parts)); | ||
| 537 | } | ||
| 538 | |||
| 387 | token::iterator token::begin() | 539 | token::iterator token::begin() |
| 388 | { | 540 | { |
| 389 | if (type_ != type::utterance) | 541 | if (type_ != type::utterance) |
| @@ -436,6 +588,57 @@ namespace verbly { | |||
| 436 | return *this; | 588 | return *this; |
| 437 | } | 589 | } |
| 438 | 590 | ||
| 591 | token token::separator(std::string param, token inner) | ||
| 592 | { | ||
| 593 | return token(transform_type::separator, std::move(param), std::move(inner)); | ||
| 594 | } | ||
| 595 | |||
| 596 | token token::punctuation(std::string param, token inner) | ||
| 597 | { | ||
| 598 | return token(transform_type::punctuation, std::move(param), std::move(inner)); | ||
| 599 | } | ||
| 600 | |||
| 601 | token token::definiteArticle(token inner) | ||
| 602 | { | ||
| 603 | return token(transform_type::definite_article, "", std::move(inner)); | ||
| 604 | } | ||
| 605 | |||
| 606 | token token::capitalize(token inner) | ||
| 607 | { | ||
| 608 | return token(transform_type::capitalize, "", std::move(inner)); | ||
| 609 | } | ||
| 610 | |||
| 611 | token& token::getInnerToken() | ||
| 612 | { | ||
| 613 | if (type_ != type::transform) | ||
| 614 | { | ||
| 615 | throw std::domain_error("Invalid access on non-tranform token"); | ||
| 616 | } | ||
| 617 | |||
| 618 | return *transform_.inner_; | ||
| 619 | } | ||
| 620 | |||
| 621 | const token& token::getInnerToken() const | ||
| 622 | { | ||
| 623 | if (type_ != type::transform) | ||
| 624 | { | ||
| 625 | throw std::domain_error("Invalid access on non-tranform token"); | ||
| 626 | } | ||
| 627 | |||
| 628 | return *transform_.inner_; | ||
| 629 | } | ||
| 630 | |||
| 631 | token::token( | ||
| 632 | transform_type type, | ||
| 633 | std::string param, | ||
| 634 | token inner) : | ||
| 635 | type_(type::transform) | ||
| 636 | { | ||
| 637 | transform_.type_ = type; | ||
| 638 | new(&transform_.param_) std::string(std::move(param)); | ||
| 639 | new(&transform_.inner_) std::unique_ptr<token>(new token(std::move(inner))); | ||
| 640 | } | ||
| 641 | |||
| 439 | std::ostream& operator<<(std::ostream& os, token::type type) | 642 | std::ostream& operator<<(std::ostream& os, token::type type) |
| 440 | { | 643 | { |
| 441 | switch (type) | 644 | switch (type) |
| @@ -445,6 +648,7 @@ namespace verbly { | |||
| 445 | case token::type::part: return os << "part"; | 648 | case token::type::part: return os << "part"; |
| 446 | case token::type::fillin: return os << "fillin"; | 649 | case token::type::fillin: return os << "fillin"; |
| 447 | case token::type::utterance: return os << "utterance"; | 650 | case token::type::utterance: return os << "utterance"; |
| 651 | case token::type::transform: return os << "transform"; | ||
| 448 | } | 652 | } |
| 449 | } | 653 | } |
| 450 | 654 | ||
| diff --git a/lib/token.h b/lib/token.h index e7f8c28..f3188c9 100644 --- a/lib/token.h +++ b/lib/token.h | |||
| @@ -18,7 +18,8 @@ namespace verbly { | |||
| 18 | literal, | 18 | literal, |
| 19 | part, | 19 | part, |
| 20 | fillin, | 20 | fillin, |
| 21 | utterance | 21 | utterance, |
| 22 | transform | ||
| 22 | }; | 23 | }; |
| 23 | 24 | ||
| 24 | // Copy & move constructors | 25 | // Copy & move constructors |
| @@ -87,6 +88,7 @@ namespace verbly { | |||
| 87 | 88 | ||
| 88 | token(); | 89 | token(); |
| 89 | token(std::vector<part> parts); | 90 | token(std::vector<part> parts); |
| 91 | token(std::initializer_list<token> pieces); | ||
| 90 | 92 | ||
| 91 | iterator begin(); | 93 | iterator begin(); |
| 92 | const_iterator begin() const; | 94 | const_iterator begin() const; |
| @@ -96,7 +98,35 @@ namespace verbly { | |||
| 96 | 98 | ||
| 97 | token& operator<<(token arg); | 99 | token& operator<<(token arg); |
| 98 | 100 | ||
| 101 | // Transform | ||
| 102 | |||
| 103 | static token separator(std::string param, token inner); | ||
| 104 | static token punctuation(std::string param, token inner); | ||
| 105 | static token definiteArticle(token inner); | ||
| 106 | static token capitalize(token inner); | ||
| 107 | |||
| 108 | token& getInnerToken(); | ||
| 109 | const token& getInnerToken() const; | ||
| 110 | |||
| 99 | private: | 111 | private: |
| 112 | |||
| 113 | std::string compileHelper( | ||
| 114 | std::string separator, | ||
| 115 | bool definiteArticle, | ||
| 116 | bool capitalize) const; | ||
| 117 | |||
| 118 | enum class transform_type { | ||
| 119 | separator, | ||
| 120 | punctuation, | ||
| 121 | definite_article, | ||
| 122 | capitalize | ||
| 123 | }; | ||
| 124 | |||
| 125 | token( | ||
| 126 | transform_type type, | ||
| 127 | std::string param, | ||
| 128 | token inner); | ||
| 129 | |||
| 100 | union { | 130 | union { |
| 101 | struct { | 131 | struct { |
| 102 | word word_; | 132 | word word_; |
| @@ -106,6 +136,11 @@ namespace verbly { | |||
| 106 | part part_; | 136 | part part_; |
| 107 | std::set<std::string> fillin_; | 137 | std::set<std::string> fillin_; |
| 108 | std::list<token> utterance_; | 138 | std::list<token> utterance_; |
| 139 | struct { | ||
| 140 | transform_type type_; | ||
| 141 | std::string param_; | ||
| 142 | std::unique_ptr<token> inner_; | ||
| 143 | } transform_; | ||
| 109 | }; | 144 | }; |
| 110 | type type_; | 145 | type type_; |
| 111 | }; | 146 | }; |
