diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-02-24 11:13:45 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-02-24 11:13:45 -0500 |
commit | 59eab842de02b2b2ba8bf53e2214b558457e6356 (patch) | |
tree | d0cf11fddf476c70f2f8d377a7f6777a3f02762b | |
parent | 8916c6911c23ca7ff7e78a90b30a295c2b9b9c22 (diff) | |
download | verbly-59eab842de02b2b2ba8bf53e2214b558457e6356.tar.gz verbly-59eab842de02b2b2ba8bf53e2214b558457e6356.tar.bz2 verbly-59eab842de02b2b2ba8bf53e2214b558457e6356.zip |
Added more casing options to tokens
-rw-r--r-- | lib/token.cpp | 82 | ||||
-rw-r--r-- | lib/token.h | 19 |
2 files changed, 80 insertions, 21 deletions
diff --git a/lib/token.cpp b/lib/token.cpp index 91591d3..ce201b4 100644 --- a/lib/token.cpp +++ b/lib/token.cpp | |||
@@ -50,7 +50,8 @@ namespace verbly { | |||
50 | case type::transform: | 50 | case type::transform: |
51 | { | 51 | { |
52 | transform_.type_ = other.transform_.type_; | 52 | transform_.type_ = other.transform_.type_; |
53 | new(&transform_.param_) std::string(other.transform_.param_); | 53 | new(&transform_.strParam_) std::string(other.transform_.strParam_); |
54 | transform_.casingParam_ = other.transform_.casingParam_; | ||
54 | new(&transform_.inner_) std::unique_ptr<token>(new token(*other.transform_.inner_)); | 55 | new(&transform_.inner_) std::unique_ptr<token>(new token(*other.transform_.inner_)); |
55 | 56 | ||
56 | break; | 57 | break; |
@@ -74,6 +75,7 @@ namespace verbly { | |||
74 | { | 75 | { |
75 | using type = token::type; | 76 | using type = token::type; |
76 | using transform_type = token::transform_type; | 77 | using transform_type = token::transform_type; |
78 | using casing = token::casing; | ||
77 | 79 | ||
78 | type tempType = first.type_; | 80 | type tempType = first.type_; |
79 | word tempWord; | 81 | word tempWord; |
@@ -83,7 +85,8 @@ namespace verbly { | |||
83 | std::set<std::string> tempFillin; | 85 | std::set<std::string> tempFillin; |
84 | std::list<token> tempUtterance; | 86 | std::list<token> tempUtterance; |
85 | transform_type tempTransformType; | 87 | transform_type tempTransformType; |
86 | std::string tempTransformParam; | 88 | std::string tempTransformStrParam; |
89 | casing tempTransformCasingParam; | ||
87 | std::unique_ptr<token> tempTransformInner; | 90 | std::unique_ptr<token> tempTransformInner; |
88 | 91 | ||
89 | switch (tempType) | 92 | switch (tempType) |
@@ -127,7 +130,8 @@ namespace verbly { | |||
127 | case type::transform: | 130 | case type::transform: |
128 | { | 131 | { |
129 | tempTransformType = first.transform_.type_; | 132 | tempTransformType = first.transform_.type_; |
130 | tempTransformParam = std::move(first.transform_.param_); | 133 | tempTransformStrParam = std::move(first.transform_.strParam_); |
134 | tempTransformCasingParam = first.transform_.casingParam_; | ||
131 | tempTransformInner = std::move(first.transform_.inner_); | 135 | tempTransformInner = std::move(first.transform_.inner_); |
132 | 136 | ||
133 | break; | 137 | break; |
@@ -179,7 +183,8 @@ namespace verbly { | |||
179 | case type::transform: | 183 | case type::transform: |
180 | { | 184 | { |
181 | first.transform_.type_ = second.transform_.type_; | 185 | first.transform_.type_ = second.transform_.type_; |
182 | new(&first.transform_.param_) std::string(std::move(second.transform_.param_)); | 186 | new(&first.transform_.strParam_) std::string(std::move(second.transform_.strParam_)); |
187 | first.transform_.casingParam_ = second.transform_.casingParam_; | ||
183 | new(&first.transform_.inner_) std::unique_ptr<token>(std::move(second.transform_.inner_)); | 188 | new(&first.transform_.inner_) std::unique_ptr<token>(std::move(second.transform_.inner_)); |
184 | 189 | ||
185 | break; | 190 | break; |
@@ -231,7 +236,8 @@ namespace verbly { | |||
231 | case type::transform: | 236 | case type::transform: |
232 | { | 237 | { |
233 | second.transform_.type_ = tempTransformType; | 238 | second.transform_.type_ = tempTransformType; |
234 | new(&second.transform_.param_) std::string(std::move(tempTransformParam)); | 239 | new(&second.transform_.strParam_) std::string(std::move(tempTransformStrParam)); |
240 | second.transform_.casingParam_ = tempTransformCasingParam; | ||
235 | new(&second.transform_.inner_) std::unique_ptr<token>(std::move(tempTransformInner)); | 241 | new(&second.transform_.inner_) std::unique_ptr<token>(std::move(tempTransformInner)); |
236 | 242 | ||
237 | break; | 243 | break; |
@@ -286,7 +292,7 @@ namespace verbly { | |||
286 | using string_type = std::string; | 292 | using string_type = std::string; |
287 | using ptr_type = std::unique_ptr<token>; | 293 | using ptr_type = std::unique_ptr<token>; |
288 | 294 | ||
289 | transform_.param_.~string_type(); | 295 | transform_.strParam_.~string_type(); |
290 | transform_.inner_.~ptr_type(); | 296 | transform_.inner_.~ptr_type(); |
291 | 297 | ||
292 | break; | 298 | break; |
@@ -311,13 +317,13 @@ namespace verbly { | |||
311 | 317 | ||
312 | std::string token::compile() const | 318 | std::string token::compile() const |
313 | { | 319 | { |
314 | return compileHelper(" ", false, false); | 320 | return compileHelper(" ", false, casing::normal); |
315 | } | 321 | } |
316 | 322 | ||
317 | std::string token::compileHelper( | 323 | std::string token::compileHelper( |
318 | std::string separator, | 324 | std::string separator, |
319 | bool definiteArticle, | 325 | bool definiteArticle, |
320 | bool capitalize) const | 326 | casing capitalization) const |
321 | { | 327 | { |
322 | switch (type_) | 328 | switch (type_) |
323 | { | 329 | { |
@@ -338,14 +344,24 @@ namespace verbly { | |||
338 | } | 344 | } |
339 | } | 345 | } |
340 | 346 | ||
341 | if (capitalize) | 347 | if ((capitalization == casing::capitalize) || (capitalization == casing::title_case)) |
342 | { | 348 | { |
343 | if (std::isalpha(result[0])) | 349 | if (std::isalpha(result[0])) |
344 | { | 350 | { |
345 | result[0] = std::toupper(result[0]); | 351 | result[0] = std::toupper(result[0]); |
346 | } | 352 | } |
353 | } else if (capitalization == casing::all_caps) | ||
354 | { | ||
355 | for (char& ch : result) | ||
356 | { | ||
357 | if (std::isalpha(ch)) | ||
358 | { | ||
359 | ch = std::toupper(ch); | ||
360 | } | ||
361 | } | ||
347 | } | 362 | } |
348 | 363 | ||
364 | |||
349 | return result; | 365 | return result; |
350 | } | 366 | } |
351 | 367 | ||
@@ -365,12 +381,21 @@ namespace verbly { | |||
365 | } | 381 | } |
366 | } | 382 | } |
367 | 383 | ||
368 | if (capitalize) | 384 | if ((capitalization == casing::capitalize) || (capitalization == casing::title_case)) |
369 | { | 385 | { |
370 | if (std::isalpha(result[0])) | 386 | if (std::isalpha(result[0])) |
371 | { | 387 | { |
372 | result[0] = std::toupper(result[0]); | 388 | result[0] = std::toupper(result[0]); |
373 | } | 389 | } |
390 | } else if (capitalization == casing::all_caps) | ||
391 | { | ||
392 | for (char& ch : result) | ||
393 | { | ||
394 | if (std::isalpha(ch)) | ||
395 | { | ||
396 | ch = std::toupper(ch); | ||
397 | } | ||
398 | } | ||
374 | } | 399 | } |
375 | 400 | ||
376 | return result; | 401 | return result; |
@@ -385,10 +410,16 @@ namespace verbly { | |||
385 | std::list<std::string> compiled; | 410 | std::list<std::string> compiled; |
386 | for (const token& tkn : utterance_) | 411 | for (const token& tkn : utterance_) |
387 | { | 412 | { |
413 | casing propagateCasing = capitalization; | ||
414 | if ((capitalization == casing::capitalize) && (!first)) | ||
415 | { | ||
416 | propagateCasing = casing::normal; | ||
417 | } | ||
418 | |||
388 | compiled.push_back( | 419 | compiled.push_back( |
389 | tkn.compileHelper(" ", | 420 | tkn.compileHelper(" ", |
390 | first && definiteArticle, | 421 | first && definiteArticle, |
391 | first && capitalize)); | 422 | propagateCasing)); |
392 | 423 | ||
393 | first = false; | 424 | first = false; |
394 | } | 425 | } |
@@ -403,23 +434,26 @@ namespace verbly { | |||
403 | case transform_type::separator: | 434 | case transform_type::separator: |
404 | { | 435 | { |
405 | return transform_.inner_->compileHelper( | 436 | return transform_.inner_->compileHelper( |
406 | transform_.param_, definiteArticle, capitalize); | 437 | transform_.strParam_, definiteArticle, capitalization); |
407 | } | 438 | } |
408 | 439 | ||
409 | case transform_type::punctuation: | 440 | case transform_type::punctuation: |
410 | { | 441 | { |
411 | return transform_.inner_->compileHelper( | 442 | return transform_.inner_->compileHelper( |
412 | separator, definiteArticle, capitalize) + transform_.param_; | 443 | separator, definiteArticle, capitalization) + transform_.strParam_; |
413 | } | 444 | } |
414 | 445 | ||
415 | case transform_type::definite_article: | 446 | case transform_type::definite_article: |
416 | { | 447 | { |
417 | return transform_.inner_->compileHelper(separator, true, capitalize); | 448 | return transform_.inner_->compileHelper(separator, true, capitalization); |
418 | } | 449 | } |
419 | 450 | ||
420 | case transform_type::capitalize: | 451 | case transform_type::capitalize: |
421 | { | 452 | { |
422 | return transform_.inner_->compileHelper(separator, definiteArticle, true); | 453 | return transform_.inner_->compileHelper( |
454 | separator, | ||
455 | definiteArticle, | ||
456 | transform_.casingParam_); | ||
423 | } | 457 | } |
424 | } | 458 | } |
425 | } | 459 | } |
@@ -603,9 +637,9 @@ namespace verbly { | |||
603 | return token(transform_type::definite_article, "", std::move(inner)); | 637 | return token(transform_type::definite_article, "", std::move(inner)); |
604 | } | 638 | } |
605 | 639 | ||
606 | token token::capitalize(token inner) | 640 | token token::capitalize(casing param, token inner) |
607 | { | 641 | { |
608 | return token(transform_type::capitalize, "", std::move(inner)); | 642 | return token(transform_type::capitalize, param, std::move(inner)); |
609 | } | 643 | } |
610 | 644 | ||
611 | token& token::getInnerToken() | 645 | token& token::getInnerToken() |
@@ -635,7 +669,19 @@ namespace verbly { | |||
635 | type_(type::transform) | 669 | type_(type::transform) |
636 | { | 670 | { |
637 | transform_.type_ = type; | 671 | transform_.type_ = type; |
638 | new(&transform_.param_) std::string(std::move(param)); | 672 | new(&transform_.strParam_) std::string(std::move(param)); |
673 | new(&transform_.inner_) std::unique_ptr<token>(new token(std::move(inner))); | ||
674 | } | ||
675 | |||
676 | token::token( | ||
677 | transform_type type, | ||
678 | casing param, | ||
679 | token inner) : | ||
680 | type_(type::transform) | ||
681 | { | ||
682 | transform_.type_ = type; | ||
683 | new(&transform_.strParam_) std::string(); | ||
684 | transform_.casingParam_ = param; | ||
639 | new(&transform_.inner_) std::unique_ptr<token>(new token(std::move(inner))); | 685 | new(&transform_.inner_) std::unique_ptr<token>(new token(std::move(inner))); |
640 | } | 686 | } |
641 | 687 | ||
diff --git a/lib/token.h b/lib/token.h index a4072e0..6df2fb5 100644 --- a/lib/token.h +++ b/lib/token.h | |||
@@ -105,10 +105,17 @@ namespace verbly { | |||
105 | 105 | ||
106 | // Transform | 106 | // Transform |
107 | 107 | ||
108 | enum class casing { | ||
109 | normal, | ||
110 | capitalize, | ||
111 | all_caps, | ||
112 | title_case | ||
113 | }; | ||
114 | |||
108 | static token separator(std::string param, token inner); | 115 | static token separator(std::string param, token inner); |
109 | static token punctuation(std::string param, token inner); | 116 | static token punctuation(std::string param, token inner); |
110 | static token definiteArticle(token inner); | 117 | static token definiteArticle(token inner); |
111 | static token capitalize(token inner); | 118 | static token capitalize(casing param, token inner); |
112 | 119 | ||
113 | token& getInnerToken(); | 120 | token& getInnerToken(); |
114 | const token& getInnerToken() const; | 121 | const token& getInnerToken() const; |
@@ -118,7 +125,7 @@ namespace verbly { | |||
118 | std::string compileHelper( | 125 | std::string compileHelper( |
119 | std::string separator, | 126 | std::string separator, |
120 | bool definiteArticle, | 127 | bool definiteArticle, |
121 | bool capitalize) const; | 128 | casing capitalization) const; |
122 | 129 | ||
123 | enum class transform_type { | 130 | enum class transform_type { |
124 | separator, | 131 | separator, |
@@ -132,6 +139,11 @@ namespace verbly { | |||
132 | std::string param, | 139 | std::string param, |
133 | token inner); | 140 | token inner); |
134 | 141 | ||
142 | token( | ||
143 | transform_type type, | ||
144 | casing param, | ||
145 | token inner); | ||
146 | |||
135 | union { | 147 | union { |
136 | struct { | 148 | struct { |
137 | word word_; | 149 | word word_; |
@@ -143,7 +155,8 @@ namespace verbly { | |||
143 | std::list<token> utterance_; | 155 | std::list<token> utterance_; |
144 | struct { | 156 | struct { |
145 | transform_type type_; | 157 | transform_type type_; |
146 | std::string param_; | 158 | std::string strParam_; |
159 | casing casingParam_; | ||
147 | std::unique_ptr<token> inner_; | 160 | std::unique_ptr<token> inner_; |
148 | } transform_; | 161 | } transform_; |
149 | }; | 162 | }; |