summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-02-16 20:03:48 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-02-16 20:03:48 -0500
commit49d6f0387f7b7cc3c6a51ef8fcdf519da98487f6 (patch)
treebfddbfdd38317e4f66c4600729b7ea85a94f1061 /lib
parent7cb714b57d1cbebb572e2279e2058f5fc94ba171 (diff)
downloadverbly-49d6f0387f7b7cc3c6a51ef8fcdf519da98487f6.tar.gz
verbly-49d6f0387f7b7cc3c6a51ef8fcdf519da98487f6.tar.bz2
verbly-49d6f0387f7b7cc3c6a51ef8fcdf519da98487f6.zip
Added transform tokens
Diffstat (limited to 'lib')
-rw-r--r--lib/token.cpp212
-rw-r--r--lib/token.h37
2 files changed, 244 insertions, 5 deletions
diff --git a/lib/token.cpp b/lib/token.cpp index 735aa7e..91591d3 100644 --- a/lib/token.cpp +++ b/lib/token.cpp
@@ -46,6 +46,15 @@ namespace verbly {
46 46
47 break; 47 break;
48 } 48 }
49
50 case type::transform:
51 {
52 transform_.type_ = other.transform_.type_;
53 new(&transform_.param_) std::string(other.transform_.param_);
54 new(&transform_.inner_) std::unique_ptr<token>(new token(*other.transform_.inner_));
55
56 break;
57 }
49 } 58 }
50 } 59 }
51 60
@@ -64,6 +73,7 @@ namespace verbly {
64 void swap(token& first, token& second) 73 void swap(token& first, token& second)
65 { 74 {
66 using type = token::type; 75 using type = token::type;
76 using transform_type = token::transform_type;
67 77
68 type tempType = first.type_; 78 type tempType = first.type_;
69 word tempWord; 79 word tempWord;
@@ -72,6 +82,9 @@ namespace verbly {
72 part tempPart; 82 part tempPart;
73 std::set<std::string> tempFillin; 83 std::set<std::string> tempFillin;
74 std::list<token> tempUtterance; 84 std::list<token> tempUtterance;
85 transform_type tempTransformType;
86 std::string tempTransformParam;
87 std::unique_ptr<token> tempTransformInner;
75 88
76 switch (tempType) 89 switch (tempType)
77 { 90 {
@@ -110,6 +123,15 @@ namespace verbly {
110 123
111 break; 124 break;
112 } 125 }
126
127 case type::transform:
128 {
129 tempTransformType = first.transform_.type_;
130 tempTransformParam = std::move(first.transform_.param_);
131 tempTransformInner = std::move(first.transform_.inner_);
132
133 break;
134 }
113 } 135 }
114 136
115 first.~token(); 137 first.~token();
@@ -153,6 +175,15 @@ namespace verbly {
153 175
154 break; 176 break;
155 } 177 }
178
179 case type::transform:
180 {
181 first.transform_.type_ = second.transform_.type_;
182 new(&first.transform_.param_) std::string(std::move(second.transform_.param_));
183 new(&first.transform_.inner_) std::unique_ptr<token>(std::move(second.transform_.inner_));
184
185 break;
186 }
156 } 187 }
157 188
158 second.~token(); 189 second.~token();
@@ -196,6 +227,15 @@ namespace verbly {
196 227
197 break; 228 break;
198 } 229 }
230
231 case type::transform:
232 {
233 second.transform_.type_ = tempTransformType;
234 new(&second.transform_.param_) std::string(std::move(tempTransformParam));
235 new(&second.transform_.inner_) std::unique_ptr<token>(std::move(tempTransformInner));
236
237 break;
238 }
199 } 239 }
200 } 240 }
201 241
@@ -240,6 +280,17 @@ namespace verbly {
240 280
241 break; 281 break;
242 } 282 }
283
284 case type::transform:
285 {
286 using string_type = std::string;
287 using ptr_type = std::unique_ptr<token>;
288
289 transform_.param_.~string_type();
290 transform_.inner_.~ptr_type();
291
292 break;
293 }
243 } 294 }
244 } 295 }
245 296
@@ -254,27 +305,123 @@ namespace verbly {
254 case type::utterance: return std::all_of(std::begin(utterance_), std::end(utterance_), [] (const token& tkn) { 305 case type::utterance: return std::all_of(std::begin(utterance_), std::end(utterance_), [] (const token& tkn) {
255 return tkn.isComplete(); 306 return tkn.isComplete();
256 }); 307 });
308 case type::transform: return transform_.inner_->isComplete();
257 } 309 }
258 } 310 }
259 311
260 std::string token::compile() const 312 std::string token::compile() const
261 { 313 {
314 return compileHelper(" ", false, false);
315 }
316
317 std::string token::compileHelper(
318 std::string separator,
319 bool definiteArticle,
320 bool capitalize) const
321 {
262 switch (type_) 322 switch (type_)
263 { 323 {
264 case type::word: return word_.word_.getInflections(word_.category_).front().getText(); 324 case type::word:
265 case type::literal: return literal_; 325 {
326 const form& wordForm = word_.word_.getInflections(word_.category_)
327 .front();
328
329 std::string result = wordForm.getText();
330
331 if (definiteArticle)
332 {
333 if (wordForm.startsWithVowelSound())
334 {
335 result = "an " + result;
336 } else {
337 result = "a " + result;
338 }
339 }
340
341 if (capitalize)
342 {
343 if (std::isalpha(result[0]))
344 {
345 result[0] = std::toupper(result[0]);
346 }
347 }
348
349 return result;
350 }
351
352 case type::literal:
353 {
354 std::string result = literal_;
355
356 if (definiteArticle && std::isalpha(result[0]))
357 {
358 char canon = std::tolower(result[0]);
359 if ((canon == 'a') || (canon == 'e') || (canon == 'i')
360 || (canon == 'o') || (canon == 'u'))
361 {
362 result = "an " + result;
363 } else {
364 result = "a " + result;
365 }
366 }
367
368 if (capitalize)
369 {
370 if (std::isalpha(result[0]))
371 {
372 result[0] = std::toupper(result[0]);
373 }
374 }
375
376 return result;
377 }
378
266 case type::part: throw std::domain_error("Cannot compile incomplete token"); 379 case type::part: throw std::domain_error("Cannot compile incomplete token");
267 case type::fillin: throw std::domain_error("Cannot compile incomplete token"); 380 case type::fillin: throw std::domain_error("Cannot compile incomplete token");
268 381
269 case type::utterance: 382 case type::utterance:
270 { 383 {
384 bool first = true;
271 std::list<std::string> compiled; 385 std::list<std::string> compiled;
272 for (const token& tkn : utterance_) 386 for (const token& tkn : utterance_)
273 { 387 {
274 compiled.push_back(tkn.compile()); 388 compiled.push_back(
389 tkn.compileHelper(" ",
390 first && definiteArticle,
391 first && capitalize));
392
393 first = false;
275 } 394 }
276 395
277 return implode(std::begin(compiled), std::end(compiled), " "); 396 return implode(std::begin(compiled), std::end(compiled), separator);
397 }
398
399 case type::transform:
400 {
401 switch (transform_.type_)
402 {
403 case transform_type::separator:
404 {
405 return transform_.inner_->compileHelper(
406 transform_.param_, definiteArticle, capitalize);
407 }
408
409 case transform_type::punctuation:
410 {
411 return transform_.inner_->compileHelper(
412 separator, definiteArticle, capitalize) + transform_.param_;
413 }
414
415 case transform_type::definite_article:
416 {
417 return transform_.inner_->compileHelper(separator, true, capitalize);
418 }
419
420 case transform_type::capitalize:
421 {
422 return transform_.inner_->compileHelper(separator, definiteArticle, true);
423 }
424 }
278 } 425 }
279 } 426 }
280 } 427 }
@@ -384,6 +531,11 @@ namespace verbly {
384 new(&utterance_) std::list<token>(std::begin(parts), std::end(parts)); 531 new(&utterance_) std::list<token>(std::begin(parts), std::end(parts));
385 } 532 }
386 533
534 token::token(std::initializer_list<token> parts) : type_(type::utterance)
535 {
536 new(&utterance_) std::list<token>(std::move(parts));
537 }
538
387 token::iterator token::begin() 539 token::iterator token::begin()
388 { 540 {
389 if (type_ != type::utterance) 541 if (type_ != type::utterance)
@@ -436,6 +588,57 @@ namespace verbly {
436 return *this; 588 return *this;
437 } 589 }
438 590
591 token token::separator(std::string param, token inner)
592 {
593 return token(transform_type::separator, std::move(param), std::move(inner));
594 }
595
596 token token::punctuation(std::string param, token inner)
597 {
598 return token(transform_type::punctuation, std::move(param), std::move(inner));
599 }
600
601 token token::definiteArticle(token inner)
602 {
603 return token(transform_type::definite_article, "", std::move(inner));
604 }
605
606 token token::capitalize(token inner)
607 {
608 return token(transform_type::capitalize, "", std::move(inner));
609 }
610
611 token& token::getInnerToken()
612 {
613 if (type_ != type::transform)
614 {
615 throw std::domain_error("Invalid access on non-tranform token");
616 }
617
618 return *transform_.inner_;
619 }
620
621 const token& token::getInnerToken() const
622 {
623 if (type_ != type::transform)
624 {
625 throw std::domain_error("Invalid access on non-tranform token");
626 }
627
628 return *transform_.inner_;
629 }
630
631 token::token(
632 transform_type type,
633 std::string param,
634 token inner) :
635 type_(type::transform)
636 {
637 transform_.type_ = type;
638 new(&transform_.param_) std::string(std::move(param));
639 new(&transform_.inner_) std::unique_ptr<token>(new token(std::move(inner)));
640 }
641
439 std::ostream& operator<<(std::ostream& os, token::type type) 642 std::ostream& operator<<(std::ostream& os, token::type type)
440 { 643 {
441 switch (type) 644 switch (type)
@@ -445,6 +648,7 @@ namespace verbly {
445 case token::type::part: return os << "part"; 648 case token::type::part: return os << "part";
446 case token::type::fillin: return os << "fillin"; 649 case token::type::fillin: return os << "fillin";
447 case token::type::utterance: return os << "utterance"; 650 case token::type::utterance: return os << "utterance";
651 case token::type::transform: return os << "transform";
448 } 652 }
449 } 653 }
450 654
diff --git a/lib/token.h b/lib/token.h index e7f8c28..f3188c9 100644 --- a/lib/token.h +++ b/lib/token.h
@@ -18,7 +18,8 @@ namespace verbly {
18 literal, 18 literal,
19 part, 19 part,
20 fillin, 20 fillin,
21 utterance 21 utterance,
22 transform
22 }; 23 };
23 24
24 // Copy & move constructors 25 // Copy & move constructors
@@ -87,6 +88,7 @@ namespace verbly {
87 88
88 token(); 89 token();
89 token(std::vector<part> parts); 90 token(std::vector<part> parts);
91 token(std::initializer_list<token> pieces);
90 92
91 iterator begin(); 93 iterator begin();
92 const_iterator begin() const; 94 const_iterator begin() const;
@@ -96,7 +98,35 @@ namespace verbly {
96 98
97 token& operator<<(token arg); 99 token& operator<<(token arg);
98 100
101 // Transform
102
103 static token separator(std::string param, token inner);
104 static token punctuation(std::string param, token inner);
105 static token definiteArticle(token inner);
106 static token capitalize(token inner);
107
108 token& getInnerToken();
109 const token& getInnerToken() const;
110
99 private: 111 private:
112
113 std::string compileHelper(
114 std::string separator,
115 bool definiteArticle,
116 bool capitalize) const;
117
118 enum class transform_type {
119 separator,
120 punctuation,
121 definite_article,
122 capitalize
123 };
124
125 token(
126 transform_type type,
127 std::string param,
128 token inner);
129
100 union { 130 union {
101 struct { 131 struct {
102 word word_; 132 word word_;
@@ -106,6 +136,11 @@ namespace verbly {
106 part part_; 136 part part_;
107 std::set<std::string> fillin_; 137 std::set<std::string> fillin_;
108 std::list<token> utterance_; 138 std::list<token> utterance_;
139 struct {
140 transform_type type_;
141 std::string param_;
142 std::unique_ptr<token> inner_;
143 } transform_;
109 }; 144 };
110 type type_; 145 type type_;
111 }; 146 };