summary refs log tree commit diff stats
path: root/patterner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'patterner.cpp')
-rw-r--r--patterner.cpp138
1 files changed, 138 insertions, 0 deletions
diff --git a/patterner.cpp b/patterner.cpp new file mode 100644 index 0000000..2c92428 --- /dev/null +++ b/patterner.cpp
@@ -0,0 +1,138 @@
1#include "patterner.h"
2#include <fstream>
3#include <stdexcept>
4
5patterner::patterner(
6 std::string datapath,
7 verbly::database& data,
8 std::mt19937& rng) :
9 data_(data),
10 rng_(rng)
11{
12 std::ifstream datafile(datapath);
13 if (!datafile.is_open())
14 {
15 throw std::invalid_argument("Could not find datafile");
16 }
17
18 bool newgroup = true;
19 std::string line;
20 std::list<std::string> curgroups;
21 while (getline(datafile, line))
22 {
23 if (line.back() == '\r')
24 {
25 line.pop_back();
26 }
27
28 if (newgroup)
29 {
30 curgroups = verbly::split<std::list<std::string>>(line, ",");
31 newgroup = false;
32 } else {
33 if (line.empty())
34 {
35 newgroup = true;
36 } else {
37 for (std::string curgroup : curgroups)
38 {
39 groups_[curgroup].push_back(line);
40 }
41 }
42 }
43 }
44}
45
46std::string patterner::generate()
47{
48 std::string action = "{MAIN}";
49 int tknloc;
50 while ((tknloc = action.find("{")) != std::string::npos)
51 {
52 std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1);
53 std::string modifier;
54 int modloc;
55 if ((modloc = token.find(":")) != std::string::npos)
56 {
57 modifier = token.substr(modloc+1);
58 token = token.substr(0, modloc);
59 }
60
61 std::string canontkn;
62 std::transform(std::begin(token), std::end(token),
63 std::back_inserter(canontkn), [] (char ch) {
64 return std::toupper(ch);
65 });
66
67 std::string result;
68 if (canontkn == "WORD")
69 {
70 result = data_.words(
71 (verbly::word::forms(verbly::inflection::base) %=
72 (verbly::form::complexity == 1)
73 && (verbly::form::length == 4)
74 && (verbly::form::proper == false)
75 && (verbly::pronunciation::numOfSyllables == 1))
76 && !(verbly::word::usageDomains %=
77 (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs
78 .first().getBaseForm().getText();
79 } else if (canontkn == "\\N")
80 {
81 result = "\n";
82 } else {
83 auto group = groups_[canontkn];
84 std::uniform_int_distribution<int> groupdist(0, group.size()-1);
85 int groupind = groupdist(rng_);
86 result = group[groupind];
87 }
88
89 if (modifier == "indefinite")
90 {
91 if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1])))
92 {
93 result = "an " + result;
94 } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u'))
95 {
96 result = "an " + result;
97 } else {
98 result = "a " + result;
99 }
100 }
101
102 std::string finalresult;
103 if (islower(token[0]))
104 {
105 std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) {
106 return std::tolower(ch);
107 });
108 } else if (isupper(token[0]) && !isupper(token[1]))
109 {
110 auto words = verbly::split<std::list<std::string>>(result, " ");
111 for (auto& word : words)
112 {
113 if (word[0] == '{')
114 {
115 word[1] = std::toupper(word[1]);
116
117 for (int k=2; k<word.length(); k++)
118 {
119 if (std::isalpha(word[k]))
120 {
121 word[k] = std::tolower(word[k]);
122 }
123 }
124 } else {
125 word[0] = std::toupper(word[0]);
126 }
127 }
128
129 finalresult = verbly::implode(std::begin(words), std::end(words), " ");
130 } else {
131 finalresult = result;
132 }
133
134 action.replace(tknloc, action.find("}")-tknloc+1, finalresult);
135 }
136
137 return action;
138}