diff options
Diffstat (limited to 'patterner.cpp')
-rw-r--r-- | patterner.cpp | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/patterner.cpp b/patterner.cpp new file mode 100644 index 0000000..2c92428 --- /dev/null +++ b/patterner.cpp | |||
@@ -0,0 +1,138 @@ | |||
1 | #include "patterner.h" | ||
2 | #include <fstream> | ||
3 | #include <stdexcept> | ||
4 | |||
5 | patterner::patterner( | ||
6 | std::string datapath, | ||
7 | verbly::database& data, | ||
8 | std::mt19937& rng) : | ||
9 | data_(data), | ||
10 | rng_(rng) | ||
11 | { | ||
12 | std::ifstream datafile(datapath); | ||
13 | if (!datafile.is_open()) | ||
14 | { | ||
15 | throw std::invalid_argument("Could not find datafile"); | ||
16 | } | ||
17 | |||
18 | bool newgroup = true; | ||
19 | std::string line; | ||
20 | std::list<std::string> curgroups; | ||
21 | while (getline(datafile, line)) | ||
22 | { | ||
23 | if (line.back() == '\r') | ||
24 | { | ||
25 | line.pop_back(); | ||
26 | } | ||
27 | |||
28 | if (newgroup) | ||
29 | { | ||
30 | curgroups = verbly::split<std::list<std::string>>(line, ","); | ||
31 | newgroup = false; | ||
32 | } else { | ||
33 | if (line.empty()) | ||
34 | { | ||
35 | newgroup = true; | ||
36 | } else { | ||
37 | for (std::string curgroup : curgroups) | ||
38 | { | ||
39 | groups_[curgroup].push_back(line); | ||
40 | } | ||
41 | } | ||
42 | } | ||
43 | } | ||
44 | } | ||
45 | |||
46 | std::string patterner::generate() | ||
47 | { | ||
48 | std::string action = "{MAIN}"; | ||
49 | int tknloc; | ||
50 | while ((tknloc = action.find("{")) != std::string::npos) | ||
51 | { | ||
52 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); | ||
53 | std::string modifier; | ||
54 | int modloc; | ||
55 | if ((modloc = token.find(":")) != std::string::npos) | ||
56 | { | ||
57 | modifier = token.substr(modloc+1); | ||
58 | token = token.substr(0, modloc); | ||
59 | } | ||
60 | |||
61 | std::string canontkn; | ||
62 | std::transform(std::begin(token), std::end(token), | ||
63 | std::back_inserter(canontkn), [] (char ch) { | ||
64 | return std::toupper(ch); | ||
65 | }); | ||
66 | |||
67 | std::string result; | ||
68 | if (canontkn == "WORD") | ||
69 | { | ||
70 | result = data_.words( | ||
71 | (verbly::word::forms(verbly::inflection::base) %= | ||
72 | (verbly::form::complexity == 1) | ||
73 | && (verbly::form::length == 4) | ||
74 | && (verbly::form::proper == false) | ||
75 | && (verbly::pronunciation::numOfSyllables == 1)) | ||
76 | && !(verbly::word::usageDomains %= | ||
77 | (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs | ||
78 | .first().getBaseForm().getText(); | ||
79 | } else if (canontkn == "\\N") | ||
80 | { | ||
81 | result = "\n"; | ||
82 | } else { | ||
83 | auto group = groups_[canontkn]; | ||
84 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | ||
85 | int groupind = groupdist(rng_); | ||
86 | result = group[groupind]; | ||
87 | } | ||
88 | |||
89 | if (modifier == "indefinite") | ||
90 | { | ||
91 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) | ||
92 | { | ||
93 | result = "an " + result; | ||
94 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | ||
95 | { | ||
96 | result = "an " + result; | ||
97 | } else { | ||
98 | result = "a " + result; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | std::string finalresult; | ||
103 | if (islower(token[0])) | ||
104 | { | ||
105 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
106 | return std::tolower(ch); | ||
107 | }); | ||
108 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
109 | { | ||
110 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
111 | for (auto& word : words) | ||
112 | { | ||
113 | if (word[0] == '{') | ||
114 | { | ||
115 | word[1] = std::toupper(word[1]); | ||
116 | |||
117 | for (int k=2; k<word.length(); k++) | ||
118 | { | ||
119 | if (std::isalpha(word[k])) | ||
120 | { | ||
121 | word[k] = std::tolower(word[k]); | ||
122 | } | ||
123 | } | ||
124 | } else { | ||
125 | word[0] = std::toupper(word[0]); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | ||
130 | } else { | ||
131 | finalresult = result; | ||
132 | } | ||
133 | |||
134 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); | ||
135 | } | ||
136 | |||
137 | return action; | ||
138 | } | ||