diff options
Diffstat (limited to 'patterner.cpp')
-rw-r--r-- | patterner.cpp | 161 |
1 files changed, 96 insertions, 65 deletions
diff --git a/patterner.cpp b/patterner.cpp index af844cf..1deffb8 100644 --- a/patterner.cpp +++ b/patterner.cpp | |||
@@ -47,92 +47,123 @@ patterner::patterner( | |||
47 | std::string patterner::generate() | 47 | std::string patterner::generate() |
48 | { | 48 | { |
49 | std::string action = "{MAIN}"; | 49 | std::string action = "{MAIN}"; |
50 | int tknloc; | ||
51 | while ((tknloc = action.find("{")) != std::string::npos) | ||
52 | { | ||
53 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); | ||
54 | std::string modifier; | ||
55 | int modloc; | ||
56 | if ((modloc = token.find(":")) != std::string::npos) | ||
57 | { | ||
58 | modifier = token.substr(modloc+1); | ||
59 | token = token.substr(0, modloc); | ||
60 | } | ||
61 | 50 | ||
62 | std::string canontkn; | 51 | verbly::filter slurBlacklist = |
63 | std::transform(std::begin(token), std::end(token), | 52 | (verbly::word::usageDomains %= ( |
64 | std::back_inserter(canontkn), [] (char ch) { | 53 | (verbly::notion::wnid == 106718862) // ethnic slur |
65 | return std::toupper(ch); | 54 | || (verbly::notion::wnid == 106717170) // disparagement (other slurs) |
66 | }); | 55 | || (verbly::notion::wnid == 107124340))); // obscenity (other profanity) |
67 | 56 | ||
68 | std::string result; | 57 | while (action == "{MAIN}") |
69 | if (canontkn == "WORD") | 58 | { |
70 | { | 59 | int tknloc; |
71 | result = data_.words( | 60 | while ((tknloc = action.find("{")) != std::string::npos) |
72 | (verbly::word::forms(verbly::inflection::base) %= | ||
73 | (verbly::form::complexity == 1) | ||
74 | && (verbly::form::length == 4) | ||
75 | && (verbly::form::proper == false) | ||
76 | && (verbly::pronunciation::numOfSyllables == 1)) | ||
77 | && !(verbly::word::usageDomains %= | ||
78 | (verbly::notion::wnid == 106718862))) // Blacklist ethnic slurs | ||
79 | .first().getBaseForm().getText(); | ||
80 | } else if (canontkn == "\\N") | ||
81 | { | 61 | { |
82 | result = "\n"; | 62 | std::string token = action.substr(tknloc+1, action.find("}")-tknloc-1); |
83 | } else { | 63 | std::string modifier; |
84 | auto group = groups_[canontkn]; | 64 | int modloc; |
85 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | 65 | if ((modloc = token.find(":")) != std::string::npos) |
86 | int groupind = groupdist(rng_); | 66 | { |
87 | result = group[groupind]; | 67 | modifier = token.substr(modloc+1); |
88 | } | 68 | token = token.substr(0, modloc); |
69 | } | ||
89 | 70 | ||
90 | if (modifier == "indefinite") | 71 | std::string canontkn; |
91 | { | 72 | std::transform(std::begin(token), std::end(token), |
92 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) | 73 | std::back_inserter(canontkn), [] (char ch) { |
74 | return std::toupper(ch); | ||
75 | }); | ||
76 | |||
77 | std::string result; | ||
78 | if (canontkn == "WORD2") | ||
93 | { | 79 | { |
94 | result = "an " + result; | 80 | result = data_.words( |
95 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | 81 | (verbly::notion::partOfSpeech == verbly::part_of_speech::noun) |
82 | && (verbly::word::forms(verbly::inflection::base) %= | ||
83 | (verbly::form::complexity == 1) | ||
84 | && (verbly::form::length == 4) | ||
85 | && (verbly::form::proper == false) | ||
86 | && (verbly::pronunciation::numOfSyllables == 1)) | ||
87 | && !slurBlacklist) | ||
88 | .first().getBaseForm().getText(); | ||
89 | } else if (canontkn == "\\N") | ||
96 | { | 90 | { |
97 | result = "an " + result; | 91 | result = "\n"; |
98 | } else { | 92 | } else { |
99 | result = "a " + result; | 93 | auto group = groups_[canontkn]; |
94 | std::uniform_int_distribution<int> groupdist(0, group.size()-1); | ||
95 | int groupind = groupdist(rng_); | ||
96 | result = group[groupind]; | ||
100 | } | 97 | } |
101 | } | ||
102 | 98 | ||
103 | std::string finalresult; | 99 | if (modifier == "indefinite") |
104 | if (islower(token[0])) | ||
105 | { | ||
106 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
107 | return std::tolower(ch); | ||
108 | }); | ||
109 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
110 | { | ||
111 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
112 | for (auto& word : words) | ||
113 | { | 100 | { |
114 | if (word[0] == '{') | 101 | if ((result.length() > 1) && (isupper(result[0])) && (isupper(result[1]))) |
115 | { | 102 | { |
116 | word[1] = std::toupper(word[1]); | 103 | result = "an " + result; |
104 | } else if ((result[0] == 'a') || (result[0] == 'e') || (result[0] == 'i') || (result[0] == 'o') || (result[0] == 'u')) | ||
105 | { | ||
106 | result = "an " + result; | ||
107 | } else { | ||
108 | result = "a " + result; | ||
109 | } | ||
110 | } | ||
117 | 111 | ||
118 | for (int k=2; k<word.length(); k++) | 112 | std::string finalresult; |
113 | if (islower(token[0])) | ||
114 | { | ||
115 | std::transform(std::begin(result), std::end(result), std::back_inserter(finalresult), [] (char ch) { | ||
116 | return std::tolower(ch); | ||
117 | }); | ||
118 | } else if (isupper(token[0]) && !isupper(token[1])) | ||
119 | { | ||
120 | auto words = verbly::split<std::list<std::string>>(result, " "); | ||
121 | for (auto& word : words) | ||
122 | { | ||
123 | if (word[0] == '{') | ||
119 | { | 124 | { |
120 | if (std::isalpha(word[k])) | 125 | word[1] = std::toupper(word[1]); |
126 | |||
127 | for (int k=2; k<word.length(); k++) | ||
121 | { | 128 | { |
122 | word[k] = std::tolower(word[k]); | 129 | if (std::isalpha(word[k])) |
130 | { | ||
131 | word[k] = std::tolower(word[k]); | ||
132 | } | ||
123 | } | 133 | } |
134 | } else { | ||
135 | word[0] = std::toupper(word[0]); | ||
124 | } | 136 | } |
125 | } else { | ||
126 | word[0] = std::toupper(word[0]); | ||
127 | } | 137 | } |
138 | |||
139 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | ||
140 | } else { | ||
141 | finalresult = result; | ||
128 | } | 142 | } |
129 | 143 | ||
130 | finalresult = verbly::implode(std::begin(words), std::end(words), " "); | 144 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); |
131 | } else { | ||
132 | finalresult = result; | ||
133 | } | 145 | } |
134 | 146 | ||
135 | action.replace(tknloc, action.find("}")-tknloc+1, finalresult); | 147 | std::string canonical; |
148 | std::transform(std::begin(action), std::end(action), | ||
149 | std::back_inserter(canonical), [] (char ch) | ||
150 | { | ||
151 | return std::tolower(ch); | ||
152 | }); | ||
153 | |||
154 | std::list<std::string> words = | ||
155 | verbly::split<std::list<std::string>>(canonical, " "); | ||
156 | |||
157 | for (std::string word : words) | ||
158 | { | ||
159 | if (!data_.forms( | ||
160 | (verbly::form::text == word) | ||
161 | && slurBlacklist).all().empty()) | ||
162 | { | ||
163 | action = "{MAIN}"; | ||
164 | break; | ||
165 | } | ||
166 | } | ||
136 | } | 167 | } |
137 | 168 | ||
138 | return action; | 169 | return action; |