diff options
Diffstat (limited to 'sentence.cpp')
| -rw-r--r-- | sentence.cpp | 297 |
1 files changed, 124 insertions, 173 deletions
| diff --git a/sentence.cpp b/sentence.cpp index c31d226..83592d9 100644 --- a/sentence.cpp +++ b/sentence.cpp | |||
| @@ -101,179 +101,23 @@ std::string sentence::generate() const | |||
| 101 | return verbly::implode(std::begin(words), std::end(words), " "); | 101 | return verbly::implode(std::begin(words), std::end(words), " "); |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | verbly::filter sentence::parseSelrestrs( | 104 | bool sentence::chooseSelrestr(std::set<std::string> selrestrs, std::set<std::string> choices) const |
| 105 | verbly::selrestr selrestr) const | ||
| 106 | { | 105 | { |
| 107 | switch (selrestr.getType()) | 106 | int validChoices = 0; |
| 107 | for (const std::string& choice : choices) | ||
| 108 | { | 108 | { |
| 109 | case verbly::selrestr::type::empty: | 109 | if (selrestrs.count(choice)) |
| 110 | { | 110 | { |
| 111 | return {}; | 111 | validChoices++; |
| 112 | } | ||
| 113 | |||
| 114 | case verbly::selrestr::type::singleton: | ||
| 115 | { | ||
| 116 | verbly::filter result; | ||
| 117 | |||
| 118 | if (selrestr.getRestriction() == "concrete") | ||
| 119 | { | ||
| 120 | result = (verbly::notion::wnid == 100001930); // physical entity | ||
| 121 | } else if (selrestr.getRestriction() == "time") | ||
| 122 | { | ||
| 123 | result = (verbly::notion::wnid == 100028270); // time | ||
| 124 | } else if (selrestr.getRestriction() == "state") | ||
| 125 | { | ||
| 126 | result = (verbly::notion::wnid == 100024720); // state | ||
| 127 | } else if (selrestr.getRestriction() == "abstract") | ||
| 128 | { | ||
| 129 | result = (verbly::notion::wnid == 100002137); // abstract entity | ||
| 130 | } else if (selrestr.getRestriction() == "scalar") | ||
| 131 | { | ||
| 132 | result = (verbly::notion::wnid == 103835412); // number | ||
| 133 | } else if (selrestr.getRestriction() == "currency") | ||
| 134 | { | ||
| 135 | result = (verbly::notion::wnid == 105050379); // currency | ||
| 136 | } else if (selrestr.getRestriction() == "location") | ||
| 137 | { | ||
| 138 | result = (verbly::notion::wnid == 100027167); // location | ||
| 139 | } else if (selrestr.getRestriction() == "organization") | ||
| 140 | { | ||
| 141 | result = (verbly::notion::wnid == 100237078); // organization | ||
| 142 | } else if (selrestr.getRestriction() == "int_control") | ||
| 143 | { | ||
| 144 | result = (verbly::notion::wnid == 100007347); // causal agent | ||
| 145 | } else if (selrestr.getRestriction() == "natural") | ||
| 146 | { | ||
| 147 | result = (verbly::notion::wnid == 100019128); // natural object | ||
| 148 | } else if (selrestr.getRestriction() == "phys_obj") | ||
| 149 | { | ||
| 150 | result = (verbly::notion::wnid == 100002684); // physical object | ||
| 151 | } else if (selrestr.getRestriction() == "solid") | ||
| 152 | { | ||
| 153 | result = (verbly::notion::wnid == 113860793); // solid | ||
| 154 | } else if (selrestr.getRestriction() == "shape") | ||
| 155 | { | ||
| 156 | result = (verbly::notion::wnid == 100027807); // shape | ||
| 157 | } else if (selrestr.getRestriction() == "substance") | ||
| 158 | { | ||
| 159 | result = (verbly::notion::wnid == 100019613); // substance | ||
| 160 | } else if (selrestr.getRestriction() == "idea") | ||
| 161 | { | ||
| 162 | result = (verbly::notion::wnid == 105803379); // idea | ||
| 163 | } else if (selrestr.getRestriction() == "sound") | ||
| 164 | { | ||
| 165 | result = (verbly::notion::wnid == 107111047); // sound | ||
| 166 | } else if (selrestr.getRestriction() == "communication") | ||
| 167 | { | ||
| 168 | result = (verbly::notion::wnid == 100033020); // communication | ||
| 169 | } else if (selrestr.getRestriction() == "region") | ||
| 170 | { | ||
| 171 | result = (verbly::notion::wnid == 105221895); // region | ||
| 172 | } else if (selrestr.getRestriction() == "place") | ||
| 173 | { | ||
| 174 | result = (verbly::notion::wnid == 100586262); // place | ||
| 175 | } else if (selrestr.getRestriction() == "machine") | ||
| 176 | { | ||
| 177 | result = (verbly::notion::wnid == 102958343); // machine | ||
| 178 | } else if (selrestr.getRestriction() == "animate") | ||
| 179 | { | ||
| 180 | result = (verbly::notion::wnid == 100004258); // animate thing | ||
| 181 | } else if (selrestr.getRestriction() == "plant") | ||
| 182 | { | ||
| 183 | result = (verbly::notion::wnid == 103956922); // plant | ||
| 184 | } else if (selrestr.getRestriction() == "comestible") | ||
| 185 | { | ||
| 186 | result = (verbly::notion::wnid == 100021265); // food | ||
| 187 | } else if (selrestr.getRestriction() == "artifact") | ||
| 188 | { | ||
| 189 | result = (verbly::notion::wnid == 100021939); // artifact | ||
| 190 | } else if (selrestr.getRestriction() == "vehicle") | ||
| 191 | { | ||
| 192 | result = (verbly::notion::wnid == 104524313); // vehicle | ||
| 193 | } else if (selrestr.getRestriction() == "human") | ||
| 194 | { | ||
| 195 | result = (verbly::notion::wnid == 100007846); // person | ||
| 196 | } else if (selrestr.getRestriction() == "animal") | ||
| 197 | { | ||
| 198 | result = (verbly::notion::wnid == 100015388); // animal | ||
| 199 | } else if (selrestr.getRestriction() == "body_part") | ||
| 200 | { | ||
| 201 | result = (verbly::notion::wnid == 105220461); // body part | ||
| 202 | } else if (selrestr.getRestriction() == "garment") | ||
| 203 | { | ||
| 204 | result = (verbly::notion::wnid == 103051540); // clothing | ||
| 205 | } else if (selrestr.getRestriction() == "tool") | ||
| 206 | { | ||
| 207 | result = (verbly::notion::wnid == 104451818); // tool | ||
| 208 | } else { | ||
| 209 | return {}; | ||
| 210 | } | ||
| 211 | |||
| 212 | std::cout << selrestr.getRestriction() << " (" << selrestr.getPos() << ")" << std::endl; | ||
| 213 | |||
| 214 | if (selrestr.getPos()) | ||
| 215 | { | ||
| 216 | return (verbly::notion::fullHypernyms %= result); | ||
| 217 | } else { | ||
| 218 | return !(verbly::notion::fullHypernyms %= result); | ||
| 219 | } | ||
| 220 | } | ||
| 221 | |||
| 222 | case verbly::selrestr::type::group: | ||
| 223 | { | ||
| 224 | std::cout << "or: " << selrestr.getOrlogic() << std::endl; | ||
| 225 | verbly::filter ret(selrestr.getOrlogic()); | ||
| 226 | |||
| 227 | for (const verbly::selrestr& child : selrestr) | ||
| 228 | { | ||
| 229 | ret += parseSelrestrs(child); | ||
| 230 | } | ||
| 231 | |||
| 232 | return ret; | ||
| 233 | } | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | bool sentence::requiresSelrestr( | ||
| 238 | std::string restriction, | ||
| 239 | verbly::selrestr selrestr) const | ||
| 240 | { | ||
| 241 | switch (selrestr.getType()) | ||
| 242 | { | ||
| 243 | case verbly::selrestr::type::empty: | ||
| 244 | { | ||
| 245 | return false; | ||
| 246 | } | ||
| 247 | |||
| 248 | case verbly::selrestr::type::singleton: | ||
| 249 | { | ||
| 250 | if (selrestr.getRestriction() == restriction) | ||
| 251 | { | ||
| 252 | return selrestr.getPos(); | ||
| 253 | } else { | ||
| 254 | return false; | ||
| 255 | } | ||
| 256 | } | ||
| 257 | |||
| 258 | case verbly::selrestr::type::group: | ||
| 259 | { | ||
| 260 | if (selrestr.getOrlogic()) | ||
| 261 | { | ||
| 262 | return std::all_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) { | ||
| 263 | return requiresSelrestr(restriction, s); | ||
| 264 | }); | ||
| 265 | } else { | ||
| 266 | return std::any_of(std::begin(selrestr), std::end(selrestr), [=] (const verbly::selrestr& s) { | ||
| 267 | return requiresSelrestr(restriction, s); | ||
| 268 | }); | ||
| 269 | } | ||
| 270 | } | 112 | } |
| 271 | } | 113 | } |
| 114 | |||
| 115 | return std::bernoulli_distribution(static_cast<double>(validChoices)/static_cast<double>(selrestrs.size()))(rng_); | ||
| 272 | } | 116 | } |
| 273 | 117 | ||
| 274 | verbly::word sentence::generateStandardNoun( | 118 | verbly::word sentence::generateStandardNoun( |
| 275 | std::string role, | 119 | std::string role, |
| 276 | verbly::selrestr selrestrs) const | 120 | std::set<std::string> selrestrs) const |
| 277 | { | 121 | { |
| 278 | std::geometric_distribution<int> tagdist(0.5); // 0.06 | 122 | std::geometric_distribution<int> tagdist(0.5); // 0.06 |
| 279 | std::vector<verbly::word> result; | 123 | std::vector<verbly::word> result; |
| @@ -292,11 +136,122 @@ verbly::word sentence::generateStandardNoun( | |||
| 292 | // Only use selection restrictions for a first attempt. | 136 | // Only use selection restrictions for a first attempt. |
| 293 | if (trySelection) | 137 | if (trySelection) |
| 294 | { | 138 | { |
| 295 | verbly::filter selrestrCondition = parseSelrestrs(selrestrs).compact(); | 139 | verbly::filter selection(true); |
| 296 | 140 | ||
| 297 | if (selrestrCondition.getType() != verbly::filter::type::empty) | 141 | for (const std::string& selrestr : selrestrs) |
| 298 | { | 142 | { |
| 299 | condition &= std::move(selrestrCondition); | 143 | if (selrestr == "concrete") |
| 144 | { | ||
| 145 | selection += (verbly::notion::wnid == 100001930); // physical entity | ||
| 146 | } else if (selrestr == "time") | ||
| 147 | { | ||
| 148 | selection += (verbly::notion::wnid == 100028270); // time | ||
| 149 | } else if (selrestr == "state") | ||
| 150 | { | ||
| 151 | selection += (verbly::notion::wnid == 100024720); // state | ||
| 152 | } else if (selrestr == "abstract") | ||
| 153 | { | ||
| 154 | selection += (verbly::notion::wnid == 100002137); // abstract entity | ||
| 155 | } else if (selrestr == "scalar") | ||
| 156 | { | ||
| 157 | selection += (verbly::notion::wnid == 103835412); // number | ||
| 158 | } else if (selrestr == "currency") | ||
| 159 | { | ||
| 160 | selection += (verbly::notion::wnid == 105050379); // currency | ||
| 161 | } else if (selrestr == "location") | ||
| 162 | { | ||
| 163 | selection += (verbly::notion::wnid == 100027167); // location | ||
| 164 | } else if (selrestr == "organization") | ||
| 165 | { | ||
| 166 | selection += (verbly::notion::wnid == 100237078); // organization | ||
| 167 | } else if (selrestr == "int_control") | ||
| 168 | { | ||
| 169 | selection += (verbly::notion::wnid == 100007347); // causal agent | ||
| 170 | } else if (selrestr == "natural") | ||
| 171 | { | ||
| 172 | selection += (verbly::notion::wnid == 100019128); // natural object | ||
| 173 | } else if (selrestr == "phys_obj") | ||
| 174 | { | ||
| 175 | selection += (verbly::notion::wnid == 100002684); // physical object | ||
| 176 | } else if (selrestr == "solid") | ||
| 177 | { | ||
| 178 | selection += (verbly::notion::wnid == 113860793); // solid | ||
| 179 | } else if (selrestr == "shape") | ||
| 180 | { | ||
| 181 | selection += (verbly::notion::wnid == 100027807); // shape | ||
| 182 | } else if (selrestr == "substance") | ||
| 183 | { | ||
| 184 | selection += (verbly::notion::wnid == 100019613); // substance | ||
| 185 | } else if (selrestr == "idea") | ||
| 186 | { | ||
| 187 | selection += (verbly::notion::wnid == 105803379); // idea | ||
| 188 | } else if (selrestr == "sound") | ||
| 189 | { | ||
| 190 | selection += (verbly::notion::wnid == 107111047); // sound | ||
| 191 | } else if (selrestr == "communication") | ||
| 192 | { | ||
| 193 | selection += (verbly::notion::wnid == 100033020); // communication | ||
| 194 | } else if (selrestr == "region") | ||
| 195 | { | ||
| 196 | selection += (verbly::notion::wnid == 105221895); // region | ||
| 197 | } else if (selrestr == "place") | ||
| 198 | { | ||
| 199 | selection += (verbly::notion::wnid == 100586262); // place | ||
| 200 | } else if (selrestr == "machine") | ||
| 201 | { | ||
| 202 | selection += (verbly::notion::wnid == 102958343); // machine | ||
| 203 | } else if (selrestr == "animate") | ||
| 204 | { | ||
| 205 | selection += (verbly::notion::wnid == 100004258); // animate thing | ||
| 206 | } else if (selrestr == "plant") | ||
| 207 | { | ||
| 208 | selection += (verbly::notion::wnid == 103956922); // plant | ||
| 209 | } else if (selrestr == "comestible") | ||
| 210 | { | ||
| 211 | selection += (verbly::notion::wnid == 100021265); // food | ||
| 212 | } else if (selrestr == "artifact") | ||
| 213 | { | ||
| 214 | selection += (verbly::notion::wnid == 100021939); // artifact | ||
| 215 | } else if (selrestr == "vehicle") | ||
| 216 | { | ||
| 217 | selection += (verbly::notion::wnid == 104524313); // vehicle | ||
| 218 | } else if (selrestr == "human") | ||
| 219 | { | ||
| 220 | selection += (verbly::notion::wnid == 100007846); // person | ||
| 221 | } else if (selrestr == "animal") | ||
| 222 | { | ||
| 223 | selection += (verbly::notion::wnid == 100015388); // animal | ||
| 224 | } else if (selrestr == "body_part") | ||
| 225 | { | ||
| 226 | selection += (verbly::notion::wnid == 105220461); // body part | ||
| 227 | } else if (selrestr == "garment") | ||
| 228 | { | ||
| 229 | selection += (verbly::notion::wnid == 103051540); // clothing | ||
| 230 | } else if (selrestr == "tool") | ||
| 231 | { | ||
| 232 | selection += (verbly::notion::wnid == 104451818); // tool | ||
| 233 | } else if ((selrestr == "concrete_inanimate") || (selrestr == "inanimate")) | ||
| 234 | { | ||
| 235 | selection += (verbly::notion::wnid == 100021939); // artifact | ||
| 236 | selection += (verbly::notion::wnid == 100019128); // natural object | ||
| 237 | } else if (selrestr == "non_region_location") | ||
| 238 | { | ||
| 239 | selection += (verbly::notion::wnid == 102913152); // building | ||
| 240 | } else if (selrestr == "non_solid_food") | ||
| 241 | { | ||
| 242 | selection += (verbly::notion::wnid == 107881800); // beverage | ||
| 243 | } else if (selrestr == "solid_food") | ||
| 244 | { | ||
| 245 | selection += (verbly::notion::wnid == 107555863); // solid food | ||
| 246 | } else if (selrestr == "slinky") | ||
| 247 | { | ||
| 248 | selection += (verbly::notion::wnid == 103670849); // line | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 252 | if (selection.compact().getType() != verbly::filter::type::empty) | ||
| 253 | { | ||
| 254 | condition &= std::move(selection); | ||
| 300 | } else if (role == "Attribute") | 255 | } else if (role == "Attribute") |
| 301 | { | 256 | { |
| 302 | condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute | 257 | condition &= (verbly::notion::fullHypernyms %= (verbly::notion::wnid == 100024264)); // attribute |
| @@ -457,7 +412,7 @@ verbly::token sentence::generateClause( | |||
| 457 | } | 412 | } |
| 458 | std::cout << std::endl; | 413 | std::cout << std::endl; |
| 459 | 414 | ||
| 460 | if (requiresSelrestr("currency", part.getNounSelrestrs())) | 415 | if (chooseSelrestr(part.getNounSelrestrs(), {"currency"})) |
| 461 | { | 416 | { |
| 462 | int lead = std::uniform_int_distribution<int>(1,9)(rng_); | 417 | int lead = std::uniform_int_distribution<int>(1,9)(rng_); |
| 463 | int tail = std::uniform_int_distribution<int>(0,6)(rng_); | 418 | int tail = std::uniform_int_distribution<int>(0,6)(rng_); |
| @@ -559,11 +514,7 @@ verbly::token sentence::generateClause( | |||
| 559 | } else { | 514 | } else { |
| 560 | verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); | 515 | verbly::word noun = generateStandardNoun(part.getNounRole(), part.getNounSelrestrs()); |
| 561 | 516 | ||
| 562 | bool plural = part.nounHasSynrestr("plural"); | 517 | bool plural = part.nounHasSynrestr("plural") || chooseSelrestr(part.getNounSelrestrs(), {"group", "plural"}); |
| 563 | if (!plural) | ||
| 564 | { | ||
| 565 | plural = requiresSelrestr("plural", part.getNounSelrestrs()); | ||
| 566 | } | ||
| 567 | 518 | ||
| 568 | utter << generateStandardNounPhrase( | 519 | utter << generateStandardNounPhrase( |
| 569 | noun, | 520 | noun, |
