diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-02-05 08:56:39 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-02-05 08:56:39 -0500 |
commit | e4fa0cb86d97c23c24cd7bdd62c23f03eed312da (patch) | |
tree | 70a20fdf684b1724659196a7de8d21a4a6ca194f /generator/generator.cpp | |
parent | bea3673ae1b3d19585dec56e96dbcd8a56b96e6d (diff) | |
download | verbly-e4fa0cb86d97c23c24cd7bdd62c23f03eed312da.tar.gz verbly-e4fa0cb86d97c23c24cd7bdd62c23f03eed312da.tar.bz2 verbly-e4fa0cb86d97c23c24cd7bdd62c23f03eed312da.zip |
Flattened selrestrs
Now, selrestrs are, instead of logically being a tree of positive/negative restrictions that are ANDed/ORed together, they are a flat set of positive restrictions that are ORed together. They are stored as strings in a table called selrestrs, just like synrestrs, which makes them a lot more queryable now as well. This change required some changes to the VerbNet data, because we needed to consolidate any ANDed clauses into single selrestrs, as well as convert any negative selrestrs into positive ones. The changes made are detailed on the wiki. Preposition choices are now encoded as comma-separated lists instead of using JSON. This change, along with the selrestrs one, allows us to remove verbly's dependency on nlohmann::json.
Diffstat (limited to 'generator/generator.cpp')
-rw-r--r-- | generator/generator.cpp | 82 |
1 files changed, 21 insertions, 61 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index 4cc9f64..e125b4a 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -7,7 +7,6 @@ | |||
7 | #include <fstream> | 7 | #include <fstream> |
8 | #include "../lib/enums.h" | 8 | #include "../lib/enums.h" |
9 | #include "progress.h" | 9 | #include "progress.h" |
10 | #include "../lib/selrestr.h" | ||
11 | #include "role.h" | 10 | #include "role.h" |
12 | #include "part.h" | 11 | #include "part.h" |
13 | #include "field.h" | 12 | #include "field.h" |
@@ -1303,12 +1302,20 @@ namespace verbly { | |||
1303 | std::string roleName = reinterpret_cast<const char*>(key); | 1302 | std::string roleName = reinterpret_cast<const char*>(key); |
1304 | xmlFree(key); | 1303 | xmlFree(key); |
1305 | 1304 | ||
1306 | selrestr roleSelrestrs; | 1305 | std::set<std::string> roleSelrestrs; |
1307 | for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) | 1306 | for (xmlNodePtr rolenode = roletopnode->xmlChildrenNode; rolenode != nullptr; rolenode = rolenode->next) |
1308 | { | 1307 | { |
1309 | if (!xmlStrcmp(rolenode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | 1308 | if (!xmlStrcmp(rolenode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) |
1310 | { | 1309 | { |
1311 | roleSelrestrs = parseSelrestr(rolenode); | 1310 | for (xmlNodePtr selrestrnode = rolenode->xmlChildrenNode; selrestrnode != nullptr; selrestrnode = selrestrnode->next) |
1311 | { | ||
1312 | if (!xmlStrcmp(selrestrnode->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
1313 | { | ||
1314 | key = xmlGetProp(selrestrnode, reinterpret_cast<const xmlChar*>("type")); | ||
1315 | roleSelrestrs.insert(std::string(reinterpret_cast<const char*>(key))); | ||
1316 | xmlFree(key); | ||
1317 | } | ||
1318 | } | ||
1312 | } | 1319 | } |
1313 | } | 1320 | } |
1314 | 1321 | ||
@@ -1335,7 +1342,7 @@ namespace verbly { | |||
1335 | std::string partRole = reinterpret_cast<const char*>(key); | 1342 | std::string partRole = reinterpret_cast<const char*>(key); |
1336 | xmlFree(key); | 1343 | xmlFree(key); |
1337 | 1344 | ||
1338 | selrestr partSelrestrs; | 1345 | std::set<std::string> partSelrestrs; |
1339 | std::set<std::string> partSynrestrs; | 1346 | std::set<std::string> partSynrestrs; |
1340 | 1347 | ||
1341 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) | 1348 | for (xmlNodePtr npnode = syntaxnode->xmlChildrenNode; npnode != nullptr; npnode = npnode->next) |
@@ -1351,11 +1358,17 @@ namespace verbly { | |||
1351 | xmlFree(key); | 1358 | xmlFree(key); |
1352 | } | 1359 | } |
1353 | } | 1360 | } |
1354 | } | 1361 | } else if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) |
1355 | |||
1356 | if (!xmlStrcmp(npnode->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
1357 | { | 1362 | { |
1358 | partSelrestrs = parseSelrestr(npnode); | 1363 | for (xmlNodePtr selrestrnode = npnode->xmlChildrenNode; selrestrnode != nullptr; selrestrnode = selrestrnode->next) |
1364 | { | ||
1365 | if (!xmlStrcmp(selrestrnode->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
1366 | { | ||
1367 | key = xmlGetProp(selrestrnode, reinterpret_cast<const xmlChar*>("type")); | ||
1368 | partSelrestrs.insert(std::string(reinterpret_cast<const char*>(key))); | ||
1369 | xmlFree(key); | ||
1370 | } | ||
1371 | } | ||
1359 | } | 1372 | } |
1360 | } | 1373 | } |
1361 | 1374 | ||
@@ -1434,58 +1447,5 @@ namespace verbly { | |||
1434 | } | 1447 | } |
1435 | } | 1448 | } |
1436 | 1449 | ||
1437 | selrestr generator::parseSelrestr(xmlNodePtr top) | ||
1438 | { | ||
1439 | xmlChar* key; | ||
1440 | |||
1441 | if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTRS"))) | ||
1442 | { | ||
1443 | if (xmlChildElementCount(top) == 0) | ||
1444 | { | ||
1445 | return {}; | ||
1446 | } else if (xmlChildElementCount(top) == 1) | ||
1447 | { | ||
1448 | return parseSelrestr(xmlFirstElementChild(top)); | ||
1449 | } else { | ||
1450 | bool orlogic = false; | ||
1451 | if (xmlHasProp(top, reinterpret_cast<const xmlChar*>("logic"))) | ||
1452 | { | ||
1453 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("logic")); | ||
1454 | if (!xmlStrcmp(key, reinterpret_cast<const xmlChar*>("or"))) | ||
1455 | { | ||
1456 | orlogic = true; | ||
1457 | } | ||
1458 | |||
1459 | xmlFree(key); | ||
1460 | } | ||
1461 | |||
1462 | std::list<selrestr> children; | ||
1463 | for (xmlNodePtr selrestr = top->xmlChildrenNode; selrestr != nullptr; selrestr = selrestr->next) | ||
1464 | { | ||
1465 | if (!xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTRS")) | ||
1466 | || !xmlStrcmp(selrestr->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
1467 | { | ||
1468 | children.push_back(parseSelrestr(selrestr)); | ||
1469 | } | ||
1470 | } | ||
1471 | |||
1472 | return selrestr(children, orlogic); | ||
1473 | } | ||
1474 | } else if (!xmlStrcmp(top->name, reinterpret_cast<const xmlChar*>("SELRESTR"))) | ||
1475 | { | ||
1476 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("Value")); | ||
1477 | bool selPos = (std::string(reinterpret_cast<const char*>(key)) == "+"); | ||
1478 | xmlFree(key); | ||
1479 | |||
1480 | key = xmlGetProp(top, reinterpret_cast<const xmlChar*>("type")); | ||
1481 | std::string selRestriction = reinterpret_cast<const char*>(key); | ||
1482 | xmlFree(key); | ||
1483 | |||
1484 | return selrestr(selRestriction, selPos); | ||
1485 | } else { | ||
1486 | throw std::logic_error("Badly formatted selrestr"); | ||
1487 | } | ||
1488 | } | ||
1489 | |||
1490 | }; | 1450 | }; |
1491 | }; | 1451 | }; |