summary refs log tree commit diff stats
path: root/generator/generator.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-01-28 12:59:42 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-01-28 12:59:42 -0500
commita7645346293ed6a912c26d0c50b6f7943f1f3072 (patch)
treed4d144e03a5e2dfcebbad2692fa71e790719d8fd /generator/generator.cpp
parent6ba8989bbbd497f949a3e8b17abed1d0bd048347 (diff)
downloadverbly-a7645346293ed6a912c26d0c50b6f7943f1f3072.tar.gz
verbly-a7645346293ed6a912c26d0c50b6f7943f1f3072.tar.bz2
verbly-a7645346293ed6a912c26d0c50b6f7943f1f3072.zip
Restructured verb frame schema to be more queryable
Groups are much less significant now, and they no longer have a database
table, nor are they considered a top level object anymore. Instead of
containing their own role data, that data is folded into the frames so
that it's easier to query; as a result, each group has its own copy of
the frames that it contains. Additionally, parts are considered top
level objects now, and you can query for frames based on attributes of
their indexed parts. Synrestrs are also contained in their own table
now, so that parts can be filtered against their synrestrs; they are
however not considered top level objects.

Created a new type of field, the "join where" or "condition join" field,
which is a normal join field that has a built in condition on a
specified field. This is used to allow creating multiple distinct join
fields from one object to another. This is required for the lemma::form
and frame::part joins, because filters for forms of separate inflections
should not be coalesced; similarly, filters on differently indexed frame
parts should not be coalesced.

Queries can now be ordered, ascending or descending, by a field, in
addition to randomly as before. This is necessary for accessing the
parts of a verb frame in the correct order, but may be useful to an end
user as well.

Fixed a bug with statement generation in that condition groups were not
being surrounded in parentheses, which made mixing OR groups and AND
groups generate inaccurate statements. This has been fixed;
additionally, parentheses are not placed around the top level condition,
and nested condition groups with the same logic type are coalesced, to
make query strings as easy to read as possible.

Also simplified the form::lemma field; it no longer conditions on the
inflection of the form like the lemma::form field does.

Also added a debug flag to statement::getQueryString that makes it
return a query string with all of the bindings filled in, for debug use
only.
Diffstat (limited to 'generator/generator.cpp')
-rw-r--r--generator/generator.cpp39
1 files changed, 17 insertions, 22 deletions
diff --git a/generator/generator.cpp b/generator/generator.cpp index 610a602..4cc9f64 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp
@@ -8,7 +8,7 @@
8#include "../lib/enums.h" 8#include "../lib/enums.h"
9#include "progress.h" 9#include "progress.h"
10#include "../lib/selrestr.h" 10#include "../lib/selrestr.h"
11#include "../lib/role.h" 11#include "role.h"
12#include "part.h" 12#include "part.h"
13#include "field.h" 13#include "field.h"
14#include "../lib/util.h" 14#include "../lib/util.h"
@@ -640,7 +640,7 @@ namespace verbly {
640 } 640 }
641 641
642 { 642 {
643 progress ppgs("Writing verb groups...", groups_.size()); 643 progress ppgs("Writing verb frames...", groups_.size());
644 644
645 for (group& g : groups_) 645 for (group& g : groups_)
646 { 646 {
@@ -649,17 +649,6 @@ namespace verbly {
649 ppgs.update(); 649 ppgs.update();
650 } 650 }
651 } 651 }
652
653 {
654 progress ppgs("Writing verb frames...", frames_.size());
655
656 for (frame& f : frames_)
657 {
658 db_ << f;
659
660 ppgs.update();
661 }
662 }
663 } 652 }
664 653
665 void generator::readWordNetAntonymy() 654 void generator::readWordNetAntonymy()
@@ -1212,9 +1201,15 @@ namespace verbly {
1212 return w; 1201 return w;
1213 } 1202 }
1214 1203
1215 group& generator::createGroup(xmlNodePtr top) 1204 void generator::createGroup(xmlNodePtr top, const group* parent)
1216 { 1205 {
1217 groups_.emplace_back(); 1206 if (parent != nullptr)
1207 {
1208 groups_.emplace_back(*parent);
1209 } else {
1210 groups_.emplace_back();
1211 }
1212
1218 group& grp = groups_.back(); 1213 group& grp = groups_.back();
1219 1214
1220 xmlChar* key; 1215 xmlChar* key;
@@ -1229,8 +1224,11 @@ namespace verbly {
1229 { 1224 {
1230 try 1225 try
1231 { 1226 {
1232 group& subgrp = createGroup(subclass); 1227 // Parsing a subgroup starts by making a copy of everything in
1233 subgrp.setParent(grp); 1228 // the parent. This is okay to do at this point because in the
1229 // VerbNet data, subgroups are always defined after everything
1230 // else.
1231 createGroup(subclass, &grp);
1234 } catch (const std::exception& e) 1232 } catch (const std::exception& e)
1235 { 1233 {
1236 key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID")); 1234 key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID"));
@@ -1323,8 +1321,7 @@ namespace verbly {
1323 { 1321 {
1324 if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME"))) 1322 if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME")))
1325 { 1323 {
1326 frames_.emplace_back(); 1324 frame fr;
1327 frame& fr = frames_.back();
1328 1325
1329 for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) 1326 for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next)
1330 { 1327 {
@@ -1428,15 +1425,13 @@ namespace verbly {
1428 } 1425 }
1429 } 1426 }
1430 1427
1431 grp.addFrame(fr); 1428 grp.addFrame(std::move(fr));
1432 } 1429 }
1433 } 1430 }
1434 } 1431 }
1435 } 1432 }
1436 } 1433 }
1437 } 1434 }
1438
1439 return grp;
1440 } 1435 }
1441 1436
1442 selrestr generator::parseSelrestr(xmlNodePtr top) 1437 selrestr generator::parseSelrestr(xmlNodePtr top)