summary refs log tree commit diff stats
path: root/generator/group.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-01-28 12:59:42 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-01-28 12:59:42 -0500
commita7645346293ed6a912c26d0c50b6f7943f1f3072 (patch)
treed4d144e03a5e2dfcebbad2692fa71e790719d8fd /generator/group.cpp
parent6ba8989bbbd497f949a3e8b17abed1d0bd048347 (diff)
downloadverbly-a7645346293ed6a912c26d0c50b6f7943f1f3072.tar.gz
verbly-a7645346293ed6a912c26d0c50b6f7943f1f3072.tar.bz2
verbly-a7645346293ed6a912c26d0c50b6f7943f1f3072.zip
Restructured verb frame schema to be more queryable
Groups are much less significant now, and they no longer have a database
table, nor are they considered a top level object anymore. Instead of
containing their own role data, that data is folded into the frames so
that it's easier to query; as a result, each group has its own copy of
the frames that it contains. Additionally, parts are considered top
level objects now, and you can query for frames based on attributes of
their indexed parts. Synrestrs are also contained in their own table
now, so that parts can be filtered against their synrestrs; they are
however not considered top level objects.

Created a new type of field, the "join where" or "condition join" field,
which is a normal join field that has a built in condition on a
specified field. This is used to allow creating multiple distinct join
fields from one object to another. This is required for the lemma::form
and frame::part joins, because filters for forms of separate inflections
should not be coalesced; similarly, filters on differently indexed frame
parts should not be coalesced.

Queries can now be ordered, ascending or descending, by a field, in
addition to randomly as before. This is necessary for accessing the
parts of a verb frame in the correct order, but may be useful to an end
user as well.

Fixed a bug with statement generation in that condition groups were not
being surrounded in parentheses, which made mixing OR groups and AND
groups generate inaccurate statements. This has been fixed;
additionally, parentheses are not placed around the top level condition,
and nested condition groups with the same logic type are coalesced, to
make query strings as easy to read as possible.

Also simplified the form::lemma field; it no longer conditions on the
inflection of the form like the lemma::form field does.

Also added a debug flag to statement::getQueryString that makes it
return a query string with all of the bindings filled in, for debug use
only.
Diffstat (limited to 'generator/group.cpp')
-rw-r--r--generator/group.cpp166
1 files changed, 98 insertions, 68 deletions
diff --git a/generator/group.cpp b/generator/group.cpp index cebe2b9..aa28d42 100644 --- a/generator/group.cpp +++ b/generator/group.cpp
@@ -15,12 +15,15 @@ namespace verbly {
15 { 15 {
16 } 16 }
17 17
18 void group::setParent(const group& parent) 18 group::group(const group& parent) :
19 id_(nextId_++),
20 roles_(parent.roles_),
21 roleNames_(parent.roleNames_)
19 { 22 {
20 // Adding a group to itself is nonsensical. 23 for (const frame& f : parent.frames_)
21 assert(&parent != this); 24 {
22 25 frames_.push_back(frame::duplicate(f));
23 parent_ = &parent; 26 }
24 } 27 }
25 28
26 void group::addRole(role r) 29 void group::addRole(role r)
@@ -30,87 +33,114 @@ namespace verbly {
30 roleNames_.insert(std::move(name)); 33 roleNames_.insert(std::move(name));
31 } 34 }
32 35
33 void group::addFrame(const frame& f) 36 void group::addFrame(frame f)
34 { 37 {
35 frames_.insert(&f); 38 frames_.push_back(std::move(f));
36 } 39 }
37 40
38 std::set<std::string> group::getRoles() const 41 bool group::hasRole(std::string name) const
39 { 42 {
40 std::set<std::string> fullRoles = roleNames_; 43 // Rarely, a noun phrase part may use a role that is not defined in the
41 44 // group. See confess-37.10 "NP V NP ADJ".
42 if (hasParent()) 45 return (roles_.count(name) == 1);
43 {
44 for (std::string name : getParent().getRoles())
45 {
46 fullRoles.insert(name);
47 }
48 }
49
50 return fullRoles;
51 } 46 }
52 47
53 const role& group::getRole(std::string name) const 48 const role& group::getRole(std::string name) const
54 { 49 {
55 if (roles_.count(name)) 50 return roles_.at(name);
56 {
57 return roles_.at(name);
58 } else if (hasParent())
59 {
60 return getParent().getRole(name);
61 } else {
62 throw std::invalid_argument("Specified role not found in verb group");
63 }
64 }
65
66 std::set<const frame*> group::getFrames() const
67 {
68 std::set<const frame*> fullFrames = frames_;
69
70 if (hasParent())
71 {
72 for (const frame* f : getParent().getFrames())
73 {
74 fullFrames.insert(f);
75 }
76 }
77
78 return fullFrames;
79 } 51 }
80 52
81 database& operator<<(database& db, const group& arg) 53 database& operator<<(database& db, const group& arg)
82 { 54 {
83 // Serialize the group first 55 // Serialize each frame
56 for (const frame& f : arg.getFrames())
84 { 57 {
85 std::list<field> fields; 58 // First, serialize the group/frame relationship
86 fields.emplace_back("group_id", arg.getId());
87
88 nlohmann::json jsonRoles;
89 for (std::string name : arg.getRoles())
90 { 59 {
91 const role& r = arg.getRole(name); 60 std::list<field> fields;
92 61
93 nlohmann::json jsonRole; 62 fields.emplace_back("frame_id", f.getId());
94 jsonRole["type"] = name; 63 fields.emplace_back("group_id", arg.getId());
95 jsonRole["selrestrs"] = r.getSelrestrs().toJson(); 64 fields.emplace_back("length", f.getLength());
96 65
97 jsonRoles.emplace_back(std::move(jsonRole)); 66 db.insertIntoTable("frames", std::move(fields));
98 } 67 }
99 68
100 fields.emplace_back("data", jsonRoles.dump()); 69 // Then, serialize the frame parts in the context of the group
101 70 for (int partIndex = 0; partIndex < f.getLength(); partIndex++)
102 db.insertIntoTable("groups", std::move(fields)); 71 {
103 } 72 const part& p = f[partIndex];
104 73
105 // Then, serialize the group/frame relationship 74 std::list<field> fields;
106 for (const frame* f : arg.getFrames()) 75 fields.emplace_back("part_id", p.getId());
107 { 76 fields.emplace_back("frame_id", f.getId());
108 std::list<field> fields; 77 fields.emplace_back("part_index", partIndex);
109 78 fields.emplace_back("type", static_cast<int>(p.getType()));
110 fields.emplace_back("group_id", arg.getId()); 79
111 fields.emplace_back("frame_id", f->getId()); 80 switch (p.getType())
112 81 {
113 db.insertIntoTable("groups_frames", std::move(fields)); 82 case part::type::noun_phrase:
83 {
84 fields.emplace_back("role", p.getNounRole());
85
86 selrestr partSelrestr;
87 if (p.getNounSelrestrs().getType() != selrestr::type::empty)
88 {
89 partSelrestr = p.getNounSelrestrs();
90 } else if (arg.hasRole(p.getNounRole()))
91 {
92 partSelrestr = arg.getRole(p.getNounRole()).getSelrestrs();
93 }
94
95 fields.emplace_back("selrestrs", partSelrestr.toJson().dump());
96
97 // Short interlude to serialize the synrestrs
98 for (const std::string& s : p.getNounSynrestrs())
99 {
100 std::list<field> synrestrFields;
101
102 synrestrFields.emplace_back("part_id", p.getId());
103 synrestrFields.emplace_back("synrestr", s);
104
105 db.insertIntoTable("synrestrs", std::move(synrestrFields));
106 }
107
108 break;
109 }
110
111 case part::type::preposition:
112 {
113 fields.emplace_back("prepositions", nlohmann::json(p.getPrepositionChoices()).dump());
114 fields.emplace_back("preposition_literality", p.isPrepositionLiteral() ? 1 : 0);
115
116 break;
117 }
118
119 case part::type::literal:
120 {
121 fields.emplace_back("literal_value", p.getLiteralValue());
122
123 break;
124 }
125
126 case part::type::verb:
127 case part::type::adjective:
128 case part::type::adverb:
129 {
130 break;
131 }
132
133 case part::type::invalid:
134 {
135 // Invalid parts should not be serialized.
136 assert(false);
137
138 break;
139 }
140 }
141
142 db.insertIntoTable("parts", std::move(fields));
143 }
114 } 144 }
115 145
116 return db; 146 return db;