diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-28 12:59:42 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-28 12:59:42 -0500 |
commit | a7645346293ed6a912c26d0c50b6f7943f1f3072 (patch) | |
tree | d4d144e03a5e2dfcebbad2692fa71e790719d8fd /generator | |
parent | 6ba8989bbbd497f949a3e8b17abed1d0bd048347 (diff) | |
download | verbly-a7645346293ed6a912c26d0c50b6f7943f1f3072.tar.gz verbly-a7645346293ed6a912c26d0c50b6f7943f1f3072.tar.bz2 verbly-a7645346293ed6a912c26d0c50b6f7943f1f3072.zip |
Restructured verb frame schema to be more queryable
Groups are much less significant now, and they no longer have a database table, nor are they considered a top level object anymore. Instead of containing their own role data, that data is folded into the frames so that it's easier to query; as a result, each group has its own copy of the frames that it contains. Additionally, parts are considered top level objects now, and you can query for frames based on attributes of their indexed parts. Synrestrs are also contained in their own table now, so that parts can be filtered against their synrestrs; they are however not considered top level objects. Created a new type of field, the "join where" or "condition join" field, which is a normal join field that has a built in condition on a specified field. This is used to allow creating multiple distinct join fields from one object to another. This is required for the lemma::form and frame::part joins, because filters for forms of separate inflections should not be coalesced; similarly, filters on differently indexed frame parts should not be coalesced. Queries can now be ordered, ascending or descending, by a field, in addition to randomly as before. This is necessary for accessing the parts of a verb frame in the correct order, but may be useful to an end user as well. Fixed a bug with statement generation in that condition groups were not being surrounded in parentheses, which made mixing OR groups and AND groups generate inaccurate statements. This has been fixed; additionally, parentheses are not placed around the top level condition, and nested condition groups with the same logic type are coalesced, to make query strings as easy to read as possible. Also simplified the form::lemma field; it no longer conditions on the inflection of the form like the lemma::form field does. Also added a debug flag to statement::getQueryString that makes it return a query string with all of the bindings filled in, for debug use only.
Diffstat (limited to 'generator')
-rw-r--r-- | generator/frame.cpp | 69 | ||||
-rw-r--r-- | generator/frame.h | 20 | ||||
-rw-r--r-- | generator/generator.cpp | 39 | ||||
-rw-r--r-- | generator/generator.h | 3 | ||||
-rw-r--r-- | generator/group.cpp | 166 | ||||
-rw-r--r-- | generator/group.h | 30 | ||||
-rw-r--r-- | generator/part.cpp | 48 | ||||
-rw-r--r-- | generator/part.h | 30 | ||||
-rw-r--r-- | generator/role.h | 60 | ||||
-rw-r--r-- | generator/schema.sql | 33 | ||||
-rw-r--r-- | generator/word.h | 1 |
11 files changed, 300 insertions, 199 deletions
diff --git a/generator/frame.cpp b/generator/frame.cpp index f75e3ba..4e4ac5f 100644 --- a/generator/frame.cpp +++ b/generator/frame.cpp | |||
@@ -11,72 +11,21 @@ namespace verbly { | |||
11 | { | 11 | { |
12 | } | 12 | } |
13 | 13 | ||
14 | void frame::push_back(part fp) | 14 | frame frame::duplicate(const frame& other) |
15 | { | ||
16 | parts_.push_back(std::move(fp)); | ||
17 | } | ||
18 | |||
19 | database& operator<<(database& db, const frame& arg) | ||
20 | { | 15 | { |
21 | std::list<field> fields; | 16 | frame result; |
22 | fields.emplace_back("frame_id", arg.getId()); | ||
23 | 17 | ||
24 | nlohmann::json jsonParts; | 18 | for (const part& p : other.parts_) |
25 | for (const part& p : arg) | ||
26 | { | 19 | { |
27 | nlohmann::json jsonPart; | 20 | result.push_back(part::duplicate(p)); |
28 | jsonPart["type"] = static_cast<int>(p.getType()); | ||
29 | |||
30 | switch (p.getType()) | ||
31 | { | ||
32 | case part::type::noun_phrase: | ||
33 | { | ||
34 | jsonPart["role"] = p.getNounRole(); | ||
35 | jsonPart["selrestrs"] = p.getNounSelrestrs().toJson(); | ||
36 | jsonPart["synrestrs"] = p.getNounSynrestrs(); | ||
37 | |||
38 | break; | ||
39 | } | ||
40 | |||
41 | case part::type::preposition: | ||
42 | { | ||
43 | jsonPart["choices"] = p.getPrepositionChoices(); | ||
44 | jsonPart["literal"] = p.isPrepositionLiteral(); | ||
45 | |||
46 | break; | ||
47 | } | ||
48 | |||
49 | case part::type::literal: | ||
50 | { | ||
51 | jsonPart["value"] = p.getLiteralValue(); | ||
52 | |||
53 | break; | ||
54 | } | ||
55 | |||
56 | case part::type::verb: | ||
57 | case part::type::adjective: | ||
58 | case part::type::adverb: | ||
59 | { | ||
60 | break; | ||
61 | } | ||
62 | |||
63 | case part::type::invalid: | ||
64 | { | ||
65 | // Invalid parts should not be serialized. | ||
66 | assert(false); | ||
67 | |||
68 | break; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | jsonParts.push_back(std::move(jsonPart)); | ||
73 | } | 21 | } |
74 | 22 | ||
75 | fields.emplace_back("data", jsonParts.dump()); | 23 | return result; |
76 | 24 | } | |
77 | db.insertIntoTable("frames", std::move(fields)); | ||
78 | 25 | ||
79 | return db; | 26 | void frame::push_back(part fp) |
27 | { | ||
28 | parts_.push_back(std::move(fp)); | ||
80 | } | 29 | } |
81 | 30 | ||
82 | }; | 31 | }; |
diff --git a/generator/frame.h b/generator/frame.h index 764564d..ba266f0 100644 --- a/generator/frame.h +++ b/generator/frame.h | |||
@@ -19,6 +19,10 @@ namespace verbly { | |||
19 | // Constructor | 19 | // Constructor |
20 | 20 | ||
21 | frame(); | 21 | frame(); |
22 | |||
23 | // Duplication | ||
24 | |||
25 | static frame duplicate(const frame& other); | ||
22 | 26 | ||
23 | // Mutators | 27 | // Mutators |
24 | 28 | ||
@@ -30,15 +34,15 @@ namespace verbly { | |||
30 | { | 34 | { |
31 | return id_; | 35 | return id_; |
32 | } | 36 | } |
33 | 37 | ||
34 | const_iterator begin() const | 38 | int getLength() const |
35 | { | 39 | { |
36 | return std::begin(parts_); | 40 | return parts_.size(); |
37 | } | 41 | } |
38 | 42 | ||
39 | const_iterator end() const | 43 | const part& operator[](int index) const |
40 | { | 44 | { |
41 | return std::end(parts_); | 45 | return parts_.at(index); |
42 | } | 46 | } |
43 | 47 | ||
44 | private: | 48 | private: |
@@ -47,12 +51,10 @@ namespace verbly { | |||
47 | 51 | ||
48 | const int id_; | 52 | const int id_; |
49 | 53 | ||
50 | std::list<part> parts_; | 54 | std::vector<part> parts_; |
51 | 55 | ||
52 | }; | 56 | }; |
53 | 57 | ||
54 | database& operator<<(database& db, const frame& arg); | ||
55 | |||
56 | }; | 58 | }; |
57 | }; | 59 | }; |
58 | 60 | ||
diff --git a/generator/generator.cpp b/generator/generator.cpp index 610a602..4cc9f64 100644 --- a/generator/generator.cpp +++ b/generator/generator.cpp | |||
@@ -8,7 +8,7 @@ | |||
8 | #include "../lib/enums.h" | 8 | #include "../lib/enums.h" |
9 | #include "progress.h" | 9 | #include "progress.h" |
10 | #include "../lib/selrestr.h" | 10 | #include "../lib/selrestr.h" |
11 | #include "../lib/role.h" | 11 | #include "role.h" |
12 | #include "part.h" | 12 | #include "part.h" |
13 | #include "field.h" | 13 | #include "field.h" |
14 | #include "../lib/util.h" | 14 | #include "../lib/util.h" |
@@ -640,7 +640,7 @@ namespace verbly { | |||
640 | } | 640 | } |
641 | 641 | ||
642 | { | 642 | { |
643 | progress ppgs("Writing verb groups...", groups_.size()); | 643 | progress ppgs("Writing verb frames...", groups_.size()); |
644 | 644 | ||
645 | for (group& g : groups_) | 645 | for (group& g : groups_) |
646 | { | 646 | { |
@@ -649,17 +649,6 @@ namespace verbly { | |||
649 | ppgs.update(); | 649 | ppgs.update(); |
650 | } | 650 | } |
651 | } | 651 | } |
652 | |||
653 | { | ||
654 | progress ppgs("Writing verb frames...", frames_.size()); | ||
655 | |||
656 | for (frame& f : frames_) | ||
657 | { | ||
658 | db_ << f; | ||
659 | |||
660 | ppgs.update(); | ||
661 | } | ||
662 | } | ||
663 | } | 652 | } |
664 | 653 | ||
665 | void generator::readWordNetAntonymy() | 654 | void generator::readWordNetAntonymy() |
@@ -1212,9 +1201,15 @@ namespace verbly { | |||
1212 | return w; | 1201 | return w; |
1213 | } | 1202 | } |
1214 | 1203 | ||
1215 | group& generator::createGroup(xmlNodePtr top) | 1204 | void generator::createGroup(xmlNodePtr top, const group* parent) |
1216 | { | 1205 | { |
1217 | groups_.emplace_back(); | 1206 | if (parent != nullptr) |
1207 | { | ||
1208 | groups_.emplace_back(*parent); | ||
1209 | } else { | ||
1210 | groups_.emplace_back(); | ||
1211 | } | ||
1212 | |||
1218 | group& grp = groups_.back(); | 1213 | group& grp = groups_.back(); |
1219 | 1214 | ||
1220 | xmlChar* key; | 1215 | xmlChar* key; |
@@ -1229,8 +1224,11 @@ namespace verbly { | |||
1229 | { | 1224 | { |
1230 | try | 1225 | try |
1231 | { | 1226 | { |
1232 | group& subgrp = createGroup(subclass); | 1227 | // Parsing a subgroup starts by making a copy of everything in |
1233 | subgrp.setParent(grp); | 1228 | // the parent. This is okay to do at this point because in the |
1229 | // VerbNet data, subgroups are always defined after everything | ||
1230 | // else. | ||
1231 | createGroup(subclass, &grp); | ||
1234 | } catch (const std::exception& e) | 1232 | } catch (const std::exception& e) |
1235 | { | 1233 | { |
1236 | key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID")); | 1234 | key = xmlGetProp(subclass, reinterpret_cast<const xmlChar*>("ID")); |
@@ -1323,8 +1321,7 @@ namespace verbly { | |||
1323 | { | 1321 | { |
1324 | if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME"))) | 1322 | if (!xmlStrcmp(frametopnode->name, reinterpret_cast<const xmlChar*>("FRAME"))) |
1325 | { | 1323 | { |
1326 | frames_.emplace_back(); | 1324 | frame fr; |
1327 | frame& fr = frames_.back(); | ||
1328 | 1325 | ||
1329 | for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) | 1326 | for (xmlNodePtr framenode = frametopnode->xmlChildrenNode; framenode != nullptr; framenode = framenode->next) |
1330 | { | 1327 | { |
@@ -1428,15 +1425,13 @@ namespace verbly { | |||
1428 | } | 1425 | } |
1429 | } | 1426 | } |
1430 | 1427 | ||
1431 | grp.addFrame(fr); | 1428 | grp.addFrame(std::move(fr)); |
1432 | } | 1429 | } |
1433 | } | 1430 | } |
1434 | } | 1431 | } |
1435 | } | 1432 | } |
1436 | } | 1433 | } |
1437 | } | 1434 | } |
1438 | |||
1439 | return grp; | ||
1440 | } | 1435 | } |
1441 | 1436 | ||
1442 | selrestr generator::parseSelrestr(xmlNodePtr top) | 1437 | selrestr generator::parseSelrestr(xmlNodePtr top) |
diff --git a/generator/generator.h b/generator/generator.h index 8352693..bc9b3c7 100644 --- a/generator/generator.h +++ b/generator/generator.h | |||
@@ -105,7 +105,7 @@ namespace verbly { | |||
105 | 105 | ||
106 | template <typename... Args> word& createWord(Args&&... args); | 106 | template <typename... Args> word& createWord(Args&&... args); |
107 | 107 | ||
108 | group& createGroup(xmlNodePtr top); | 108 | void createGroup(xmlNodePtr top, const group* parent = nullptr); |
109 | 109 | ||
110 | selrestr parseSelrestr(xmlNodePtr top); | 110 | selrestr parseSelrestr(xmlNodePtr top); |
111 | 111 | ||
@@ -128,7 +128,6 @@ namespace verbly { | |||
128 | std::list<lemma> lemmas_; | 128 | std::list<lemma> lemmas_; |
129 | std::list<form> forms_; | 129 | std::list<form> forms_; |
130 | std::list<pronunciation> pronunciations_; | 130 | std::list<pronunciation> pronunciations_; |
131 | std::list<frame> frames_; | ||
132 | std::list<group> groups_; | 131 | std::list<group> groups_; |
133 | 132 | ||
134 | // Indexes | 133 | // Indexes |
diff --git a/generator/group.cpp b/generator/group.cpp index cebe2b9..aa28d42 100644 --- a/generator/group.cpp +++ b/generator/group.cpp | |||
@@ -15,12 +15,15 @@ namespace verbly { | |||
15 | { | 15 | { |
16 | } | 16 | } |
17 | 17 | ||
18 | void group::setParent(const group& parent) | 18 | group::group(const group& parent) : |
19 | id_(nextId_++), | ||
20 | roles_(parent.roles_), | ||
21 | roleNames_(parent.roleNames_) | ||
19 | { | 22 | { |
20 | // Adding a group to itself is nonsensical. | 23 | for (const frame& f : parent.frames_) |
21 | assert(&parent != this); | 24 | { |
22 | 25 | frames_.push_back(frame::duplicate(f)); | |
23 | parent_ = &parent; | 26 | } |
24 | } | 27 | } |
25 | 28 | ||
26 | void group::addRole(role r) | 29 | void group::addRole(role r) |
@@ -30,87 +33,114 @@ namespace verbly { | |||
30 | roleNames_.insert(std::move(name)); | 33 | roleNames_.insert(std::move(name)); |
31 | } | 34 | } |
32 | 35 | ||
33 | void group::addFrame(const frame& f) | 36 | void group::addFrame(frame f) |
34 | { | 37 | { |
35 | frames_.insert(&f); | 38 | frames_.push_back(std::move(f)); |
36 | } | 39 | } |
37 | 40 | ||
38 | std::set<std::string> group::getRoles() const | 41 | bool group::hasRole(std::string name) const |
39 | { | 42 | { |
40 | std::set<std::string> fullRoles = roleNames_; | 43 | // Rarely, a noun phrase part may use a role that is not defined in the |
41 | 44 | // group. See confess-37.10 "NP V NP ADJ". | |
42 | if (hasParent()) | 45 | return (roles_.count(name) == 1); |
43 | { | ||
44 | for (std::string name : getParent().getRoles()) | ||
45 | { | ||
46 | fullRoles.insert(name); | ||
47 | } | ||
48 | } | ||
49 | |||
50 | return fullRoles; | ||
51 | } | 46 | } |
52 | 47 | ||
53 | const role& group::getRole(std::string name) const | 48 | const role& group::getRole(std::string name) const |
54 | { | 49 | { |
55 | if (roles_.count(name)) | 50 | return roles_.at(name); |
56 | { | ||
57 | return roles_.at(name); | ||
58 | } else if (hasParent()) | ||
59 | { | ||
60 | return getParent().getRole(name); | ||
61 | } else { | ||
62 | throw std::invalid_argument("Specified role not found in verb group"); | ||
63 | } | ||
64 | } | ||
65 | |||
66 | std::set<const frame*> group::getFrames() const | ||
67 | { | ||
68 | std::set<const frame*> fullFrames = frames_; | ||
69 | |||
70 | if (hasParent()) | ||
71 | { | ||
72 | for (const frame* f : getParent().getFrames()) | ||
73 | { | ||
74 | fullFrames.insert(f); | ||
75 | } | ||
76 | } | ||
77 | |||
78 | return fullFrames; | ||
79 | } | 51 | } |
80 | 52 | ||
81 | database& operator<<(database& db, const group& arg) | 53 | database& operator<<(database& db, const group& arg) |
82 | { | 54 | { |
83 | // Serialize the group first | 55 | // Serialize each frame |
56 | for (const frame& f : arg.getFrames()) | ||
84 | { | 57 | { |
85 | std::list<field> fields; | 58 | // First, serialize the group/frame relationship |
86 | fields.emplace_back("group_id", arg.getId()); | ||
87 | |||
88 | nlohmann::json jsonRoles; | ||
89 | for (std::string name : arg.getRoles()) | ||
90 | { | 59 | { |
91 | const role& r = arg.getRole(name); | 60 | std::list<field> fields; |
92 | 61 | ||
93 | nlohmann::json jsonRole; | 62 | fields.emplace_back("frame_id", f.getId()); |
94 | jsonRole["type"] = name; | 63 | fields.emplace_back("group_id", arg.getId()); |
95 | jsonRole["selrestrs"] = r.getSelrestrs().toJson(); | 64 | fields.emplace_back("length", f.getLength()); |
96 | 65 | ||
97 | jsonRoles.emplace_back(std::move(jsonRole)); | 66 | db.insertIntoTable("frames", std::move(fields)); |
98 | } | 67 | } |
99 | 68 | ||
100 | fields.emplace_back("data", jsonRoles.dump()); | 69 | // Then, serialize the frame parts in the context of the group |
101 | 70 | for (int partIndex = 0; partIndex < f.getLength(); partIndex++) | |
102 | db.insertIntoTable("groups", std::move(fields)); | 71 | { |
103 | } | 72 | const part& p = f[partIndex]; |
104 | 73 | ||
105 | // Then, serialize the group/frame relationship | 74 | std::list<field> fields; |
106 | for (const frame* f : arg.getFrames()) | 75 | fields.emplace_back("part_id", p.getId()); |
107 | { | 76 | fields.emplace_back("frame_id", f.getId()); |
108 | std::list<field> fields; | 77 | fields.emplace_back("part_index", partIndex); |
109 | 78 | fields.emplace_back("type", static_cast<int>(p.getType())); | |
110 | fields.emplace_back("group_id", arg.getId()); | 79 | |
111 | fields.emplace_back("frame_id", f->getId()); | 80 | switch (p.getType()) |
112 | 81 | { | |
113 | db.insertIntoTable("groups_frames", std::move(fields)); | 82 | case part::type::noun_phrase: |
83 | { | ||
84 | fields.emplace_back("role", p.getNounRole()); | ||
85 | |||
86 | selrestr partSelrestr; | ||
87 | if (p.getNounSelrestrs().getType() != selrestr::type::empty) | ||
88 | { | ||
89 | partSelrestr = p.getNounSelrestrs(); | ||
90 | } else if (arg.hasRole(p.getNounRole())) | ||
91 | { | ||
92 | partSelrestr = arg.getRole(p.getNounRole()).getSelrestrs(); | ||
93 | } | ||
94 | |||
95 | fields.emplace_back("selrestrs", partSelrestr.toJson().dump()); | ||
96 | |||
97 | // Short interlude to serialize the synrestrs | ||
98 | for (const std::string& s : p.getNounSynrestrs()) | ||
99 | { | ||
100 | std::list<field> synrestrFields; | ||
101 | |||
102 | synrestrFields.emplace_back("part_id", p.getId()); | ||
103 | synrestrFields.emplace_back("synrestr", s); | ||
104 | |||
105 | db.insertIntoTable("synrestrs", std::move(synrestrFields)); | ||
106 | } | ||
107 | |||
108 | break; | ||
109 | } | ||
110 | |||
111 | case part::type::preposition: | ||
112 | { | ||
113 | fields.emplace_back("prepositions", nlohmann::json(p.getPrepositionChoices()).dump()); | ||
114 | fields.emplace_back("preposition_literality", p.isPrepositionLiteral() ? 1 : 0); | ||
115 | |||
116 | break; | ||
117 | } | ||
118 | |||
119 | case part::type::literal: | ||
120 | { | ||
121 | fields.emplace_back("literal_value", p.getLiteralValue()); | ||
122 | |||
123 | break; | ||
124 | } | ||
125 | |||
126 | case part::type::verb: | ||
127 | case part::type::adjective: | ||
128 | case part::type::adverb: | ||
129 | { | ||
130 | break; | ||
131 | } | ||
132 | |||
133 | case part::type::invalid: | ||
134 | { | ||
135 | // Invalid parts should not be serialized. | ||
136 | assert(false); | ||
137 | |||
138 | break; | ||
139 | } | ||
140 | } | ||
141 | |||
142 | db.insertIntoTable("parts", std::move(fields)); | ||
143 | } | ||
114 | } | 144 | } |
115 | 145 | ||
116 | return db; | 146 | return db; |
diff --git a/generator/group.h b/generator/group.h index 83f40c2..5486fbe 100644 --- a/generator/group.h +++ b/generator/group.h | |||
@@ -5,7 +5,7 @@ | |||
5 | #include <set> | 5 | #include <set> |
6 | #include <string> | 6 | #include <string> |
7 | #include <cassert> | 7 | #include <cassert> |
8 | #include "../lib/role.h" | 8 | #include "role.h" |
9 | 9 | ||
10 | namespace verbly { | 10 | namespace verbly { |
11 | namespace generator { | 11 | namespace generator { |
@@ -20,13 +20,13 @@ namespace verbly { | |||
20 | 20 | ||
21 | group(); | 21 | group(); |
22 | 22 | ||
23 | // Mutators | 23 | explicit group(const group& parent); |
24 | 24 | ||
25 | void setParent(const group& parent); | 25 | // Mutators |
26 | 26 | ||
27 | void addRole(role r); | 27 | void addRole(role r); |
28 | 28 | ||
29 | void addFrame(const frame& f); | 29 | void addFrame(frame f); |
30 | 30 | ||
31 | // Accessors | 31 | // Accessors |
32 | 32 | ||
@@ -35,24 +35,19 @@ namespace verbly { | |||
35 | return id_; | 35 | return id_; |
36 | } | 36 | } |
37 | 37 | ||
38 | bool hasParent() const | 38 | const std::set<std::string>& getRoles() const |
39 | { | ||
40 | return (parent_ != nullptr); | ||
41 | } | ||
42 | |||
43 | const group& getParent() const | ||
44 | { | 39 | { |
45 | // Calling code should always call hasParent first | 40 | return roleNames_; |
46 | assert(parent_ != nullptr); | ||
47 | |||
48 | return *parent_; | ||
49 | } | 41 | } |
50 | 42 | ||
51 | std::set<std::string> getRoles() const; | 43 | bool hasRole(std::string name) const; |
52 | 44 | ||
53 | const role& getRole(std::string name) const; | 45 | const role& getRole(std::string name) const; |
54 | 46 | ||
55 | std::set<const frame*> getFrames() const; | 47 | const std::list<frame>& getFrames() const |
48 | { | ||
49 | return frames_; | ||
50 | } | ||
56 | 51 | ||
57 | private: | 52 | private: |
58 | 53 | ||
@@ -60,9 +55,8 @@ namespace verbly { | |||
60 | 55 | ||
61 | const int id_; | 56 | const int id_; |
62 | 57 | ||
63 | const group* parent_ = nullptr; | ||
64 | std::map<std::string, role> roles_; | 58 | std::map<std::string, role> roles_; |
65 | std::set<const frame*> frames_; | 59 | std::list<frame> frames_; |
66 | 60 | ||
67 | // Caches | 61 | // Caches |
68 | 62 | ||
diff --git a/generator/part.cpp b/generator/part.cpp index 8a75ed4..07618a8 100644 --- a/generator/part.cpp +++ b/generator/part.cpp | |||
@@ -4,6 +4,8 @@ | |||
4 | namespace verbly { | 4 | namespace verbly { |
5 | namespace generator { | 5 | namespace generator { |
6 | 6 | ||
7 | int part::nextId_ = 0; | ||
8 | |||
7 | part part::createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs) | 9 | part part::createNounPhrase(std::string role, selrestr selrestrs, std::set<std::string> synrestrs) |
8 | { | 10 | { |
9 | part p(type::noun_phrase); | 11 | part p(type::noun_phrase); |
@@ -49,9 +51,52 @@ namespace verbly { | |||
49 | return p; | 51 | return p; |
50 | } | 52 | } |
51 | 53 | ||
54 | part part::duplicate(const part& other) | ||
55 | { | ||
56 | part result(other.type_); | ||
57 | |||
58 | switch (result.type_) | ||
59 | { | ||
60 | case type::noun_phrase: | ||
61 | { | ||
62 | new(&result.noun_phrase_.role) std::string(other.noun_phrase_.role); | ||
63 | new(&result.noun_phrase_.selrestrs) selrestr(other.noun_phrase_.selrestrs); | ||
64 | new(&result.noun_phrase_.synrestrs) std::set<std::string>(other.noun_phrase_.synrestrs); | ||
65 | |||
66 | break; | ||
67 | } | ||
68 | |||
69 | case type::preposition: | ||
70 | { | ||
71 | new(&result.preposition_.choices) std::set<std::string>(other.preposition_.choices); | ||
72 | result.preposition_.literal = other.preposition_.literal; | ||
73 | |||
74 | break; | ||
75 | } | ||
76 | |||
77 | case type::literal: | ||
78 | { | ||
79 | new(&result.literal_) std::string(other.literal_); | ||
80 | |||
81 | break; | ||
82 | } | ||
83 | |||
84 | case type::verb: | ||
85 | case type::adjective: | ||
86 | case type::adverb: | ||
87 | case type::invalid: | ||
88 | { | ||
89 | break; | ||
90 | } | ||
91 | } | ||
92 | |||
93 | return result; | ||
94 | } | ||
95 | |||
52 | part::part(const part& other) | 96 | part::part(const part& other) |
53 | { | 97 | { |
54 | type_ = other.type_; | 98 | type_ = other.type_; |
99 | id_ = other.id_; | ||
55 | 100 | ||
56 | switch (type_) | 101 | switch (type_) |
57 | { | 102 | { |
@@ -106,6 +151,7 @@ namespace verbly { | |||
106 | using type = part::type; | 151 | using type = part::type; |
107 | 152 | ||
108 | type tempType = first.type_; | 153 | type tempType = first.type_; |
154 | int tempId = first.id_; | ||
109 | std::string tempRole; | 155 | std::string tempRole; |
110 | selrestr tempSelrestrs; | 156 | selrestr tempSelrestrs; |
111 | std::set<std::string> tempSynrestrs; | 157 | std::set<std::string> tempSynrestrs; |
@@ -151,6 +197,7 @@ namespace verbly { | |||
151 | first.~part(); | 197 | first.~part(); |
152 | 198 | ||
153 | first.type_ = second.type_; | 199 | first.type_ = second.type_; |
200 | first.id_ = second.id_; | ||
154 | 201 | ||
155 | switch (first.type_) | 202 | switch (first.type_) |
156 | { | 203 | { |
@@ -190,6 +237,7 @@ namespace verbly { | |||
190 | second.~part(); | 237 | second.~part(); |
191 | 238 | ||
192 | second.type_ = tempType; | 239 | second.type_ = tempType; |
240 | second.id_ = tempId; | ||
193 | 241 | ||
194 | switch (second.type_) | 242 | switch (second.type_) |
195 | { | 243 | { |
diff --git a/generator/part.h b/generator/part.h index b010f62..39ba1e7 100644 --- a/generator/part.h +++ b/generator/part.h | |||
@@ -4,21 +4,16 @@ | |||
4 | #include <string> | 4 | #include <string> |
5 | #include <set> | 5 | #include <set> |
6 | #include "../lib/selrestr.h" | 6 | #include "../lib/selrestr.h" |
7 | #include "../lib/enums.h" | ||
7 | 8 | ||
8 | namespace verbly { | 9 | namespace verbly { |
10 | |||
9 | namespace generator { | 11 | namespace generator { |
10 | 12 | ||
11 | class part { | 13 | class part { |
12 | public: | 14 | public: |
13 | enum class type { | 15 | |
14 | invalid = -1, | 16 | using type = part_type; |
15 | noun_phrase = 0, | ||
16 | verb = 1, | ||
17 | preposition = 2, | ||
18 | adjective = 3, | ||
19 | adverb = 4, | ||
20 | literal = 5 | ||
21 | }; | ||
22 | 17 | ||
23 | // Static factories | 18 | // Static factories |
24 | 19 | ||
@@ -34,6 +29,10 @@ namespace verbly { | |||
34 | 29 | ||
35 | static part createLiteral(std::string value); | 30 | static part createLiteral(std::string value); |
36 | 31 | ||
32 | // Duplication | ||
33 | |||
34 | static part duplicate(const part& other); | ||
35 | |||
37 | // Copy and move constructors | 36 | // Copy and move constructors |
38 | 37 | ||
39 | part(const part& other); | 38 | part(const part& other); |
@@ -54,6 +53,11 @@ namespace verbly { | |||
54 | 53 | ||
55 | // General accessors | 54 | // General accessors |
56 | 55 | ||
56 | int getId() const | ||
57 | { | ||
58 | return id_; | ||
59 | } | ||
60 | |||
57 | type getType() const | 61 | type getType() const |
58 | { | 62 | { |
59 | return type_; | 63 | return type_; |
@@ -79,13 +83,19 @@ namespace verbly { | |||
79 | 83 | ||
80 | private: | 84 | private: |
81 | 85 | ||
86 | static int nextId_; | ||
87 | |||
88 | int id_; | ||
89 | |||
82 | // Private constructors | 90 | // Private constructors |
83 | 91 | ||
84 | part() | 92 | part() |
85 | { | 93 | { |
86 | } | 94 | } |
87 | 95 | ||
88 | part(type t) : type_(t) | 96 | part(type t) : |
97 | id_(nextId_++), | ||
98 | type_(t) | ||
89 | { | 99 | { |
90 | } | 100 | } |
91 | 101 | ||
diff --git a/generator/role.h b/generator/role.h new file mode 100644 index 0000000..4884ef3 --- /dev/null +++ b/generator/role.h | |||
@@ -0,0 +1,60 @@ | |||
1 | #ifndef ROLE_H_249F9A9C | ||
2 | #define ROLE_H_249F9A9C | ||
3 | |||
4 | #include <stdexcept> | ||
5 | #include <string> | ||
6 | #include "../lib/selrestr.h" | ||
7 | |||
8 | namespace verbly { | ||
9 | |||
10 | class role { | ||
11 | public: | ||
12 | |||
13 | // Default constructor | ||
14 | |||
15 | role() = default; | ||
16 | |||
17 | // Constructor | ||
18 | |||
19 | role( | ||
20 | std::string name, | ||
21 | selrestr selrestrs = {}) : | ||
22 | valid_(true), | ||
23 | name_(name), | ||
24 | selrestrs_(selrestrs) | ||
25 | { | ||
26 | } | ||
27 | |||
28 | // Accessors | ||
29 | |||
30 | const std::string& getName() const | ||
31 | { | ||
32 | if (!valid_) | ||
33 | { | ||
34 | throw std::domain_error("Bad access to invalid role"); | ||
35 | } | ||
36 | |||
37 | return name_; | ||
38 | } | ||
39 | |||
40 | const selrestr& getSelrestrs() const | ||
41 | { | ||
42 | if (!valid_) | ||
43 | { | ||
44 | throw std::domain_error("Bad access to invalid role"); | ||
45 | } | ||
46 | |||
47 | return selrestrs_; | ||
48 | } | ||
49 | |||
50 | private: | ||
51 | |||
52 | bool valid_ = false; | ||
53 | std::string name_; | ||
54 | selrestr selrestrs_; | ||
55 | |||
56 | }; | ||
57 | |||
58 | }; | ||
59 | |||
60 | #endif /* end of include guard: ROLE_H_249F9A9C */ | ||
diff --git a/generator/schema.sql b/generator/schema.sql index c3e54d8..33ebc28 100644 --- a/generator/schema.sql +++ b/generator/schema.sql | |||
@@ -186,19 +186,32 @@ CREATE TABLE `forms_pronunciations` ( | |||
186 | CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`); | 186 | CREATE INDEX `pronunciation_of` ON `forms_pronunciations`(`form_id`); |
187 | CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`); | 187 | CREATE INDEX `spelling_of` ON `forms_pronunciations`(`pronunciation_id`); |
188 | 188 | ||
189 | CREATE TABLE `groups` ( | 189 | CREATE TABLE `frames` ( |
190 | `group_id` INTEGER PRIMARY KEY, | 190 | `frame_id` INTEGER NOT NULL, |
191 | `data` BLOB NOT NULL | 191 | `group_id` INTEGER NOT NULL, |
192 | 'length' INTEGER NOT NULL | ||
192 | ); | 193 | ); |
193 | 194 | ||
194 | CREATE TABLE `frames` ( | 195 | CREATE INDEX `frames_in` ON `frames`(`group_id`); |
195 | `frame_id` INTEGER PRIMARY KEY, | 196 | |
196 | `data` BLOB NOT NULL | 197 | CREATE TABLE `parts` ( |
198 | `part_id` INTEGER PRIMARY KEY, | ||
199 | `frame_id` INTEGER NOT NULL, | ||
200 | `part_index` INTEGER NOT NULL, | ||
201 | `type` INTEGER NOT NULL, | ||
202 | `role` VARCHAR(16), | ||
203 | `selrestrs` BLOB, | ||
204 | `prepositions` BLOB, | ||
205 | `preposition_literality` SMALLINT, | ||
206 | `literal_value` VARCHAR(64) | ||
197 | ); | 207 | ); |
198 | 208 | ||
199 | CREATE TABLE `groups_frames` ( | 209 | CREATE INDEX `parts_of` ON `parts`(`frame_id`); |
200 | `group_id` INTEGER NOT NULL, | 210 | CREATE UNIQUE INDEX `part_by_frame_index` ON `parts`(`frame_id`, `part_index`); |
201 | `frame_id` INTEGER NOT NULL | 211 | |
212 | CREATE TABLE `synrestrs` ( | ||
213 | `part_id` INTEGER NOT NULL, | ||
214 | `synrestr` VARCHAR(32) NOT NULL | ||
202 | ); | 215 | ); |
203 | 216 | ||
204 | CREATE INDEX `frames_in` ON `groups_frames`(`group_id`); | 217 | CREATE INDEX `synrestrs_for` ON `synrestrs`(`part_id`); |
diff --git a/generator/word.h b/generator/word.h index a994ec3..c6d7b20 100644 --- a/generator/word.h +++ b/generator/word.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include "../lib/enums.h" | 5 | #include "../lib/enums.h" |
6 | 6 | ||
7 | namespace verbly { | 7 | namespace verbly { |
8 | |||
8 | namespace generator { | 9 | namespace generator { |
9 | 10 | ||
10 | class notion; | 11 | class notion; |