summary refs log tree commit diff stats
path: root/generator/notion.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-01-16 18:02:50 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-01-16 18:02:50 -0500
commit6746da6edd7d9d50efe374eabbb79a3cac882d81 (patch)
treeff20917e08b08d36b9541c1371106596e7bec442 /generator/notion.cpp
parent4af7e55733098ca42f75a4ffaca1b0f6bab4dd36 (diff)
downloadverbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.gz
verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.bz2
verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.zip
Started structural rewrite
The new object structure was designed to build on the existing WordNet
structure, while also adding in all of the data that we get from other sources.
More information about this can be found on the project wiki.

The generator has already been completely rewritten to generate a
datafile that uses the new structure. In addition, a number of indexes
are created, which does double the size of the datafile, but also allows
for much faster lookups. Finally, the new generator is written modularly
and is a lot more readable than the old one.

The verbly interface to the new object structure has mostly been
completed, but has not been tested fully. There is a completely new
search API which utilizes a lot of operator overloading; documentation
on how to use it should go up at some point.

Token processing and verb frames are currently unimplemented. Source for
these have been left in the repository for now.
Diffstat (limited to 'generator/notion.cpp')
-rw-r--r--generator/notion.cpp85
1 files changed, 85 insertions, 0 deletions
diff --git a/generator/notion.cpp b/generator/notion.cpp new file mode 100644 index 0000000..290d982 --- /dev/null +++ b/generator/notion.cpp
@@ -0,0 +1,85 @@
1#include "notion.h"
2#include <string>
3#include <list>
4#include "database.h"
5#include "field.h"
6
7namespace verbly {
8 namespace generator {
9
10 int notion::nextId_ = 0;
11
12 notion::notion(
13 part_of_speech partOfSpeech) :
14 id_(nextId_++),
15 partOfSpeech_(partOfSpeech)
16 {
17 }
18
19 notion::notion(
20 part_of_speech partOfSpeech,
21 int wnid) :
22 id_(nextId_++),
23 partOfSpeech_(partOfSpeech),
24 wnid_(wnid),
25 hasWnid_(true)
26 {
27 }
28
29 void notion::incrementNumOfImages()
30 {
31 // Calling code should always call hasWnid and check that the notion is a noun first.
32 assert(hasWnid_ && (partOfSpeech_ == part_of_speech::noun));
33
34 numOfImages_++;
35 }
36
37 void notion::setPrepositionGroups(std::list<std::string> groups)
38 {
39 // Calling code should always check that the notion is a preposition first.
40 assert(partOfSpeech_ == part_of_speech::preposition);
41
42 prepositionGroups_ = groups;
43 }
44
45 database& operator<<(database& db, const notion& arg)
46 {
47 // First, serialize the notion
48 {
49 std::list<field> fields;
50
51 fields.emplace_back("notion_id", arg.getId());
52 fields.emplace_back("part_of_speech", static_cast<int>(arg.getPartOfSpeech()));
53
54 if (arg.hasWnid())
55 {
56 fields.emplace_back("wnid", arg.getWnid());
57
58 if (arg.getPartOfSpeech() == part_of_speech::noun)
59 {
60 fields.emplace_back("images", arg.getNumOfImages());
61 }
62 }
63
64 db.insertIntoTable("notions", std::move(fields));
65 }
66
67 // Next, serialize the is_a relationship if this is a preposition
68 if (arg.getPartOfSpeech() == part_of_speech::preposition)
69 {
70 for (std::string group : arg.getPrepositionGroups())
71 {
72 std::list<field> fields;
73
74 fields.emplace_back("notion_id", arg.getId());
75 fields.emplace_back("groupname", group);
76
77 db.insertIntoTable("is_a", std::move(fields));
78 }
79 }
80
81 return db;
82 }
83
84 };
85};