summary refs log tree commit diff stats
path: root/lib/statement.cpp
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-01-16 18:02:50 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-01-16 18:02:50 -0500
commit6746da6edd7d9d50efe374eabbb79a3cac882d81 (patch)
treeff20917e08b08d36b9541c1371106596e7bec442 /lib/statement.cpp
parent4af7e55733098ca42f75a4ffaca1b0f6bab4dd36 (diff)
downloadverbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.gz
verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.tar.bz2
verbly-6746da6edd7d9d50efe374eabbb79a3cac882d81.zip
Started structural rewrite
The new object structure was designed to build on the existing WordNet
structure, while also adding in all of the data that we get from other sources.
More information about this can be found on the project wiki.

The generator has already been completely rewritten to generate a
datafile that uses the new structure. In addition, a number of indexes
are created, which does double the size of the datafile, but also allows
for much faster lookups. Finally, the new generator is written modularly
and is a lot more readable than the old one.

The verbly interface to the new object structure has mostly been
completed, but has not been tested fully. There is a completely new
search API which utilizes a lot of operator overloading; documentation
on how to use it should go up at some point.

Token processing and verb frames are currently unimplemented. Source for
these have been left in the repository for now.
Diffstat (limited to 'lib/statement.cpp')
-rw-r--r--lib/statement.cpp806
1 files changed, 806 insertions, 0 deletions
diff --git a/lib/statement.cpp b/lib/statement.cpp new file mode 100644 index 0000000..52fa00d --- /dev/null +++ b/lib/statement.cpp
@@ -0,0 +1,806 @@
1#include "statement.h"
2#include <sstream>
3#include <utility>
4#include "filter.h"
5#include "util.h"
6#include "notion.h"
7#include "word.h"
8#include "group.h"
9#include "frame.h"
10#include "lemma.h"
11#include "form.h"
12#include "pronunciation.h"
13
14namespace verbly {
15
16 statement::statement(
17 object context,
18 filter queryFilter) :
19 statement(getTableForContext(context), queryFilter.normalize(context))
20 {
21 }
22
23 std::string statement::getQueryString(std::list<std::string> select, bool random, int limit) const
24 {
25 std::stringstream queryStream;
26
27 if (!withs_.empty())
28 {
29 queryStream << "WITH RECURSIVE ";
30
31 std::list<std::string> ctes;
32 for (const with& cte : withs_)
33 {
34 std::stringstream cteStream;
35 cteStream << cte.getIdentifier();
36 cteStream << " AS (SELECT ";
37 cteStream << cte.getTopTable();
38 cteStream << ".* FROM ";
39 cteStream << cte.getTableForId(cte.getTopTable());
40 cteStream << " AS ";
41 cteStream << cte.getTopTable();
42
43 for (const join& j : cte.getJoins())
44 {
45 cteStream << " ";
46 cteStream << j;
47 }
48
49 if (cte.getCondition().getType() != condition::type::empty)
50 {
51 cteStream << " WHERE ";
52 cteStream << cte.getCondition().toSql();
53 }
54
55 cteStream << " UNION SELECT l.* FROM ";
56 cteStream << cte.getIdentifier();
57 cteStream << " AS t INNER JOIN ";
58 cteStream << cte.getField().getTable();
59 cteStream << " AS j ON t.";
60 cteStream << cte.getField().getColumn();
61 cteStream << " = j.";
62 cteStream << cte.getField().getForeignJoinColumn();
63 cteStream << " INNER JOIN ";
64 cteStream << cte.getTableForId(cte.getTopTable());
65 cteStream << " AS l ON j.";
66 cteStream << cte.getField().getJoinColumn();
67 cteStream << " = l.";
68 cteStream << cte.getField().getColumn();
69 cteStream << ")";
70
71 ctes.push_back(cteStream.str());
72 }
73
74 queryStream << implode(std::begin(ctes), std::end(ctes), ", ");
75 queryStream << " ";
76 }
77
78 std::list<std::string> realSelect;
79 for (std::string& s : select)
80 {
81 realSelect.push_back(topTable_ + "." + s);
82 }
83
84 queryStream << "SELECT ";
85 queryStream << implode(std::begin(realSelect), std::end(realSelect), ", ");
86 queryStream << " FROM ";
87 queryStream << tables_.at(topTable_);
88 queryStream << " AS ";
89 queryStream << topTable_;
90
91 for (const join& j : joins_)
92 {
93 queryStream << " ";
94 queryStream << j;
95 }
96
97 if (topCondition_.getType() != condition::type::empty)
98 {
99 queryStream << " WHERE ";
100 queryStream << topCondition_.toSql();
101 }
102
103 if (random)
104 {
105 queryStream << " ORDER BY RANDOM()";
106 }
107
108 if (limit > 0)
109 {
110 queryStream << " LIMIT ";
111 queryStream << limit;
112 }
113
114 return queryStream.str();
115 }
116
117 std::list<binding> statement::getBindings() const
118 {
119 std::list<binding> result;
120
121 for (const with& w : withs_)
122 {
123 for (binding value : w.getCondition().flattenBindings())
124 {
125 result.push_back(std::move(value));
126 }
127 }
128
129 for (binding value : topCondition_.flattenBindings())
130 {
131 result.push_back(std::move(value));
132 }
133
134 return result;
135 }
136
137 statement::statement(
138 std::string tableName,
139 filter clause,
140 int nextTableId,
141 int nextWithId) :
142 nextTableId_(nextTableId),
143 nextWithId_(nextWithId),
144 topTable_(instantiateTable(std::move(tableName))),
145 topCondition_(parseFilter(std::move(clause)))
146 {
147 }
148
149 statement::condition statement::parseFilter(filter clause)
150 {
151 switch (clause.getType())
152 {
153 case filter::type::empty:
154 {
155 return {};
156 }
157
158 case filter::type::singleton:
159 {
160 switch (clause.getField().getType())
161 {
162 case field::type::undefined:
163 {
164 return {};
165 }
166
167 case field::type::string:
168 case field::type::integer:
169 case field::type::boolean:
170 {
171 switch (clause.getComparison())
172 {
173 case filter::comparison::is_null:
174 {
175 return condition(topTable_, clause.getField().getColumn(), true);
176 }
177
178 case filter::comparison::is_not_null:
179 {
180 return condition(topTable_, clause.getField().getColumn(), false);
181 }
182
183 case filter::comparison::int_equals:
184 {
185 return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getIntegerArgument());
186 }
187
188 case filter::comparison::int_does_not_equal:
189 {
190 return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getIntegerArgument());
191 }
192
193 case filter::comparison::int_is_at_least:
194 {
195 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_least, clause.getIntegerArgument());
196 }
197
198 case filter::comparison::int_is_greater_than:
199 {
200 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_greater_than, clause.getIntegerArgument());
201 }
202
203 case filter::comparison::int_is_at_most:
204 {
205 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_at_most, clause.getIntegerArgument());
206 }
207
208 case filter::comparison::int_is_less_than:
209 {
210 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_less_than, clause.getIntegerArgument());
211 }
212
213 case filter::comparison::boolean_equals:
214 {
215 return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getBooleanArgument() ? 1 : 0);
216 }
217
218 case filter::comparison::string_equals:
219 {
220 return condition(topTable_, clause.getField().getColumn(), condition::comparison::equals, clause.getStringArgument());
221 }
222
223 case filter::comparison::string_does_not_equal:
224 {
225 return condition(topTable_, clause.getField().getColumn(), condition::comparison::does_not_equal, clause.getStringArgument());
226 }
227
228 case filter::comparison::string_is_like:
229 {
230 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_like, clause.getStringArgument());
231 }
232
233 case filter::comparison::string_is_not_like:
234 {
235 return condition(topTable_, clause.getField().getColumn(), condition::comparison::is_not_like, clause.getStringArgument());
236 }
237
238 case filter::comparison::matches:
239 case filter::comparison::does_not_match:
240 case filter::comparison::hierarchally_matches:
241 case filter::comparison::does_not_hierarchally_match:
242 {
243 throw std::logic_error("Invalid comparison type for field");
244 }
245 }
246 }
247
248 case field::type::join:
249 {
250 std::string joinTableName;
251 if (clause.getField().hasTable())
252 {
253 joinTableName = clause.getField().getTable();
254 } else {
255 joinTableName = getTableForContext(clause.getField().getJoinObject());
256 }
257
258 statement joinStmt(
259 joinTableName,
260 clause.getJoinCondition().normalize(clause.getField().getJoinObject()),
261 nextTableId_,
262 nextWithId_);
263
264 std::string joinTable = joinStmt.topTable_;
265 condition curCond = integrate(std::move(joinStmt));
266
267 bool outer = false;
268 if (clause.getComparison() == filter::comparison::does_not_match)
269 {
270 outer = true;
271
272 curCond &= condition(joinTable, clause.getField().getColumn(), true);
273 }
274
275 joins_.emplace_back(outer, joinTableName, topTable_, clause.getField().getColumn(), joinTable, clause.getField().getColumn());
276
277 return curCond;
278 }
279
280 case field::type::join_through:
281 {
282 statement joinStmt(
283 getTableForContext(clause.getField().getJoinObject()),
284 clause.getJoinCondition().normalize(clause.getField().getJoinObject()),
285 nextTableId_,
286 nextWithId_);
287
288 std::string joinTable = joinStmt.topTable_;
289 std::string throughTable = instantiateTable(clause.getField().getTable());
290 condition curCond = integrate(std::move(joinStmt));
291
292 bool outer = false;
293 if (clause.getComparison() == filter::comparison::does_not_match)
294 {
295 outer = true;
296
297 curCond &= condition(throughTable, clause.getField().getJoinColumn(), true);
298 }
299
300 joins_.emplace_back(outer, clause.getField().getTable(), topTable_, clause.getField().getColumn(), throughTable, clause.getField().getJoinColumn());
301 joins_.emplace_back(false, getTableForContext(clause.getField().getJoinObject()), throughTable, clause.getField().getForeignJoinColumn(), joinTable, clause.getField().getForeignColumn());
302
303 return curCond;
304 }
305
306 case field::type::hierarchal_join:
307 {
308 std::string withName = std::string(clause.getField().getTable()) + "_tree_" + std::to_string(nextWithId_++);
309 std::string withInstName = instantiateTable(withName);
310
311 bool outer = false;
312 if (clause.getComparison() == filter::comparison::does_not_hierarchally_match)
313 {
314 outer = true;
315 }
316
317 joins_.emplace_back(outer, withName, topTable_, clause.getField().getColumn(), withInstName, clause.getField().getColumn());
318
319 statement withStmt(
320 getTableForContext(clause.getField().getObject()),
321 clause.getJoinCondition().normalize(clause.getField().getObject()),
322 nextTableId_,
323 nextWithId_);
324
325 for (auto& w : withStmt.withs_)
326 {
327 withs_.push_back(std::move(w));
328 }
329
330 nextTableId_ = withStmt.nextTableId_;
331 nextWithId_ = withStmt.nextWithId_;
332
333 withs_.emplace_back(
334 withName,
335 clause.getField(),
336 std::move(withStmt.tables_),
337 std::move(withStmt.topTable_),
338 std::move(withStmt.topCondition_),
339 std::move(withStmt.joins_));
340
341 if (clause.getComparison() == filter::comparison::does_not_hierarchally_match)
342 {
343 return condition(withInstName, clause.getField().getColumn(), true);
344 } else {
345 return {};
346 }
347 }
348 }
349 }
350
351 case filter::type::group:
352 {
353 condition grp(clause.getOrlogic());
354
355 for (const filter& child : clause)
356 {
357 condition newChild = parseFilter(child);
358 if (newChild.getType() != condition::type::empty)
359 {
360 grp += std::move(newChild);
361 }
362 }
363
364 if (grp.getChildren().empty())
365 {
366 grp = {};
367 }
368
369 return grp;
370 }
371 }
372 }
373
374 std::string statement::instantiateTable(std::string name)
375 {
376 std::string identifier = name + "_" + std::to_string(nextTableId_++);
377 tables_[identifier] = name;
378
379 return identifier;
380 }
381
382 statement::condition statement::integrate(statement subStmt)
383 {
384 for (auto& mapping : subStmt.tables_)
385 {
386 tables_[mapping.first] = mapping.second;
387 }
388
389 for (auto& j : subStmt.joins_)
390 {
391 joins_.push_back(j);
392 }
393
394 for (auto& w : subStmt.withs_)
395 {
396 withs_.push_back(w);
397 }
398
399 nextTableId_ = subStmt.nextTableId_;
400 nextWithId_ = subStmt.nextWithId_;
401
402 return subStmt.topCondition_;
403 }
404
405 std::ostream& operator<<(std::ostream& oss, const statement::join& j)
406 {
407 if (j.isOuterJoin())
408 {
409 oss << "LEFT";
410 } else {
411 oss << "INNER";
412 }
413
414 return oss
415 << " JOIN "
416 << j.getForeignTableName()
417 << " AS "
418 << j.getForeignTable()
419 << " ON "
420 << j.getForeignTable()
421 << "."
422 << j.getForeignColumn()
423 << " = "
424 << j.getJoinTable()
425 << "."
426 << j.getJoinColumn();
427 }
428
429 statement::condition::condition(const condition& other)
430 {
431 type_ = other.type_;
432
433 switch (type_)
434 {
435 case type::empty:
436 {
437 break;
438 }
439
440 case type::singleton:
441 {
442 new(&singleton_.table_) std::string(other.singleton_.table_);
443 new(&singleton_.column_) std::string(other.singleton_.column_);
444 singleton_.comparison_ = other.singleton_.comparison_;
445 new(&singleton_.value_) binding(other.singleton_.value_);
446
447 break;
448 }
449
450 case type::group:
451 {
452 new(&group_.children_) std::list<condition>(other.group_.children_);
453 group_.orlogic_ = other.group_.orlogic_;
454
455 break;
456 }
457 }
458 }
459
460 statement::condition::condition(condition&& other) : condition()
461 {
462 swap(*this, other);
463 }
464
465 statement::condition& statement::condition::operator=(condition other)
466 {
467 swap(*this, other);
468
469 return *this;
470 }
471
472 void swap(statement::condition& first, statement::condition& second)
473 {
474 using type = statement::condition::type;
475 using condition = statement::condition;
476
477 type tempType = first.type_;
478 std::string tempTable;
479 std::string tempColumn;
480 condition::comparison tempComparison;
481 binding tempBinding;
482 std::list<condition> tempChildren;
483 bool tempOrlogic;
484
485 switch (tempType)
486 {
487 case type::empty:
488 {
489 break;
490 }
491
492 case type::singleton:
493 {
494 tempTable = std::move(first.singleton_.table_);
495 tempColumn = std::move(first.singleton_.column_);
496 tempComparison = first.singleton_.comparison_;
497 tempBinding = std::move(first.singleton_.value_);
498
499 break;
500 }
501
502 case type::group:
503 {
504 tempChildren = std::move(first.group_.children_);
505 tempOrlogic = first.group_.orlogic_;
506
507 break;
508 }
509 }
510
511 first.~condition();
512
513 first.type_ = second.type_;
514
515 switch (first.type_)
516 {
517 case type::empty:
518 {
519 break;
520 }
521
522 case type::singleton:
523 {
524 new(&first.singleton_.table_) std::string(std::move(second.singleton_.table_));
525 new(&first.singleton_.column_) std::string(std::move(second.singleton_.column_));
526 first.singleton_.comparison_ = second.singleton_.comparison_;
527 new(&first.singleton_.value_) binding(std::move(second.singleton_.value_));
528
529 break;
530 }
531
532 case type::group:
533 {
534 new(&first.group_.children_) std::list<condition>(std::move(second.group_.children_));
535 first.group_.orlogic_ = second.group_.orlogic_;
536
537 break;
538 }
539 }
540
541 second.~condition();
542
543 second.type_ = tempType;
544
545 switch (second.type_)
546 {
547 case type::empty:
548 {
549 break;
550 }
551
552 case type::singleton:
553 {
554 new(&second.singleton_.table_) std::string(std::move(tempTable));
555 new(&second.singleton_.column_) std::string(std::move(tempColumn));
556 second.singleton_.comparison_ = tempComparison;
557 new(&second.singleton_.value_) binding(std::move(tempBinding));
558
559 break;
560 }
561
562 case type::group:
563 {
564 new(&second.group_.children_) std::list<condition>(std::move(tempChildren));
565 second.group_.orlogic_ = tempOrlogic;
566
567 break;
568 }
569 }
570 }
571
572 statement::condition::~condition()
573 {
574 switch (type_)
575 {
576 case type::empty:
577 {
578 break;
579 }
580
581 case type::singleton:
582 {
583 using string_type = std::string;
584
585 singleton_.table_.~string_type();
586 singleton_.column_.~string_type();
587 singleton_.value_.~binding();
588
589 break;
590 }
591
592 case type::group:
593 {
594 using list_type = std::list<condition>;
595
596 group_.children_.~list_type();
597
598 break;
599 }
600 }
601 }
602
603 statement::condition::condition() : type_(type::empty)
604 {
605 }
606
607 statement::condition::condition(
608 std::string table,
609 std::string column,
610 bool isNull) :
611 type_(type::singleton)
612 {
613 new(&singleton_.table_) std::string(std::move(table));
614 new(&singleton_.column_) std::string(std::move(column));
615
616 if (isNull)
617 {
618 singleton_.comparison_ = comparison::is_null;
619 } else {
620 singleton_.comparison_ = comparison::is_not_null;
621 }
622 }
623
624 statement::condition::condition(
625 std::string table,
626 std::string column,
627 comparison comp,
628 binding value) :
629 type_(type::singleton)
630 {
631 new(&singleton_.table_) std::string(std::move(table));
632 new(&singleton_.column_) std::string(std::move(column));
633 singleton_.comparison_ = comp;
634 new(&singleton_.value_) binding(std::move(value));
635 }
636
637 std::string statement::condition::toSql() const
638 {
639 switch (type_)
640 {
641 case type::empty:
642 {
643 return "";
644 }
645
646 case type::singleton:
647 {
648 switch (singleton_.comparison_)
649 {
650 case comparison::equals:
651 {
652 return singleton_.table_ + "." + singleton_.column_ + " = ?";
653 }
654
655 case comparison::does_not_equal:
656 {
657 return singleton_.table_ + "." + singleton_.column_ + " != ?";
658 }
659
660 case comparison::is_greater_than:
661 {
662 return singleton_.table_ + "." + singleton_.column_ + " > ?";
663 }
664
665 case comparison::is_at_most:
666 {
667 return singleton_.table_ + "." + singleton_.column_ + " <= ?";
668 }
669
670 case comparison::is_less_than:
671 {
672 return singleton_.table_ + "." + singleton_.column_ + " < ?";
673 }
674
675 case comparison::is_at_least:
676 {
677 return singleton_.table_ + "." + singleton_.column_ + " >= ?";
678 }
679
680 case comparison::is_like:
681 {
682 return singleton_.table_ + "." + singleton_.column_ + " LIKE ?";
683 }
684
685 case comparison::is_not_like:
686 {
687 return singleton_.table_ + "." + singleton_.column_ + " NOT LIKE ?";
688 }
689
690 case comparison::is_not_null:
691 {
692 return singleton_.table_ + "." + singleton_.column_ + " IS NOT NULL";
693 }
694
695 case comparison::is_null:
696 {
697 return singleton_.table_ + "." + singleton_.column_ + " IS NULL";
698 }
699 }
700 }
701
702 case type::group:
703 {
704 std::list<std::string> clauses;
705 for (const condition& cond : group_.children_)
706 {
707 clauses.push_back(cond.toSql());
708 }
709
710 return implode(std::begin(clauses), std::end(clauses), group_.orlogic_ ? " OR " : " AND ");
711 }
712 }
713 }
714
715 std::list<binding> statement::condition::flattenBindings() const
716 {
717 switch (type_)
718 {
719 case type::empty:
720 {
721 return {};
722 }
723
724 case type::singleton:
725 {
726 return {singleton_.value_};
727 }
728
729 case type::group:
730 {
731 std::list<binding> bindings;
732 for (const condition& cond : group_.children_)
733 {
734 for (binding value : cond.flattenBindings())
735 {
736 bindings.push_back(std::move(value));
737 }
738 }
739
740 return bindings;
741 }
742 }
743 }
744
745 statement::condition::condition(bool orlogic) : type_(type::group)
746 {
747 new(&group_.children_) std::list<condition>();
748 group_.orlogic_ = orlogic;
749 }
750
751 statement::condition& statement::condition::operator+=(condition n)
752 {
753 if (type_ == type::group)
754 {
755 group_.children_.push_back(std::move(n));
756
757 return *this;
758 } else {
759 throw std::domain_error("Cannot add condition to non-group condition");
760 }
761 }
762
763 statement::condition& statement::condition::operator&=(condition n)
764 {
765 switch (type_)
766 {
767 case type::empty:
768 {
769 *this = std::move(n);
770
771 break;
772 }
773
774 case type::singleton:
775 {
776 condition grp(false);
777 grp += *this;
778 grp += std::move(n);
779
780 *this = grp;
781
782 break;
783 }
784
785 case type::group:
786 {
787 *this += std::move(n);
788
789 break;
790 }
791 }
792
793 return *this;
794 }
795
796 const std::list<statement::condition>& statement::condition::getChildren() const
797 {
798 if (type_ == type::group)
799 {
800 return group_.children_;
801 } else {
802 throw std::domain_error("Cannot get children of non-group condition");
803 }
804 }
805
806};