summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorKelly Rauchenberger <fefferburbia@gmail.com>2017-01-21 19:00:45 -0500
committerKelly Rauchenberger <fefferburbia@gmail.com>2017-01-21 19:00:45 -0500
commit9423ab7f075afd3e1501a2d1438fc8330811e40e (patch)
tree71f1b207711907aad2e02643838eb2e479714526
parent2a1f319b37dc3af45c136bfdc9226515c2fefaf2 (diff)
downloadverbly-9423ab7f075afd3e1501a2d1438fc8330811e40e.tar.gz
verbly-9423ab7f075afd3e1501a2d1438fc8330811e40e.tar.bz2
verbly-9423ab7f075afd3e1501a2d1438fc8330811e40e.zip
Fixed statement generation involving negative subqueries
Previously, we generated negative subqueries by integrating them into
the main statement normally, and then making the connecting join be a
LEFT JOIN instead of an INNER JOIN, and by adding a condition that the
join column be NULL. The problem with this is that if the top table of
the subquery joins against any other table (which join throughs always
do), then no rows will be returned. This was solved by putting the
subquery into a CTE and then LEFT JOINing as before with the CTE.
-rw-r--r--lib/statement.cpp275
-rw-r--r--lib/statement.h16
2 files changed, 234 insertions, 57 deletions
diff --git a/lib/statement.cpp b/lib/statement.cpp index f8d5346..64a4311 100644 --- a/lib/statement.cpp +++ b/lib/statement.cpp
@@ -52,20 +52,24 @@ namespace verbly {
52 cteStream << cte.getCondition().toSql(); 52 cteStream << cte.getCondition().toSql();
53 } 53 }
54 54
55 cteStream << " UNION SELECT l.* FROM "; 55 if (cte.isRecursive())
56 cteStream << cte.getIdentifier(); 56 {
57 cteStream << " AS t INNER JOIN "; 57 cteStream << " UNION SELECT l.* FROM ";
58 cteStream << cte.getField().getTable(); 58 cteStream << cte.getIdentifier();
59 cteStream << " AS j ON t."; 59 cteStream << " AS t INNER JOIN ";
60 cteStream << cte.getField().getColumn(); 60 cteStream << cte.getField().getTable();
61 cteStream << " = j."; 61 cteStream << " AS j ON t.";
62 cteStream << cte.getField().getForeignJoinColumn(); 62 cteStream << cte.getField().getColumn();
63 cteStream << " INNER JOIN "; 63 cteStream << " = j.";
64 cteStream << cte.getTableForId(cte.getTopTable()); 64 cteStream << cte.getField().getForeignJoinColumn();
65 cteStream << " AS l ON j."; 65 cteStream << " INNER JOIN ";
66 cteStream << cte.getField().getJoinColumn(); 66 cteStream << cte.getTableForId(cte.getTopTable());
67 cteStream << " = l."; 67 cteStream << " AS l ON j.";
68 cteStream << cte.getField().getColumn(); 68 cteStream << cte.getField().getJoinColumn();
69 cteStream << " = l.";
70 cteStream << cte.getField().getColumn();
71 }
72
69 cteStream << ")"; 73 cteStream << ")";
70 74
71 ctes.push_back(cteStream.str()); 75 ctes.push_back(cteStream.str());
@@ -146,6 +150,13 @@ namespace verbly {
146 { 150 {
147 } 151 }
148 152
153 /**
154 * This function recursively parses the query's filter condition. It is not
155 * idempotent. It returns a condition object representing the passed filter,
156 * but it also modifies the statement object, specifically by adding any joins
157 * and CTEs that may be required to represent the passed filter. This may also
158 * involve instantiating tables.
159 */
149 statement::condition statement::parseFilter(filter clause) 160 statement::condition statement::parseFilter(filter clause)
150 { 161 {
151 switch (clause.getType()) 162 switch (clause.getType())
@@ -164,6 +175,9 @@ namespace verbly {
164 return {}; 175 return {};
165 } 176 }
166 177
178 // For primitive type filters, all we need to do is translate the
179 // filter object directly into a condition object. No joins are
180 // necessary.
167 case field::type::string: 181 case field::type::string:
168 case field::type::integer: 182 case field::type::integer:
169 case field::type::boolean: 183 case field::type::boolean:
@@ -247,6 +261,7 @@ namespace verbly {
247 261
248 case field::type::join: 262 case field::type::join:
249 { 263 {
264 // First, figure out what table we need to join against.
250 std::string joinTableName; 265 std::string joinTableName;
251 if (clause.getField().hasTable()) 266 if (clause.getField().hasTable())
252 { 267 {
@@ -255,6 +270,9 @@ namespace verbly {
255 joinTableName = getTableForContext(clause.getField().getJoinObject()); 270 joinTableName = getTableForContext(clause.getField().getJoinObject());
256 } 271 }
257 272
273 // Recursively parse the subquery, and therefore obtain an
274 // instantiated table to join against, as well as any joins or CTEs
275 // that the subquery may require to function.
258 statement joinStmt( 276 statement joinStmt(
259 joinTableName, 277 joinTableName,
260 clause.getJoinCondition().normalize(clause.getField().getJoinObject()), 278 clause.getJoinCondition().normalize(clause.getField().getJoinObject()),
@@ -262,23 +280,71 @@ namespace verbly {
262 nextWithId_); 280 nextWithId_);
263 281
264 std::string joinTable = joinStmt.topTable_; 282 std::string joinTable = joinStmt.topTable_;
265 condition curCond = integrate(std::move(joinStmt));
266 283
267 bool outer = false;
268 if (clause.getComparison() == filter::comparison::does_not_match) 284 if (clause.getComparison() == filter::comparison::does_not_match)
269 { 285 {
270 outer = true; 286 // If the comparison is actually a negative filter, we can't just
287 // integrate the subquery statement into this statement and then
288 // join against it. Even if we LEFT JOIN against the subquery's
289 // top level table and then condition on the join column being
290 // NULL, if that table joins against any other table, the query
291 // will return zero results. Instead, we create a non-recursive
292 // CTE that represents the subquery, then LEFT JOIN against it and
293 // condition on the join column being NULL as before.
294 std::string withName = instantiateWith(clause.getField().getTable());
295 std::string withInstName = instantiateTable(withName);
271 296
272 curCond &= condition(joinTable, clause.getField().getColumn(), true); 297 // LEFT JOIN against the CTE.
273 } 298 joins_.emplace_back(
274 299 true,
275 joins_.emplace_back(outer, joinTableName, topTable_, clause.getField().getColumn(), joinTable, clause.getField().getColumn()); 300 withName,
301 topTable_,
302 clause.getField().getColumn(),
303 withInstName,
304 clause.getField().getColumn());
305
306 // All CTEs have to be in the main statement, so integrate any
307 // CTEs that our subquery uses. Also, retrieve the table mapping,
308 // joins list, and subquery condition, and use them to create the
309 // CTE.
310 std::map<std::string, std::string> cteTables = std::move(joinStmt.tables_);
311 std::list<join> cteJoins = std::move(joinStmt.joins_);
312 condition cteCondition = integrate(std::move(joinStmt), true);
276 313
277 return curCond; 314 withs_.emplace_back(
315 std::move(withName),
316 clause.getField(),
317 std::move(cteTables),
318 std::move(joinTable),
319 std::move(cteCondition),
320 std::move(cteJoins),
321 false);
322
323 // Condition on the join column being NULL, which causes the query
324 // to only return results that do not match the subquery.
325 return condition(std::move(withInstName), clause.getField().getColumn(), true);
326 } else {
327 // INNER JOIN against the top table of the subquery.
328 joins_.emplace_back(
329 false,
330 std::move(joinTableName),
331 topTable_,
332 clause.getField().getColumn(),
333 std::move(joinTable),
334 clause.getField().getColumn());
335
336 // Integrate the subquery's table mappings, joins, and CTEs into
337 // this statement, and return the subquery condition as our
338 // condition.
339 return integrate(std::move(joinStmt));
340 }
278 } 341 }
279 342
280 case field::type::join_through: 343 case field::type::join_through:
281 { 344 {
345 // Recursively parse the subquery, and therefore obtain an
346 // instantiated table to join against, as well as any joins or CTEs
347 // that the subquery may require to function.
282 statement joinStmt( 348 statement joinStmt(
283 getTableForContext(clause.getField().getJoinObject()), 349 getTableForContext(clause.getField().getJoinObject()),
284 clause.getJoinCondition().normalize(clause.getField().getJoinObject()), 350 clause.getJoinCondition().normalize(clause.getField().getJoinObject()),
@@ -286,58 +352,143 @@ namespace verbly {
286 nextWithId_); 352 nextWithId_);
287 353
288 std::string joinTable = joinStmt.topTable_; 354 std::string joinTable = joinStmt.topTable_;
289 std::string throughTable = instantiateTable(clause.getField().getTable());
290 condition curCond = integrate(std::move(joinStmt));
291 355
292 bool outer = false;
293 if (clause.getComparison() == filter::comparison::does_not_match) 356 if (clause.getComparison() == filter::comparison::does_not_match)
294 { 357 {
295 outer = true; 358 // If the comparison is actually a negative filter, we can't just
359 // integrate the subquery statement into this statement and then
360 // join against it. Even if we LEFT JOIN against the subquery's
361 // through table and then condition on the join column being NULL,
362 // the query will return zero results because the through table
363 // joins against the subquery's top level table. Instead, we
364 // create a non-recursive CTE that represents the through table
365 // joined against the subquery, then LEFT JOIN against it and
366 // condition on the join column being NULL as before.
367 std::string withName = instantiateWith(clause.getField().getTable());
368 std::string withInstName = instantiateTable(withName);
296 369
297 curCond &= condition(throughTable, clause.getField().getJoinColumn(), true); 370 // LEFT JOIN against the CTE.
298 } 371 joins_.emplace_back(
299 372 true,
300 joins_.emplace_back(outer, clause.getField().getTable(), topTable_, clause.getField().getColumn(), throughTable, clause.getField().getJoinColumn()); 373 withName,
301 joins_.emplace_back(false, getTableForContext(clause.getField().getJoinObject()), throughTable, clause.getField().getForeignJoinColumn(), joinTable, clause.getField().getForeignColumn()); 374 topTable_,
375 clause.getField().getColumn(),
376 withInstName,
377 clause.getField().getJoinColumn());
378
379 // Modify the substatement such that the through table is the top
380 // table, and such that it joins against the previous top table.
381 std::string throughTable = joinStmt.instantiateTable(clause.getField().getTable());
382 joinStmt.joins_.emplace_back(
383 false,
384 getTableForContext(clause.getField().getJoinObject()),
385 throughTable,
386 clause.getField().getForeignJoinColumn(),
387 std::move(joinTable),
388 clause.getField().getForeignColumn());
389
390 joinStmt.topTable_ = throughTable;
391
392 // All CTEs have to be in the main statement, so integrate any
393 // CTEs that our subquery uses. Also, retrieve the table mapping,
394 // joins list, and subquery condition, and use them to create the
395 // CTE.
396 std::map<std::string, std::string> cteTables = std::move(joinStmt.tables_);
397 std::list<join> cteJoins = std::move(joinStmt.joins_);
398 condition cteCondition = integrate(std::move(joinStmt), true);
302 399
303 return curCond; 400 withs_.emplace_back(
401 std::move(withName),
402 clause.getField(),
403 std::move(cteTables),
404 std::move(throughTable),
405 std::move(cteCondition),
406 std::move(cteJoins),
407 false);
408
409 // Condition on the join column being NULL, which causes the query
410 // to only return results that do not match the subquery.
411 return condition(std::move(withInstName), clause.getField().getJoinColumn(), true);
412 } else {
413 // Instantiate the through table.
414 std::string throughTable = instantiateTable(clause.getField().getTable());
415
416 // INNER JOIN against the through table.
417 joins_.emplace_back(
418 false,
419 clause.getField().getTable(),
420 topTable_,
421 clause.getField().getColumn(),
422 throughTable,
423 clause.getField().getJoinColumn());
424
425 // INNER JOIN from the through table to the top table of the subquery.
426 joins_.emplace_back(
427 false,
428 getTableForContext(clause.getField().getJoinObject()),
429 std::move(throughTable),
430 clause.getField().getForeignJoinColumn(),
431 std::move(joinTable),
432 clause.getField().getForeignColumn());
433
434 // Integrate the subquery's table mappings, joins, and CTEs into
435 // this statement, and return the subquery condition as our
436 // condition.
437 return integrate(std::move(joinStmt));
438 }
304 } 439 }
305 440
306 case field::type::hierarchal_join: 441 case field::type::hierarchal_join:
307 { 442 {
308 std::string withName = std::string(clause.getField().getTable()) + "_tree_" + std::to_string(nextWithId_++); 443 // Create a recursive CTE that represents the results of the subquery.
444 std::string withName = instantiateWith(clause.getField().getTable());
309 std::string withInstName = instantiateTable(withName); 445 std::string withInstName = instantiateTable(withName);
310 446
447 // If we are matching against the subquery, we INNER JOIN with the
448 // CTE. If we are negatively matching the subquery, we LEFT JOIN
449 // with the CTE.
311 bool outer = false; 450 bool outer = false;
312 if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) 451 if (clause.getComparison() == filter::comparison::does_not_hierarchally_match)
313 { 452 {
314 outer = true; 453 outer = true;
315 } 454 }
316 455
317 joins_.emplace_back(outer, withName, topTable_, clause.getField().getColumn(), withInstName, clause.getField().getColumn()); 456 // Join against the CTE.
457 joins_.emplace_back(
458 outer,
459 withName,
460 topTable_,
461 clause.getField().getColumn(),
462 withInstName,
463 clause.getField().getColumn());
318 464
465 // Recursively parse the subquery in order to create the CTE.
319 statement withStmt( 466 statement withStmt(
320 getTableForContext(clause.getField().getObject()), 467 getTableForContext(clause.getField().getObject()),
321 clause.getJoinCondition().normalize(clause.getField().getObject()), 468 clause.getJoinCondition().normalize(clause.getField().getObject()),
322 nextTableId_, 469 nextTableId_,
323 nextWithId_); 470 nextWithId_);
324 471
325 for (auto& w : withStmt.withs_) 472 // All CTEs have to be in the main statement, so integrate any CTEs
326 { 473 // that our subquery uses. Also, retrieve the table mapping, joins
327 withs_.push_back(std::move(w)); 474 // list, and subquery condition, and use them to create the CTE.
328 } 475 std::string cteTopTable = std::move(withStmt.topTable_);
329 476 std::map<std::string, std::string> cteTables = std::move(withStmt.tables_);
330 nextTableId_ = withStmt.nextTableId_; 477 std::list<join> cteJoins = std::move(withStmt.joins_);
331 nextWithId_ = withStmt.nextWithId_; 478 condition cteCondition = integrate(std::move(withStmt), true);
332 479
333 withs_.emplace_back( 480 withs_.emplace_back(
334 withName, 481 std::move(withName),
335 clause.getField(), 482 clause.getField(),
336 std::move(withStmt.tables_), 483 std::move(cteTables),
337 std::move(withStmt.topTable_), 484 std::move(cteTopTable),
338 std::move(withStmt.topCondition_), 485 std::move(cteCondition),
339 std::move(withStmt.joins_)); 486 std::move(cteJoins),
487 true);
340 488
489 // If we are matching against the subquery, no condition is
490 // necessary. If we are negatively matching the subquery, we
491 // condition on the join column being NULL.
341 if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) 492 if (clause.getComparison() == filter::comparison::does_not_hierarchally_match)
342 { 493 {
343 return condition(withInstName, clause.getField().getColumn(), true); 494 return condition(withInstName, clause.getField().getColumn(), true);
@@ -379,16 +530,32 @@ namespace verbly {
379 return identifier; 530 return identifier;
380 } 531 }
381 532
382 statement::condition statement::integrate(statement subStmt) 533 std::string statement::instantiateWith(std::string name)
383 { 534 {
384 for (auto& mapping : subStmt.tables_) 535 return name + "_tree_" + std::to_string(nextWithId_++);
536 }
537
538 /**
539 * This method integrates the parts of a recursively generated statement into
540 * this statement. This is used because filters are recursive objects, but
541 * statements need to be flat to be compiled into a SQL query. Thus, all CTEs
542 * have to be in the main statement, and all table mappings & joins that
543 * aren't part of a CTE have to be in the main statement as well. Finally, we
544 * need to copy up the next ID fields in order to properly prevent ID reuse.
545 */
546 statement::condition statement::integrate(statement subStmt, bool cte)
547 {
548 if (!cte)
385 { 549 {
386 tables_[mapping.first] = mapping.second; 550 for (auto& mapping : subStmt.tables_)
387 } 551 {
552 tables_[mapping.first] = mapping.second;
553 }
388 554
389 for (auto& j : subStmt.joins_) 555 for (auto& j : subStmt.joins_)
390 { 556 {
391 joins_.push_back(j); 557 joins_.push_back(j);
558 }
392 } 559 }
393 560
394 for (auto& w : subStmt.withs_) 561 for (auto& w : subStmt.withs_)
diff --git a/lib/statement.h b/lib/statement.h index a528d60..8188ec0 100644 --- a/lib/statement.h +++ b/lib/statement.h
@@ -184,13 +184,15 @@ namespace verbly {
184 std::map<std::string, std::string> tables, 184 std::map<std::string, std::string> tables,
185 std::string topTable, 185 std::string topTable,
186 condition where, 186 condition where,
187 std::list<join> joins) : 187 std::list<join> joins,
188 bool recursive) :
188 identifier_(std::move(identifier)), 189 identifier_(std::move(identifier)),
189 field_(f), 190 field_(f),
190 tables_(std::move(tables)), 191 tables_(std::move(tables)),
191 topTable_(std::move(topTable)), 192 topTable_(std::move(topTable)),
192 topCondition_(std::move(where)), 193 topCondition_(std::move(where)),
193 joins_(std::move(joins)) 194 joins_(std::move(joins)),
195 recursive_(recursive)
194 { 196 {
195 } 197 }
196 198
@@ -224,6 +226,11 @@ namespace verbly {
224 return joins_; 226 return joins_;
225 } 227 }
226 228
229 bool isRecursive() const
230 {
231 return recursive_;
232 }
233
227 private: 234 private:
228 std::string identifier_; 235 std::string identifier_;
229 field field_; 236 field field_;
@@ -231,6 +238,7 @@ namespace verbly {
231 std::string topTable_; 238 std::string topTable_;
232 condition topCondition_; 239 condition topCondition_;
233 std::list<join> joins_; 240 std::list<join> joins_;
241 bool recursive_;
234 242
235 }; 243 };
236 244
@@ -254,7 +262,9 @@ namespace verbly {
254 262
255 std::string instantiateTable(std::string name); 263 std::string instantiateTable(std::string name);
256 264
257 condition integrate(statement subStmt); 265 std::string instantiateWith(std::string name);
266
267 condition integrate(statement subStmt, bool cte = false);
258 268
259 int nextTableId_; 269 int nextTableId_;
260 int nextWithId_; 270 int nextWithId_;