diff options
author | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-21 19:00:45 -0500 |
---|---|---|
committer | Kelly Rauchenberger <fefferburbia@gmail.com> | 2017-01-21 19:00:45 -0500 |
commit | 9423ab7f075afd3e1501a2d1438fc8330811e40e (patch) | |
tree | 71f1b207711907aad2e02643838eb2e479714526 | |
parent | 2a1f319b37dc3af45c136bfdc9226515c2fefaf2 (diff) | |
download | verbly-9423ab7f075afd3e1501a2d1438fc8330811e40e.tar.gz verbly-9423ab7f075afd3e1501a2d1438fc8330811e40e.tar.bz2 verbly-9423ab7f075afd3e1501a2d1438fc8330811e40e.zip |
Fixed statement generation involving negative subqueries
Previously, we generated negative subqueries by integrating them into the main statement normally, and then making the connecting join be a LEFT JOIN instead of an INNER JOIN, and by adding a condition that the join column be NULL. The problem with this is that if the top table of the subquery joins against any other table (which join throughs always do), then no rows will be returned. This was solved by putting the subquery into a CTE and then LEFT JOINing as before with the CTE.
-rw-r--r-- | lib/statement.cpp | 275 | ||||
-rw-r--r-- | lib/statement.h | 16 |
2 files changed, 234 insertions, 57 deletions
diff --git a/lib/statement.cpp b/lib/statement.cpp index f8d5346..64a4311 100644 --- a/lib/statement.cpp +++ b/lib/statement.cpp | |||
@@ -52,20 +52,24 @@ namespace verbly { | |||
52 | cteStream << cte.getCondition().toSql(); | 52 | cteStream << cte.getCondition().toSql(); |
53 | } | 53 | } |
54 | 54 | ||
55 | cteStream << " UNION SELECT l.* FROM "; | 55 | if (cte.isRecursive()) |
56 | cteStream << cte.getIdentifier(); | 56 | { |
57 | cteStream << " AS t INNER JOIN "; | 57 | cteStream << " UNION SELECT l.* FROM "; |
58 | cteStream << cte.getField().getTable(); | 58 | cteStream << cte.getIdentifier(); |
59 | cteStream << " AS j ON t."; | 59 | cteStream << " AS t INNER JOIN "; |
60 | cteStream << cte.getField().getColumn(); | 60 | cteStream << cte.getField().getTable(); |
61 | cteStream << " = j."; | 61 | cteStream << " AS j ON t."; |
62 | cteStream << cte.getField().getForeignJoinColumn(); | 62 | cteStream << cte.getField().getColumn(); |
63 | cteStream << " INNER JOIN "; | 63 | cteStream << " = j."; |
64 | cteStream << cte.getTableForId(cte.getTopTable()); | 64 | cteStream << cte.getField().getForeignJoinColumn(); |
65 | cteStream << " AS l ON j."; | 65 | cteStream << " INNER JOIN "; |
66 | cteStream << cte.getField().getJoinColumn(); | 66 | cteStream << cte.getTableForId(cte.getTopTable()); |
67 | cteStream << " = l."; | 67 | cteStream << " AS l ON j."; |
68 | cteStream << cte.getField().getColumn(); | 68 | cteStream << cte.getField().getJoinColumn(); |
69 | cteStream << " = l."; | ||
70 | cteStream << cte.getField().getColumn(); | ||
71 | } | ||
72 | |||
69 | cteStream << ")"; | 73 | cteStream << ")"; |
70 | 74 | ||
71 | ctes.push_back(cteStream.str()); | 75 | ctes.push_back(cteStream.str()); |
@@ -146,6 +150,13 @@ namespace verbly { | |||
146 | { | 150 | { |
147 | } | 151 | } |
148 | 152 | ||
153 | /** | ||
154 | * This function recursively parses the query's filter condition. It is not | ||
155 | * idempotent. It returns a condition object representing the passed filter, | ||
156 | * but it also modifies the statement object, specifically by adding any joins | ||
157 | * and CTEs that may be required to represent the passed filter. This may also | ||
158 | * involve instantiating tables. | ||
159 | */ | ||
149 | statement::condition statement::parseFilter(filter clause) | 160 | statement::condition statement::parseFilter(filter clause) |
150 | { | 161 | { |
151 | switch (clause.getType()) | 162 | switch (clause.getType()) |
@@ -164,6 +175,9 @@ namespace verbly { | |||
164 | return {}; | 175 | return {}; |
165 | } | 176 | } |
166 | 177 | ||
178 | // For primitive type filters, all we need to do is translate the | ||
179 | // filter object directly into a condition object. No joins are | ||
180 | // necessary. | ||
167 | case field::type::string: | 181 | case field::type::string: |
168 | case field::type::integer: | 182 | case field::type::integer: |
169 | case field::type::boolean: | 183 | case field::type::boolean: |
@@ -247,6 +261,7 @@ namespace verbly { | |||
247 | 261 | ||
248 | case field::type::join: | 262 | case field::type::join: |
249 | { | 263 | { |
264 | // First, figure out what table we need to join against. | ||
250 | std::string joinTableName; | 265 | std::string joinTableName; |
251 | if (clause.getField().hasTable()) | 266 | if (clause.getField().hasTable()) |
252 | { | 267 | { |
@@ -255,6 +270,9 @@ namespace verbly { | |||
255 | joinTableName = getTableForContext(clause.getField().getJoinObject()); | 270 | joinTableName = getTableForContext(clause.getField().getJoinObject()); |
256 | } | 271 | } |
257 | 272 | ||
273 | // Recursively parse the subquery, and therefore obtain an | ||
274 | // instantiated table to join against, as well as any joins or CTEs | ||
275 | // that the subquery may require to function. | ||
258 | statement joinStmt( | 276 | statement joinStmt( |
259 | joinTableName, | 277 | joinTableName, |
260 | clause.getJoinCondition().normalize(clause.getField().getJoinObject()), | 278 | clause.getJoinCondition().normalize(clause.getField().getJoinObject()), |
@@ -262,23 +280,71 @@ namespace verbly { | |||
262 | nextWithId_); | 280 | nextWithId_); |
263 | 281 | ||
264 | std::string joinTable = joinStmt.topTable_; | 282 | std::string joinTable = joinStmt.topTable_; |
265 | condition curCond = integrate(std::move(joinStmt)); | ||
266 | 283 | ||
267 | bool outer = false; | ||
268 | if (clause.getComparison() == filter::comparison::does_not_match) | 284 | if (clause.getComparison() == filter::comparison::does_not_match) |
269 | { | 285 | { |
270 | outer = true; | 286 | // If the comparison is actually a negative filter, we can't just |
287 | // integrate the subquery statement into this statement and then | ||
288 | // join against it. Even if we LEFT JOIN against the subquery's | ||
289 | // top level table and then condition on the join column being | ||
290 | // NULL, if that table joins against any other table, the query | ||
291 | // will return zero results. Instead, we create a non-recursive | ||
292 | // CTE that represents the subquery, then LEFT JOIN against it and | ||
293 | // condition on the join column being NULL as before. | ||
294 | std::string withName = instantiateWith(clause.getField().getTable()); | ||
295 | std::string withInstName = instantiateTable(withName); | ||
271 | 296 | ||
272 | curCond &= condition(joinTable, clause.getField().getColumn(), true); | 297 | // LEFT JOIN against the CTE. |
273 | } | 298 | joins_.emplace_back( |
274 | 299 | true, | |
275 | joins_.emplace_back(outer, joinTableName, topTable_, clause.getField().getColumn(), joinTable, clause.getField().getColumn()); | 300 | withName, |
301 | topTable_, | ||
302 | clause.getField().getColumn(), | ||
303 | withInstName, | ||
304 | clause.getField().getColumn()); | ||
305 | |||
306 | // All CTEs have to be in the main statement, so integrate any | ||
307 | // CTEs that our subquery uses. Also, retrieve the table mapping, | ||
308 | // joins list, and subquery condition, and use them to create the | ||
309 | // CTE. | ||
310 | std::map<std::string, std::string> cteTables = std::move(joinStmt.tables_); | ||
311 | std::list<join> cteJoins = std::move(joinStmt.joins_); | ||
312 | condition cteCondition = integrate(std::move(joinStmt), true); | ||
276 | 313 | ||
277 | return curCond; | 314 | withs_.emplace_back( |
315 | std::move(withName), | ||
316 | clause.getField(), | ||
317 | std::move(cteTables), | ||
318 | std::move(joinTable), | ||
319 | std::move(cteCondition), | ||
320 | std::move(cteJoins), | ||
321 | false); | ||
322 | |||
323 | // Condition on the join column being NULL, which causes the query | ||
324 | // to only return results that do not match the subquery. | ||
325 | return condition(std::move(withInstName), clause.getField().getColumn(), true); | ||
326 | } else { | ||
327 | // INNER JOIN against the top table of the subquery. | ||
328 | joins_.emplace_back( | ||
329 | false, | ||
330 | std::move(joinTableName), | ||
331 | topTable_, | ||
332 | clause.getField().getColumn(), | ||
333 | std::move(joinTable), | ||
334 | clause.getField().getColumn()); | ||
335 | |||
336 | // Integrate the subquery's table mappings, joins, and CTEs into | ||
337 | // this statement, and return the subquery condition as our | ||
338 | // condition. | ||
339 | return integrate(std::move(joinStmt)); | ||
340 | } | ||
278 | } | 341 | } |
279 | 342 | ||
280 | case field::type::join_through: | 343 | case field::type::join_through: |
281 | { | 344 | { |
345 | // Recursively parse the subquery, and therefore obtain an | ||
346 | // instantiated table to join against, as well as any joins or CTEs | ||
347 | // that the subquery may require to function. | ||
282 | statement joinStmt( | 348 | statement joinStmt( |
283 | getTableForContext(clause.getField().getJoinObject()), | 349 | getTableForContext(clause.getField().getJoinObject()), |
284 | clause.getJoinCondition().normalize(clause.getField().getJoinObject()), | 350 | clause.getJoinCondition().normalize(clause.getField().getJoinObject()), |
@@ -286,58 +352,143 @@ namespace verbly { | |||
286 | nextWithId_); | 352 | nextWithId_); |
287 | 353 | ||
288 | std::string joinTable = joinStmt.topTable_; | 354 | std::string joinTable = joinStmt.topTable_; |
289 | std::string throughTable = instantiateTable(clause.getField().getTable()); | ||
290 | condition curCond = integrate(std::move(joinStmt)); | ||
291 | 355 | ||
292 | bool outer = false; | ||
293 | if (clause.getComparison() == filter::comparison::does_not_match) | 356 | if (clause.getComparison() == filter::comparison::does_not_match) |
294 | { | 357 | { |
295 | outer = true; | 358 | // If the comparison is actually a negative filter, we can't just |
359 | // integrate the subquery statement into this statement and then | ||
360 | // join against it. Even if we LEFT JOIN against the subquery's | ||
361 | // through table and then condition on the join column being NULL, | ||
362 | // the query will return zero results because the through table | ||
363 | // joins against the subquery's top level table. Instead, we | ||
364 | // create a non-recursive CTE that represents the through table | ||
365 | // joined against the subquery, then LEFT JOIN against it and | ||
366 | // condition on the join column being NULL as before. | ||
367 | std::string withName = instantiateWith(clause.getField().getTable()); | ||
368 | std::string withInstName = instantiateTable(withName); | ||
296 | 369 | ||
297 | curCond &= condition(throughTable, clause.getField().getJoinColumn(), true); | 370 | // LEFT JOIN against the CTE. |
298 | } | 371 | joins_.emplace_back( |
299 | 372 | true, | |
300 | joins_.emplace_back(outer, clause.getField().getTable(), topTable_, clause.getField().getColumn(), throughTable, clause.getField().getJoinColumn()); | 373 | withName, |
301 | joins_.emplace_back(false, getTableForContext(clause.getField().getJoinObject()), throughTable, clause.getField().getForeignJoinColumn(), joinTable, clause.getField().getForeignColumn()); | 374 | topTable_, |
375 | clause.getField().getColumn(), | ||
376 | withInstName, | ||
377 | clause.getField().getJoinColumn()); | ||
378 | |||
379 | // Modify the substatement such that the through table is the top | ||
380 | // table, and such that it joins against the previous top table. | ||
381 | std::string throughTable = joinStmt.instantiateTable(clause.getField().getTable()); | ||
382 | joinStmt.joins_.emplace_back( | ||
383 | false, | ||
384 | getTableForContext(clause.getField().getJoinObject()), | ||
385 | throughTable, | ||
386 | clause.getField().getForeignJoinColumn(), | ||
387 | std::move(joinTable), | ||
388 | clause.getField().getForeignColumn()); | ||
389 | |||
390 | joinStmt.topTable_ = throughTable; | ||
391 | |||
392 | // All CTEs have to be in the main statement, so integrate any | ||
393 | // CTEs that our subquery uses. Also, retrieve the table mapping, | ||
394 | // joins list, and subquery condition, and use them to create the | ||
395 | // CTE. | ||
396 | std::map<std::string, std::string> cteTables = std::move(joinStmt.tables_); | ||
397 | std::list<join> cteJoins = std::move(joinStmt.joins_); | ||
398 | condition cteCondition = integrate(std::move(joinStmt), true); | ||
302 | 399 | ||
303 | return curCond; | 400 | withs_.emplace_back( |
401 | std::move(withName), | ||
402 | clause.getField(), | ||
403 | std::move(cteTables), | ||
404 | std::move(throughTable), | ||
405 | std::move(cteCondition), | ||
406 | std::move(cteJoins), | ||
407 | false); | ||
408 | |||
409 | // Condition on the join column being NULL, which causes the query | ||
410 | // to only return results that do not match the subquery. | ||
411 | return condition(std::move(withInstName), clause.getField().getJoinColumn(), true); | ||
412 | } else { | ||
413 | // Instantiate the through table. | ||
414 | std::string throughTable = instantiateTable(clause.getField().getTable()); | ||
415 | |||
416 | // INNER JOIN against the through table. | ||
417 | joins_.emplace_back( | ||
418 | false, | ||
419 | clause.getField().getTable(), | ||
420 | topTable_, | ||
421 | clause.getField().getColumn(), | ||
422 | throughTable, | ||
423 | clause.getField().getJoinColumn()); | ||
424 | |||
425 | // INNER JOIN from the through table to the top table of the subquery. | ||
426 | joins_.emplace_back( | ||
427 | false, | ||
428 | getTableForContext(clause.getField().getJoinObject()), | ||
429 | std::move(throughTable), | ||
430 | clause.getField().getForeignJoinColumn(), | ||
431 | std::move(joinTable), | ||
432 | clause.getField().getForeignColumn()); | ||
433 | |||
434 | // Integrate the subquery's table mappings, joins, and CTEs into | ||
435 | // this statement, and return the subquery condition as our | ||
436 | // condition. | ||
437 | return integrate(std::move(joinStmt)); | ||
438 | } | ||
304 | } | 439 | } |
305 | 440 | ||
306 | case field::type::hierarchal_join: | 441 | case field::type::hierarchal_join: |
307 | { | 442 | { |
308 | std::string withName = std::string(clause.getField().getTable()) + "_tree_" + std::to_string(nextWithId_++); | 443 | // Create a recursive CTE that represents the results of the subquery. |
444 | std::string withName = instantiateWith(clause.getField().getTable()); | ||
309 | std::string withInstName = instantiateTable(withName); | 445 | std::string withInstName = instantiateTable(withName); |
310 | 446 | ||
447 | // If we are matching against the subquery, we INNER JOIN with the | ||
448 | // CTE. If we are negatively matching the subquery, we LEFT JOIN | ||
449 | // with the CTE. | ||
311 | bool outer = false; | 450 | bool outer = false; |
312 | if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) | 451 | if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) |
313 | { | 452 | { |
314 | outer = true; | 453 | outer = true; |
315 | } | 454 | } |
316 | 455 | ||
317 | joins_.emplace_back(outer, withName, topTable_, clause.getField().getColumn(), withInstName, clause.getField().getColumn()); | 456 | // Join against the CTE. |
457 | joins_.emplace_back( | ||
458 | outer, | ||
459 | withName, | ||
460 | topTable_, | ||
461 | clause.getField().getColumn(), | ||
462 | withInstName, | ||
463 | clause.getField().getColumn()); | ||
318 | 464 | ||
465 | // Recursively parse the subquery in order to create the CTE. | ||
319 | statement withStmt( | 466 | statement withStmt( |
320 | getTableForContext(clause.getField().getObject()), | 467 | getTableForContext(clause.getField().getObject()), |
321 | clause.getJoinCondition().normalize(clause.getField().getObject()), | 468 | clause.getJoinCondition().normalize(clause.getField().getObject()), |
322 | nextTableId_, | 469 | nextTableId_, |
323 | nextWithId_); | 470 | nextWithId_); |
324 | 471 | ||
325 | for (auto& w : withStmt.withs_) | 472 | // All CTEs have to be in the main statement, so integrate any CTEs |
326 | { | 473 | // that our subquery uses. Also, retrieve the table mapping, joins |
327 | withs_.push_back(std::move(w)); | 474 | // list, and subquery condition, and use them to create the CTE. |
328 | } | 475 | std::string cteTopTable = std::move(withStmt.topTable_); |
329 | 476 | std::map<std::string, std::string> cteTables = std::move(withStmt.tables_); | |
330 | nextTableId_ = withStmt.nextTableId_; | 477 | std::list<join> cteJoins = std::move(withStmt.joins_); |
331 | nextWithId_ = withStmt.nextWithId_; | 478 | condition cteCondition = integrate(std::move(withStmt), true); |
332 | 479 | ||
333 | withs_.emplace_back( | 480 | withs_.emplace_back( |
334 | withName, | 481 | std::move(withName), |
335 | clause.getField(), | 482 | clause.getField(), |
336 | std::move(withStmt.tables_), | 483 | std::move(cteTables), |
337 | std::move(withStmt.topTable_), | 484 | std::move(cteTopTable), |
338 | std::move(withStmt.topCondition_), | 485 | std::move(cteCondition), |
339 | std::move(withStmt.joins_)); | 486 | std::move(cteJoins), |
487 | true); | ||
340 | 488 | ||
489 | // If we are matching against the subquery, no condition is | ||
490 | // necessary. If we are negatively matching the subquery, we | ||
491 | // condition on the join column being NULL. | ||
341 | if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) | 492 | if (clause.getComparison() == filter::comparison::does_not_hierarchally_match) |
342 | { | 493 | { |
343 | return condition(withInstName, clause.getField().getColumn(), true); | 494 | return condition(withInstName, clause.getField().getColumn(), true); |
@@ -379,16 +530,32 @@ namespace verbly { | |||
379 | return identifier; | 530 | return identifier; |
380 | } | 531 | } |
381 | 532 | ||
382 | statement::condition statement::integrate(statement subStmt) | 533 | std::string statement::instantiateWith(std::string name) |
383 | { | 534 | { |
384 | for (auto& mapping : subStmt.tables_) | 535 | return name + "_tree_" + std::to_string(nextWithId_++); |
536 | } | ||
537 | |||
538 | /** | ||
539 | * This method integrates the parts of a recursively generated statement into | ||
540 | * this statement. This is used because filters are recursive objects, but | ||
541 | * statements need to be flat to be compiled into a SQL query. Thus, all CTEs | ||
542 | * have to be in the main statement, and all table mappings & joins that | ||
543 | * aren't part of a CTE have to be in the main statement as well. Finally, we | ||
544 | * need to copy up the next ID fields in order to properly prevent ID reuse. | ||
545 | */ | ||
546 | statement::condition statement::integrate(statement subStmt, bool cte) | ||
547 | { | ||
548 | if (!cte) | ||
385 | { | 549 | { |
386 | tables_[mapping.first] = mapping.second; | 550 | for (auto& mapping : subStmt.tables_) |
387 | } | 551 | { |
552 | tables_[mapping.first] = mapping.second; | ||
553 | } | ||
388 | 554 | ||
389 | for (auto& j : subStmt.joins_) | 555 | for (auto& j : subStmt.joins_) |
390 | { | 556 | { |
391 | joins_.push_back(j); | 557 | joins_.push_back(j); |
558 | } | ||
392 | } | 559 | } |
393 | 560 | ||
394 | for (auto& w : subStmt.withs_) | 561 | for (auto& w : subStmt.withs_) |
diff --git a/lib/statement.h b/lib/statement.h index a528d60..8188ec0 100644 --- a/lib/statement.h +++ b/lib/statement.h | |||
@@ -184,13 +184,15 @@ namespace verbly { | |||
184 | std::map<std::string, std::string> tables, | 184 | std::map<std::string, std::string> tables, |
185 | std::string topTable, | 185 | std::string topTable, |
186 | condition where, | 186 | condition where, |
187 | std::list<join> joins) : | 187 | std::list<join> joins, |
188 | bool recursive) : | ||
188 | identifier_(std::move(identifier)), | 189 | identifier_(std::move(identifier)), |
189 | field_(f), | 190 | field_(f), |
190 | tables_(std::move(tables)), | 191 | tables_(std::move(tables)), |
191 | topTable_(std::move(topTable)), | 192 | topTable_(std::move(topTable)), |
192 | topCondition_(std::move(where)), | 193 | topCondition_(std::move(where)), |
193 | joins_(std::move(joins)) | 194 | joins_(std::move(joins)), |
195 | recursive_(recursive) | ||
194 | { | 196 | { |
195 | } | 197 | } |
196 | 198 | ||
@@ -224,6 +226,11 @@ namespace verbly { | |||
224 | return joins_; | 226 | return joins_; |
225 | } | 227 | } |
226 | 228 | ||
229 | bool isRecursive() const | ||
230 | { | ||
231 | return recursive_; | ||
232 | } | ||
233 | |||
227 | private: | 234 | private: |
228 | std::string identifier_; | 235 | std::string identifier_; |
229 | field field_; | 236 | field field_; |
@@ -231,6 +238,7 @@ namespace verbly { | |||
231 | std::string topTable_; | 238 | std::string topTable_; |
232 | condition topCondition_; | 239 | condition topCondition_; |
233 | std::list<join> joins_; | 240 | std::list<join> joins_; |
241 | bool recursive_; | ||
234 | 242 | ||
235 | }; | 243 | }; |
236 | 244 | ||
@@ -254,7 +262,9 @@ namespace verbly { | |||
254 | 262 | ||
255 | std::string instantiateTable(std::string name); | 263 | std::string instantiateTable(std::string name); |
256 | 264 | ||
257 | condition integrate(statement subStmt); | 265 | std::string instantiateWith(std::string name); |
266 | |||
267 | condition integrate(statement subStmt, bool cte = false); | ||
258 | 268 | ||
259 | int nextTableId_; | 269 | int nextTableId_; |
260 | int nextWithId_; | 270 | int nextWithId_; |