Mercurial > hg > MonetDB
changeset 86146:c5e9e0a524cc select-window-pushdown
Merge with Jan2022 branch.
| author | Sjoerd Mullender <sjoerd@acm.org> |
|---|---|
| date | Fri, 22 Jul 2022 09:35:41 +0200 |
| parents | 85d2040d989d (diff) 3824de3a5f26 (current diff) |
| children | 2a90add101fb |
| files | |
| diffstat | 3 files changed, 397 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -4457,6 +4457,94 @@ rel_push_groupby_down(visitor *v, sql_re return rel; } +/* + * Gets the column expressions of a diff function and adds them to "columns". + * The diff function has two possible argument types: either a sql_exp representing a column + * or a sql_exp representing another diff function, therefore this function is recursive. + */ +static void +get_diff_function_columns(sql_exp *diffExp, list *columns) { + list *args = diffExp->l; + + for (node *arg = args->h; arg; arg = arg->next) { + sql_exp *exp = arg->data; + + // diff function + if (exp->type == e_func) { + get_diff_function_columns(exp, columns); + } + // column + else { + list_append(columns, exp); + } + } +} + +/* + * Builds a list of aggregation key columns to be used by the select push down algorithm, namely for + * window functions. Returns NULL if the window function does not partition by any column + */ +static list * +get_aggregation_key_columns(sql_allocator *sa, sql_rel *r) { + for (node* n = r->exps->h; n; n = n->next) { + sql_exp *e = n->data; + + if (e->type == e_func) { + sql_subfunc *f = e->f; + + // aggregation function + if (!strcmp(f->func->base.name, "rank")) { + list* rankArguments = e->l; + // the partition key is the second argument + sql_exp *partitionExp = rankArguments->h->next->data; + + // check if the key contains any columns, i.e., is a diff function + if (partitionExp->type == e_func) { + // get columns to list + list *aggColumns = sa_list(sa); + get_diff_function_columns(partitionExp, aggColumns); + return aggColumns; + } + // the function has no aggregation columns (e_atom of boolean) + else { + return NULL; + } + + } + } + } + return NULL; +} + +/* + * Checks if a filter column is also used as an aggregation key, so it can be later safely pushed down. + */ +static int +filter_column_in_aggregation_columns(sql_exp *column, list *aggColumns) { + /* check if it is a column or an e_convert, and get the actual column if it is the latter */ + if (column->type == e_convert) { + column = column->l; + } + + char *tableName = column->l; + char *columnName = column->r; + + for (node *n = aggColumns->h; n; n = n->next) { + sql_exp *aggCol = n->data; + char *aggColTableName = aggCol->l; + char *aggColColumnName = aggCol->r; + + if (!strcmp(tableName, aggColTableName) && !strcmp(columnName, aggColColumnName)) { + /* match */ + return 1; + } + } + + /* no matches found */ + return 0; +} + + /* * Push select down, pushes the selects through (simple) projections. Also * it cleans up the projections which become useless. @@ -4605,6 +4693,48 @@ rel_push_select_down(visitor *v, sql_rel n = next; } } + + /* push filters if they match the aggregation key on a window function */ + else if (pl && pl->op != op_ddl && exps_have_unsafe(r->exps, 0)) { + set_processed(pl); + /* list of aggregation key columns */ + list *aggColumns = get_aggregation_key_columns(v->sql->sa, r); + + /* aggregation keys found, check if any filter matches them */ + if (aggColumns) { + for (n = exps->h; n;) { + node *next = n->next; + sql_exp *e = n->data, *ne = NULL; + + if (e->type == e_cmp) { + /* simple comparison filter */ + if (e->flag == cmp_gt || e->flag == cmp_gte || e->flag == cmp_lte || e->flag == cmp_lt + || e->flag == cmp_equal || e->flag == cmp_notequal || e->flag == cmp_in || e->flag == cmp_notin) { + sql_exp* column = e->l; + + /* check if the expression matches any aggregation key, meaning we can + try to safely push it down */ + if (filter_column_in_aggregation_columns(column, aggColumns)) { + ne = exp_push_down_prj(v->sql, e, r, pl); + + /* can we move it down */ + if (ne && ne != e && pl->exps) { + if (!is_select(pl->op) || rel_is_ref(pl)) + r->l = pl = rel_select(v->sql->sa, pl, NULL); + rel_select_add_exp(v->sql->sa, pl, ne); + list_remove_node(exps, NULL, n); + v->changes++; + } + } + } + } + n = next; + } + + /* cleanup list */ + list_destroy(aggColumns); + } + } } /* try push select under set relation */
--- a/sql/test/Tests/All +++ b/sql/test/Tests/All @@ -122,6 +122,7 @@ constant-not-in unicode window_functions +select_window_pushdown !NOWAL?hot_snapshot HAVE_LIBZ&!NOWAL?hot_snapshot_gz
new file mode 100644 --- /dev/null +++ b/sql/test/Tests/select_window_pushdown.test @@ -0,0 +1,266 @@ +# init +statement ok +CREATE TABLE Test (k int, v int); + +statement ok +INSERT INTO Test SELECT value % 10 as k, value as v FROM generate_series(1, 100); + + +# simple eq filter on the partition key, must be pushed down, +# while the flag filter cannot be safely pushed down +query T nosort +plan SELECT * +FROM ( + SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank + FROM ( + SELECT k, v, v % 2 = 0 AS flag + FROM Test + ) t1 +) t2 +WHERE rank = 1 AND NOT flag AND k = 10; +---- +project ( +| select ( +| | project ( +| | | project ( +| | | | project ( +| | | | | select ( +| | | | | | table("sys"."test") [ "test"."k", "test"."v" ] +| | | | | ) [ ("test"."k") = (int(32) "10") ] +| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ] +| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ] +| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ] +| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ] +) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ] + + +# simple range filter on the partition key +query T nosort +plan SELECT * +FROM ( + SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank + FROM ( + SELECT k, v, v % 2 = 0 AS flag + FROM Test + ) t1 +) t2 +WHERE rank = 1 AND NOT flag AND k >= 10 AND k <= 50; +---- +project ( +| select ( +| | project ( +| | | project ( +| | | | project ( +| | | | | select ( +| | | | | | table("sys"."test") [ "test"."k", "test"."v" ] +| | | | | ) [ (int(32) "10") <= ("test"."k") <= (int(32) "50") ] +| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ] +| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ] +| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ] +| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ] +) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ] + + +# simple not in filter on the partition key +query T nosort +plan SELECT * +FROM ( + SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank + FROM ( + SELECT k, v, v % 2 = 0 AS flag + FROM Test + ) t1 +) t2 +WHERE rank = 1 AND NOT flag AND k NOT IN (10, 20, 30); +---- +project ( +| select ( +| | project ( +| | | project ( +| | | | project ( +| | | | | select ( +| | | | | | table("sys"."test") [ "test"."k", "test"."v" ] +| | | | | ) [ ("test"."k") notin (int(32) "10", int(32) "20", int(32) "30") ] +| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%2"."%2", "sys"."="("%2"."%2", int(32) "0") as "t1"."flag" ] +| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ] +| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ] +| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ] +) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ] + + +# another filter also not on the partition key, must not be pushed down +query T nosort +plan SELECT * +FROM ( + SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank + FROM ( + SELECT k, v, v % 2 = 0 AS flag + FROM Test + ) t1 +) t2 +WHERE rank = 1 AND NOT flag AND k = 10 AND v = 15; +---- +project ( +| select ( +| | project ( +| | | project ( +| | | | project ( +| | | | | select ( +| | | | | | table("sys"."test") [ "test"."k", "test"."v" ] +| | | | | ) [ ("test"."k") = (int(32) "10") ] +| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ] +| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ] +| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ] +| ) [ ("t1"."v") = (int(32) "15"), ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ] +) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ] + + +# swapping k with v just to test for hardcoded optimizations, +# v is pushed down but not k +query T nosort +plan SELECT * +FROM ( + SELECT *, rank() OVER (PARTITION BY v ORDER BY v DESC) AS rank + FROM ( + SELECT k, v, v % 2 = 0 AS flag + FROM Test + ) t1 +) t2 +WHERE rank = 1 AND NOT flag AND k = 10 AND v = 15; +---- +project ( +| select ( +| | project ( +| | | project ( +| | | | project ( +| | | | | select ( +| | | | | | table("sys"."test") [ "test"."k", "test"."v" ] +| | | | | ) [ ("test"."v") = (int(32) "15") ] +| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ] +| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."v" NULLS LAST ] +| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."v"), "sys"."diff"("t1"."v")) as "t2"."rank" ] +| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1"), ("t1"."k") = (int(32) "10") ] +) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ] + + +# performing some additional computation on the partition key, +# filter cannot be pushed down +plan SELECT * +FROM ( + SELECT k * 10 as k, v, flag, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank + FROM ( + SELECT k, v, v % 2 = 0 AS flag + FROM Test + ) t1 +) t2 +WHERE rank = 1 AND NOT flag AND k = 10; +---- +project ( +| select ( +| | project ( +| | | select ( +| | | | project ( +| | | | | project ( +| | | | | | project ( +| | | | | | | table("sys"."test") [ "test"."k", "test"."v" ] +| | | | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ] +| | | | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ] +| | | | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ] +| | | ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ] +| | ) [ "sys"."sql_mul"("t1"."k", tinyint(4) "10") as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ] +| ) [ ("t2"."k") = (bigint(36) "10") ] +) [ "t2"."k", "t2"."v", "t2"."flag", "t2"."rank" ] + + +# filter [partition column OR flag], cannot be safely pushed down +query T nosort +plan SELECT * +FROM ( + SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank + FROM ( + SELECT k, v, v % 2 = 0 AS flag + FROM Test + ) t1 +) t2 +WHERE rank = 1 AND (NOT flag OR k = 10); +---- +project ( +| select ( +| | project ( +| | | project ( +| | | | project ( +| | | | | table("sys"."test") [ "test"."k", "test"."v" ] +| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ] +| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ] +| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ] +| ) [ ("t2"."rank") = (int(32) "1"), (("t1"."flag") = (boolean(1) "false")) or (("t1"."k") = (int(32) "10")) ] +) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ] + + +# filter on k and v and both are partition columns, both filters can be pushed down +query T nosort +plan SELECT * +FROM ( + SELECT *, rank() OVER (PARTITION BY k, v ORDER BY v DESC) AS rank + FROM ( + SELECT k, v, v % 2 = 0 AS flag + FROM Test + ) t1 +) t2 +WHERE rank = 1 AND NOT flag AND k <= 10 AND v IN (1, 2, 3); +---- +project ( +| select ( +| | project ( +| | | project ( +| | | | project ( +| | | | | select ( +| | | | | | table("sys"."test") [ "test"."k", "test"."v" ] +| | | | | ) [ ("test"."k") <= (int(32) "10"), ("test"."v") in (int(32) "1", int(32) "2", int(32) "3") ] +| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%2"."%2", "sys"."="("%2"."%2", int(32) "0") as "t1"."flag" ] +| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ] +| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("sys"."diff"("t1"."k"), "t1"."v"), "sys"."diff"("t1"."v")) as "t2"."rank" ] +| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ] +) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ] + + +statement ok +DROP TABLE Test + + +# test with string filters +# (previously e_convert were not being considered) +statement ok +CREATE TABLE Test (k varchar(100), v int); + +statement ok +INSERT INTO Test SELECT value % 10 as k, value as v FROM generate_series(1, 100); + +query T nosort +plan SELECT * +FROM ( + SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank + FROM ( + SELECT k, v, v % 2 = 0 AS flag + FROM Test + ) t1 +) t2 +WHERE rank = 1 AND NOT flag AND k = '10'; +---- +project ( +| select ( +| | project ( +| | | project ( +| | | | project ( +| | | | | select ( +| | | | | | table("sys"."test") [ "test"."k", "test"."v" ] +| | | | | ) [ (char(100)["test"."k"]) = (char(100) "10") ] +| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ] +| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ] +| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ] +| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ] +) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ] + + +statement ok +DROP TABLE Test
