changeset 86146:c5e9e0a524cc select-window-pushdown

Merge with Jan2022 branch.
author Sjoerd Mullender <sjoerd@acm.org>
date Fri, 22 Jul 2022 09:35:41 +0200
parents 85d2040d989d (diff) 3824de3a5f26 (current diff)
children 2a90add101fb
files
diffstat 3 files changed, 397 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -4457,6 +4457,94 @@ rel_push_groupby_down(visitor *v, sql_re
 	return rel;
 }
 
+/* 
+ * Gets the column expressions of a diff function and adds them to "columns".
+ * The diff function has two possible argument types: either a sql_exp representing a column
+ * or a sql_exp representing another diff function, therefore this function is recursive.
+ */
+static void
+get_diff_function_columns(sql_exp *diffExp, list *columns) {
+	list *args = diffExp->l;
+	
+	for (node *arg = args->h; arg; arg = arg->next) {
+		sql_exp *exp = arg->data;
+		
+		// diff function
+		if (exp->type == e_func) {
+			get_diff_function_columns(exp, columns);
+		}
+		// column
+		else {
+			list_append(columns, exp);
+		}
+	}
+}
+
+/* 
+ * Builds a list of aggregation key columns to be used by the select push down algorithm, namely for
+ * window functions. Returns NULL if the window function does not partition by any column
+ */
+static list *
+get_aggregation_key_columns(sql_allocator *sa, sql_rel *r) {
+	for (node* n = r->exps->h; n; n = n->next) {
+		sql_exp *e = n->data;
+		
+		if (e->type == e_func) {
+			sql_subfunc *f = e->f;
+			
+			// aggregation function
+			if (!strcmp(f->func->base.name, "rank")) {
+				list* rankArguments = e->l;
+				// the partition key is the second argument
+				sql_exp *partitionExp = rankArguments->h->next->data;
+
+				// check if the key contains any columns, i.e., is a diff function
+				if (partitionExp->type == e_func) {
+					// get columns to list
+					list *aggColumns = sa_list(sa);
+					get_diff_function_columns(partitionExp, aggColumns);
+					return aggColumns;
+				}
+				// the function has no aggregation columns (e_atom of boolean)
+				else {
+					return NULL;
+				}
+
+			}
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Checks if a filter column is also used as an aggregation key, so it can be later safely pushed down.
+ */
+static int
+filter_column_in_aggregation_columns(sql_exp *column, list *aggColumns) {
+	/* check if it is a column or an e_convert, and get the actual column if it is the latter */
+	if (column->type == e_convert) {
+		column = column->l;
+	}
+
+	char *tableName = column->l;
+	char *columnName = column->r;
+
+	for (node *n = aggColumns->h; n; n = n->next) {
+		sql_exp *aggCol = n->data;
+		char *aggColTableName = aggCol->l;
+		char *aggColColumnName = aggCol->r;
+		
+		if (!strcmp(tableName, aggColTableName) && !strcmp(columnName, aggColColumnName)) {
+			/* match */
+			return 1;
+		}
+	}
+
+	/* no matches found */
+	return 0;
+}
+
+
 /*
  * Push select down, pushes the selects through (simple) projections. Also
  * it cleans up the projections which become useless.
@@ -4605,6 +4693,48 @@ rel_push_select_down(visitor *v, sql_rel
 				n = next;
 			}
 		}
+		
+		/* push filters if they match the aggregation key on a window function */
+		else if (pl && pl->op != op_ddl && exps_have_unsafe(r->exps, 0)) {
+			set_processed(pl);
+			/* list of aggregation key columns */
+			list *aggColumns = get_aggregation_key_columns(v->sql->sa, r);
+
+			/* aggregation keys found, check if any filter matches them */
+			if (aggColumns) {
+				for (n = exps->h; n;) {
+					node *next = n->next;
+					sql_exp *e = n->data, *ne = NULL;
+					
+					if (e->type == e_cmp) {
+						/* simple comparison filter */
+						if (e->flag == cmp_gt || e->flag == cmp_gte || e->flag == cmp_lte || e->flag == cmp_lt
+							|| e->flag == cmp_equal || e->flag == cmp_notequal || e->flag == cmp_in || e->flag == cmp_notin) {
+							sql_exp* column = e->l;
+
+							/* check if the expression matches any aggregation key, meaning we can 
+							   try to safely push it down */
+							if (filter_column_in_aggregation_columns(column, aggColumns)) {
+								ne = exp_push_down_prj(v->sql, e, r, pl);
+
+								/* can we move it down */
+								if (ne && ne != e && pl->exps) {
+									if (!is_select(pl->op) || rel_is_ref(pl))
+										r->l = pl = rel_select(v->sql->sa, pl, NULL);
+									rel_select_add_exp(v->sql->sa, pl, ne);
+									list_remove_node(exps, NULL, n);
+									v->changes++;
+								}
+							}
+						}
+					}
+					n = next;
+				}
+
+				/* cleanup list */
+				list_destroy(aggColumns);
+			}
+		}
 	}
 
 	/* try push select under set relation */
--- a/sql/test/Tests/All
+++ b/sql/test/Tests/All
@@ -122,6 +122,7 @@ constant-not-in
 unicode
 
 window_functions
+select_window_pushdown
 
 !NOWAL?hot_snapshot
 HAVE_LIBZ&!NOWAL?hot_snapshot_gz
new file mode 100644
--- /dev/null
+++ b/sql/test/Tests/select_window_pushdown.test
@@ -0,0 +1,266 @@
+# init
+statement ok
+CREATE TABLE Test (k int, v int);
+
+statement ok
+INSERT INTO Test SELECT value % 10 as k, value as v FROM generate_series(1, 100);
+
+
+# simple eq filter on the partition key, must be pushed down,
+# while the flag filter cannot be safely pushed down
+query T nosort
+plan SELECT *
+FROM (
+    SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank
+    FROM (
+        SELECT k, v, v % 2 = 0 AS flag
+        FROM Test
+    ) t1
+) t2
+WHERE rank = 1 AND NOT flag AND k = 10;
+----
+project (
+| select (
+| | project (
+| | | project (
+| | | | project (
+| | | | | select (
+| | | | | | table("sys"."test") [ "test"."k", "test"."v" ]
+| | | | | ) [ ("test"."k") = (int(32) "10") ]
+| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ]
+| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ]
+| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ]
+| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ]
+) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ]
+
+
+# simple range filter on the partition key
+query T nosort
+plan SELECT *
+FROM (
+    SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank
+    FROM (
+        SELECT k, v, v % 2 = 0 AS flag
+        FROM Test
+    ) t1
+) t2
+WHERE rank = 1 AND NOT flag AND k >= 10 AND k <= 50;
+----
+project (
+| select (
+| | project (
+| | | project (
+| | | | project (
+| | | | | select (
+| | | | | | table("sys"."test") [ "test"."k", "test"."v" ]
+| | | | | ) [ (int(32) "10") <= ("test"."k") <= (int(32) "50") ]
+| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ]
+| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ]
+| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ]
+| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ]
+) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ]
+
+
+# simple not in filter on the partition key
+query T nosort
+plan SELECT *
+FROM (
+    SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank
+    FROM (
+        SELECT k, v, v % 2 = 0 AS flag
+        FROM Test
+    ) t1
+) t2
+WHERE rank = 1 AND NOT flag AND k NOT IN (10, 20, 30);
+----
+project (
+| select (
+| | project (
+| | | project (
+| | | | project (
+| | | | | select (
+| | | | | | table("sys"."test") [ "test"."k", "test"."v" ]
+| | | | | ) [ ("test"."k") notin (int(32) "10", int(32) "20", int(32) "30") ]
+| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%2"."%2", "sys"."="("%2"."%2", int(32) "0") as "t1"."flag" ]
+| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ]
+| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ]
+| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ]
+) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ]
+
+
+# another filter also not on the partition key, must not be pushed down
+query T nosort
+plan SELECT *
+FROM (
+    SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank
+    FROM (
+        SELECT k, v, v % 2 = 0 AS flag
+        FROM Test
+    ) t1
+) t2
+WHERE rank = 1 AND NOT flag AND k = 10 AND v = 15;
+----
+project (
+| select (
+| | project (
+| | | project (
+| | | | project (
+| | | | | select (
+| | | | | | table("sys"."test") [ "test"."k", "test"."v" ]
+| | | | | ) [ ("test"."k") = (int(32) "10") ]
+| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ]
+| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ]
+| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ]
+| ) [ ("t1"."v") = (int(32) "15"), ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ]
+) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ]
+
+
+# swapping k with v just to test for hardcoded optimizations,
+# v is pushed down but not k
+query T nosort
+plan SELECT *
+FROM (
+    SELECT *, rank() OVER (PARTITION BY v ORDER BY v DESC) AS rank
+    FROM (
+        SELECT k, v, v % 2 = 0 AS flag
+        FROM Test
+    ) t1
+) t2
+WHERE rank = 1 AND NOT flag AND k = 10 AND v = 15;
+----
+project (
+| select (
+| | project (
+| | | project (
+| | | | project (
+| | | | | select (
+| | | | | | table("sys"."test") [ "test"."k", "test"."v" ]
+| | | | | ) [ ("test"."v") = (int(32) "15") ]
+| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ]
+| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."v" NULLS LAST ]
+| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."v"), "sys"."diff"("t1"."v")) as "t2"."rank" ]
+| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1"), ("t1"."k") = (int(32) "10") ]
+) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ]
+
+
+# performing some additional computation on the partition key,
+# filter cannot be pushed down
+plan SELECT *
+FROM (
+    SELECT k * 10 as k, v, flag, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank
+    FROM (
+        SELECT k, v, v % 2 = 0 AS flag
+        FROM Test
+    ) t1
+) t2
+WHERE rank = 1 AND NOT flag AND k = 10;
+----
+project (
+| select (
+| | project (
+| | | select (
+| | | | project (
+| | | | | project (
+| | | | | | project (
+| | | | | | | table("sys"."test") [ "test"."k", "test"."v" ]
+| | | | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ]
+| | | | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ]
+| | | | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ]
+| | | ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ]
+| | ) [ "sys"."sql_mul"("t1"."k", tinyint(4) "10") as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ]
+| ) [ ("t2"."k") = (bigint(36) "10") ]
+) [ "t2"."k", "t2"."v", "t2"."flag", "t2"."rank" ]
+
+
+# filter [partition column OR flag], cannot be safely pushed down
+query T nosort
+plan SELECT *
+FROM (
+    SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank
+    FROM (
+        SELECT k, v, v % 2 = 0 AS flag
+        FROM Test
+    ) t1
+) t2
+WHERE rank = 1 AND (NOT flag OR k = 10);
+----
+project (
+| select (
+| | project (
+| | | project (
+| | | | project (
+| | | | | table("sys"."test") [ "test"."k", "test"."v" ]
+| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ]
+| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ]
+| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ]
+| ) [ ("t2"."rank") = (int(32) "1"), (("t1"."flag") = (boolean(1) "false")) or (("t1"."k") = (int(32) "10")) ]
+) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ]
+
+
+# filter on k and v and both are partition columns, both filters can be pushed down 
+query T nosort
+plan SELECT *
+FROM (
+    SELECT *, rank() OVER (PARTITION BY k, v ORDER BY v DESC) AS rank
+    FROM (
+        SELECT k, v, v % 2 = 0 AS flag
+        FROM Test
+    ) t1
+) t2
+WHERE rank = 1 AND NOT flag AND k <= 10 AND v IN (1, 2, 3);
+----
+project (
+| select (
+| | project (
+| | | project (
+| | | | project (
+| | | | | select (
+| | | | | | table("sys"."test") [ "test"."k", "test"."v" ]
+| | | | | ) [ ("test"."k") <= (int(32) "10"), ("test"."v") in (int(32) "1", int(32) "2", int(32) "3") ]
+| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%2"."%2", "sys"."="("%2"."%2", int(32) "0") as "t1"."flag" ]
+| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ]
+| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("sys"."diff"("t1"."k"), "t1"."v"), "sys"."diff"("t1"."v")) as "t2"."rank" ]
+| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ]
+) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ]
+
+
+statement ok
+DROP TABLE Test
+
+
+# test with string filters
+# (previously e_convert were not being considered)
+statement ok
+CREATE TABLE Test (k varchar(100), v int);
+
+statement ok
+INSERT INTO Test SELECT value % 10 as k, value as v FROM generate_series(1, 100);
+
+query T nosort
+plan SELECT *
+FROM (
+    SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) AS rank
+    FROM (
+        SELECT k, v, v % 2 = 0 AS flag
+        FROM Test
+    ) t1
+) t2
+WHERE rank = 1 AND NOT flag AND k = '10';
+----
+project (
+| select (
+| | project (
+| | | project (
+| | | | project (
+| | | | | select (
+| | | | | | table("sys"."test") [ "test"."k", "test"."v" ]
+| | | | | ) [ (char(100)["test"."k"]) = (char(100) "10") ]
+| | | | ) [ "test"."k" as "t1"."k", "test"."v" as "t1"."v", "sys"."mod"("test"."v", int(32) "2") as "%1"."%1", "sys"."="("%1"."%1", int(32) "0") as "t1"."flag" ]
+| | | ) [ "t1"."k", "t1"."v", "t1"."flag" ] [ "t1"."k" ASC, "t1"."v" NULLS LAST ]
+| | ) [ "t1"."k", "t1"."v", "t1"."flag", "sys"."rank"("sys"."star"(), "sys"."diff"("t1"."k"), "sys"."diff"("t1"."v")) as "t2"."rank" ]
+| ) [ ("t1"."flag") = (boolean(1) "false"), ("t2"."rank") = (int(32) "1") ]
+) [ "t1"."k" as "t2"."k", "t1"."v" as "t2"."v", "t1"."flag" as "t2"."flag", "t2"."rank" ]
+
+
+statement ok
+DROP TABLE Test