From 7e701befde6ab23523ad9569263e81daaf464cc7 Mon Sep 17 00:00:00 2001 From: Colm Date: Thu, 21 Nov 2024 19:22:30 +0000 Subject: [PATCH] PG17 compatibility: add helper function for EXPLAIN diffs in scalar subquery output (#7757) PG17 changed how scalar subquery outputs appear in EXPLAIN output (*). This commit changes impacted regress goldfiles to the PG17 format, and adds a helper function to covert pre-PG17 plans to the PG17 format. The conversion is required when testing Citus on pgversions prior to 17. The helper function can and should be removed when 17 becomes the minimum supported version. (*) https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=fd0398fcb --- src/test/regress/expected/ch_bench_having.out | 42 ++++++++----- .../regress/expected/ch_bench_having_mx.out | 44 +++++++------ src/test/regress/expected/having_subquery.out | 8 ++- src/test/regress/expected/multi_explain.out | 18 +++--- src/test/regress/expected/multi_explain_0.out | 18 +++--- .../expected/multi_reference_table.out | 6 +- src/test/regress/expected/multi_subquery.out | 24 +++++--- .../regress/expected/multi_test_helpers.out | 59 ++++++++++++++++++ src/test/regress/sql/ch_bench_having.sql | 11 +++- src/test/regress/sql/ch_bench_having_mx.sql | 11 +++- src/test/regress/sql/having_subquery.sql | 2 + src/test/regress/sql/multi_explain.sql | 6 +- .../regress/sql/multi_reference_table.sql | 2 + src/test/regress/sql/multi_subquery.sql | 8 +++ src/test/regress/sql/multi_test_helpers.sql | 61 +++++++++++++++++++ 15 files changed, 257 insertions(+), 63 deletions(-) diff --git a/src/test/regress/expected/ch_bench_having.out b/src/test/regress/expected/ch_bench_having.out index 29feb0305fa..02fba11c8b4 100644 --- a/src/test/regress/expected/ch_bench_having.out +++ b/src/test/regress/expected/ch_bench_having.out @@ -12,6 +12,7 @@ SELECT create_distributed_table('stock','s_w_id'); (1 row) +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock @@ -19,15 +20,16 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock) group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock) order by s_i_id; +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- Sort Sort Key: remote_scan.s_i_id - InitPlan 1 (returns $0) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> HashAggregate Group Key: remote_scan.s_i_id - Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0) + Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > (InitPlan 1).col1) -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Aggregate @@ -53,27 +55,29 @@ order by s_i_id; Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: stock.s_i_id - InitPlan 1 (returns $0) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> Seq Scan on stock_1640000 stock - Filter: ((s_order_cnt)::numeric > $0) + Filter: ((s_order_cnt)::numeric > (InitPlan 1).col1) (36 rows) +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock) order by s_i_id; - QUERY PLAN +$Q$) as "QUERY PLAN"; + QUERY PLAN --------------------------------------------------------------------- Sort Sort Key: remote_scan.s_i_id - InitPlan 1 (returns $0) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> HashAggregate Group Key: remote_scan.s_i_id - Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0) + Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > (InitPlan 1).col1) -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Aggregate @@ -93,17 +97,19 @@ order by s_i_id; -> Seq Scan on stock_1640000 stock (24 rows) +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock); - QUERY PLAN +$Q$) as "QUERY PLAN"; + QUERY PLAN --------------------------------------------------------------------- HashAggregate Group Key: remote_scan.s_i_id - Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0) - InitPlan 1 (returns $0) + Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > (InitPlan 1).col1) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 @@ -124,24 +130,26 @@ having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from st -> Seq Scan on stock_1640000 stock (22 rows) +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false) select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select true) order by s_i_id; +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- Sort Sort Key: remote_scan.s_i_id - InitPlan 1 (returns $0) + InitPlan 1 -> Result -> HashAggregate Group Key: remote_scan.s_i_id -> Result - One-Time Filter: $0 + One-Time Filter: (InitPlan 1).col1 -> Custom Scan (Citus Adaptive) - Filter: $0 + Filter: (InitPlan 1).col1 Task Count: 4 Tasks Shown: One of 4 -> Task @@ -151,21 +159,23 @@ order by s_i_id; -> Seq Scan on stock_1640000 s (17 rows) +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false) select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select true); +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- HashAggregate Group Key: remote_scan.s_i_id - InitPlan 1 (returns $0) + InitPlan 1 -> Result -> Result - One-Time Filter: $0 + One-Time Filter: (InitPlan 1).col1 -> Custom Scan (Citus Adaptive) - Filter: $0 + Filter: (InitPlan 1).col1 Task Count: 4 Tasks Shown: One of 4 -> Task diff --git a/src/test/regress/expected/ch_bench_having_mx.out b/src/test/regress/expected/ch_bench_having_mx.out index 90c4334a017..f4664fba53c 100644 --- a/src/test/regress/expected/ch_bench_having_mx.out +++ b/src/test/regress/expected/ch_bench_having_mx.out @@ -16,6 +16,7 @@ SELECT create_distributed_table('stock','s_w_id'); \c - - - :worker_1_port SET search_path = ch_bench_having; +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock @@ -23,15 +24,16 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock) group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock) order by s_i_id; +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- Sort Sort Key: remote_scan.s_i_id - InitPlan 1 (returns $0) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> HashAggregate Group Key: remote_scan.s_i_id - Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0) + Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > (InitPlan 1).col1) -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Aggregate @@ -57,27 +59,29 @@ order by s_i_id; Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: stock.s_i_id - InitPlan 1 (returns $0) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> Seq Scan on stock_1640000 stock - Filter: ((s_order_cnt)::numeric > $0) + Filter: ((s_order_cnt)::numeric > (InitPlan 1).col1) (36 rows) +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock) order by s_i_id; - QUERY PLAN +$Q$) as "QUERY PLAN"; + QUERY PLAN --------------------------------------------------------------------- Sort Sort Key: remote_scan.s_i_id - InitPlan 1 (returns $0) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> HashAggregate Group Key: remote_scan.s_i_id - Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0) + Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > (InitPlan 1).col1) -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Aggregate @@ -97,17 +101,19 @@ order by s_i_id; -> Seq Scan on stock_1640000 stock (24 rows) +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock); - QUERY PLAN +$Q$) as "QUERY PLAN"; + QUERY PLAN --------------------------------------------------------------------- HashAggregate Group Key: remote_scan.s_i_id - Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > $0) - InitPlan 1 (returns $0) + Filter: ((pg_catalog.sum(remote_scan.worker_column_3))::bigint > (InitPlan 1).col1) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 @@ -128,24 +134,26 @@ having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from st -> Seq Scan on stock_1640000 stock (22 rows) +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false) select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select true) order by s_i_id; - QUERY PLAN +$Q$) as "QUERY PLAN"; + QUERY PLAN --------------------------------------------------------------------- Sort Sort Key: remote_scan.s_i_id - InitPlan 1 (returns $0) + InitPlan 1 -> Result -> HashAggregate Group Key: remote_scan.s_i_id -> Result - One-Time Filter: $0 + One-Time Filter: (InitPlan 1).col1 -> Custom Scan (Citus Adaptive) - Filter: $0 + Filter: (InitPlan 1).col1 Task Count: 4 Tasks Shown: One of 4 -> Task @@ -155,20 +163,22 @@ order by s_i_id; -> Seq Scan on stock_1640000 s (17 rows) +SELECT public.explain_with_pg17_initplan_format($Q$ explain select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select true); +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- HashAggregate (cost=500.01..503.01 rows=200 width=12) Group Key: remote_scan.s_i_id - InitPlan 1 (returns $0) + InitPlan 1 -> Result (cost=0.00..0.01 rows=1 width=1) -> Result (cost=0.00..0.00 rows=100000 width=12) - One-Time Filter: $0 + One-Time Filter: (InitPlan 1).col1 -> Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=100000 width=12) - Filter: $0 + Filter: (InitPlan 1).col1 Task Count: 4 Tasks Shown: One of 4 -> Task diff --git a/src/test/regress/expected/having_subquery.out b/src/test/regress/expected/having_subquery.out index a67d441b3ea..83e582791f6 100644 --- a/src/test/regress/expected/having_subquery.out +++ b/src/test/regress/expected/having_subquery.out @@ -22,6 +22,7 @@ HAVING ( (3 rows) -- lets pin the plan in the test as well +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT count(*), o_orderstatus @@ -31,16 +32,17 @@ HAVING ( SELECT count(*) FROM customer ) > 0; +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- HashAggregate Group Key: remote_scan.o_orderstatus - InitPlan 1 (returns $0) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> Result - One-Time Filter: ($0 > 0) + One-Time Filter: ((InitPlan 1).col1 > 0) -> Custom Scan (Citus Adaptive) - Filter: ($0 > 0) + Filter: ((InitPlan 1).col1 > 0) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 1 diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index 17b67360774..906add24cd0 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -1456,25 +1456,27 @@ Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Function Scan on generate_series s -- WHERE EXISTS forces pg12 to materialize cte +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) WITH cte1 AS (SELECT s FROM generate_series(1,10) s) INSERT INTO lineitem_hash_part WITH cte1 AS (SELECT * FROM cte1 WHERE EXISTS (SELECT * FROM cte1) LIMIT 5) SELECT s FROM cte1 WHERE EXISTS (SELECT * FROM cte1); +$Q$); Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Result - One-Time Filter: $3 + One-Time Filter: (InitPlan 4).col1 CTE cte1 -> Function Scan on generate_series s CTE cte1 -> Limit - InitPlan 2 (returns $1) + InitPlan 2 -> CTE Scan on cte1 cte1_1 -> Result - One-Time Filter: $1 + One-Time Filter: (InitPlan 2).col1 -> CTE Scan on cte1 cte1_2 - InitPlan 4 (returns $3) + InitPlan 4 -> CTE Scan on cte1 cte1_3 -> CTE Scan on cte1 EXPLAIN (COSTS OFF) @@ -2425,9 +2427,11 @@ Aggregate (actual rows=1 loops=1) Sort Key: ref_table.a Sort Method: quicksort Memory: 25kB -> Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1) -EXPLAIN :default_analyze_flags +SELECT public.explain_with_pg17_initplan_format($Q$ +EXPLAIN (ANALYZE on, COSTS off, TIMING off, SUMMARY off) SELECT count(distinct a) FROM dist_table WHERE EXISTS(SELECT random() < 2 FROM dist_table NATURAL JOIN ref_table); +$Q$); Aggregate (actual rows=1 loops=1) -> Custom Scan (Citus Adaptive) (actual rows=4 loops=1) -> Distributed Subplan XXX_1 @@ -2457,13 +2461,13 @@ Aggregate (actual rows=1 loops=1) Tuple data received from node: 8 bytes Node: host=localhost port=xxxxx dbname=regression -> Aggregate (actual rows=1 loops=1) - InitPlan 1 (returns $0) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result (actual rows=1 loops=1) -> Sort (actual rows=4 loops=1) Sort Key: dist_table.a Sort Method: quicksort Memory: 25kB -> Result (actual rows=4 loops=1) - One-Time Filter: $0 + One-Time Filter: (InitPlan 1).col1 -> Seq Scan on dist_table_570017 dist_table (actual rows=4 loops=1) BEGIN; EXPLAIN :default_analyze_flags diff --git a/src/test/regress/expected/multi_explain_0.out b/src/test/regress/expected/multi_explain_0.out index 9534cefb8c1..5ba5e056f30 100644 --- a/src/test/regress/expected/multi_explain_0.out +++ b/src/test/regress/expected/multi_explain_0.out @@ -1456,25 +1456,27 @@ Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Function Scan on generate_series s -- WHERE EXISTS forces pg12 to materialize cte +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) WITH cte1 AS (SELECT s FROM generate_series(1,10) s) INSERT INTO lineitem_hash_part WITH cte1 AS (SELECT * FROM cte1 WHERE EXISTS (SELECT * FROM cte1) LIMIT 5) SELECT s FROM cte1 WHERE EXISTS (SELECT * FROM cte1); +$Q$); Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator -> Result - One-Time Filter: $3 + One-Time Filter: (InitPlan 4).col1 CTE cte1 -> Function Scan on generate_series s CTE cte1 -> Limit - InitPlan 2 (returns $1) + InitPlan 2 -> CTE Scan on cte1 cte1_1 -> Result - One-Time Filter: $1 + One-Time Filter: (InitPlan 2).col1 -> CTE Scan on cte1 cte1_2 - InitPlan 4 (returns $3) + InitPlan 4 -> CTE Scan on cte1 cte1_3 -> CTE Scan on cte1 EXPLAIN (COSTS OFF) @@ -2420,9 +2422,11 @@ Aggregate (actual rows=1 loops=1) Sort Key: ref_table.a Sort Method: quicksort Memory: 25kB -> Seq Scan on ref_table_570021 ref_table (actual rows=10 loops=1) -EXPLAIN :default_analyze_flags +SELECT public.explain_with_pg17_initplan_format($Q$ +EXPLAIN (ANALYZE on, COSTS off, TIMING off, SUMMARY off) SELECT count(distinct a) FROM dist_table WHERE EXISTS(SELECT random() < 2 FROM dist_table NATURAL JOIN ref_table); +$Q$); Aggregate (actual rows=1 loops=1) -> Custom Scan (Citus Adaptive) (actual rows=4 loops=1) -> Distributed Subplan XXX_1 @@ -2452,10 +2456,10 @@ Aggregate (actual rows=1 loops=1) Tuple data received from node: 8 bytes Node: host=localhost port=xxxxx dbname=regression -> Aggregate (actual rows=1 loops=1) - InitPlan 1 (returns $0) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result (actual rows=1 loops=1) -> Result (actual rows=4 loops=1) - One-Time Filter: $0 + One-Time Filter: (InitPlan 1).col1 -> Seq Scan on dist_table_570017 dist_table (actual rows=4 loops=1) BEGIN; EXPLAIN :default_analyze_flags diff --git a/src/test/regress/expected/multi_reference_table.out b/src/test/regress/expected/multi_reference_table.out index 75a9c3b64b3..68835be409a 100644 --- a/src/test/regress/expected/multi_reference_table.out +++ b/src/test/regress/expected/multi_reference_table.out @@ -1574,9 +1574,11 @@ ALTER TABLE reference_table_test ADD COLUMN value_dummy INT; INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); ROLLBACK; -- Previous issue failed to rename reference tables in subqueries +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1 HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0 ORDER BY 1; +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- Sort @@ -1596,10 +1598,10 @@ ORDER BY 1; Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: colocated_table_test.value_1 - InitPlan 1 (returns $0) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> Result - One-Time Filter: ($0 > '0'::double precision) + One-Time Filter: ((InitPlan 1).col1 > '0'::double precision) -> Seq Scan on colocated_table_test_1250005 colocated_table_test (22 rows) diff --git a/src/test/regress/expected/multi_subquery.out b/src/test/regress/expected/multi_subquery.out index 60f978f5e77..0a074e882b8 100644 --- a/src/test/regress/expected/multi_subquery.out +++ b/src/test/regress/expected/multi_subquery.out @@ -973,8 +973,10 @@ SELECT create_reference_table('keyvalref'); (1 row) +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key); +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus Adaptive) @@ -993,15 +995,17 @@ SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: keyval1.key - Filter: (sum(keyval1.value) > $0) - InitPlan 1 (returns $0) + Filter: (sum(keyval1.value) > (InitPlan 1).col1) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> Seq Scan on keyval1_xxxxxxx keyval1 (20 rows) -- For some reason 'ORDER BY 1 DESC LIMIT 1' triggers recursive planning +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key ORDER BY 1 DESC LIMIT 1); +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus Adaptive) @@ -1023,14 +1027,16 @@ SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: keyval1.key - Filter: (sum(keyval1.value) > $0) - InitPlan 1 (returns $0) + Filter: (sum(keyval1.value) > (InitPlan 1).col1) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> Seq Scan on keyval1_xxxxxxx keyval1 (23 rows) +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 GROUP BY key ORDER BY 1 DESC LIMIT 1); +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus Adaptive) @@ -1055,14 +1061,16 @@ SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: keyval1.key - Filter: (sum(keyval1.value) > $0) - InitPlan 1 (returns $0) + Filter: (sum(keyval1.value) > (InitPlan 1).col1) + InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> Seq Scan on keyval1_xxxxxxx keyval1 (26 rows) +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 HAVING sum(value) > (SELECT sum(value) FROM keyval2 k2 WHERE k2.key = 2 ORDER BY 1 DESC LIMIT 1); +$Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus Adaptive) @@ -1071,8 +1079,8 @@ SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 HAVING sum(value) > (SELECT sum -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate - Filter: (sum(k1.value) > $0) - InitPlan 1 (returns $0) + Filter: (sum(k1.value) > (InitPlan 1).col1) + InitPlan 1 -> Limit -> Sort Sort Key: (sum(k2.value)) DESC diff --git a/src/test/regress/expected/multi_test_helpers.out b/src/test/regress/expected/multi_test_helpers.out index b8758e561bd..f66ff1e47b8 100644 --- a/src/test/regress/expected/multi_test_helpers.out +++ b/src/test/regress/expected/multi_test_helpers.out @@ -526,3 +526,62 @@ BEGIN RETURN result; END; $func$ LANGUAGE plpgsql; +CREATE or REPLACE FUNCTION initplan_references_to_pg17(text) returns text AS $$ +DECLARE + expr_parts text[]; + initplan_refs text[]; + n_initplan_refs int = 0; + i int := 1; + rv text := ''; + expr_part text; +BEGIN + -- Split the line on each $x; there must be at least one + -- For example 'foo = $0 and bar < $1' is split to: [ 'foo =', 'bar <' ] + expr_parts := regexp_split_to_array($1, '\$\d+'); + + -- Construct the PG17 formatted names in the given text + -- for example 'foo = $0 and bar < $1' yields [ '(InitPlan1).col1', '(InitPlan2).col1' ] + initplan_refs := ARRAY(select '(InitPlan ' || substr(x[1],2)::int + 1 || ').col1' from regexp_matches($1, '\$\d', 'g') x); + n_initplan_refs := array_length(initplan_refs, 1); + + -- Combine expression parts with PG17 formatted names + FOREACH expr_part IN ARRAY expr_parts + LOOP + rv := rv || expr_part; + -- There should be more expr parts than init plan refs so + -- check init plan refs boundary each time + IF i <= n_initplan_refs THEN + rv := rv || initplan_refs[i]; + END IF; + i := i + 1; + END LOOP; + RETURN rv; +END; +$$ LANGUAGE plpgsql; +-- This function formats EXPLAIN output to conform to how PG17 EXPLAIN shows +-- scalar subquery outputs if the pg version is less than 17 (*). When 17 +-- becomes the minimum supported pgversion this function can be retired. +-- +-- (*) https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=fd0398fcb +CREATE OR REPLACE FUNCTION explain_with_pg17_initplan_format(explain_command text, out query_plan text) +RETURNS SETOF TEXT AS $$ +DECLARE + pgversion int = 0; +BEGIN + pgversion = substring(version(), '\d+')::int ; + FOR query_plan IN execute explain_command LOOP + IF pgversion < 17 THEN + -- Two types of format changes are needed: + -- 1) 'Init Plan 1 (returns $0)' becomes just 'Init Plan 1' + -- 2) 'foo = $0' becomes 'foo = (InitPlan 1).col1' + IF query_plan ~ 'InitPlan \d \(returns' THEN + query_plan = regexp_replace(query_plan, '\(returns \$\d\)', '', 'g'); + ELSIF query_plan ~ '\$\d' THEN + -- This line contains at least one InitPlan reference + -- Replace it to have PG17 style InitPlan references + query_plan = public.initplan_references_to_pg17(query_plan); + END IF; + END IF; + RETURN NEXT; + END LOOP; +END; $$ language plpgsql; diff --git a/src/test/regress/sql/ch_bench_having.sql b/src/test/regress/sql/ch_bench_having.sql index b6996cbe352..53838b9c0f9 100644 --- a/src/test/regress/sql/ch_bench_having.sql +++ b/src/test/regress/sql/ch_bench_having.sql @@ -10,6 +10,7 @@ CREATE TABLE stock ( SELECT create_distributed_table('stock','s_w_id'); +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock @@ -17,33 +18,41 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock) group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock) order by s_i_id; +$Q$) as "QUERY PLAN"; +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock) order by s_i_id; +$Q$) as "QUERY PLAN"; +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock); +$Q$) as "QUERY PLAN"; - +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false) select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select true) order by s_i_id; +$Q$) as "QUERY PLAN"; +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false) select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select true); +$Q$) as "QUERY PLAN"; select s_i_id, sum(s_order_cnt) as ordercount from stock diff --git a/src/test/regress/sql/ch_bench_having_mx.sql b/src/test/regress/sql/ch_bench_having_mx.sql index 7374ba4431b..798591a5f87 100644 --- a/src/test/regress/sql/ch_bench_having_mx.sql +++ b/src/test/regress/sql/ch_bench_having_mx.sql @@ -15,6 +15,7 @@ SELECT create_distributed_table('stock','s_w_id'); \c - - - :worker_1_port SET search_path = ch_bench_having; +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock @@ -22,32 +23,40 @@ where s_order_cnt > (select sum(s_order_cnt) * .005 as where_query from stock) group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock) order by s_i_id; +$Q$) as "QUERY PLAN"; +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock) order by s_i_id; +$Q$) as "QUERY PLAN"; +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false, summary false, timing false) select s_i_id, sum(s_order_cnt) as ordercount from stock group by s_i_id having sum(s_order_cnt) > (select max(s_order_cnt) - 3 as having_query from stock); +$Q$) as "QUERY PLAN"; - +SELECT public.explain_with_pg17_initplan_format($Q$ explain (costs false) select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select true) order by s_i_id; +$Q$) as "QUERY PLAN"; +SELECT public.explain_with_pg17_initplan_format($Q$ explain select s_i_id, sum(s_order_cnt) as ordercount from stock s group by s_i_id having (select true); +$Q$) as "QUERY PLAN"; select s_i_id, sum(s_order_cnt) as ordercount from stock diff --git a/src/test/regress/sql/having_subquery.sql b/src/test/regress/sql/having_subquery.sql index ced678d3903..f1e1c691164 100644 --- a/src/test/regress/sql/having_subquery.sql +++ b/src/test/regress/sql/having_subquery.sql @@ -16,6 +16,7 @@ HAVING ( ) > 0; -- lets pin the plan in the test as well +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT count(*), o_orderstatus @@ -25,3 +26,4 @@ HAVING ( SELECT count(*) FROM customer ) > 0; +$Q$) as "QUERY PLAN"; diff --git a/src/test/regress/sql/multi_explain.sql b/src/test/regress/sql/multi_explain.sql index 7fa75c8be9b..4fc16fbd8ec 100644 --- a/src/test/regress/sql/multi_explain.sql +++ b/src/test/regress/sql/multi_explain.sql @@ -630,11 +630,13 @@ INSERT INTO lineitem_hash_part (l_orderkey) SELECT s FROM generate_series(1,5) s; -- WHERE EXISTS forces pg12 to materialize cte +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) WITH cte1 AS (SELECT s FROM generate_series(1,10) s) INSERT INTO lineitem_hash_part WITH cte1 AS (SELECT * FROM cte1 WHERE EXISTS (SELECT * FROM cte1) LIMIT 5) SELECT s FROM cte1 WHERE EXISTS (SELECT * FROM cte1); +$Q$); EXPLAIN (COSTS OFF) INSERT INTO lineitem_hash_part @@ -949,9 +951,11 @@ SELECT count(distinct a) from r NATURAL JOIN ref_table; EXPLAIN :default_analyze_flags SELECT count(distinct a) FROM (SELECT GREATEST(random(), 2) r, a FROM dist_table) t NATURAL JOIN ref_table; -EXPLAIN :default_analyze_flags +SELECT public.explain_with_pg17_initplan_format($Q$ +EXPLAIN (ANALYZE on, COSTS off, TIMING off, SUMMARY off) SELECT count(distinct a) FROM dist_table WHERE EXISTS(SELECT random() < 2 FROM dist_table NATURAL JOIN ref_table); +$Q$); BEGIN; EXPLAIN :default_analyze_flags diff --git a/src/test/regress/sql/multi_reference_table.sql b/src/test/regress/sql/multi_reference_table.sql index 42e3c283b06..d538effe63e 100644 --- a/src/test/regress/sql/multi_reference_table.sql +++ b/src/test/regress/sql/multi_reference_table.sql @@ -993,9 +993,11 @@ INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); ROLLBACK; -- Previous issue failed to rename reference tables in subqueries +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1 HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0 ORDER BY 1; +$Q$) as "QUERY PLAN"; WITH a as (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) SELECT ct.value_1, count(*) FROM colocated_table_test ct join a on ct.value_1 = a.value_2 diff --git a/src/test/regress/sql/multi_subquery.sql b/src/test/regress/sql/multi_subquery.sql index e5d8aa17cec..8dd2d479183 100644 --- a/src/test/regress/sql/multi_subquery.sql +++ b/src/test/regress/sql/multi_subquery.sql @@ -665,18 +665,26 @@ SELECT create_distributed_table('keyval2', 'key'); CREATE TABLE keyvalref (key int, value int); SELECT create_reference_table('keyvalref'); +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key); +$Q$) as "QUERY PLAN"; -- For some reason 'ORDER BY 1 DESC LIMIT 1' triggers recursive planning +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyvalref GROUP BY key ORDER BY 1 DESC LIMIT 1); +$Q$) as "QUERY PLAN"; +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT count(*) FROM keyval1 GROUP BY key HAVING sum(value) > (SELECT sum(value) FROM keyval2 GROUP BY key ORDER BY 1 DESC LIMIT 1); +$Q$) as "QUERY PLAN"; +SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT count(*) FROM keyval1 k1 WHERE k1.key = 2 HAVING sum(value) > (SELECT sum(value) FROM keyval2 k2 WHERE k2.key = 2 ORDER BY 1 DESC LIMIT 1); +$Q$) as "QUERY PLAN"; -- Simple join subquery pushdown SELECT diff --git a/src/test/regress/sql/multi_test_helpers.sql b/src/test/regress/sql/multi_test_helpers.sql index f7c97f1b2d3..e39ae451b2e 100644 --- a/src/test/regress/sql/multi_test_helpers.sql +++ b/src/test/regress/sql/multi_test_helpers.sql @@ -550,3 +550,64 @@ BEGIN RETURN result; END; $func$ LANGUAGE plpgsql; + +CREATE or REPLACE FUNCTION initplan_references_to_pg17(text) returns text AS $$ +DECLARE + expr_parts text[]; + initplan_refs text[]; + n_initplan_refs int = 0; + i int := 1; + rv text := ''; + expr_part text; +BEGIN + -- Split the line on each $x; there must be at least one + -- For example 'foo = $0 and bar < $1' is split to: [ 'foo =', 'bar <' ] + expr_parts := regexp_split_to_array($1, '\$\d+'); + + -- Construct the PG17 formatted names in the given text + -- for example 'foo = $0 and bar < $1' yields [ '(InitPlan1).col1', '(InitPlan2).col1' ] + initplan_refs := ARRAY(select '(InitPlan ' || substr(x[1],2)::int + 1 || ').col1' from regexp_matches($1, '\$\d', 'g') x); + n_initplan_refs := array_length(initplan_refs, 1); + + -- Combine expression parts with PG17 formatted names + FOREACH expr_part IN ARRAY expr_parts + LOOP + rv := rv || expr_part; + -- There should be more expr parts than init plan refs so + -- check init plan refs boundary each time + IF i <= n_initplan_refs THEN + rv := rv || initplan_refs[i]; + END IF; + i := i + 1; + END LOOP; + RETURN rv; +END; +$$ LANGUAGE plpgsql; + +-- This function formats EXPLAIN output to conform to how PG17 EXPLAIN shows +-- scalar subquery outputs if the pg version is less than 17 (*). When 17 +-- becomes the minimum supported pgversion this function can be retired. +-- +-- (*) https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=fd0398fcb +CREATE OR REPLACE FUNCTION explain_with_pg17_initplan_format(explain_command text, out query_plan text) +RETURNS SETOF TEXT AS $$ +DECLARE + pgversion int = 0; +BEGIN + pgversion = substring(version(), '\d+')::int ; + FOR query_plan IN execute explain_command LOOP + IF pgversion < 17 THEN + -- Two types of format changes are needed: + -- 1) 'Init Plan 1 (returns $0)' becomes just 'Init Plan 1' + -- 2) 'foo = $0' becomes 'foo = (InitPlan 1).col1' + IF query_plan ~ 'InitPlan \d \(returns' THEN + query_plan = regexp_replace(query_plan, '\(returns \$\d\)', '', 'g'); + ELSIF query_plan ~ '\$\d' THEN + -- This line contains at least one InitPlan reference + -- Replace it to have PG17 style InitPlan references + query_plan = public.initplan_references_to_pg17(query_plan); + END IF; + END IF; + RETURN NEXT; + END LOOP; +END; $$ language plpgsql;