postgresql/contrib/pg_plan_advice/sql/join_order.sql
Robert Haas 5883ff30b0 Add pg_plan_advice contrib module.
Provide a facility that (1) can be used to stabilize certain plan choices
so that the planner cannot reverse course without authorization and
(2) can be used by knowledgeable users to insist on plan choices contrary
to what the planner believes best. In both cases, terrible outcomes are
possible: users should think twice and perhaps three times before
constraining the planner's ability to do as it thinks best; nevertheless,
there are problems that are much more easily solved with these facilities
than without them.

This patch takes the approach of analyzing a finished plan to produce
textual output, which we call "plan advice", that describes key
decisions made during plan; if that plan advice is provided during
future planning cycles, it will force those key decisions to be made in
the same way.  Not all planner decisions can be controlled using advice;
for example, decisions about how to perform aggregation are currently
out of scope, as is choice of sort order. Plan advice can also be edited
by the user, or even written from scratch in simple cases, making it
possible to generate outcomes that the planner would not have produced.
Partial advice can be provided to control some planner outcomes but not
others.

Currently, plan advice is focused only on specific outcomes, such as
the choice to use a sequential scan for a particular relation, and not
on estimates that might contribute to those outcomes, such as a
possibly-incorrect selectivity estimate. While it would be useful to
users to be able to provide plan advice that affects selectivity
estimates or other aspects of costing, that is out of scope for this
commit.

Reviewed-by: Lukas Fittl <lukas@fittl.com>
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Greg Burd <greg@burd.me>
Reviewed-by: Jacob Champion <jacob.champion@enterprisedb.com>
Reviewed-by: Haibo Yan <tristan.yim@gmail.com>
Reviewed-by: Dian Fay <di@nmfay.com>
Reviewed-by: Ajay Pal <ajay.pal.k@gmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: Alexandra Wang <alexandra.wang.oss@gmail.com>
Discussion: http://postgr.es/m/CA+TgmoZ-Jh1T6QyWoCODMVQdhTUPYkaZjWztzP1En4=ZHoKPzw@mail.gmail.com
2026-03-12 13:00:43 -04:00

145 lines
5.4 KiB
PL/PgSQL

LOAD 'pg_plan_advice';
SET max_parallel_workers_per_gather = 0;
CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int)
WITH (autovacuum_enabled = false);
INSERT INTO jo_dim1 (id, dim1, val1)
SELECT g, 'some filler text ' || g, (g % 3) + 1
FROM generate_series(1,100) g;
VACUUM ANALYZE jo_dim1;
CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int)
WITH (autovacuum_enabled = false);
INSERT INTO jo_dim2 (id, dim2, val2)
SELECT g, 'some filler text ' || g, (g % 53) + 1
FROM generate_series(1,1000) g;
VACUUM ANALYZE jo_dim2;
CREATE TABLE jo_fact (
id int primary key,
dim1_id integer not null references jo_dim1 (id),
dim2_id integer not null references jo_dim2 (id)
) WITH (autovacuum_enabled = false);
INSERT INTO jo_fact
SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g;
VACUUM ANALYZE jo_fact;
-- We expect to join to d2 first and then d1, since the condition on d2
-- is more selective.
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
-- Force a few different join orders. Some of these are very inefficient,
-- but the planner considers them all viable.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
SET LOCAL pg_plan_advice.advice = 'join_order(f {d1 d2})';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
COMMIT;
-- Force a join order by mentioning just a prefix of the join list.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'join_order(d2)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
SET LOCAL pg_plan_advice.advice = 'join_order(d2 d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
COMMIT;
-- jo_fact is not partitioned, but let's try pretending that it is and
-- verifying that the advice does not apply.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 d1 d2)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 (d1 d2))';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_fact f
LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
WHERE val1 = 1 AND val2 = 1;
COMMIT;
-- The unusual formulation of this query is intended to prevent the query
-- planner from reducing the FULL JOIN to some other join type, so that we
-- can test what happens with a join type that cannot be reordered.
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
-- We should not be able to force the planner to join f to d1 first, because
-- that is not a valid join order, but we should be able to force the planner
-- to make either d2 or f the driving table.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
COMMIT;
-- Two incompatible join orders should conflict. In the second case,
-- the conflict is implicit: if d1 is on the inner side of a join of any
-- type, it cannot also be the driving table.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'join_order(f) join_order(d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
SET LOCAL pg_plan_advice.advice = 'join_order(d1) hash_join(d1)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM jo_dim1 d1
INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
COMMIT;