postgresql/contrib/pg_plan_advice/sql/semijoin.sql

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

119 lines
4.4 KiB
MySQL
Raw Normal View History

Add pg_plan_advice contrib module. Provide a facility that (1) can be used to stabilize certain plan choices so that the planner cannot reverse course without authorization and (2) can be used by knowledgeable users to insist on plan choices contrary to what the planner believes best. In both cases, terrible outcomes are possible: users should think twice and perhaps three times before constraining the planner's ability to do as it thinks best; nevertheless, there are problems that are much more easily solved with these facilities than without them. This patch takes the approach of analyzing a finished plan to produce textual output, which we call "plan advice", that describes key decisions made during plan; if that plan advice is provided during future planning cycles, it will force those key decisions to be made in the same way. Not all planner decisions can be controlled using advice; for example, decisions about how to perform aggregation are currently out of scope, as is choice of sort order. Plan advice can also be edited by the user, or even written from scratch in simple cases, making it possible to generate outcomes that the planner would not have produced. Partial advice can be provided to control some planner outcomes but not others. Currently, plan advice is focused only on specific outcomes, such as the choice to use a sequential scan for a particular relation, and not on estimates that might contribute to those outcomes, such as a possibly-incorrect selectivity estimate. While it would be useful to users to be able to provide plan advice that affects selectivity estimates or other aspects of costing, that is out of scope for this commit. Reviewed-by: Lukas Fittl <lukas@fittl.com> Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com> Reviewed-by: Greg Burd <greg@burd.me> Reviewed-by: Jacob Champion <jacob.champion@enterprisedb.com> Reviewed-by: Haibo Yan <tristan.yim@gmail.com> Reviewed-by: Dian Fay <di@nmfay.com> Reviewed-by: Ajay Pal <ajay.pal.k@gmail.com> Reviewed-by: John Naylor <johncnaylorls@gmail.com> Reviewed-by: Alexandra Wang <alexandra.wang.oss@gmail.com> Discussion: http://postgr.es/m/CA+TgmoZ-Jh1T6QyWoCODMVQdhTUPYkaZjWztzP1En4=ZHoKPzw@mail.gmail.com
2026-03-12 12:59:52 -04:00
LOAD 'pg_plan_advice';
SET max_parallel_workers_per_gather = 0;
CREATE TABLE sj_wide (
id integer primary key,
val1 integer,
padding text storage plain
) WITH (autovacuum_enabled = false);
INSERT INTO sj_wide
SELECT g, g%10+1, repeat(' ', 300) FROM generate_series(1, 1000) g;
CREATE INDEX ON sj_wide (val1);
VACUUM ANALYZE sj_wide;
CREATE TABLE sj_narrow (
id integer primary key,
val1 integer
) WITH (autovacuum_enabled = false);
INSERT INTO sj_narrow
SELECT g, g%10+1 FROM generate_series(1, 1000) g;
CREATE INDEX ON sj_narrow (val1);
VACUUM ANALYZE sj_narrow;
-- We expect this to make the VALUES list unique and use index lookups to
-- find the rows in sj_wide, so as to avoid a full scan of sj_wide.
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM sj_wide
WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
-- If we ask for a unique semijoin, we should get the same plan as with
-- no advice. If we ask for a non-unique semijoin, we should see a Semi
-- Join operation in the plan tree.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'semijoin_unique("*VALUES*")';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM sj_wide
WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique("*VALUES*")';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM sj_wide
WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
COMMIT;
-- Because this table is narrower than the previous one, a sequential scan
-- is less expensive, and we choose a straightforward Semi Join plan by
-- default. (Note that this is also very sensitive to the length of the IN
-- list, which affects how many index lookups the alternative plan will need.)
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM sj_narrow
WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
-- Here, we expect advising a unique semijoin to swith to the same plan that
-- we got with sj_wide, and advising a non-unique semijoin should not change
-- the plan.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'semijoin_unique("*VALUES*")';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM sj_narrow
WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique("*VALUES*")';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM sj_narrow
WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
COMMIT;
-- In the above example, we made the outer side of the join unique, but here,
-- we should make the inner side unique.
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM generate_series(1,1000) g
WHERE g in (select val1 from sj_narrow);
-- We should be able to force a plan with or without the make-unique strategy,
-- with either side as the driving table.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM generate_series(1,1000) g
WHERE g in (select val1 from sj_narrow);
SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(sj_narrow)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM generate_series(1,1000) g
WHERE g in (select val1 from sj_narrow);
SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow) join_order(sj_narrow)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM generate_series(1,1000) g
WHERE g in (select val1 from sj_narrow);
SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(sj_narrow) join_order(sj_narrow)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM generate_series(1,1000) g
WHERE g in (select val1 from sj_narrow);
COMMIT;
-- However, mentioning the wrong side of the join should result in an advice
-- failure.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'semijoin_unique(g)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM generate_series(1,1000) g
WHERE g in (select val1 from sj_narrow);
SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(g)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM generate_series(1,1000) g
WHERE g in (select val1 from sj_narrow);
COMMIT;
-- Test conflicting advice.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow) semijoin_non_unique(sj_narrow)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM generate_series(1,1000) g
WHERE g in (select val1 from sj_narrow);
COMMIT;
-- Try applying SEMIJOIN_UNIQUE() to a non-semijoin.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'semijoin_unique(g)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM generate_series(1,1000) g, sj_narrow s WHERE g = s.val1;
COMMIT;