postgresql/contrib/pg_plan_advice/sql/join_strategy.sql

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

85 lines
3.6 KiB
MySQL
Raw Permalink Normal View History

Add pg_plan_advice contrib module. Provide a facility that (1) can be used to stabilize certain plan choices so that the planner cannot reverse course without authorization and (2) can be used by knowledgeable users to insist on plan choices contrary to what the planner believes best. In both cases, terrible outcomes are possible: users should think twice and perhaps three times before constraining the planner's ability to do as it thinks best; nevertheless, there are problems that are much more easily solved with these facilities than without them. This patch takes the approach of analyzing a finished plan to produce textual output, which we call "plan advice", that describes key decisions made during plan; if that plan advice is provided during future planning cycles, it will force those key decisions to be made in the same way. Not all planner decisions can be controlled using advice; for example, decisions about how to perform aggregation are currently out of scope, as is choice of sort order. Plan advice can also be edited by the user, or even written from scratch in simple cases, making it possible to generate outcomes that the planner would not have produced. Partial advice can be provided to control some planner outcomes but not others. Currently, plan advice is focused only on specific outcomes, such as the choice to use a sequential scan for a particular relation, and not on estimates that might contribute to those outcomes, such as a possibly-incorrect selectivity estimate. While it would be useful to users to be able to provide plan advice that affects selectivity estimates or other aspects of costing, that is out of scope for this commit. Reviewed-by: Lukas Fittl <lukas@fittl.com> Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com> Reviewed-by: Greg Burd <greg@burd.me> Reviewed-by: Jacob Champion <jacob.champion@enterprisedb.com> Reviewed-by: Haibo Yan <tristan.yim@gmail.com> Reviewed-by: Dian Fay <di@nmfay.com> Reviewed-by: Ajay Pal <ajay.pal.k@gmail.com> Reviewed-by: John Naylor <johncnaylorls@gmail.com> Reviewed-by: Alexandra Wang <alexandra.wang.oss@gmail.com> Discussion: http://postgr.es/m/CA+TgmoZ-Jh1T6QyWoCODMVQdhTUPYkaZjWztzP1En4=ZHoKPzw@mail.gmail.com
2026-03-12 12:59:52 -04:00
LOAD 'pg_plan_advice';
SET max_parallel_workers_per_gather = 0;
CREATE TABLE join_dim (id serial primary key, dim text)
WITH (autovacuum_enabled = false);
INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
VACUUM ANALYZE join_dim;
CREATE TABLE join_fact (
id int primary key,
dim_id integer not null references join_dim (id)
) WITH (autovacuum_enabled = false);
INSERT INTO join_fact
SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
CREATE INDEX join_fact_dim_id ON join_fact (dim_id);
VACUUM ANALYZE join_fact;
-- We expect a hash join by default.
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
-- Try forcing each join method in turn with join_dim as the inner table.
-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will
-- fail, because the planner knows that join_dim (id) is unique, and will
-- refuse to add mark/restore overhead.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
COMMIT;
-- Now try forcing each join method in turn with join_fact as the inner
-- table. All of these should work.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
COMMIT;
-- Non-working cases. We can't force a foreign join between these tables,
-- because they aren't foreign tables. We also can't use two different
-- strategies on the same table, nor can we put both tables on the inner
-- side of the same join.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f) NESTED_LOOP_MATERIALIZE(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
COMMIT;