postgresql/contrib/pg_plan_advice/sql/join_strategy.sql
Robert Haas 5883ff30b0 Add pg_plan_advice contrib module.
Provide a facility that (1) can be used to stabilize certain plan choices
so that the planner cannot reverse course without authorization and
(2) can be used by knowledgeable users to insist on plan choices contrary
to what the planner believes best. In both cases, terrible outcomes are
possible: users should think twice and perhaps three times before
constraining the planner's ability to do as it thinks best; nevertheless,
there are problems that are much more easily solved with these facilities
than without them.

This patch takes the approach of analyzing a finished plan to produce
textual output, which we call "plan advice", that describes key
decisions made during plan; if that plan advice is provided during
future planning cycles, it will force those key decisions to be made in
the same way.  Not all planner decisions can be controlled using advice;
for example, decisions about how to perform aggregation are currently
out of scope, as is choice of sort order. Plan advice can also be edited
by the user, or even written from scratch in simple cases, making it
possible to generate outcomes that the planner would not have produced.
Partial advice can be provided to control some planner outcomes but not
others.

Currently, plan advice is focused only on specific outcomes, such as
the choice to use a sequential scan for a particular relation, and not
on estimates that might contribute to those outcomes, such as a
possibly-incorrect selectivity estimate. While it would be useful to
users to be able to provide plan advice that affects selectivity
estimates or other aspects of costing, that is out of scope for this
commit.

Reviewed-by: Lukas Fittl <lukas@fittl.com>
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Greg Burd <greg@burd.me>
Reviewed-by: Jacob Champion <jacob.champion@enterprisedb.com>
Reviewed-by: Haibo Yan <tristan.yim@gmail.com>
Reviewed-by: Dian Fay <di@nmfay.com>
Reviewed-by: Ajay Pal <ajay.pal.k@gmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: Alexandra Wang <alexandra.wang.oss@gmail.com>
Discussion: http://postgr.es/m/CA+TgmoZ-Jh1T6QyWoCODMVQdhTUPYkaZjWztzP1En4=ZHoKPzw@mail.gmail.com
2026-03-12 13:00:43 -04:00

84 lines
3.6 KiB
PL/PgSQL

LOAD 'pg_plan_advice';
SET max_parallel_workers_per_gather = 0;
CREATE TABLE join_dim (id serial primary key, dim text)
WITH (autovacuum_enabled = false);
INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
VACUUM ANALYZE join_dim;
CREATE TABLE join_fact (
id int primary key,
dim_id integer not null references join_dim (id)
) WITH (autovacuum_enabled = false);
INSERT INTO join_fact
SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
CREATE INDEX join_fact_dim_id ON join_fact (dim_id);
VACUUM ANALYZE join_fact;
-- We expect a hash join by default.
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
-- Try forcing each join method in turn with join_dim as the inner table.
-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will
-- fail, because the planner knows that join_dim (id) is unique, and will
-- refuse to add mark/restore overhead.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
COMMIT;
-- Now try forcing each join method in turn with join_fact as the inner
-- table. All of these should work.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
COMMIT;
-- Non-working cases. We can't force a foreign join between these tables,
-- because they aren't foreign tables. We also can't use two different
-- strategies on the same table, nor can we put both tables on the inner
-- side of the same join.
BEGIN;
SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f) NESTED_LOOP_MATERIALIZE(f)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f d)';
EXPLAIN (COSTS OFF, PLAN_ADVICE)
SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
COMMIT;