diff --git a/contrib/Makefile b/contrib/Makefile index 2f0a88d3f77..dd04c20acd2 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -34,6 +34,7 @@ SUBDIRS = \ pg_freespacemap \ pg_logicalinspect \ pg_overexplain \ + pg_plan_advice \ pg_prewarm \ pg_stat_statements \ pg_surgery \ diff --git a/contrib/meson.build b/contrib/meson.build index def13257cbe..5a752eac347 100644 --- a/contrib/meson.build +++ b/contrib/meson.build @@ -48,6 +48,7 @@ subdir('pgcrypto') subdir('pg_freespacemap') subdir('pg_logicalinspect') subdir('pg_overexplain') +subdir('pg_plan_advice') subdir('pg_prewarm') subdir('pgrowlocks') subdir('pg_stat_statements') diff --git a/contrib/pg_plan_advice/.gitignore b/contrib/pg_plan_advice/.gitignore new file mode 100644 index 00000000000..3f46c27b147 --- /dev/null +++ b/contrib/pg_plan_advice/.gitignore @@ -0,0 +1,7 @@ +/pgpa_parser.h +/pgpa_parser.c +/pgpa_scanner.c +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/contrib/pg_plan_advice/Makefile b/contrib/pg_plan_advice/Makefile new file mode 100644 index 00000000000..d2a8233f387 --- /dev/null +++ b/contrib/pg_plan_advice/Makefile @@ -0,0 +1,43 @@ +# contrib/pg_plan_advice/Makefile + +MODULE_big = pg_plan_advice +OBJS = \ + $(WIN32RES) \ + pg_plan_advice.o \ + pgpa_ast.o \ + pgpa_identifier.o \ + pgpa_join.o \ + pgpa_output.o \ + pgpa_parser.o \ + pgpa_planner.o \ + pgpa_scan.o \ + pgpa_scanner.o \ + pgpa_trove.o \ + pgpa_walker.o + +PGFILEDESC = "pg_plan_advice - help the planner get the right plan" + +REGRESS = gather join_order join_strategy partitionwise prepared \ + scan semijoin syntax + +EXTRA_CLEAN = pgpa_parser.h pgpa_parser.c pgpa_scanner.c + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pg_plan_advice +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +# See notes in src/backend/parser/Makefile about the following two rules +pgpa_parser.h: pgpa_parser.c + touch $@ + +pgpa_parser.c: BISONFLAGS += -d + +# Force these dependencies to be known even without dependency info built: +pgpa_parser.o pgpa_scanner.o: pgpa_parser.h diff --git a/contrib/pg_plan_advice/README b/contrib/pg_plan_advice/README new file mode 100644 index 00000000000..b0e4fd1d6e1 --- /dev/null +++ b/contrib/pg_plan_advice/README @@ -0,0 +1,260 @@ +contrib/pg_plan_advice/README + +Plan Advice +=========== + +This module implements a mini-language for "plan advice" that allows for +control of certain key planner decisions. Goals include (1) enforcing plan +stability (my previous plan was good and I would like to keep getting a +similar one) and (2) allowing users to experiment with plans other than +the one preferred by the optimizer. Non-goals include (1) controlling +every possible planner decision and (2) forcing consideration of plans +that the optimizer rejects for reasons other than cost. (There is some +room for bikeshedding about what exactly this non-goal means: what if +we skip path generation entirely for a certain case on the theory that +we know it cannot win on cost? Does that count as a cost-based rejection +even though no cost was ever computed?) + +Generally, plan advice is a series of whitespace-separated advice items, +each of which applies an advice tag to a list of advice targets. For +example, "SEQ_SCAN(foo) HASH_JOIN(bar@ss)" contains two items of advice, +the first of which applies the SEQ_SCAN tag to "foo" and the second of +which applies the HASH_JOIN tag to "bar@ss". In this simple example, each +target identifies a single relation; see "Relation Identifiers", below. +Advice tags can also be applied to groups of relations; for example, +"HASH_JOIN(baz (bletch quux))" applies the HASH_JOIN tag to the single +relation identifier "baz" as well as to the 2-item list containing +"bletch" and "quux". + +Critically, this module knows both how to generate plan advice from an +already-existing plan, and also how to enforce it during future planning +cycles. Everything it does is intended to be "round-trip safe": if you +generate advice from a plan and then feed that back into a future planning +cycle, each piece of advice should be guaranteed to apply to exactly the +same part of the query from which it was generated without ambiguity or +guesswork, and it should successfully enforce the same planning decision that +led to it being generated in the first place. Note that there is no +intention that these guarantees hold in the presence of intervening DDL; +e.g. if you change the properties of a function so that a subquery is no +longer inlined, or if you drop an index named in the plan advice, the advice +isn't going to work any more. That's expected. + +This module aims to force the planner to follow any provided advice without +regard to whether it appears to be good advice or bad advice. If the +user provides bad advice, whether derived from a previously-generated plan +or manually written, they may get a bad plan. We regard this as user error, +not a defect in this module. It seems likely that applying advice +judiciously and only when truly required to avoid problems will be a more +successful strategy than applying it with a broad brush, but users are free +to experiment with whatever strategies they think best. + +Relation Identifiers +==================== + +Uniquely identifying the part of a query to which a certain piece of +advice applies is harder than it sounds. Our basic approach is to use +relation aliases as a starting point, and then disambiguate. There are +three ways that the same relation alias can occur multiple times: + +1. It can appear in more than one subquery. + +2. It can appear more than once in the same subquery, + e.g. (foo JOIN bar) x JOIN foo. + +3. The table can be partitioned. + +Any combination of these things can occur simultaneously. Therefore, our +general syntax for a relation identifier is: + +alias_name#occurrence_number/partition_schema.partition_name@plan_name + +All components except for the alias_name are optional and included only +when required. When a component is omitted, the associated punctuation +must also be omitted. Occurrence numbers are counted ignoring children of +partitioned tables. When the generated occurrence number is 1, we omit +the occurrence number. The partition schema and partition name are included +only for children of partitioned tables. In generated advice, the +partition_schema is always included whenever there is a partition_name, +but user-written advice may mention the name and omit the schema. The +plan_name is omitted for the top-level PlannerInfo. + +Scan Advice +=========== + +For many types of scan, no advice is generated or possible; for instance, +a subquery is always scanned using a subquery scan. While that scan may be +elided via setrefs processing, this doesn't change the fact that only one +basic approach exists. Hence, scan advice applies mostly to relations, which +can be scanned in multiple ways. + +We tend to think of a scan as targeting a single relation, and that's +normally the case, but it doesn't have to be. For instance, if a join is +proven empty, the whole thing may be replaced with a single Result node +which, in effect, is a degenerate scan of every relation in the collapsed +portion of the join tree. Similarly, it's possible to inject a custom scan +in such a way that it replaces an entire join. If we ever emit advice +for these cases, it would target sets of relation identifiers surrounded +by parentheses, e.g., SOME_SORT_OF_SCAN(foo (bar baz)) would mean that +the given scan type would be used for foo as a single relation and also the +combination of bar and baz as a join product. We have no such cases at +present. + +For index and index-only scans, both the relation being scanned and the +index or indexes being used must be specified. For example, INDEX_SCAN(foo +foo_a_idx bar bar_b_idx) indicates that an index scan (not an index-only +scan) should be used on foo_a_idx when scanning foo, and that an index scan +should be used on bar_b_idx when scanning bar. + +Bitmap heap scans currently do not allow for an index specification: +BITMAP_HEAP_SCAN(foo bar) simply means that each of foo and bar should use +some sort of bitmap heap scan. + +Join Order Advice +================= + +The JOIN_ORDER tag specifies the order in which several tables that are +part of the same join problem should be joined. Each subquery (except for +those that are inlined) is a separate join problem. Within a subquery, +partitionwise joins can create additional, separate join problems. Hence, +queries involving partitionwise joins may use JOIN_ORDER() many times. + +We take the canonical join structure to be an outer-deep tree, so +JOIN_ORDER(t1 t2 t3) says that t1 is the driving table and should be joined +first to t2 and then to t3. If the join problem involves additional tables, +they can be joined in any order after the join between t1, t2, and t3 has +been constructed. Generated join advice always mentions all tables +in the join problem, but manually written join advice need not do so. + +For trees which are not outer-deep, parentheses can be used. For example, +JOIN_ORDER(t1 (t2 t3)) says that the top-level join should have t1 on the +outer side and a join between t2 and t3 on the inner side. That join should +be constructed so that t2 is on the outer side and t3 is on the inner side. + +In some cases, it's not possible to fully specify the join order in this way. +For example, if t2 and t3 are being scanned by a single custom scan or foreign +scan, or if a partitionwise join is being performed between those tables, then +it's impossible to say that t2 is the outer table and t3 is the inner table, +or the other way around; it's just undefined. In such cases, we generate +join advice that uses curly braces, intending to indicate a lack of ordering: +JOIN_ORDER(t1 {t2 t3}) says that the uppermost join should have t1 on the outer +side and some kind of join between t2 and t3 on the inner side, but without +saying how that join must be performed or anything about which relation should +appear on which side of the join, or even whether this kind of join has sides. + +Join Method Advice +================== + +Tags such as NESTED_LOOP_PLAIN specify the method that should be used to +perform a certain join. More specifically, NESTED_LOOP_PLAIN(x (y z)) says +that the plan should put the relation whose identifier is "x" on the inner +side of a plain nested loop (one without materialization or memoization) +and that it should also put a join between the relation whose identifier is +"y" and the relation whose identifier is "z" on the inner side of a nested +loop. Hence, for an N-table join problem, there will be N-1 pieces of join +method advice; no join method advice is required for the outermost +table in the join problem. + +Considering that we have both join order advice and join method advice, +it might seem natural to say that NESTED_LOOP_PLAIN(x) should be redefined +to mean that x should appear by itself on one side or the other of a nested +loop, rather than specifically on the inner side, but this definition appears +useless in practice. It gives the planner too much freedom to do things that +bear little resemblance to what the user probably had in mind. This makes +only a limited amount of practical difference in the case of a merge join or +unparameterized nested loop, but for a parameterized nested loop or a hash +join, the two sides are treated very differently, and saying that a certain +relation should be involved in one of those operations without saying which +role it should take isn't saying much. + +This choice of definition implies that join method advice also imposes some +join order constraints. For example, given a join between foo and bar, +HASH_JOIN(bar) implies that foo is the driving table. Otherwise, it would +be impossible to put bar beneath the inner side of a Hash Join. + +Note that, given this definition, it's reasonable to consider deleting the +join order advice but applying the join method advice. For example, +consider a star schema with tables fact, dim1, dim2, dim3, dim4, and dim5. +The automatically generated advice might specify JOIN_ORDER(fact dim1 dim3 +dim4 dim2 dim5) HASH_JOIN(dim2 dim4) NESTED_LOOP_PLAIN(dim1 dim3 dim5). +Deleting the JOIN_ORDER advice allows the planner to reorder the joins +however it likes while still forcing the same choice of join method. This +seems potentially useful, and is one reason why a unified syntax that controls +both join order and join method in a single locution was not chosen. + +Advice Completeness +=================== + +An essential guiding principle is that no inference may be made on the basis +of the absence of advice. The user is entitled to remove any portion of the +generated advice which they deem unsuitable or counterproductive and the +result should only be to increase the flexibility afforded to the planner. +This means that if advice can say that a certain optimization or technique +should be used, it should also be able to say that the optimization or +technique should not be used. We should never assume that the absence of an +instruction to do a certain thing means that it should not be done; all +instructions must be explicit. + +Semijoin Uniqueness +=================== + +Faced with a semijoin, the planner considers both a direct implementation +and a plan where the one side is made unique and then an inner join is +performed. We emit SEMIJOIN_UNIQUE() advice when this transformation occurs +and SEMIJOIN_NON_UNIQUE() advice when it doesn't. These items work like +join method advice: the inner side of the relevant join is named, and the +chosen join order must be compatible with the advice having some effect. + +Partitionwise +============= + +PARTITIONWISE() advice can be used to specify both those partitionwise joins +which should be performed and those which should not be performed; the idea +is that each argument to PARTITIONWISE specifies a set of relations that +should be scanned partitionwise after being joined to each other and nothing +else. Hence, for example, PARTITIONWISE((t1 t2) t3) specifies that the +query should contain a partitionwise join between t1 and t2 and that t3 +should not be part of any partitionwise join. If there are no other rels +in the query, specifying just PARTITIONWISE((t1 t2)) would have the same +effect, since there would be no other rels to which t3 could be joined in +a partitionwise fashion. + +Parallel Query (Gather, etc.) +============================= + +Each argument to GATHER() or GATHER_MERGE() is a single relation or an +exact set of relations on top of which a Gather or Gather Merge node, +respectively, should be placed. Each argument to NO_GATHER() is a single +relation that should not appear beneath any Gather or Gather Merge node; +that is, parallelism should not be used. + +Implicit Join Order Constraints +=============================== + +When JOIN_ORDER() advice is not provided for a particular join problem, +other pieces of advice may still incidentally constrain the join order. +For example, a user who specifies HASH_JOIN((foo bar)) is explicitly saying +that there should be a hash join with exactly foo and bar on the inner +side of it, but that also implies that foo and bar must be joined to +each other before either of them is joined to anything else. Otherwise, +the join the user is attempting to constrain won't actually occur in the +query, which ends up looking like the system has just decided to ignore +the advice altogether. + +Future Work +=========== + +We don't handle choice of aggregation: it would be nice to be able to force +sorted or grouped aggregation. I'm guessing this can be left to future work. + +More seriously, we don't know anything about eager aggregation, which could +have a large impact on the shape of the plan tree. XXX: This needs some study +to determine how large a problem it is, and might need to be fixed sooner +rather than later. + +We don't offer any control over estimates, only outcomes. It seems like a +good idea to incorporate that ability at some future point, as pg_hint_plan +does. However, since the primary goal of the initial development work is to be +able to induce the planner to recreate a desired plan that worked well in +the past, this has not been included in the initial development effort. + +XXX Need to investigate whether and how well supplying advice works with GEQO diff --git a/contrib/pg_plan_advice/expected/gather.out b/contrib/pg_plan_advice/expected/gather.out new file mode 100644 index 00000000000..0cc0dedf859 --- /dev/null +++ b/contrib/pg_plan_advice/expected/gather.out @@ -0,0 +1,371 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 1; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +SET min_parallel_table_scan_size = 0; +SET debug_parallel_query = off; +CREATE TABLE gt_dim (id serial primary key, dim text) + WITH (autovacuum_enabled = false); +INSERT INTO gt_dim (dim) SELECT random()::text FROM generate_series(1,100) g; +VACUUM ANALYZE gt_dim; +CREATE TABLE gt_fact ( + id int not null, + dim_id integer not null references gt_dim (id) +) WITH (autovacuum_enabled = false); +INSERT INTO gt_fact + SELECT g, (g%3)+1 FROM generate_series(1,100000) g; +VACUUM ANALYZE gt_fact; +-- By default, we expect Gather Merge with a parallel hash join. +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------------- + Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER_MERGE((f d)) +(14 rows) + +-- Force Gather or Gather Merge of both relations together. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------------- + Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER_MERGE((f d)) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER_MERGE((f d)) +(16 rows) + +SET LOCAL pg_plan_advice.advice = 'gather((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------------- + Sort + Sort Key: f.dim_id + -> Gather + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER((f d)) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER((f d)) +(16 rows) + +COMMIT; +-- Force a separate Gather or Gather Merge operation for each relation. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Seq Scan on gt_fact f + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: d.id + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER_MERGE(f) /* matched */ + GATHER_MERGE(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f d) + GATHER_MERGE(f d) +(20 rows) + +SET LOCAL pg_plan_advice.advice = 'gather(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Sort + Sort Key: f.dim_id + -> Gather + Workers Planned: 1 + -> Parallel Seq Scan on gt_fact f + -> Sort + Sort Key: d.id + -> Gather + Workers Planned: 1 + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER(f) /* matched */ + GATHER(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f d) + GATHER(f d) +(20 rows) + +SET LOCAL pg_plan_advice.advice = 'gather((d d/d.d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Seq Scan on gt_fact f + -> Index Scan using gt_dim_pkey on gt_dim d + Supplied Plan Advice: + GATHER((d d/d.d)) /* partially matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.gt_dim_pkey) + GATHER_MERGE(f) + NO_GATHER(d) +(17 rows) + +COMMIT; +-- Force a Gather or Gather Merge on one relation but no parallelism on other. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge(f) no_gather(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Seq Scan on gt_fact f + -> Index Scan using gt_dim_pkey on gt_dim d + Supplied Plan Advice: + GATHER_MERGE(f) /* matched */ + NO_GATHER(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.gt_dim_pkey) + GATHER_MERGE(f) + NO_GATHER(d) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'gather_merge(d) no_gather(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Sort + Sort Key: f.dim_id + -> Seq Scan on gt_fact f + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: d.id + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER_MERGE(d) /* matched */ + NO_GATHER(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f d) + GATHER_MERGE(d) + NO_GATHER(f) +(19 rows) + +SET LOCAL pg_plan_advice.advice = 'gather(f) no_gather(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +-------------------------------------------------- + Merge Join + Merge Cond: (d.id = f.dim_id) + -> Index Scan using gt_dim_pkey on gt_dim d + -> Sort + Sort Key: f.dim_id + -> Gather + Workers Planned: 1 + -> Parallel Seq Scan on gt_fact f + Supplied Plan Advice: + GATHER(f) /* matched */ + NO_GATHER(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + MERGE_JOIN_PLAIN(f) + SEQ_SCAN(f) + INDEX_SCAN(d public.gt_dim_pkey) + GATHER(f) + NO_GATHER(d) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'gather(d) no_gather(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Sort + Sort Key: f.dim_id + -> Seq Scan on gt_fact f + -> Sort + Sort Key: d.id + -> Gather + Workers Planned: 1 + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER(d) /* matched */ + NO_GATHER(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + SEQ_SCAN(f d) + GATHER(d) + NO_GATHER(f) +(19 rows) + +COMMIT; +-- Force no Gather or Gather Merge use at all. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'no_gather(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------ + Merge Join + Merge Cond: (d.id = f.dim_id) + -> Index Scan using gt_dim_pkey on gt_dim d + -> Sort + Sort Key: f.dim_id + -> Seq Scan on gt_fact f + Supplied Plan Advice: + NO_GATHER(f) /* matched */ + NO_GATHER(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + MERGE_JOIN_PLAIN(f) + SEQ_SCAN(f) + INDEX_SCAN(d public.gt_dim_pkey) + NO_GATHER(f d) +(15 rows) + +COMMIT; +-- Can't force Gather Merge without the ORDER BY clause, but just Gather is OK. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------------------- + Gather + Disabled: true + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER_MERGE((f d)) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER((f d)) +(15 rows) + +SET LOCAL pg_plan_advice.advice = 'gather((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------------------- + Gather + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER((f d)) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER((f d)) +(14 rows) + +COMMIT; +-- Test conflicting advice. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather((f d)) no_gather(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + QUERY PLAN +------------------------------------------------------- + Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: f.dim_id + -> Parallel Hash Join + Hash Cond: (f.dim_id = d.id) + -> Parallel Seq Scan on gt_fact f + -> Parallel Hash + -> Parallel Seq Scan on gt_dim d + Supplied Plan Advice: + GATHER((f d)) /* matched, conflicting, failed */ + NO_GATHER(f) /* matched, conflicting, failed */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + GATHER_MERGE((f d)) +(17 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/expected/join_order.out b/contrib/pg_plan_advice/expected/join_order.out new file mode 100644 index 00000000000..a5a9728e3fd --- /dev/null +++ b/contrib/pg_plan_advice/expected/join_order.out @@ -0,0 +1,500 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int) + WITH (autovacuum_enabled = false); +INSERT INTO jo_dim1 (id, dim1, val1) + SELECT g, 'some filler text ' || g, (g % 3) + 1 + FROM generate_series(1,100) g; +VACUUM ANALYZE jo_dim1; +CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int) + WITH (autovacuum_enabled = false); +INSERT INTO jo_dim2 (id, dim2, val2) + SELECT g, 'some filler text ' || g, (g % 53) + 1 + FROM generate_series(1,1000) g; +VACUUM ANALYZE jo_dim2; +CREATE TABLE jo_fact ( + id int primary key, + dim1_id integer not null references jo_dim1 (id), + dim2_id integer not null references jo_dim2 (id) +) WITH (autovacuum_enabled = false); +INSERT INTO jo_fact + SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g; +VACUUM ANALYZE jo_fact; +-- We expect to join to d2 first and then d1, since the condition on d2 +-- is more selective. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------ + Hash Join + Hash Cond: (f.dim1_id = d1.id) + -> Hash Join + Hash Cond: (f.dim2_id = d2.id) + -> Seq Scan on jo_fact f + -> Hash + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + Generated Plan Advice: + JOIN_ORDER(f d2 d1) + HASH_JOIN(d2 d1) + SEQ_SCAN(f d2 d1) + NO_GATHER(f d1 d2) +(16 rows) + +-- Force a few different join orders. Some of these are very inefficient, +-- but the planner considers them all viable. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------ + Hash Join + Hash Cond: (f.dim2_id = d2.id) + -> Hash Join + Hash Cond: (f.dim1_id = d1.id) + -> Seq Scan on jo_fact f + -> Hash + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + Supplied Plan Advice: + JOIN_ORDER(f d1 d2) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d1 d2) + HASH_JOIN(d1 d2) + SEQ_SCAN(f d1 d2) + NO_GATHER(f d1 d2) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------ + Hash Join + Hash Cond: (f.dim1_id = d1.id) + -> Hash Join + Hash Cond: (f.dim2_id = d2.id) + -> Seq Scan on jo_fact f + -> Hash + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + Supplied Plan Advice: + JOIN_ORDER(f d2 d1) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d2 d1) + HASH_JOIN(d2 d1) + SEQ_SCAN(f d2 d1) + NO_GATHER(f d1 d2) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +----------------------------------------- + Hash Join + Hash Cond: (f.dim2_id = d2.id) + -> Hash Join + Hash Cond: (d1.id = f.dim1_id) + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on jo_fact f + -> Hash + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + Supplied Plan Advice: + JOIN_ORDER(d1 f d2) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d1 f d2) + HASH_JOIN(f d2) + SEQ_SCAN(d1 f d2) + NO_GATHER(f d1 d2) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------------------------ + Hash Join + Hash Cond: ((f.dim1_id = d1.id) AND (f.dim2_id = d2.id)) + -> Seq Scan on jo_fact f + -> Hash + -> Nested Loop + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + -> Materialize + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + Supplied Plan Advice: + JOIN_ORDER(f (d1 d2)) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f (d1 d2)) + NESTED_LOOP_MATERIALIZE(d2) + HASH_JOIN((d1 d2)) + SEQ_SCAN(f d1 d2) + NO_GATHER(f d1 d2) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(f {d1 d2})'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------------------------ + Hash Join + Hash Cond: ((f.dim1_id = d1.id) AND (f.dim2_id = d2.id)) + -> Seq Scan on jo_fact f + -> Hash + -> Nested Loop + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + -> Materialize + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + Supplied Plan Advice: + JOIN_ORDER(f {d1 d2}) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(f (d1 d2)) + NESTED_LOOP_MATERIALIZE(d2) + HASH_JOIN((d1 d2)) + SEQ_SCAN(f d1 d2) + NO_GATHER(f d1 d2) +(18 rows) + +COMMIT; +-- Force a join order by mentioning just a prefix of the join list. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: (d2.id = f.dim2_id) + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + -> Hash + -> Hash Join + Hash Cond: (f.dim1_id = d1.id) + -> Seq Scan on jo_fact f + -> Hash + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + Supplied Plan Advice: + JOIN_ORDER(d2) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d2 (f d1)) + HASH_JOIN(d1 (f d1)) + SEQ_SCAN(d2 f d1) + NO_GATHER(f d1 d2) +(18 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(d2 d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------------------------ + Hash Join + Hash Cond: ((d1.id = f.dim1_id) AND (d2.id = f.dim2_id)) + -> Nested Loop + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + -> Materialize + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on jo_fact f + Supplied Plan Advice: + JOIN_ORDER(d2 d1) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d2 d1 f) + NESTED_LOOP_MATERIALIZE(d1) + HASH_JOIN(f) + SEQ_SCAN(d2 d1 f) + NO_GATHER(f d1 d2) +(18 rows) + +COMMIT; +-- jo_fact is not partitioned, but let's try pretending that it is and +-- verifying that the advice does not apply. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 d1 d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------------------------- + Nested Loop + Disabled: true + -> Nested Loop + Disabled: true + -> Seq Scan on jo_fact f + -> Index Scan using jo_dim1_pkey on jo_dim1 d1 + Index Cond: (id = f.dim1_id) + Filter: (val1 = 1) + -> Index Scan using jo_dim2_pkey on jo_dim2 d2 + Index Cond: (id = f.dim2_id) + Filter: (val2 = 1) + Supplied Plan Advice: + JOIN_ORDER(f/d1 d1 d2) /* partially matched */ + Generated Plan Advice: + JOIN_ORDER(f d1 d2) + NESTED_LOOP_PLAIN(d1 d2) + SEQ_SCAN(f) + INDEX_SCAN(d1 public.jo_dim1_pkey d2 public.jo_dim2_pkey) + NO_GATHER(f d1 d2) +(19 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 (d1 d2))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +-------------------------------------------------------------- + Nested Loop + Disabled: true + Join Filter: ((d1.id = f.dim1_id) AND (d2.id = f.dim2_id)) + -> Nested Loop + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + -> Materialize + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + -> Seq Scan on jo_fact f + Supplied Plan Advice: + JOIN_ORDER(f/d1 (d1 d2)) /* partially matched */ + Generated Plan Advice: + JOIN_ORDER(d1 d2 f) + NESTED_LOOP_PLAIN(f) + NESTED_LOOP_MATERIALIZE(d2) + SEQ_SCAN(d1 d2 f) + NO_GATHER(f d1 d2) +(18 rows) + +COMMIT; +-- The unusual formulation of this query is intended to prevent the query +-- planner from reducing the FULL JOIN to some other join type, so that we +-- can test what happens with a join type that cannot be reordered. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + QUERY PLAN +------------------------------------------------------------- + Nested Loop + Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL)) + -> Merge Full Join + Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0))) + -> Sort + Sort Key: ((d2.id + 0)) + -> Seq Scan on jo_dim2 d2 + -> Sort + Sort Key: ((f.dim2_id + 0)) + -> Seq Scan on jo_fact f + -> Materialize + -> Seq Scan on jo_dim1 d1 + Generated Plan Advice: + JOIN_ORDER(d2 f d1) + MERGE_JOIN_PLAIN(f) + NESTED_LOOP_MATERIALIZE(d1) + SEQ_SCAN(d2 f d1) + NO_GATHER(d1 f d2) +(18 rows) + +-- We should not be able to force the planner to join f to d1 first, because +-- that is not a valid join order, but we should be able to force the planner +-- to make either d2 or f the driving table. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + QUERY PLAN +------------------------------------------------------------- + Nested Loop + Disabled: true + Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL)) + -> Merge Full Join + Disabled: true + Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0))) + -> Sort + Sort Key: ((d2.id + 0)) + -> Seq Scan on jo_dim2 d2 + -> Sort + Sort Key: ((f.dim2_id + 0)) + -> Seq Scan on jo_fact f + -> Seq Scan on jo_dim1 d1 + Supplied Plan Advice: + JOIN_ORDER(f d1 d2) /* partially matched */ + Generated Plan Advice: + JOIN_ORDER(d2 f d1) + MERGE_JOIN_PLAIN(f) + NESTED_LOOP_PLAIN(d1) + SEQ_SCAN(d2 f d1) + NO_GATHER(d1 f d2) +(21 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + QUERY PLAN +------------------------------------------------------------- + Nested Loop + Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL)) + -> Merge Full Join + Merge Cond: (((f.dim2_id + 0)) = ((d2.id + 0))) + -> Sort + Sort Key: ((f.dim2_id + 0)) + -> Seq Scan on jo_fact f + -> Sort + Sort Key: ((d2.id + 0)) + -> Seq Scan on jo_dim2 d2 + -> Materialize + -> Seq Scan on jo_dim1 d1 + Supplied Plan Advice: + JOIN_ORDER(f d2 d1) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d2 d1) + MERGE_JOIN_PLAIN(d2) + NESTED_LOOP_MATERIALIZE(d1) + SEQ_SCAN(f d2 d1) + NO_GATHER(d1 f d2) +(20 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + QUERY PLAN +------------------------------------------------------------- + Nested Loop + Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL)) + -> Merge Full Join + Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0))) + -> Sort + Sort Key: ((d2.id + 0)) + -> Seq Scan on jo_dim2 d2 + -> Sort + Sort Key: ((f.dim2_id + 0)) + -> Seq Scan on jo_fact f + -> Materialize + -> Seq Scan on jo_dim1 d1 + Supplied Plan Advice: + JOIN_ORDER(d2 f d1) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d2 f d1) + MERGE_JOIN_PLAIN(f) + NESTED_LOOP_MATERIALIZE(d1) + SEQ_SCAN(d2 f d1) + NO_GATHER(d1 f d2) +(20 rows) + +COMMIT; +-- Two incompatible join orders should conflict. In the second case, +-- the conflict is implicit: if d1 is on the inner side of a join of any +-- type, it cannot also be the driving table. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(f) join_order(d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + QUERY PLAN +------------------------------------------------------------- + Nested Loop + Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL)) + -> Merge Full Join + Merge Cond: (((f.dim2_id + 0)) = ((d2.id + 0))) + -> Sort + Sort Key: ((f.dim2_id + 0)) + -> Seq Scan on jo_fact f + -> Sort + Sort Key: ((d2.id + 0)) + -> Seq Scan on jo_dim2 d2 + -> Materialize + -> Seq Scan on jo_dim1 d1 + Supplied Plan Advice: + JOIN_ORDER(f) /* matched, conflicting */ + JOIN_ORDER(d1) /* matched, conflicting, failed */ + Generated Plan Advice: + JOIN_ORDER(f d2 d1) + MERGE_JOIN_PLAIN(d2) + NESTED_LOOP_MATERIALIZE(d1) + SEQ_SCAN(f d2 d1) + NO_GATHER(d1 f d2) +(21 rows) + +SET LOCAL pg_plan_advice.advice = 'join_order(d1) hash_join(d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + QUERY PLAN +--------------------------------------------------------------- + Nested Loop + Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL)) + -> Seq Scan on jo_dim1 d1 + -> Materialize + -> Merge Full Join + Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0))) + -> Sort + Sort Key: ((d2.id + 0)) + -> Seq Scan on jo_dim2 d2 + -> Sort + Sort Key: ((f.dim2_id + 0)) + -> Seq Scan on jo_fact f + Supplied Plan Advice: + JOIN_ORDER(d1) /* matched, conflicting */ + HASH_JOIN(d1) /* matched, conflicting, failed */ + Generated Plan Advice: + JOIN_ORDER(d1 (d2 f)) + MERGE_JOIN_PLAIN(f) + NESTED_LOOP_MATERIALIZE((f d2)) + SEQ_SCAN(d1 d2 f) + NO_GATHER(d1 f d2) +(21 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/expected/join_strategy.out b/contrib/pg_plan_advice/expected/join_strategy.out new file mode 100644 index 00000000000..0f9db692190 --- /dev/null +++ b/contrib/pg_plan_advice/expected/join_strategy.out @@ -0,0 +1,339 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +CREATE TABLE join_dim (id serial primary key, dim text) + WITH (autovacuum_enabled = false); +INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g; +VACUUM ANALYZE join_dim; +CREATE TABLE join_fact ( + id int primary key, + dim_id integer not null references join_dim (id) +) WITH (autovacuum_enabled = false); +INSERT INTO join_fact + SELECT g, (g%3)+1 FROM generate_series(1,100000) g; +CREATE INDEX join_fact_dim_id ON join_fact (dim_id); +VACUUM ANALYZE join_fact; +-- We expect a hash join by default. +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------ + Hash Join + Hash Cond: (f.dim_id = d.id) + -> Seq Scan on join_fact f + -> Hash + -> Seq Scan on join_dim d + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + NO_GATHER(f d) +(10 rows) + +-- Try forcing each join method in turn with join_dim as the inner table. +-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will +-- fail, because the planner knows that join_dim (id) is unique, and will +-- refuse to add mark/restore overhead. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------ + Hash Join + Hash Cond: (f.dim_id = d.id) + -> Seq Scan on join_fact f + -> Hash + -> Seq Scan on join_dim d + Supplied Plan Advice: + HASH_JOIN(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------------------- + Merge Join + Disabled: true + Merge Cond: (f.dim_id = d.id) + -> Index Scan using join_fact_dim_id on join_fact f + -> Index Scan using join_dim_pkey on join_dim d + Supplied Plan Advice: + MERGE_JOIN_MATERIALIZE(d) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + INDEX_SCAN(f public.join_fact_dim_id d public.join_dim_pkey) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------------------- + Merge Join + Merge Cond: (f.dim_id = d.id) + -> Index Scan using join_fact_dim_id on join_fact f + -> Index Scan using join_dim_pkey on join_dim d + Supplied Plan Advice: + MERGE_JOIN_PLAIN(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + MERGE_JOIN_PLAIN(d) + INDEX_SCAN(f public.join_fact_dim_id d public.join_dim_pkey) + NO_GATHER(f d) +(11 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +-------------------------------------------- + Nested Loop + Join Filter: (f.dim_id = d.id) + -> Seq Scan on join_fact f + -> Materialize + -> Seq Scan on join_dim d + Supplied Plan Advice: + NESTED_LOOP_MATERIALIZE(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + NESTED_LOOP_MATERIALIZE(d) + SEQ_SCAN(f d) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------------- + Nested Loop + -> Seq Scan on join_fact f + -> Memoize + Cache Key: f.dim_id + Cache Mode: logical + -> Index Scan using join_dim_pkey on join_dim d + Index Cond: (id = f.dim_id) + Supplied Plan Advice: + NESTED_LOOP_MEMOIZE(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + NESTED_LOOP_MEMOIZE(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.join_dim_pkey) + NO_GATHER(f d) +(15 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------- + Nested Loop + -> Seq Scan on join_fact f + -> Index Scan using join_dim_pkey on join_dim d + Index Cond: (id = f.dim_id) + Supplied Plan Advice: + NESTED_LOOP_PLAIN(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + NESTED_LOOP_PLAIN(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.join_dim_pkey) + NO_GATHER(f d) +(12 rows) + +COMMIT; +-- Now try forcing each join method in turn with join_fact as the inner +-- table. All of these should work. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------- + Hash Join + Hash Cond: (d.id = f.dim_id) + -> Seq Scan on join_dim d + -> Hash + -> Seq Scan on join_fact f + Supplied Plan Advice: + HASH_JOIN(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + HASH_JOIN(f) + SEQ_SCAN(d f) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------------------- + Merge Join + Merge Cond: (d.id = f.dim_id) + -> Index Scan using join_dim_pkey on join_dim d + -> Materialize + -> Index Scan using join_fact_dim_id on join_fact f + Supplied Plan Advice: + MERGE_JOIN_MATERIALIZE(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + MERGE_JOIN_MATERIALIZE(f) + INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------------------- + Merge Join + Merge Cond: (d.id = f.dim_id) + -> Index Scan using join_dim_pkey on join_dim d + -> Index Scan using join_fact_dim_id on join_fact f + Supplied Plan Advice: + MERGE_JOIN_PLAIN(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + MERGE_JOIN_PLAIN(f) + INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id) + NO_GATHER(f d) +(11 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +-------------------------------------------- + Nested Loop + Join Filter: (f.dim_id = d.id) + -> Seq Scan on join_dim d + -> Materialize + -> Seq Scan on join_fact f + Supplied Plan Advice: + NESTED_LOOP_MATERIALIZE(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + NESTED_LOOP_MATERIALIZE(f) + SEQ_SCAN(d f) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +-------------------------------------------------------------- + Nested Loop + -> Seq Scan on join_dim d + -> Memoize + Cache Key: d.id + Cache Mode: logical + -> Index Scan using join_fact_dim_id on join_fact f + Index Cond: (dim_id = d.id) + Supplied Plan Advice: + NESTED_LOOP_MEMOIZE(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + NESTED_LOOP_MEMOIZE(f) + SEQ_SCAN(d) + INDEX_SCAN(f public.join_fact_dim_id) + NO_GATHER(f d) +(15 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +-------------------------------------------------------- + Nested Loop + -> Seq Scan on join_dim d + -> Index Scan using join_fact_dim_id on join_fact f + Index Cond: (dim_id = d.id) + Supplied Plan Advice: + NESTED_LOOP_PLAIN(f) /* matched */ + Generated Plan Advice: + JOIN_ORDER(d f) + NESTED_LOOP_PLAIN(f) + SEQ_SCAN(d) + INDEX_SCAN(f public.join_fact_dim_id) + NO_GATHER(f d) +(12 rows) + +COMMIT; +-- Non-working cases. We can't force a foreign join between these tables, +-- because they aren't foreign tables. We also can't use two different +-- strategies on the same table, nor can we put both tables on the inner +-- side of the same join. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------- + Nested Loop + Disabled: true + -> Seq Scan on join_fact f + -> Index Scan using join_dim_pkey on join_dim d + Index Cond: (id = f.dim_id) + Supplied Plan Advice: + FOREIGN_JOIN((f d)) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(f d) + NESTED_LOOP_PLAIN(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.join_dim_pkey) + NO_GATHER(f d) +(13 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f) NESTED_LOOP_MATERIALIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +----------------------------------------------------------------- + Merge Join + Merge Cond: (d.id = f.dim_id) + -> Index Scan using join_dim_pkey on join_dim d + -> Index Scan using join_fact_dim_id on join_fact f + Supplied Plan Advice: + NESTED_LOOP_PLAIN(f) /* matched, conflicting, failed */ + NESTED_LOOP_MATERIALIZE(f) /* matched, conflicting, failed */ + Generated Plan Advice: + JOIN_ORDER(d f) + MERGE_JOIN_PLAIN(f) + INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id) + NO_GATHER(f d) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------- + Nested Loop + Disabled: true + -> Seq Scan on join_fact f + -> Index Scan using join_dim_pkey on join_dim d + Index Cond: (id = f.dim_id) + Supplied Plan Advice: + NESTED_LOOP_PLAIN(f) /* matched, failed */ + NESTED_LOOP_PLAIN(d) /* matched */ + Generated Plan Advice: + JOIN_ORDER(f d) + NESTED_LOOP_PLAIN(d) + SEQ_SCAN(f) + INDEX_SCAN(d public.join_dim_pkey) + NO_GATHER(f d) +(14 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/expected/partitionwise.out b/contrib/pg_plan_advice/expected/partitionwise.out new file mode 100644 index 00000000000..2b3d0a82443 --- /dev/null +++ b/contrib/pg_plan_advice/expected/partitionwise.out @@ -0,0 +1,426 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +SET enable_partitionwise_join = true; +CREATE TABLE pt1 (id integer primary key, dim1 text, val1 int) + PARTITION BY RANGE (id); +CREATE TABLE pt1a PARTITION OF pt1 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt1b PARTITION OF pt1 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt1c PARTITION OF pt1 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt1 (id, dim1, val1) + SELECT g, 'some filler text ' || g, (g % 3) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE pt1; +CREATE TABLE pt2 (id integer primary key, dim2 text, val2 int) + PARTITION BY RANGE (id); +CREATE TABLE pt2a PARTITION OF pt2 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt2b PARTITION OF pt2 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt2c PARTITION OF pt2 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt2 (id, dim2, val2) + SELECT g, 'some other text ' || g, (g % 5) + 1 + FROM generate_series(1,3000,2) g; +VACUUM ANALYZE pt2; +CREATE TABLE pt3 (id integer primary key, dim3 text, val3 int) + PARTITION BY RANGE (id); +CREATE TABLE pt3a PARTITION OF pt3 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt3b PARTITION OF pt3 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt3c PARTITION OF pt3 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt3 (id, dim3, val3) + SELECT g, 'a third random text ' || g, (g % 7) + 1 + FROM generate_series(1,3000,3) g; +VACUUM ANALYZE pt3; +CREATE TABLE ptmismatch (id integer primary key, dimm text, valm int) + PARTITION BY RANGE (id); +CREATE TABLE ptmismatcha PARTITION OF ptmismatch + FOR VALUES FROM (1) to (1501) + WITH (autovacuum_enabled = false); +CREATE TABLE ptmismatchb PARTITION OF ptmismatch + FOR VALUES FROM (1501) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO ptmismatch (id, dimm, valm) + SELECT g, 'yet another text ' || g, (g % 2) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE ptmismatch; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_1.id = pt3_1.id) + -> Seq Scan on pt2a pt2_1 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3a pt3_1 + Filter: (val3 = 1) + -> Index Scan using pt1a_pkey on pt1a pt1_1 + Index Cond: (id = pt2_1.id) + Filter: (val1 = 1) + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_2.id = pt3_2.id) + -> Seq Scan on pt2b pt2_2 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3b pt3_2 + Filter: (val3 = 1) + -> Index Scan using pt1b_pkey on pt1b pt1_2 + Index Cond: (id = pt2_2.id) + Filter: (val1 = 1) + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_3.id = pt3_3.id) + -> Seq Scan on pt2c pt2_3 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3c pt3_3 + Filter: (val3 = 1) + -> Index Scan using pt1c_pkey on pt1c pt1_3 + Index Cond: (id = pt2_3.id) + Filter: (val1 = 1) + Generated Plan Advice: + JOIN_ORDER(pt2/public.pt2a pt3/public.pt3a pt1/public.pt1a) + JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b) + JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c) + NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c) + HASH_JOIN(pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) + SEQ_SCAN(pt2/public.pt2a pt3/public.pt3a pt2/public.pt2b pt3/public.pt3b + pt2/public.pt2c pt3/public.pt3c) + INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey + pt1/public.pt1c public.pt1c_pkey) + PARTITIONWISE((pt1 pt2 pt3)) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a + pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) +(47 rows) + +-- Suppress partitionwise join, or do it just partially. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE(pt1 pt2 pt3)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Nested Loop + -> Hash Join + Hash Cond: (pt2.id = pt3.id) + -> Append + -> Seq Scan on pt2a pt2_1 + Filter: (val2 = 1) + -> Seq Scan on pt2b pt2_2 + Filter: (val2 = 1) + -> Seq Scan on pt2c pt2_3 + Filter: (val2 = 1) + -> Hash + -> Append + -> Seq Scan on pt3a pt3_1 + Filter: (val3 = 1) + -> Seq Scan on pt3b pt3_2 + Filter: (val3 = 1) + -> Seq Scan on pt3c pt3_3 + Filter: (val3 = 1) + -> Append + -> Index Scan using pt1a_pkey on pt1a pt1_1 + Index Cond: (id = pt2.id) + Filter: (val1 = 1) + -> Index Scan using pt1b_pkey on pt1b pt1_2 + Index Cond: (id = pt2.id) + Filter: (val1 = 1) + -> Index Scan using pt1c_pkey on pt1c pt1_3 + Index Cond: (id = pt2.id) + Filter: (val1 = 1) + Supplied Plan Advice: + PARTITIONWISE(pt1) /* matched */ + PARTITIONWISE(pt2) /* matched */ + PARTITIONWISE(pt3) /* matched */ + Generated Plan Advice: + JOIN_ORDER(pt2 pt3 pt1) + NESTED_LOOP_PLAIN(pt1) + HASH_JOIN(pt3) + SEQ_SCAN(pt2/public.pt2a pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a + pt3/public.pt3b pt3/public.pt3c) + INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey + pt1/public.pt1c public.pt1c_pkey) + PARTITIONWISE(pt2 pt3 pt1) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a + pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) +(43 rows) + +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) pt3)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Hash Join + Hash Cond: (pt1.id = pt3.id) + -> Append + -> Hash Join + Hash Cond: (pt1_1.id = pt2_1.id) + -> Seq Scan on pt1a pt1_1 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on pt2a pt2_1 + Filter: (val2 = 1) + -> Hash Join + Hash Cond: (pt1_2.id = pt2_2.id) + -> Seq Scan on pt1b pt1_2 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on pt2b pt2_2 + Filter: (val2 = 1) + -> Hash Join + Hash Cond: (pt1_3.id = pt2_3.id) + -> Seq Scan on pt1c pt1_3 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on pt2c pt2_3 + Filter: (val2 = 1) + -> Hash + -> Append + -> Seq Scan on pt3a pt3_1 + Filter: (val3 = 1) + -> Seq Scan on pt3b pt3_2 + Filter: (val3 = 1) + -> Seq Scan on pt3c pt3_3 + Filter: (val3 = 1) + Supplied Plan Advice: + PARTITIONWISE((pt1 pt2)) /* matched */ + PARTITIONWISE(pt3) /* matched */ + Generated Plan Advice: + JOIN_ORDER(pt1/public.pt1a pt2/public.pt2a) + JOIN_ORDER(pt1/public.pt1b pt2/public.pt2b) + JOIN_ORDER(pt1/public.pt1c pt2/public.pt2c) + JOIN_ORDER({pt1 pt2} pt3) + HASH_JOIN(pt2/public.pt2a pt2/public.pt2b pt2/public.pt2c pt3) + SEQ_SCAN(pt1/public.pt1a pt2/public.pt2a pt1/public.pt1b pt2/public.pt2b + pt1/public.pt1c pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b + pt3/public.pt3c) + PARTITIONWISE((pt1 pt2) pt3) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a + pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) +(47 rows) + +COMMIT; +-- Test conflicting advice. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) (pt1 pt3))'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + Disabled: true + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_1.id = pt3_1.id) + -> Seq Scan on pt2a pt2_1 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3a pt3_1 + Filter: (val3 = 1) + -> Index Scan using pt1a_pkey on pt1a pt1_1 + Index Cond: (id = pt2_1.id) + Filter: (val1 = 1) + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_2.id = pt3_2.id) + -> Seq Scan on pt2b pt2_2 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3b pt3_2 + Filter: (val3 = 1) + -> Index Scan using pt1b_pkey on pt1b pt1_2 + Index Cond: (id = pt2_2.id) + Filter: (val1 = 1) + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_3.id = pt3_3.id) + -> Seq Scan on pt2c pt2_3 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3c pt3_3 + Filter: (val3 = 1) + -> Index Scan using pt1c_pkey on pt1c pt1_3 + Index Cond: (id = pt2_3.id) + Filter: (val1 = 1) + Supplied Plan Advice: + PARTITIONWISE((pt1 pt2)) /* matched, conflicting, failed */ + PARTITIONWISE((pt1 pt3)) /* matched, conflicting, failed */ + Generated Plan Advice: + JOIN_ORDER(pt2/public.pt2a pt3/public.pt3a pt1/public.pt1a) + JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b) + JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c) + NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c) + HASH_JOIN(pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) + SEQ_SCAN(pt2/public.pt2a pt3/public.pt3a pt2/public.pt2b pt3/public.pt3b + pt2/public.pt2c pt3/public.pt3c) + INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey + pt1/public.pt1c public.pt1c_pkey) + PARTITIONWISE((pt1 pt2 pt3)) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a + pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) +(51 rows) + +COMMIT; +-- Can't force a partitionwise join with a mismatched table. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 ptmismatch))'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, ptmismatch WHERE pt1.id = ptmismatch.id; + QUERY PLAN +--------------------------------------------------------------------------- + Nested Loop + Disabled: true + -> Append + -> Seq Scan on pt1a pt1_1 + -> Seq Scan on pt1b pt1_2 + -> Seq Scan on pt1c pt1_3 + -> Append + -> Index Scan using ptmismatcha_pkey on ptmismatcha ptmismatch_1 + Index Cond: (id = pt1.id) + -> Index Scan using ptmismatchb_pkey on ptmismatchb ptmismatch_2 + Index Cond: (id = pt1.id) + Supplied Plan Advice: + PARTITIONWISE((pt1 ptmismatch)) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(pt1 ptmismatch) + NESTED_LOOP_PLAIN(ptmismatch) + SEQ_SCAN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c) + INDEX_SCAN(ptmismatch/public.ptmismatcha public.ptmismatcha_pkey + ptmismatch/public.ptmismatchb public.ptmismatchb_pkey) + PARTITIONWISE(pt1 ptmismatch) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c + ptmismatch/public.ptmismatcha ptmismatch/public.ptmismatchb) +(22 rows) + +COMMIT; +-- Force join order for a particular branch of the partitionwise join with +-- and without mentioning the schema name. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'JOIN_ORDER(pt3/public.pt3a pt2/public.pt2a pt1/public.pt1a)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Nested Loop + -> Hash Join + Hash Cond: (pt3_1.id = pt2_1.id) + -> Seq Scan on pt3a pt3_1 + Filter: (val3 = 1) + -> Hash + -> Seq Scan on pt2a pt2_1 + Filter: (val2 = 1) + -> Index Scan using pt1a_pkey on pt1a pt1_1 + Index Cond: (id = pt2_1.id) + Filter: (val1 = 1) + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_2.id = pt3_2.id) + -> Seq Scan on pt2b pt2_2 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3b pt3_2 + Filter: (val3 = 1) + -> Index Scan using pt1b_pkey on pt1b pt1_2 + Index Cond: (id = pt2_2.id) + Filter: (val1 = 1) + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_3.id = pt3_3.id) + -> Seq Scan on pt2c pt2_3 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3c pt3_3 + Filter: (val3 = 1) + -> Index Scan using pt1c_pkey on pt1c pt1_3 + Index Cond: (id = pt2_3.id) + Filter: (val1 = 1) + Supplied Plan Advice: + JOIN_ORDER(pt3/public.pt3a pt2/public.pt2a pt1/public.pt1a) /* matched */ + Generated Plan Advice: + JOIN_ORDER(pt3/public.pt3a pt2/public.pt2a pt1/public.pt1a) + JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b) + JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c) + NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c) + HASH_JOIN(pt2/public.pt2a pt3/public.pt3b pt3/public.pt3c) + SEQ_SCAN(pt3/public.pt3a pt2/public.pt2a pt2/public.pt2b pt3/public.pt3b + pt2/public.pt2c pt3/public.pt3c) + INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey + pt1/public.pt1c public.pt1c_pkey) + PARTITIONWISE((pt1 pt2 pt3)) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a + pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) +(49 rows) + +SET LOCAL pg_plan_advice.advice = 'JOIN_ORDER(pt3/pt3a pt2/pt2a pt1/pt1a)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Nested Loop + -> Hash Join + Hash Cond: (pt3_1.id = pt2_1.id) + -> Seq Scan on pt3a pt3_1 + Filter: (val3 = 1) + -> Hash + -> Seq Scan on pt2a pt2_1 + Filter: (val2 = 1) + -> Index Scan using pt1a_pkey on pt1a pt1_1 + Index Cond: (id = pt2_1.id) + Filter: (val1 = 1) + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_2.id = pt3_2.id) + -> Seq Scan on pt2b pt2_2 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3b pt3_2 + Filter: (val3 = 1) + -> Index Scan using pt1b_pkey on pt1b pt1_2 + Index Cond: (id = pt2_2.id) + Filter: (val1 = 1) + -> Nested Loop + -> Hash Join + Hash Cond: (pt2_3.id = pt3_3.id) + -> Seq Scan on pt2c pt2_3 + Filter: (val2 = 1) + -> Hash + -> Seq Scan on pt3c pt3_3 + Filter: (val3 = 1) + -> Index Scan using pt1c_pkey on pt1c pt1_3 + Index Cond: (id = pt2_3.id) + Filter: (val1 = 1) + Supplied Plan Advice: + JOIN_ORDER(pt3/pt3a pt2/pt2a pt1/pt1a) /* matched */ + Generated Plan Advice: + JOIN_ORDER(pt3/public.pt3a pt2/public.pt2a pt1/public.pt1a) + JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b) + JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c) + NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c) + HASH_JOIN(pt2/public.pt2a pt3/public.pt3b pt3/public.pt3c) + SEQ_SCAN(pt3/public.pt3a pt2/public.pt2a pt2/public.pt2b pt3/public.pt3b + pt2/public.pt2c pt3/public.pt3c) + INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey + pt1/public.pt1c public.pt1c_pkey) + PARTITIONWISE((pt1 pt2 pt3)) + NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a + pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c) +(49 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/expected/prepared.out b/contrib/pg_plan_advice/expected/prepared.out new file mode 100644 index 00000000000..07a7c623659 --- /dev/null +++ b/contrib/pg_plan_advice/expected/prepared.out @@ -0,0 +1,67 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +CREATE TABLE ptab (id integer, val text) WITH (autovacuum_enabled = false); +SET pg_plan_advice.always_store_advice_details = false; +-- Not prepared, so advice should be generated. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM ptab; + QUERY PLAN +------------------------ + Seq Scan on ptab + Generated Plan Advice: + SEQ_SCAN(ptab) + NO_GATHER(ptab) +(4 rows) + +-- Prepared, so advice should not be generated. +PREPARE pt1 AS SELECT * FROM ptab; +EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt1; + QUERY PLAN +------------------ + Seq Scan on ptab +(1 row) + +SET pg_plan_advice.always_store_advice_details = true; +-- Prepared, but always_store_advice_details = true, so should show advice. +PREPARE pt2 AS SELECT * FROM ptab; +EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt2; + QUERY PLAN +------------------------ + Seq Scan on ptab + Generated Plan Advice: + SEQ_SCAN(ptab) + NO_GATHER(ptab) +(4 rows) + +-- Not prepared, so feedback should be generated. +SET pg_plan_advice.always_store_advice_details = false; +SET pg_plan_advice.advice = 'SEQ_SCAN(ptab)'; +EXPLAIN (COSTS OFF) +SELECT * FROM ptab; + QUERY PLAN +-------------------------------- + Seq Scan on ptab + Supplied Plan Advice: + SEQ_SCAN(ptab) /* matched */ +(3 rows) + +-- Prepared, so advice should not be generated. +PREPARE pt3 AS SELECT * FROM ptab; +EXPLAIN (COSTS OFF) EXECUTE pt1; + QUERY PLAN +------------------ + Seq Scan on ptab +(1 row) + +SET pg_plan_advice.always_store_advice_details = true; +-- Prepared, but always_store_advice_details = true, so should show feedback. +PREPARE pt4 AS SELECT * FROM ptab; +EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt2; + QUERY PLAN +------------------------ + Seq Scan on ptab + Generated Plan Advice: + SEQ_SCAN(ptab) + NO_GATHER(ptab) +(4 rows) + diff --git a/contrib/pg_plan_advice/expected/scan.out b/contrib/pg_plan_advice/expected/scan.out new file mode 100644 index 00000000000..3f9e13b6d41 --- /dev/null +++ b/contrib/pg_plan_advice/expected/scan.out @@ -0,0 +1,757 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +SET seq_page_cost = 0.1; +SET random_page_cost = 0.1; +SET cpu_tuple_cost = 0; +SET cpu_index_tuple_cost = 0; +CREATE TABLE scan_table (a int primary key, b text) + WITH (autovacuum_enabled = false); +INSERT INTO scan_table + SELECT g, 'some text ' || g FROM generate_series(1, 100000) g; +CREATE INDEX scan_table_b ON scan_table USING brin (b); +VACUUM ANALYZE scan_table; +-- Sequential scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +------------------------- + Seq Scan on scan_table + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(4 rows) + +-- Index scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(5 rows) + +-- Index-only scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------ + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(5 rows) + +-- Bitmap heap scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; + QUERY PLAN +----------------------------------------------- + Bitmap Heap Scan on scan_table + Recheck Cond: (b > 'some text 8'::text) + -> Bitmap Index Scan on scan_table_b + Index Cond: (b > 'some text 8'::text) + Generated Plan Advice: + BITMAP_HEAP_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +-- TID scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; + QUERY PLAN +----------------------------------- + Tid Scan on scan_table + TID Cond: (ctid = '(0,1)'::tid) + Generated Plan Advice: + TID_SCAN(scan_table) + NO_GATHER(scan_table) +(5 rows) + +-- TID range scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; + QUERY PLAN +--------------------------------------------------------------- + Tid Range Scan on scan_table + TID Cond: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid)) + Generated Plan Advice: + TID_SCAN(scan_table) + NO_GATHER(scan_table) +(5 rows) + +-- Try forcing each of our test queries to use the scan type they +-- wanted to use anyway. This should succeed. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +-------------------------------------- + Seq Scan on scan_table + Supplied Plan Advice: + SEQ_SCAN(scan_table) /* matched */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(6 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +-------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------------- + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; + QUERY PLAN +----------------------------------------------- + Bitmap Heap Scan on scan_table + Recheck Cond: (b > 'some text 8'::text) + -> Bitmap Index Scan on scan_table_b + Index Cond: (b > 'some text 8'::text) + Supplied Plan Advice: + BITMAP_HEAP_SCAN(scan_table) /* matched */ + Generated Plan Advice: + BITMAP_HEAP_SCAN(scan_table) + NO_GATHER(scan_table) +(9 rows) + +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; + QUERY PLAN +-------------------------------------- + Tid Scan on scan_table + TID Cond: (ctid = '(0,1)'::tid) + Supplied Plan Advice: + TID_SCAN(scan_table) /* matched */ + Generated Plan Advice: + TID_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; + QUERY PLAN +--------------------------------------------------------------- + Tid Range Scan on scan_table + TID Cond: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid)) + Supplied Plan Advice: + TID_SCAN(scan_table) /* matched */ + Generated Plan Advice: + TID_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +COMMIT; +-- Try to force a full scan of the table to use some other scan type. All +-- of these will fail. An index scan or bitmap heap scan could potentially +-- generate the correct answer, but the planner does not even consider these +-- possibilities due to the lack of a WHERE clause. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +---------------------------------------------------------------- + Seq Scan on scan_table + Disabled: true + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on scan_table + Disabled: true + Supplied Plan Advice: + INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +------------------------------------------------------ + Seq Scan on scan_table + Disabled: true + Supplied Plan Advice: + BITMAP_HEAP_SCAN(scan_table) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + QUERY PLAN +---------------------------------------------- + Seq Scan on scan_table + Disabled: true + Supplied Plan Advice: + TID_SCAN(scan_table) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +COMMIT; +-- Try again to force index use. This should now succeed for the INDEX_SCAN +-- and BITMAP_HEAP_SCAN, but the INDEX_ONLY_SCAN can't be forced because the +-- query fetches columns not included in the index. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; + QUERY PLAN +-------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a > 0) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on scan_table + Disabled: true + Filter: (a > 0) + Supplied Plan Advice: + INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(8 rows) + +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; + QUERY PLAN +---------------------------------------------- + Bitmap Heap Scan on scan_table + Recheck Cond: (a > 0) + -> Bitmap Index Scan on scan_table_pkey + Index Cond: (a > 0) + Supplied Plan Advice: + BITMAP_HEAP_SCAN(scan_table) /* matched */ + Generated Plan Advice: + BITMAP_HEAP_SCAN(scan_table) + NO_GATHER(scan_table) +(9 rows) + +COMMIT; +-- We can force a primary key lookup to use a sequential scan, but we +-- can't force it to use an index-only scan (due to the column list) +-- or a TID scan (due to the absence of a TID qual). +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +-------------------------------------- + Seq Scan on scan_table + Filter: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(scan_table) /* matched */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +--------------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Disabled: true + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(8 rows) + +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Disabled: true + Index Cond: (a = 1) + Supplied Plan Advice: + TID_SCAN(scan_table) /* matched, failed */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(8 rows) + +COMMIT; +-- We can forcibly downgrade an index-only scan to an index scan, but we can't +-- force the use of an index that the planner thinks is inapplicable. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +-------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +--------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Disabled: true + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_b) /* matched, failed */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(8 rows) + +COMMIT; +-- We can force the use of a sequential scan in place of a bitmap heap scan, +-- but a plain index scan on a BRIN index is not possible. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; + QUERY PLAN +-------------------------------------- + Seq Scan on scan_table + Filter: (b > 'some text 8'::text) + Supplied Plan Advice: + SEQ_SCAN(scan_table) /* matched */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Disabled: true + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_b) /* matched, failed */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(8 rows) + +COMMIT; +-- We can force the use of a sequential scan rather than a TID scan or +-- TID range scan. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; + QUERY PLAN +-------------------------------------- + Seq Scan on scan_table + Filter: (ctid = '(0,1)'::tid) + Supplied Plan Advice: + SEQ_SCAN(scan_table) /* matched */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; + QUERY PLAN +------------------------------------------------------------- + Seq Scan on scan_table + Filter: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid)) + Supplied Plan Advice: + SEQ_SCAN(scan_table) /* matched */ + Generated Plan Advice: + SEQ_SCAN(scan_table) + NO_GATHER(scan_table) +(7 rows) + +COMMIT; +-- Test more complex scenarios with index scans. +BEGIN; +-- Should still work if we mention the schema. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +--------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +-- But not if we mention the wrong schema. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table cilbup.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table cilbup.scan_table_pkey) /* matched, inapplicable, failed */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +-- It's OK to repeat the same advice. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +-------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched */ + INDEX_SCAN(scan_table scan_table_pkey) /* matched */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(8 rows) + +-- But it doesn't work if the index target is even notionally different. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + QUERY PLAN +---------------------------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table scan_table_pkey) /* matched, conflicting */ + INDEX_SCAN(scan_table public.scan_table_pkey) /* matched, conflicting */ + Generated Plan Advice: + INDEX_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(8 rows) + +COMMIT; +-- Test assorted incorrect advice. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(nothing)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------ + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(nothing) /* not matched */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(nothing whatsoever)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------ + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(nothing whatsoever) /* not matched */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table bogus)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +-------------------------------------------------------------------- + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_SCAN(scan_table bogus) /* matched, inapplicable, failed */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(nothing whatsoever)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +--------------------------------------------------------- + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_ONLY_SCAN(nothing whatsoever) /* not matched */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table bogus)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + QUERY PLAN +------------------------------------------------------------------------- + Index Only Scan using scan_table_pkey on scan_table + Index Cond: (a = 1) + Supplied Plan Advice: + INDEX_ONLY_SCAN(scan_table bogus) /* matched, inapplicable, failed */ + Generated Plan Advice: + INDEX_ONLY_SCAN(scan_table public.scan_table_pkey) + NO_GATHER(scan_table) +(7 rows) + +COMMIT; +-- Test our ability to refer to multiple instances of the same alias. +BEGIN; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; + QUERY PLAN +------------------------------------------------------------------- + Nested Loop Left Join + -> Nested Loop Left Join + -> Function Scan on generate_series g + -> Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = g.g) + -> Index Scan using scan_table_pkey on scan_table s_1 + Index Cond: (a = g.g) + Generated Plan Advice: + JOIN_ORDER(g s s#2) + NESTED_LOOP_PLAIN(s s#2) + INDEX_SCAN(s public.scan_table_pkey s#2 public.scan_table_pkey) + NO_GATHER(g s s#2) +(12 rows) + +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; + QUERY PLAN +---------------------------------------------------------- + Nested Loop Left Join + -> Hash Left Join + Hash Cond: (g.g = s.a) + -> Function Scan on generate_series g + -> Hash + -> Seq Scan on scan_table s + -> Index Scan using scan_table_pkey on scan_table s_1 + Index Cond: (a = g.g) + Supplied Plan Advice: + SEQ_SCAN(s) /* matched */ + Generated Plan Advice: + JOIN_ORDER(g s s#2) + NESTED_LOOP_PLAIN(s#2) + HASH_JOIN(s) + SEQ_SCAN(s) + INDEX_SCAN(s#2 public.scan_table_pkey) + NO_GATHER(g s s#2) +(17 rows) + +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s#2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; + QUERY PLAN +-------------------------------------------------------------- + Hash Left Join + Hash Cond: (g.g = s_1.a) + -> Nested Loop Left Join + -> Function Scan on generate_series g + -> Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = g.g) + -> Hash + -> Seq Scan on scan_table s_1 + Supplied Plan Advice: + SEQ_SCAN(s#2) /* matched */ + Generated Plan Advice: + JOIN_ORDER(g s s#2) + NESTED_LOOP_PLAIN(s) + HASH_JOIN(s#2) + SEQ_SCAN(s#2) + INDEX_SCAN(s public.scan_table_pkey) + NO_GATHER(g s s#2) +(17 rows) + +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s) SEQ_SCAN(s#2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; + QUERY PLAN +------------------------------------------------ + Hash Left Join + Hash Cond: (g.g = s_1.a) + -> Hash Left Join + Hash Cond: (g.g = s.a) + -> Function Scan on generate_series g + -> Hash + -> Seq Scan on scan_table s + -> Hash + -> Seq Scan on scan_table s_1 + Supplied Plan Advice: + SEQ_SCAN(s) /* matched */ + SEQ_SCAN(s#2) /* matched */ + Generated Plan Advice: + JOIN_ORDER(g s s#2) + HASH_JOIN(s s#2) + SEQ_SCAN(s s#2) + NO_GATHER(g s s#2) +(17 rows) + +COMMIT; +-- Test our ability to refer to scans within a subquery. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; + QUERY PLAN +-------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Generated Plan Advice: + INDEX_SCAN(s@x public.scan_table_pkey) + NO_GATHER(x s@x) +(5 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); + QUERY PLAN +--------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Generated Plan Advice: + INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey) + NO_GATHER(unnamed_subquery s@unnamed_subquery) +(5 rows) + +BEGIN; +-- Should not match. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; + QUERY PLAN +-------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s) /* not matched */ + Generated Plan Advice: + INDEX_SCAN(s@x public.scan_table_pkey) + NO_GATHER(x s@x) +(7 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); + QUERY PLAN +--------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s) /* not matched */ + Generated Plan Advice: + INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey) + NO_GATHER(unnamed_subquery s@unnamed_subquery) +(7 rows) + +-- Should match first query only. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@x)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; + QUERY PLAN +------------------------------- + Seq Scan on scan_table s + Filter: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s@x) /* matched */ + Generated Plan Advice: + SEQ_SCAN(s@x) + NO_GATHER(x s@x) +(7 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); + QUERY PLAN +--------------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s@x) /* not matched */ + Generated Plan Advice: + INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey) + NO_GATHER(unnamed_subquery s@unnamed_subquery) +(7 rows) + +-- Should match second query only. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@unnamed_subquery)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; + QUERY PLAN +-------------------------------------------------- + Index Scan using scan_table_pkey on scan_table s + Index Cond: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s@unnamed_subquery) /* not matched */ + Generated Plan Advice: + INDEX_SCAN(s@x public.scan_table_pkey) + NO_GATHER(x s@x) +(7 rows) + +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); + QUERY PLAN +-------------------------------------------------- + Seq Scan on scan_table s + Filter: (a = 1) + Supplied Plan Advice: + SEQ_SCAN(s@unnamed_subquery) /* matched */ + Generated Plan Advice: + SEQ_SCAN(s@unnamed_subquery) + NO_GATHER(unnamed_subquery s@unnamed_subquery) +(7 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/expected/semijoin.out b/contrib/pg_plan_advice/expected/semijoin.out new file mode 100644 index 00000000000..5551c028a1f --- /dev/null +++ b/contrib/pg_plan_advice/expected/semijoin.out @@ -0,0 +1,377 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +CREATE TABLE sj_wide ( + id integer primary key, + val1 integer, + padding text storage plain +) WITH (autovacuum_enabled = false); +INSERT INTO sj_wide + SELECT g, g%10+1, repeat(' ', 300) FROM generate_series(1, 1000) g; +CREATE INDEX ON sj_wide (val1); +VACUUM ANALYZE sj_wide; +CREATE TABLE sj_narrow ( + id integer primary key, + val1 integer +) WITH (autovacuum_enabled = false); +INSERT INTO sj_narrow + SELECT g, g%10+1 FROM generate_series(1, 1000) g; +CREATE INDEX ON sj_narrow (val1); +VACUUM ANALYZE sj_narrow; +-- We expect this to make the VALUES list unique and use index lookups to +-- find the rows in sj_wide, so as to avoid a full scan of sj_wide. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_wide + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); + QUERY PLAN +----------------------------------------------------------- + Nested Loop + -> HashAggregate + Group Key: "*VALUES*".column1, "*VALUES*".column2 + -> Values Scan on "*VALUES*" + -> Index Scan using sj_wide_pkey on sj_wide + Index Cond: (id = "*VALUES*".column1) + Filter: (val1 = "*VALUES*".column2) + Generated Plan Advice: + JOIN_ORDER("*VALUES*" sj_wide) + NESTED_LOOP_PLAIN(sj_wide) + INDEX_SCAN(sj_wide public.sj_wide_pkey) + SEMIJOIN_UNIQUE("*VALUES*") + NO_GATHER(sj_wide "*VALUES*") +(13 rows) + +-- If we ask for a unique semijoin, we should get the same plan as with +-- no advice. If we ask for a non-unique semijoin, we should see a Semi +-- Join operation in the plan tree. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique("*VALUES*")'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_wide + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); + QUERY PLAN +----------------------------------------------------------- + Nested Loop + -> HashAggregate + Group Key: "*VALUES*".column1, "*VALUES*".column2 + -> Values Scan on "*VALUES*" + -> Index Scan using sj_wide_pkey on sj_wide + Index Cond: (id = "*VALUES*".column1) + Filter: (val1 = "*VALUES*".column2) + Supplied Plan Advice: + SEMIJOIN_UNIQUE("*VALUES*") /* matched */ + Generated Plan Advice: + JOIN_ORDER("*VALUES*" sj_wide) + NESTED_LOOP_PLAIN(sj_wide) + INDEX_SCAN(sj_wide public.sj_wide_pkey) + SEMIJOIN_UNIQUE("*VALUES*") + NO_GATHER(sj_wide "*VALUES*") +(15 rows) + +SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique("*VALUES*")'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_wide + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); + QUERY PLAN +------------------------------------------------------------------------------------------ + Hash Semi Join + Hash Cond: ((sj_wide.id = "*VALUES*".column1) AND (sj_wide.val1 = "*VALUES*".column2)) + -> Seq Scan on sj_wide + -> Hash + -> Values Scan on "*VALUES*" + Supplied Plan Advice: + SEMIJOIN_NON_UNIQUE("*VALUES*") /* matched */ + Generated Plan Advice: + JOIN_ORDER(sj_wide "*VALUES*") + HASH_JOIN("*VALUES*") + SEQ_SCAN(sj_wide) + SEMIJOIN_NON_UNIQUE("*VALUES*") + NO_GATHER(sj_wide "*VALUES*") +(13 rows) + +COMMIT; +-- Because this table is narrower than the previous one, a sequential scan +-- is less expensive, and we choose a straightforward Semi Join plan by +-- default. (Note that this is also very sensitive to the length of the IN +-- list, which affects how many index lookups the alternative plan will need.) +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_narrow + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); + QUERY PLAN +---------------------------------------------------------------------------------------------- + Hash Semi Join + Hash Cond: ((sj_narrow.id = "*VALUES*".column1) AND (sj_narrow.val1 = "*VALUES*".column2)) + -> Seq Scan on sj_narrow + -> Hash + -> Values Scan on "*VALUES*" + Generated Plan Advice: + JOIN_ORDER(sj_narrow "*VALUES*") + HASH_JOIN("*VALUES*") + SEQ_SCAN(sj_narrow) + SEMIJOIN_NON_UNIQUE("*VALUES*") + NO_GATHER(sj_narrow "*VALUES*") +(11 rows) + +-- Here, we expect advising a unique semijoin to swith to the same plan that +-- we got with sj_wide, and advising a non-unique semijoin should not change +-- the plan. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique("*VALUES*")'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_narrow + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); + QUERY PLAN +---------------------------------------------------------------------------------------------- + Hash Join + Hash Cond: ((sj_narrow.id = "*VALUES*".column1) AND (sj_narrow.val1 = "*VALUES*".column2)) + -> Seq Scan on sj_narrow + -> Hash + -> HashAggregate + Group Key: "*VALUES*".column1, "*VALUES*".column2 + -> Values Scan on "*VALUES*" + Supplied Plan Advice: + SEMIJOIN_UNIQUE("*VALUES*") /* matched */ + Generated Plan Advice: + JOIN_ORDER(sj_narrow "*VALUES*") + HASH_JOIN("*VALUES*") + SEQ_SCAN(sj_narrow) + SEMIJOIN_UNIQUE("*VALUES*") + NO_GATHER(sj_narrow "*VALUES*") +(15 rows) + +SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique("*VALUES*")'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_narrow + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); + QUERY PLAN +---------------------------------------------------------------------------------------------- + Hash Semi Join + Hash Cond: ((sj_narrow.id = "*VALUES*".column1) AND (sj_narrow.val1 = "*VALUES*".column2)) + -> Seq Scan on sj_narrow + -> Hash + -> Values Scan on "*VALUES*" + Supplied Plan Advice: + SEMIJOIN_NON_UNIQUE("*VALUES*") /* matched */ + Generated Plan Advice: + JOIN_ORDER(sj_narrow "*VALUES*") + HASH_JOIN("*VALUES*") + SEQ_SCAN(sj_narrow) + SEMIJOIN_NON_UNIQUE("*VALUES*") + NO_GATHER(sj_narrow "*VALUES*") +(13 rows) + +COMMIT; +-- In the above example, we made the outer side of the join unique, but here, +-- we should make the inner side unique. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); + QUERY PLAN +------------------------------------------ + Hash Join + Hash Cond: (g.g = sj_narrow.val1) + -> Function Scan on generate_series g + -> Hash + -> HashAggregate + Group Key: sj_narrow.val1 + -> Seq Scan on sj_narrow + Generated Plan Advice: + JOIN_ORDER(g sj_narrow) + HASH_JOIN(sj_narrow) + SEQ_SCAN(sj_narrow) + SEMIJOIN_UNIQUE(sj_narrow) + NO_GATHER(g sj_narrow) +(13 rows) + +-- We should be able to force a plan with or without the make-unique strategy, +-- with either side as the driving table. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); + QUERY PLAN +-------------------------------------------- + Hash Join + Hash Cond: (g.g = sj_narrow.val1) + -> Function Scan on generate_series g + -> Hash + -> HashAggregate + Group Key: sj_narrow.val1 + -> Seq Scan on sj_narrow + Supplied Plan Advice: + SEMIJOIN_UNIQUE(sj_narrow) /* matched */ + Generated Plan Advice: + JOIN_ORDER(g sj_narrow) + HASH_JOIN(sj_narrow) + SEQ_SCAN(sj_narrow) + SEMIJOIN_UNIQUE(sj_narrow) + NO_GATHER(g sj_narrow) +(15 rows) + +SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(sj_narrow)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); + QUERY PLAN +------------------------------------------------ + Hash Semi Join + Hash Cond: (g.g = sj_narrow.val1) + -> Function Scan on generate_series g + -> Hash + -> Seq Scan on sj_narrow + Supplied Plan Advice: + SEMIJOIN_NON_UNIQUE(sj_narrow) /* matched */ + Generated Plan Advice: + JOIN_ORDER(g sj_narrow) + HASH_JOIN(sj_narrow) + SEQ_SCAN(sj_narrow) + SEMIJOIN_NON_UNIQUE(sj_narrow) + NO_GATHER(g sj_narrow) +(13 rows) + +SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow) join_order(sj_narrow)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: (sj_narrow.val1 = g.g) + -> HashAggregate + Group Key: sj_narrow.val1 + -> Seq Scan on sj_narrow + -> Hash + -> Function Scan on generate_series g + Supplied Plan Advice: + SEMIJOIN_UNIQUE(sj_narrow) /* matched */ + JOIN_ORDER(sj_narrow) /* matched */ + Generated Plan Advice: + JOIN_ORDER(sj_narrow g) + HASH_JOIN(g) + SEQ_SCAN(sj_narrow) + SEMIJOIN_UNIQUE(sj_narrow) + NO_GATHER(g sj_narrow) +(16 rows) + +SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(sj_narrow) join_order(sj_narrow)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); + QUERY PLAN +------------------------------------------------ + Hash Right Semi Join + Hash Cond: (sj_narrow.val1 = g.g) + -> Seq Scan on sj_narrow + -> Hash + -> Function Scan on generate_series g + Supplied Plan Advice: + SEMIJOIN_NON_UNIQUE(sj_narrow) /* matched */ + JOIN_ORDER(sj_narrow) /* matched */ + Generated Plan Advice: + JOIN_ORDER(sj_narrow g) + HASH_JOIN(g) + SEQ_SCAN(sj_narrow) + SEMIJOIN_NON_UNIQUE(sj_narrow) + NO_GATHER(g sj_narrow) +(14 rows) + +COMMIT; +-- However, mentioning the wrong side of the join should result in an advice +-- failure. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique(g)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); + QUERY PLAN +-------------------------------------------- + Nested Loop + Disabled: true + Join Filter: (g.g = sj_narrow.val1) + -> HashAggregate + Group Key: sj_narrow.val1 + -> Seq Scan on sj_narrow + -> Function Scan on generate_series g + Supplied Plan Advice: + SEMIJOIN_UNIQUE(g) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(sj_narrow g) + NESTED_LOOP_PLAIN(g) + SEQ_SCAN(sj_narrow) + SEMIJOIN_UNIQUE(sj_narrow) + NO_GATHER(g sj_narrow) +(15 rows) + +SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(g)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); + QUERY PLAN +------------------------------------------------ + Nested Loop + Disabled: true + Join Filter: (g.g = sj_narrow.val1) + -> HashAggregate + Group Key: sj_narrow.val1 + -> Seq Scan on sj_narrow + -> Function Scan on generate_series g + Supplied Plan Advice: + SEMIJOIN_NON_UNIQUE(g) /* matched, failed */ + Generated Plan Advice: + JOIN_ORDER(sj_narrow g) + NESTED_LOOP_PLAIN(g) + SEQ_SCAN(sj_narrow) + SEMIJOIN_UNIQUE(sj_narrow) + NO_GATHER(g sj_narrow) +(15 rows) + +COMMIT; +-- Test conflicting advice. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow) semijoin_non_unique(sj_narrow)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); + QUERY PLAN +--------------------------------------------------------------------- + Hash Join + Hash Cond: (g.g = sj_narrow.val1) + -> Function Scan on generate_series g + -> Hash + -> HashAggregate + Group Key: sj_narrow.val1 + -> Seq Scan on sj_narrow + Supplied Plan Advice: + SEMIJOIN_UNIQUE(sj_narrow) /* matched, conflicting */ + SEMIJOIN_NON_UNIQUE(sj_narrow) /* matched, conflicting, failed */ + Generated Plan Advice: + JOIN_ORDER(g sj_narrow) + HASH_JOIN(sj_narrow) + SEQ_SCAN(sj_narrow) + SEMIJOIN_UNIQUE(sj_narrow) + NO_GATHER(g sj_narrow) +(16 rows) + +COMMIT; +-- Try applying SEMIJOIN_UNIQUE() to a non-semijoin. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique(g)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g, sj_narrow s WHERE g = s.val1; + QUERY PLAN +---------------------------------------------------------- + Merge Join + Merge Cond: (s.val1 = g.g) + -> Index Scan using sj_narrow_val1_idx on sj_narrow s + -> Sort + Sort Key: g.g + -> Function Scan on generate_series g + Supplied Plan Advice: + SEMIJOIN_UNIQUE(g) /* matched, inapplicable, failed */ + Generated Plan Advice: + JOIN_ORDER(s g) + MERGE_JOIN_PLAIN(g) + INDEX_SCAN(s public.sj_narrow_val1_idx) + NO_GATHER(g s) +(13 rows) + +COMMIT; diff --git a/contrib/pg_plan_advice/expected/syntax.out b/contrib/pg_plan_advice/expected/syntax.out new file mode 100644 index 00000000000..be61402b569 --- /dev/null +++ b/contrib/pg_plan_advice/expected/syntax.out @@ -0,0 +1,192 @@ +LOAD 'pg_plan_advice'; +-- An empty string is allowed. Empty target lists are allowed for most advice +-- tags, but not for JOIN_ORDER. "Supplied Plan Advice" should be omitted in +-- text format when there is no actual advice, but not in non-text format. +SET pg_plan_advice.advice = ''; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +------------ + Result +(1 row) + +SET pg_plan_advice.advice = 'SEQ_SCAN()'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +------------ + Result +(1 row) + +SET pg_plan_advice.advice = 'NESTED_LOOP_PLAIN()'; +EXPLAIN (COSTS OFF, FORMAT JSON) SELECT 1; + QUERY PLAN +-------------------------------- + [ + + { + + "Plan": { + + "Node Type": "Result", + + "Parallel Aware": false,+ + "Async Capable": false, + + "Disabled": false + + }, + + "Supplied Plan Advice": ""+ + } + + ] +(1 row) + +SET pg_plan_advice.advice = 'JOIN_ORDER()'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "JOIN_ORDER()" +DETAIL: Could not parse advice: JOIN_ORDER must have at least one target at or near ")" +-- Test assorted variations in capitalization, whitespace, and which parts of +-- the relation identifier are included. These should all work. +SET pg_plan_advice.advice = 'SEQ_SCAN(x)'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +--------------------------------- + Result + Supplied Plan Advice: + SEQ_SCAN(x) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = 'seq_scan(x@y)'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +----------------------------------- + Result + Supplied Plan Advice: + SEQ_SCAN(x@y) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = 'SEQ_scan(x#2)'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +----------------------------------- + Result + Supplied Plan Advice: + SEQ_SCAN(x#2) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = 'SEQ_SCAN (x/y)'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +----------------------------------- + Result + Supplied Plan Advice: + SEQ_SCAN(x/y) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = ' SEQ_SCAN ( x / y . z ) '; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +------------------------------------- + Result + Supplied Plan Advice: + SEQ_SCAN(x/y.z) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = 'SEQ_SCAN("x"#2/"y"."z"@"t")'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +----------------------------------------- + Result + Supplied Plan Advice: + SEQ_SCAN(x#2/y.z@t) /* not matched */ +(3 rows) + +-- Syntax errors. +SET pg_plan_advice.advice = 'SEQUENTIAL_SCAN(x)'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQUENTIAL_SCAN(x)" +DETAIL: Could not parse advice: syntax error at or near "SEQUENTIAL_SCAN" +SET pg_plan_advice.advice = 'SEQ_SCAN'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN" +DETAIL: Could not parse advice: syntax error at end of input +SET pg_plan_advice.advice = 'SEQ_SCAN('; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN(" +DETAIL: Could not parse advice: syntax error at end of input +SET pg_plan_advice.advice = 'SEQ_SCAN("'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("" +DETAIL: Could not parse advice: unterminated quoted identifier at end of input +SET pg_plan_advice.advice = 'SEQ_SCAN("")'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("")" +DETAIL: Could not parse advice: zero-length delimited identifier at or near """ +SET pg_plan_advice.advice = 'SEQ_SCAN("a"'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("a"" +DETAIL: Could not parse advice: syntax error at end of input +SET pg_plan_advice.advice = 'SEQ_SCAN(#'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN(#" +DETAIL: Could not parse advice: syntax error at or near "#" +SET pg_plan_advice.advice = '()'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "()" +DETAIL: Could not parse advice: syntax error at or near "(" +SET pg_plan_advice.advice = '123'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "123" +DETAIL: Could not parse advice: syntax error at or near "123" +-- Tags like SEQ_SCAN and NO_GATHER don't allow sublists at all; other tags, +-- except for JOIN_ORDER, allow at most one level of sublist. Hence, these +-- examples should error out. +SET pg_plan_advice.advice = 'SEQ_SCAN((x))'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN((x))" +DETAIL: Could not parse advice: syntax error at or near "(" +SET pg_plan_advice.advice = 'GATHER(((x)))'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "GATHER(((x)))" +DETAIL: Could not parse advice: syntax error at or near "(" +-- Legal comments. +SET pg_plan_advice.advice = '/**/'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +------------ + Result +(1 row) + +SET pg_plan_advice.advice = 'HASH_JOIN(_)/***/'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +---------------------------------- + Result + Supplied Plan Advice: + HASH_JOIN(_) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(/*x*/y)'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +---------------------------------- + Result + Supplied Plan Advice: + HASH_JOIN(y) /* not matched */ +(3 rows) + +SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(y//*x*/z)'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +------------------------------------ + Result + Supplied Plan Advice: + HASH_JOIN(y/z) /* not matched */ +(3 rows) + +-- Unterminated comments. +SET pg_plan_advice.advice = '/*'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "/*" +DETAIL: Could not parse advice: unterminated comment at end of input +SET pg_plan_advice.advice = 'JOIN_ORDER("fOO") /* oops'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "JOIN_ORDER("fOO") /* oops" +DETAIL: Could not parse advice: unterminated comment at end of input +-- Nested comments are not supported, so the first of these is legal and +-- the second is not. +SET pg_plan_advice.advice = '/*/*/'; +EXPLAIN (COSTS OFF) SELECT 1; + QUERY PLAN +------------ + Result +(1 row) + +SET pg_plan_advice.advice = '/*/* stuff */*/'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "/*/* stuff */*/" +DETAIL: Could not parse advice: syntax error at or near "*" +-- Foreign join requires multiple relation identifiers. +SET pg_plan_advice.advice = 'FOREIGN_JOIN(a)'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "FOREIGN_JOIN(a)" +DETAIL: Could not parse advice: FOREIGN_JOIN targets must contain more than one relation identifier at or near ")" +SET pg_plan_advice.advice = 'FOREIGN_JOIN((a))'; +ERROR: invalid value for parameter "pg_plan_advice.advice": "FOREIGN_JOIN((a))" +DETAIL: Could not parse advice: FOREIGN_JOIN targets must contain more than one relation identifier at or near ")" diff --git a/contrib/pg_plan_advice/meson.build b/contrib/pg_plan_advice/meson.build new file mode 100644 index 00000000000..cf948ffaa13 --- /dev/null +++ b/contrib/pg_plan_advice/meson.build @@ -0,0 +1,66 @@ +# Copyright (c) 2022-2026, PostgreSQL Global Development Group + +pg_plan_advice_sources = files( + 'pg_plan_advice.c', + 'pgpa_ast.c', + 'pgpa_identifier.c', + 'pgpa_join.c', + 'pgpa_output.c', + 'pgpa_planner.c', + 'pgpa_scan.c', + 'pgpa_trove.c', + 'pgpa_walker.c', +) + +pgpa_scanner = custom_target('pgpa_scanner', + input: 'pgpa_scanner.l', + output: 'pgpa_scanner.c', + command: flex_cmd, +) +generated_sources += pgpa_scanner +pg_plan_advice_sources += pgpa_scanner + +pgpa_parser = custom_target('pgpa_parser', + input: 'pgpa_parser.y', + kwargs: bison_kw, +) +generated_sources += pgpa_parser.to_list() +pg_plan_advice_sources += pgpa_parser + +if host_system == 'windows' + pg_plan_advice_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'pg_plan_advice', + '--FILEDESC', 'pg_plan_advice - help the planner get the right plan',]) +endif + +pg_plan_advice_inc = include_directories('.') + +pg_plan_advice = shared_module('pg_plan_advice', + pg_plan_advice_sources, + include_directories: pg_plan_advice_inc, + kwargs: contrib_mod_args, +) +contrib_targets += pg_plan_advice + +install_headers( + 'pg_plan_advice.h', + install_dir: dir_include_extension / 'pg_plan_advice', +) + +tests += { + 'name': 'pg_plan_advice', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'gather', + 'join_order', + 'join_strategy', + 'partitionwise', + 'prepared', + 'scan', + 'semijoin', + 'syntax', + ], + }, +} diff --git a/contrib/pg_plan_advice/pg_plan_advice.c b/contrib/pg_plan_advice/pg_plan_advice.c new file mode 100644 index 00000000000..2393ed55518 --- /dev/null +++ b/contrib/pg_plan_advice/pg_plan_advice.c @@ -0,0 +1,456 @@ +/*------------------------------------------------------------------------- + * + * pg_plan_advice.c + * main entrypoints for generating and applying planner advice + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pg_plan_advice.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pg_plan_advice.h" +#include "pgpa_ast.h" +#include "pgpa_identifier.h" +#include "pgpa_output.h" +#include "pgpa_planner.h" +#include "pgpa_trove.h" +#include "pgpa_walker.h" + +#include "commands/defrem.h" +#include "commands/explain.h" +#include "commands/explain_format.h" +#include "commands/explain_state.h" +#include "funcapi.h" +#include "optimizer/planner.h" +#include "storage/dsm_registry.h" +#include "utils/guc.h" + +PG_MODULE_MAGIC; + +/* GUC variables */ +char *pg_plan_advice_advice = NULL; +bool pg_plan_advice_always_store_advice_details = false; +static bool pg_plan_advice_always_explain_supplied_advice = true; +bool pg_plan_advice_feedback_warnings = false; +bool pg_plan_advice_trace_mask = false; + +/* Saved hook value */ +static explain_per_plan_hook_type prev_explain_per_plan = NULL; + +/* Other file-level globals */ +static int es_extension_id; +static MemoryContext pgpa_memory_context = NULL; +static List *advisor_hook_list = NIL; + +static void pg_plan_advice_explain_option_handler(ExplainState *es, + DefElem *opt, + ParseState *pstate); +static void pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt, + IntoClause *into, + ExplainState *es, + const char *queryString, + ParamListInfo params, + QueryEnvironment *queryEnv); +static bool pg_plan_advice_advice_check_hook(char **newval, void **extra, + GucSource source); +static DefElem *find_defelem_by_defname(List *deflist, char *defname); + +/* + * Initialize this module. + */ +void +_PG_init(void) +{ + DefineCustomStringVariable("pg_plan_advice.advice", + "advice to apply during query planning", + NULL, + &pg_plan_advice_advice, + NULL, + PGC_USERSET, + 0, + pg_plan_advice_advice_check_hook, + NULL, + NULL); + + DefineCustomBoolVariable("pg_plan_advice.always_explain_supplied_advice", + "EXPLAIN output includes supplied advice even without EXPLAIN (PLAN_ADVICE)", + NULL, + &pg_plan_advice_always_explain_supplied_advice, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomBoolVariable("pg_plan_advice.always_store_advice_details", + "Generate advice strings even when seemingly not required", + "Use this option to see generated advice for prepared queries.", + &pg_plan_advice_always_store_advice_details, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomBoolVariable("pg_plan_advice.feedback_warnings", + "Warn when supplied advice does not apply cleanly", + NULL, + &pg_plan_advice_feedback_warnings, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomBoolVariable("pg_plan_advice.trace_mask", + "Emit debugging messages showing the computed strategy mask for each relation", + NULL, + &pg_plan_advice_trace_mask, + false, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + MarkGUCPrefixReserved("pg_plan_advice"); + + /* Get an ID that we can use to cache data in an ExplainState. */ + es_extension_id = GetExplainExtensionId("pg_plan_advice"); + + /* Register the new EXPLAIN options implemented by this module. */ + RegisterExtensionExplainOption("plan_advice", + pg_plan_advice_explain_option_handler); + + /* Install hooks */ + pgpa_planner_install_hooks(); + prev_explain_per_plan = explain_per_plan_hook; + explain_per_plan_hook = pg_plan_advice_explain_per_plan_hook; +} + +/* + * Return a pointer to a memory context where long-lived data managed by this + * module can be stored. + */ +MemoryContext +pg_plan_advice_get_mcxt(void) +{ + if (pgpa_memory_context == NULL) + pgpa_memory_context = AllocSetContextCreate(TopMemoryContext, + "pg_plan_advice", + ALLOCSET_DEFAULT_SIZES); + + return pgpa_memory_context; +} + +/* + * Was the PLAN_ADVICE option specified and not set to false? + */ +bool +pg_plan_advice_should_explain(ExplainState *es) +{ + bool *plan_advice = NULL; + + if (es != NULL) + plan_advice = GetExplainExtensionState(es, es_extension_id); + return plan_advice != NULL && *plan_advice; +} + +/* + * Get the advice that should be used while planning a particular query. + */ +char * +pg_plan_advice_get_supplied_query_advice(PlannerGlobal *glob, + Query *parse, + const char *query_string, + int cursorOptions, + ExplainState *es) +{ + ListCell *lc; + + /* + * If any advisors are loaded, consult them. The first one that produces a + * non-NULL string wins. + */ + foreach(lc, advisor_hook_list) + { + pg_plan_advice_advisor_hook hook = lfirst(lc); + char *advice_string; + + advice_string = (*hook) (glob, parse, query_string, cursorOptions, es); + if (advice_string != NULL) + return advice_string; + } + + /* Otherwise, just use the value of the GUC. */ + return pg_plan_advice_advice; +} + +/* + * Add an advisor, which can supply advice strings to be used during future + * query planning operations. + * + * The advisor should return NULL if it has no advice string to offer for a + * given query. If multiple advisors are added, they will be consulted in the + * order added until one of them returns a non-NULL value. + */ +void +pg_plan_advice_add_advisor(pg_plan_advice_advisor_hook hook) +{ + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt()); + advisor_hook_list = lappend(advisor_hook_list, hook); + MemoryContextSwitchTo(oldcontext); +} + +/* + * Remove an advisor. + */ +void +pg_plan_advice_remove_advisor(pg_plan_advice_advisor_hook hook) +{ + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt()); + advisor_hook_list = list_delete_ptr(advisor_hook_list, hook); + MemoryContextSwitchTo(oldcontext); +} + +/* + * Other loadable modules can use this function to trigger advice generation. + * + * Calling this function with activate = true requests that any queries + * planned afterwards should generate plan advice, which will be stored in the + * PlannedStmt. Calling this function with activate = false revokes that + * request. Multiple loadable modules could be using this simultaneously, so + * make sure to only revoke your own requests. + * + * Note that you can't use this function to *suppress* advice generation, + * which can occur for other reasons, such as the use of EXPLAIN (PLAN_ADVICE), + * regardless. It's a way of turning advice generation on, not a way of turning + * it off. + */ +void +pg_plan_advice_request_advice_generation(bool activate) +{ + if (activate) + pgpa_planner_generate_advice++; + else + { + Assert(pgpa_planner_generate_advice > 0); + pgpa_planner_generate_advice--; + } +} + +/* + * Handler for EXPLAIN (PLAN_ADVICE). + */ +static void +pg_plan_advice_explain_option_handler(ExplainState *es, DefElem *opt, + ParseState *pstate) +{ + bool *plan_advice; + + plan_advice = GetExplainExtensionState(es, es_extension_id); + + if (plan_advice == NULL) + { + plan_advice = palloc0_object(bool); + SetExplainExtensionState(es, es_extension_id, plan_advice); + } + + *plan_advice = defGetBoolean(opt); +} + +/* + * Display a string that is likely to consist of multiple lines in EXPLAIN + * output. + */ +static void +pg_plan_advice_explain_text_multiline(ExplainState *es, char *qlabel, + char *value) +{ + char *s; + + /* For non-text formats, it's best not to add any special handling. */ + if (es->format != EXPLAIN_FORMAT_TEXT) + { + ExplainPropertyText(qlabel, value, es); + return; + } + + /* In text format, if there is no data, display nothing. */ + if (*value == '\0') + return; + + /* + * It looks nicest to indent each line of the advice separately, beginning + * on the line below the label. + */ + ExplainIndentText(es); + appendStringInfo(es->str, "%s:\n", qlabel); + es->indent++; + while ((s = strchr(value, '\n')) != NULL) + { + ExplainIndentText(es); + appendBinaryStringInfo(es->str, value, (s - value) + 1); + value = s + 1; + } + + /* Don't interpret a terminal newline as a request for an empty line. */ + if (*value != '\0') + { + ExplainIndentText(es); + appendStringInfo(es->str, "%s\n", value); + } + + es->indent--; +} + +/* + * Add advice feedback to the EXPLAIN output. + */ +static void +pg_plan_advice_explain_feedback(ExplainState *es, List *feedback) +{ + StringInfoData buf; + + initStringInfo(&buf); + foreach_node(DefElem, item, feedback) + { + int flags = defGetInt32(item); + + appendStringInfo(&buf, "%s /* ", item->defname); + pgpa_trove_append_flags(&buf, flags); + appendStringInfo(&buf, " */\n"); + } + + pg_plan_advice_explain_text_multiline(es, "Supplied Plan Advice", + buf.data); +} + +/* + * Add relevant details, if any, to the EXPLAIN output for a single plan. + */ +static void +pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt, + IntoClause *into, + ExplainState *es, + const char *queryString, + ParamListInfo params, + QueryEnvironment *queryEnv) +{ + bool should_explain; + DefElem *pgpa_item; + List *pgpa_list; + + if (prev_explain_per_plan) + prev_explain_per_plan(plannedstmt, into, es, queryString, params, + queryEnv); + + /* Should an advice string be part of the EXPLAIN output? */ + should_explain = pg_plan_advice_should_explain(es); + + /* Find any data pgpa_planner_shutdown stashed in the PlannedStmt. */ + pgpa_item = find_defelem_by_defname(plannedstmt->extension_state, + "pg_plan_advice"); + pgpa_list = pgpa_item == NULL ? NULL : (List *) pgpa_item->arg; + + /* + * By default, if there is a record of attempting to apply advice during + * query planning, we always output that information, but the user can set + * pg_plan_advice.always_explain_supplied_advice = false to suppress that + * behavior. If they do, we'll only display it when the PLAN_ADVICE option + * was specified and not set to false. + * + * NB: If we're explaining a query planned beforehand -- i.e. a prepared + * statement -- the application of query advice may not have been + * recorded, and therefore this won't be able to show anything. Use + * pg_plan_advice.always_store_advice_details = true to work around this. + */ + if (pgpa_list != NULL && (pg_plan_advice_always_explain_supplied_advice || + should_explain)) + { + DefElem *feedback; + + feedback = find_defelem_by_defname(pgpa_list, "feedback"); + if (feedback != NULL) + pg_plan_advice_explain_feedback(es, (List *) feedback->arg); + } + + /* + * If the PLAN_ADVICE option was specified -- and not set to FALSE -- show + * generated advice. + */ + if (should_explain) + { + DefElem *advice_string_item; + char *advice_string = NULL; + + advice_string_item = + find_defelem_by_defname(pgpa_list, "advice_string"); + if (advice_string_item != NULL) + { + advice_string = strVal(advice_string_item->arg); + pg_plan_advice_explain_text_multiline(es, "Generated Plan Advice", + advice_string); + } + } +} + +/* + * Check hook for pg_plan_advice.advice + */ +static bool +pg_plan_advice_advice_check_hook(char **newval, void **extra, GucSource source) +{ + MemoryContext oldcontext; + MemoryContext tmpcontext; + char *error; + + if (*newval == NULL) + return true; + + tmpcontext = AllocSetContextCreate(CurrentMemoryContext, + "pg_plan_advice.advice", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(tmpcontext); + + /* + * It would be nice to save the parse tree that we construct here for + * eventual use when planning with this advice, but *extra can only point + * to a single guc_malloc'd chunk, and our parse tree involves an + * arbitrary number of memory allocations. + */ + (void) pgpa_parse(*newval, &error); + + if (error != NULL) + GUC_check_errdetail("Could not parse advice: %s", error); + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(tmpcontext); + + return (error == NULL); +} + +/* + * Search a list of DefElem objects for a given defname. + */ +static DefElem * +find_defelem_by_defname(List *deflist, char *defname) +{ + foreach_node(DefElem, item, deflist) + { + if (strcmp(item->defname, defname) == 0) + return item; + } + + return NULL; +} diff --git a/contrib/pg_plan_advice/pg_plan_advice.h b/contrib/pg_plan_advice/pg_plan_advice.h new file mode 100644 index 00000000000..749331b6b8a --- /dev/null +++ b/contrib/pg_plan_advice/pg_plan_advice.h @@ -0,0 +1,45 @@ +/*------------------------------------------------------------------------- + * + * pg_plan_advice.h + * main header file for pg_plan_advice contrib module + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pg_plan_advice.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_PLAN_ADVICE_H +#define PG_PLAN_ADVICE_H + +#include "commands/explain_state.h" +#include "nodes/pathnodes.h" + +/* Hook for other plugins to supply advice strings */ +typedef char *(*pg_plan_advice_advisor_hook) (PlannerGlobal *glob, + Query *parse, + const char *query_string, + int cursorOptions, + ExplainState *es); + +/* GUC variables */ +extern char *pg_plan_advice_advice; +extern bool pg_plan_advice_always_store_advice_details; +extern bool pg_plan_advice_feedback_warnings; +extern bool pg_plan_advice_trace_mask; + +/* Function prototypes (for use by pg_plan_advice itself) */ +extern MemoryContext pg_plan_advice_get_mcxt(void); +extern bool pg_plan_advice_should_explain(ExplainState *es); +extern char *pg_plan_advice_get_supplied_query_advice(PlannerGlobal *glob, + Query *parse, + const char *query_string, + int cursorOptions, + ExplainState *es); + +/* Function prototypes (for use by other plugins) */ +extern PGDLLEXPORT void pg_plan_advice_add_advisor(pg_plan_advice_advisor_hook hook); +extern PGDLLEXPORT void pg_plan_advice_remove_advisor(pg_plan_advice_advisor_hook hook); +extern PGDLLEXPORT void pg_plan_advice_request_advice_generation(bool activate); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_ast.c b/contrib/pg_plan_advice/pgpa_ast.c new file mode 100644 index 00000000000..f4fa6a626d4 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_ast.c @@ -0,0 +1,351 @@ +/*------------------------------------------------------------------------- + * + * pgpa_ast.c + * additional supporting code related to plan advice parsing + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_ast.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "pgpa_ast.h" + +#include "funcapi.h" +#include "utils/array.h" +#include "utils/builtins.h" + +static bool pgpa_identifiers_cover_target(int nrids, pgpa_identifier *rids, + pgpa_advice_target *target, + bool *rids_used); + +/* + * Get a C string that corresponds to the specified advice tag. + */ +char * +pgpa_cstring_advice_tag(pgpa_advice_tag_type advice_tag) +{ + switch (advice_tag) + { + case PGPA_TAG_BITMAP_HEAP_SCAN: + return "BITMAP_HEAP_SCAN"; + case PGPA_TAG_FOREIGN_JOIN: + return "FOREIGN_JOIN"; + case PGPA_TAG_GATHER: + return "GATHER"; + case PGPA_TAG_GATHER_MERGE: + return "GATHER_MERGE"; + case PGPA_TAG_HASH_JOIN: + return "HASH_JOIN"; + case PGPA_TAG_INDEX_ONLY_SCAN: + return "INDEX_ONLY_SCAN"; + case PGPA_TAG_INDEX_SCAN: + return "INDEX_SCAN"; + case PGPA_TAG_JOIN_ORDER: + return "JOIN_ORDER"; + case PGPA_TAG_MERGE_JOIN_MATERIALIZE: + return "MERGE_JOIN_MATERIALIZE"; + case PGPA_TAG_MERGE_JOIN_PLAIN: + return "MERGE_JOIN_PLAIN"; + case PGPA_TAG_NESTED_LOOP_MATERIALIZE: + return "NESTED_LOOP_MATERIALIZE"; + case PGPA_TAG_NESTED_LOOP_MEMOIZE: + return "NESTED_LOOP_MEMOIZE"; + case PGPA_TAG_NESTED_LOOP_PLAIN: + return "NESTED_LOOP_PLAIN"; + case PGPA_TAG_NO_GATHER: + return "NO_GATHER"; + case PGPA_TAG_PARTITIONWISE: + return "PARTITIONWISE"; + case PGPA_TAG_SEMIJOIN_NON_UNIQUE: + return "SEMIJOIN_NON_UNIQUE"; + case PGPA_TAG_SEMIJOIN_UNIQUE: + return "SEMIJOIN_UNIQUE"; + case PGPA_TAG_SEQ_SCAN: + return "SEQ_SCAN"; + case PGPA_TAG_TID_SCAN: + return "TID_SCAN"; + } + + pg_unreachable(); + return NULL; +} + +/* + * Convert an advice tag, formatted as a string that has already been + * downcased as appropriate, to a pgpa_advice_tag_type. + * + * If we succeed, set *fail = false and return the result; if we fail, + * set *fail = true and return an arbitrary value. + */ +pgpa_advice_tag_type +pgpa_parse_advice_tag(const char *tag, bool *fail) +{ + *fail = false; + + switch (tag[0]) + { + case 'b': + if (strcmp(tag, "bitmap_heap_scan") == 0) + return PGPA_TAG_BITMAP_HEAP_SCAN; + break; + case 'f': + if (strcmp(tag, "foreign_join") == 0) + return PGPA_TAG_FOREIGN_JOIN; + break; + case 'g': + if (strcmp(tag, "gather") == 0) + return PGPA_TAG_GATHER; + if (strcmp(tag, "gather_merge") == 0) + return PGPA_TAG_GATHER_MERGE; + break; + case 'h': + if (strcmp(tag, "hash_join") == 0) + return PGPA_TAG_HASH_JOIN; + break; + case 'i': + if (strcmp(tag, "index_scan") == 0) + return PGPA_TAG_INDEX_SCAN; + if (strcmp(tag, "index_only_scan") == 0) + return PGPA_TAG_INDEX_ONLY_SCAN; + break; + case 'j': + if (strcmp(tag, "join_order") == 0) + return PGPA_TAG_JOIN_ORDER; + break; + case 'm': + if (strcmp(tag, "merge_join_materialize") == 0) + return PGPA_TAG_MERGE_JOIN_MATERIALIZE; + if (strcmp(tag, "merge_join_plain") == 0) + return PGPA_TAG_MERGE_JOIN_PLAIN; + break; + case 'n': + if (strcmp(tag, "nested_loop_materialize") == 0) + return PGPA_TAG_NESTED_LOOP_MATERIALIZE; + if (strcmp(tag, "nested_loop_memoize") == 0) + return PGPA_TAG_NESTED_LOOP_MEMOIZE; + if (strcmp(tag, "nested_loop_plain") == 0) + return PGPA_TAG_NESTED_LOOP_PLAIN; + if (strcmp(tag, "no_gather") == 0) + return PGPA_TAG_NO_GATHER; + break; + case 'p': + if (strcmp(tag, "partitionwise") == 0) + return PGPA_TAG_PARTITIONWISE; + break; + case 's': + if (strcmp(tag, "semijoin_non_unique") == 0) + return PGPA_TAG_SEMIJOIN_NON_UNIQUE; + if (strcmp(tag, "semijoin_unique") == 0) + return PGPA_TAG_SEMIJOIN_UNIQUE; + if (strcmp(tag, "seq_scan") == 0) + return PGPA_TAG_SEQ_SCAN; + break; + case 't': + if (strcmp(tag, "tid_scan") == 0) + return PGPA_TAG_TID_SCAN; + break; + } + + /* didn't work out */ + *fail = true; + + /* return an arbitrary value to unwind the call stack */ + return PGPA_TAG_SEQ_SCAN; +} + +/* + * Format a pgpa_advice_target as a string and append result to a StringInfo. + */ +void +pgpa_format_advice_target(StringInfo str, pgpa_advice_target *target) +{ + if (target->ttype != PGPA_TARGET_IDENTIFIER) + { + bool first = true; + char *delims; + + if (target->ttype == PGPA_TARGET_UNORDERED_LIST) + delims = "{}"; + else + delims = "()"; + + appendStringInfoChar(str, delims[0]); + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + if (first) + first = false; + else + appendStringInfoChar(str, ' '); + pgpa_format_advice_target(str, child_target); + } + appendStringInfoChar(str, delims[1]); + } + else + { + const char *rt_identifier; + + rt_identifier = pgpa_identifier_string(&target->rid); + appendStringInfoString(str, rt_identifier); + } +} + +/* + * Format a pgpa_index_target as a string and append result to a StringInfo. + */ +void +pgpa_format_index_target(StringInfo str, pgpa_index_target *itarget) +{ + if (itarget->indnamespace != NULL) + appendStringInfo(str, "%s.", + quote_identifier(itarget->indnamespace)); + appendStringInfoString(str, quote_identifier(itarget->indname)); +} + +/* + * Determine whether two pgpa_index_target objects are exactly identical. + */ +bool +pgpa_index_targets_equal(pgpa_index_target *i1, pgpa_index_target *i2) +{ + /* indnamespace can be NULL, and two NULL values are equal */ + if ((i1->indnamespace != NULL || i2->indnamespace != NULL) && + (i1->indnamespace == NULL || i2->indnamespace == NULL || + strcmp(i1->indnamespace, i2->indnamespace) != 0)) + return false; + if (strcmp(i1->indname, i2->indname) != 0) + return false; + + return true; +} + +/* + * Check whether an identifier matches an any part of an advice target. + */ +bool +pgpa_identifier_matches_target(pgpa_identifier *rid, pgpa_advice_target *target) +{ + /* For non-identifiers, check all descendants. */ + if (target->ttype != PGPA_TARGET_IDENTIFIER) + { + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + if (pgpa_identifier_matches_target(rid, child_target)) + return true; + } + return false; + } + + /* Straightforward comparisons of alias name and occurrence number. */ + if (strcmp(rid->alias_name, target->rid.alias_name) != 0) + return false; + if (rid->occurrence != target->rid.occurrence) + return false; + + /* + * If a relation identifier mentions a partition name, it should also + * specify a partition schema. But the target may leave the schema NULL to + * match anything. + */ + Assert(rid->partnsp != NULL || rid->partrel == NULL); + if (rid->partnsp != NULL && target->rid.partnsp != NULL && + strcmp(rid->partnsp, target->rid.partnsp) != 0) + return false; + + /* + * These fields can be NULL on either side, but NULL only matches another + * NULL. + */ + if (!strings_equal_or_both_null(rid->partrel, target->rid.partrel)) + return false; + if (!strings_equal_or_both_null(rid->plan_name, target->rid.plan_name)) + return false; + + return true; +} + +/* + * Match identifiers to advice targets and return an enum value indicating + * the relationship between the set of keys and the set of targets. + * + * See the comments for pgpa_itm_type. + */ +pgpa_itm_type +pgpa_identifiers_match_target(int nrids, pgpa_identifier *rids, + pgpa_advice_target *target) +{ + bool all_rids_used = true; + bool any_rids_used = false; + bool all_targets_used; + bool *rids_used = palloc0_array(bool, nrids); + + all_targets_used = + pgpa_identifiers_cover_target(nrids, rids, target, rids_used); + + for (int i = 0; i < nrids; ++i) + { + if (rids_used[i]) + any_rids_used = true; + else + all_rids_used = false; + } + + if (all_rids_used) + { + if (all_targets_used) + return PGPA_ITM_EQUAL; + else + return PGPA_ITM_KEYS_ARE_SUBSET; + } + else + { + if (all_targets_used) + return PGPA_ITM_TARGETS_ARE_SUBSET; + else if (any_rids_used) + return PGPA_ITM_INTERSECTING; + else + return PGPA_ITM_DISJOINT; + } +} + +/* + * Returns true if every target or sub-target is matched by at least one + * identifier, and otherwise false. + * + * Also sets rids_used[i] = true for each idenifier that matches at least one + * target. + */ +static bool +pgpa_identifiers_cover_target(int nrids, pgpa_identifier *rids, + pgpa_advice_target *target, bool *rids_used) +{ + bool result = false; + + if (target->ttype != PGPA_TARGET_IDENTIFIER) + { + result = true; + + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + if (!pgpa_identifiers_cover_target(nrids, rids, child_target, + rids_used)) + result = false; + } + } + else + { + for (int i = 0; i < nrids; ++i) + { + if (pgpa_identifier_matches_target(&rids[i], target)) + { + rids_used[i] = true; + result = true; + } + } + } + + return result; +} diff --git a/contrib/pg_plan_advice/pgpa_ast.h b/contrib/pg_plan_advice/pgpa_ast.h new file mode 100644 index 00000000000..3c3db801926 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_ast.h @@ -0,0 +1,185 @@ +/*------------------------------------------------------------------------- + * + * pgpa_ast.h + * abstract syntax trees for plan advice, plus parser/scanner support + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_ast.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_AST_H +#define PGPA_AST_H + +#include "pgpa_identifier.h" + +#include "nodes/pg_list.h" + +/* + * Advice items generally take the form SOME_TAG(item [...]), where an item + * can take various forms. The simplest case is a relation identifier, but + * some tags allow sublists, and JOIN_ORDER() allows both ordered and unordered + * sublists. + */ +typedef enum +{ + PGPA_TARGET_IDENTIFIER, /* relation identifier */ + PGPA_TARGET_ORDERED_LIST, /* (item ...) */ + PGPA_TARGET_UNORDERED_LIST /* {item ...} */ +} pgpa_target_type; + +/* + * An index specification. + */ +typedef struct pgpa_index_target +{ + /* Index schema and name */ + char *indnamespace; + char *indname; +} pgpa_index_target; + +/* + * A single item about which advice is being given, which could be either + * a relation identifier that we want to break out into its constituent fields, + * or a sublist of some kind. + */ +typedef struct pgpa_advice_target +{ + pgpa_target_type ttype; + + /* + * This field is meaningful when ttype is PGPA_TARGET_IDENTIFIER. + * + * All identifiers must have an alias name and an occurrence number; the + * remaining fields can be NULL. Note that it's possible to specify a + * partition name without a partition schema, but not the reverse. + */ + pgpa_identifier rid; + + /* + * This field is set when ttype is PGPA_TARGET_IDENTIFIER and the advice + * tag is PGPA_TAG_INDEX_SCAN or PGPA_TAG_INDEX_ONLY_SCAN. + */ + pgpa_index_target *itarget; + + /* + * When the ttype is PGPA_TARGET__LIST, this field contains a + * list of additional pgpa_advice_target objects. Otherwise, it is unused. + */ + List *children; +} pgpa_advice_target; + +/* + * These are all the kinds of advice that we know how to parse. If a keyword + * is found at the top level, it must be in this list. + * + * If you change anything here, also update pgpa_parse_advice_tag and + * pgpa_cstring_advice_tag. + */ +typedef enum pgpa_advice_tag_type +{ + PGPA_TAG_BITMAP_HEAP_SCAN, + PGPA_TAG_FOREIGN_JOIN, + PGPA_TAG_GATHER, + PGPA_TAG_GATHER_MERGE, + PGPA_TAG_HASH_JOIN, + PGPA_TAG_INDEX_ONLY_SCAN, + PGPA_TAG_INDEX_SCAN, + PGPA_TAG_JOIN_ORDER, + PGPA_TAG_MERGE_JOIN_MATERIALIZE, + PGPA_TAG_MERGE_JOIN_PLAIN, + PGPA_TAG_NESTED_LOOP_MATERIALIZE, + PGPA_TAG_NESTED_LOOP_MEMOIZE, + PGPA_TAG_NESTED_LOOP_PLAIN, + PGPA_TAG_NO_GATHER, + PGPA_TAG_PARTITIONWISE, + PGPA_TAG_SEMIJOIN_NON_UNIQUE, + PGPA_TAG_SEMIJOIN_UNIQUE, + PGPA_TAG_SEQ_SCAN, + PGPA_TAG_TID_SCAN +} pgpa_advice_tag_type; + +/* + * An item of advice, meaning a tag and the list of all targets to which + * it is being applied. + * + * "targets" is a list of pgpa_advice_target objects. + * + * The List returned from pgpa_yyparse is list of pgpa_advice_item objects. + */ +typedef struct pgpa_advice_item +{ + pgpa_advice_tag_type tag; + List *targets; +} pgpa_advice_item; + +/* + * Result of comparing an array of pgpa_relation_identifier objects to a + * pgpa_advice_target. + * + * PGPA_ITM_EQUAL means all targets are matched by some identifier, and + * all identifiers were matched to a target. + * + * PGPA_ITM_KEYS_ARE_SUBSET means that all identifiers matched to a target, + * but there were leftover targets. Generally, this means that the advice is + * looking to apply to all of the rels we have plus some additional ones that + * we don't have. + * + * PGPA_ITM_TARGETS_ARE_SUBSET means that all targets are matched by + * identifiers, but there were leftover identifiers. Generally, this means + * that the advice is looking to apply to some but not all of the rels we have. + * + * PGPA_ITM_INTERSECTING means that some identifiers and targets were matched, + * but neither all identifiers nor all targets could be matched to items in + * the other set. + * + * PGPA_ITM_DISJOINT means that no matches between identifiers and targets were + * found. + */ +typedef enum +{ + PGPA_ITM_EQUAL, + PGPA_ITM_KEYS_ARE_SUBSET, + PGPA_ITM_TARGETS_ARE_SUBSET, + PGPA_ITM_INTERSECTING, + PGPA_ITM_DISJOINT +} pgpa_itm_type; + +/* for pgpa_scanner.l and pgpa_parser.y */ +union YYSTYPE; +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void *yyscan_t; +#endif + +/* in pgpa_scanner.l */ +extern int pgpa_yylex(union YYSTYPE *yylval_param, List **result, + char **parse_error_msg_p, yyscan_t yyscanner); +extern void pgpa_yyerror(List **result, char **parse_error_msg_p, + yyscan_t yyscanner, + const char *message); +extern void pgpa_scanner_init(const char *str, yyscan_t *yyscannerp); +extern void pgpa_scanner_finish(yyscan_t yyscanner); + +/* in pgpa_parser.y */ +extern int pgpa_yyparse(List **result, char **parse_error_msg_p, + yyscan_t yyscanner); +extern List *pgpa_parse(const char *advice_string, char **error_p); + +/* in pgpa_ast.c */ +extern char *pgpa_cstring_advice_tag(pgpa_advice_tag_type advice_tag); +extern bool pgpa_identifier_matches_target(pgpa_identifier *rid, + pgpa_advice_target *target); +extern pgpa_itm_type pgpa_identifiers_match_target(int nrids, + pgpa_identifier *rids, + pgpa_advice_target *target); +extern bool pgpa_index_targets_equal(pgpa_index_target *i1, + pgpa_index_target *i2); +extern pgpa_advice_tag_type pgpa_parse_advice_tag(const char *tag, bool *fail); +extern void pgpa_format_advice_target(StringInfo str, + pgpa_advice_target *target); +extern void pgpa_format_index_target(StringInfo str, + pgpa_index_target *itarget); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_identifier.c b/contrib/pg_plan_advice/pgpa_identifier.c new file mode 100644 index 00000000000..0cfc4aa4f7e --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_identifier.c @@ -0,0 +1,481 @@ +/*------------------------------------------------------------------------- + * + * pgpa_identifier.c + * create appropriate identifiers for range table entries + * + * The goal of this module is to be able to produce identifiers for range + * table entries that are unique, understandable to human beings, and + * able to be reconstructed during future planning cycles. As an + * exception, we do not care about, or want to produce, identifiers for + * RTE_JOIN entries. This is because (1) we would end up with a ton of + * RTEs with unhelpful names like unnamed_join_17; (2) not all joins have + * RTEs; and (3) we intend to refer to joins by their constituent members + * rather than by reference to the join RTE. + * + * In general, we construct identifiers of the following form: + * + * alias_name#occurrence_number/child_table_name@subquery_name + * + * However, occurrence_number is omitted when it is the first occurrence + * within the same subquery, child_table_name is omitted for relations that + * are not child tables, and subquery_name is omitted for the topmost + * query level. Whenever an item is omitted, the preceding punctuation mark + * is also omitted. Identifier-style escaping is applied to alias_name and + * subquery_name. In generated advice, child table names are always + * schema-qualified, but users can supply advice where the schema name is + * not mentioned. Identifier-style escaping is applied to the schema and to + * the relation name separately. + * + * The upshot of all of these rules is that in simple cases, the relation + * identifier is textually identical to the alias name, making life easier + * for users. However, even in complex cases, every relation identifier + * for a given query will be unique (or at least we hope so: if not, this + * code is buggy and the identifier format might need to be rethought). + * + * A key goal of this system is that we want to be able to reconstruct the + * same identifiers during a future planning cycle for the same query, so + * that if a certain behavior is specified for a certain identifier, we can + * properly identify the RTI for which that behavior is mandated. In order + * for this to work, subquery names must be unique and known before the + * subquery is planned, and the remainder of the identifier must not depend + * on any part of the query outside of the current subquery level. In + * particular, occurrence_number must be calculated relative to the range + * table for the relevant subquery, not the final flattened range table. + * + * NB: All of this code must use rt_fetch(), not planner_rt_fetch()! + * Join removal and self-join elimination remove rels from the arrays + * that planner_rt_fetch() uses; using rt_fetch() is necessary to get + * stable results. + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_identifier.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "pgpa_identifier.h" + +#include "parser/parsetree.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" + +static Index *pgpa_create_top_rti_map(Index rtable_length, List *rtable, + List *appinfos); +static int pgpa_occurrence_number(List *rtable, Index *top_rti_map, + SubPlanRTInfo *rtinfo, Index rti); + +/* + * Create a range table identifier from scratch. + * + * This function leaves the caller to do all the heavy lifting, so it's + * generally better to use one of the functions below instead. + * + * See the file header comments for more details on the format of an + * identifier. + */ +const char * +pgpa_identifier_string(const pgpa_identifier *rid) +{ + const char *result; + + Assert(rid->alias_name != NULL); + result = quote_identifier(rid->alias_name); + + Assert(rid->occurrence >= 0); + if (rid->occurrence > 1) + result = psprintf("%s#%d", result, rid->occurrence); + + if (rid->partrel != NULL) + { + if (rid->partnsp == NULL) + result = psprintf("%s/%s", result, + quote_identifier(rid->partrel)); + else + result = psprintf("%s/%s.%s", result, + quote_identifier(rid->partnsp), + quote_identifier(rid->partrel)); + } + + if (rid->plan_name != NULL) + result = psprintf("%s@%s", result, quote_identifier(rid->plan_name)); + + return result; +} + +/* + * Compute a relation identifier for a particular RTI. + * + * The caller provides root and rti, and gets the necessary details back via + * the remaining parameters. + */ +void +pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti, + pgpa_identifier *rid) +{ + Index top_rti = rti; + int occurrence = 1; + RangeTblEntry *rte; + RangeTblEntry *top_rte; + char *partnsp = NULL; + char *partrel = NULL; + + /* + * If this is a child RTE, find the topmost parent that is still of type + * RTE_RELATION. We do this because we identify children of partitioned + * tables by the name of the child table, but subqueries can also have + * child rels and we don't care about those here. + */ + for (;;) + { + AppendRelInfo *appinfo; + RangeTblEntry *parent_rte; + + /* append_rel_array can be NULL if there are no children */ + if (root->append_rel_array == NULL || + (appinfo = root->append_rel_array[top_rti]) == NULL) + break; + + parent_rte = rt_fetch(appinfo->parent_relid, root->parse->rtable); + if (parent_rte->rtekind != RTE_RELATION) + break; + + top_rti = appinfo->parent_relid; + } + + /* Get the range table entries for the RTI and top RTI. */ + rte = rt_fetch(rti, root->parse->rtable); + top_rte = rt_fetch(top_rti, root->parse->rtable); + Assert(rte->rtekind != RTE_JOIN); + Assert(top_rte->rtekind != RTE_JOIN); + + /* Work out the correct occurrence number. */ + for (Index prior_rti = 1; prior_rti < top_rti; ++prior_rti) + { + RangeTblEntry *prior_rte; + AppendRelInfo *appinfo; + + /* + * If this is a child rel of a parent that is a relation, skip it. + * + * Such range table entries are disambiguated by mentioning the schema + * and name of the table, not by counting them as separate occurrences + * of the same table. + * + * NB: append_rel_array can be NULL if there are no children + */ + if (root->append_rel_array != NULL && + (appinfo = root->append_rel_array[prior_rti]) != NULL) + { + RangeTblEntry *parent_rte; + + parent_rte = rt_fetch(appinfo->parent_relid, root->parse->rtable); + if (parent_rte->rtekind == RTE_RELATION) + continue; + } + + /* Skip NULL entries and joins. */ + prior_rte = rt_fetch(prior_rti, root->parse->rtable); + if (prior_rte == NULL || prior_rte->rtekind == RTE_JOIN) + continue; + + /* Skip if the alias name differs. */ + if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0) + continue; + + /* Looks like a true duplicate. */ + ++occurrence; + } + + /* If this is a child table, get the schema and relation names. */ + if (rti != top_rti) + { + partnsp = get_namespace_name_or_temp(get_rel_namespace(rte->relid)); + partrel = get_rel_name(rte->relid); + } + + /* OK, we have all the answers we need. Return them to the caller. */ + rid->alias_name = top_rte->eref->aliasname; + rid->occurrence = occurrence; + rid->partnsp = partnsp; + rid->partrel = partrel; + rid->plan_name = root->plan_name; +} + +/* + * Compute a relation identifier for a set of RTIs, except for any RTE_JOIN + * RTIs that may be present. + * + * RTE_JOIN entries are excluded because they cannot be mentioned by plan + * advice. + * + * The caller is responsible for making sure that the tkeys array is large + * enough to store the results. + * + * The return value is the number of identifiers computed. + */ +int +pgpa_compute_identifiers_by_relids(PlannerInfo *root, Bitmapset *relids, + pgpa_identifier *rids) +{ + int count = 0; + int rti = -1; + + while ((rti = bms_next_member(relids, rti)) >= 0) + { + RangeTblEntry *rte = rt_fetch(rti, root->parse->rtable); + + if (rte->rtekind == RTE_JOIN) + continue; + pgpa_compute_identifier_by_rti(root, rti, &rids[count++]); + } + + Assert(count > 0); + return count; +} + +/* + * Create an array of range table identifiers for all the non-NULL, + * non-RTE_JOIN entries in the PlannedStmt's range table. + */ +pgpa_identifier * +pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt) +{ + Index rtable_length = list_length(pstmt->rtable); + pgpa_identifier *result = palloc0_array(pgpa_identifier, rtable_length); + Index *top_rti_map; + int rtinfoindex = 0; + SubPlanRTInfo *rtinfo = NULL; + SubPlanRTInfo *nextrtinfo = NULL; + + /* + * Account for relations added by inheritance expansion of partitioned + * tables. + */ + top_rti_map = pgpa_create_top_rti_map(rtable_length, pstmt->rtable, + pstmt->appendRelations); + + /* + * When we begin iterating, we're processing the portion of the range + * table that originated from the top-level PlannerInfo, so subrtinfo is + * NULL. Later, subrtinfo will be the SubPlanRTInfo for the subquery whose + * portion of the range table we are processing. nextrtinfo is always the + * SubPlanRTInfo that follows the current one, if any, so when we're + * processing the top-level query's portion of the range table, the next + * SubPlanRTInfo is the very first one. + */ + if (pstmt->subrtinfos != NULL) + nextrtinfo = linitial(pstmt->subrtinfos); + + /* Main loop over the range table. */ + for (Index rti = 1; rti <= rtable_length; rti++) + { + const char *plan_name; + Index top_rti; + RangeTblEntry *rte; + RangeTblEntry *top_rte; + char *partnsp = NULL; + char *partrel = NULL; + int occurrence; + pgpa_identifier *rid; + + /* + * Advance to the next SubPlanRTInfo, if it's time to do that. + * + * This loop probably shouldn't ever iterate more than once, because + * that would imply that a subquery was planned but added nothing to + * the range table; but let's be defensive and assume it can happen. + */ + while (nextrtinfo != NULL && rti > nextrtinfo->rtoffset) + { + rtinfo = nextrtinfo; + if (++rtinfoindex >= list_length(pstmt->subrtinfos)) + nextrtinfo = NULL; + else + nextrtinfo = list_nth(pstmt->subrtinfos, rtinfoindex); + } + + /* Fetch the range table entry, if any. */ + rte = rt_fetch(rti, pstmt->rtable); + + /* + * We can't and don't need to identify null entries, and we don't want + * to identify join entries. + */ + if (rte == NULL || rte->rtekind == RTE_JOIN) + continue; + + /* + * If this is not a relation added by partitioned table expansion, + * then the top RTI/RTE are just the same as this RTI/RTE. Otherwise, + * we need the information for the top RTI/RTE, and must also fetch + * the partition schema and name. + */ + top_rti = top_rti_map[rti - 1]; + if (rti == top_rti) + top_rte = rte; + else + { + top_rte = rt_fetch(top_rti, pstmt->rtable); + partnsp = + get_namespace_name_or_temp(get_rel_namespace(rte->relid)); + partrel = get_rel_name(rte->relid); + } + + /* Compute the correct occurrence number. */ + occurrence = pgpa_occurrence_number(pstmt->rtable, top_rti_map, + rtinfo, top_rti); + + /* Get the name of the current plan (NULL for toplevel query). */ + plan_name = rtinfo == NULL ? NULL : rtinfo->plan_name; + + /* Save all the details we've derived. */ + rid = &result[rti - 1]; + rid->alias_name = top_rte->eref->aliasname; + rid->occurrence = occurrence; + rid->partnsp = partnsp; + rid->partrel = partrel; + rid->plan_name = plan_name; + } + + return result; +} + +/* + * Search for a pgpa_identifier in the array of identifiers computed for the + * range table. If exactly one match is found, return the matching RTI; else + * return 0. + */ +Index +pgpa_compute_rti_from_identifier(int rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_identifier *rid) +{ + Index result = 0; + + for (Index rti = 1; rti <= rtable_length; ++rti) + { + pgpa_identifier *rti_rid = &rt_identifiers[rti - 1]; + + /* If there's no identifier for this RTI, skip it. */ + if (rti_rid->alias_name == NULL) + continue; + + /* + * If it matches, return this RTI. As usual, an omitted partition + * schema matches anything, but partition and plan names must either + * match exactly or be omitted on both sides. + */ + if (strcmp(rid->alias_name, rti_rid->alias_name) == 0 && + rid->occurrence == rti_rid->occurrence && + (rid->partnsp == NULL || rti_rid->partnsp == NULL || + strcmp(rid->partnsp, rti_rid->partnsp) == 0) && + strings_equal_or_both_null(rid->partrel, rti_rid->partrel) && + strings_equal_or_both_null(rid->plan_name, rti_rid->plan_name)) + { + if (result != 0) + { + /* Multiple matches were found. */ + return 0; + } + result = rti; + } + } + + return result; +} + +/* + * Build a mapping from each RTI to the RTI whose alias_name will be used to + * construct the range table identifier. + * + * For child relations, this is the topmost parent that is still of type + * RTE_RELATION. For other relations, it's just the original RTI. + * + * Since we're eventually going to need this information for every RTI in + * the range table, it's best to compute all the answers in a single pass over + * the AppendRelInfo list. Otherwise, we might end up searching through that + * list repeatedly for entries of interest. + * + * Note that the returned array is uses zero-based indexing, while RTIs use + * 1-based indexing, so subtract 1 from the RTI before looking it up in the + * array. + */ +static Index * +pgpa_create_top_rti_map(Index rtable_length, List *rtable, List *appinfos) +{ + Index *top_rti_map = palloc0_array(Index, rtable_length); + + /* Initially, make every RTI point to itself. */ + for (Index rti = 1; rti <= rtable_length; ++rti) + top_rti_map[rti - 1] = rti; + + /* Update the map for each AppendRelInfo object. */ + foreach_node(AppendRelInfo, appinfo, appinfos) + { + Index parent_rti = appinfo->parent_relid; + RangeTblEntry *parent_rte = rt_fetch(parent_rti, rtable); + + /* If the parent is not RTE_RELATION, ignore this entry. */ + if (parent_rte->rtekind != RTE_RELATION) + continue; + + /* + * Map the child to wherever we mapped the parent. Parents always + * precede their children in the AppendRelInfo list, so this should + * work out. + */ + top_rti_map[appinfo->child_relid - 1] = top_rti_map[parent_rti - 1]; + } + + return top_rti_map; +} + +/* + * Find the occurrence number of a certain relation within a certain subquery. + * + * The same alias name can occur multiple times within a subquery, but we want + * to disambiguate by giving different occurrences different integer indexes. + * However, child tables are disambiguated by including the table name rather + * than by incrementing the occurrence number; and joins are not named and so + * shouldn't increment the occurrence number either. + */ +static int +pgpa_occurrence_number(List *rtable, Index *top_rti_map, + SubPlanRTInfo *rtinfo, Index rti) +{ + Index rtoffset = (rtinfo == NULL) ? 0 : rtinfo->rtoffset; + int occurrence = 1; + RangeTblEntry *rte = rt_fetch(rti, rtable); + + for (Index prior_rti = rtoffset + 1; prior_rti < rti; ++prior_rti) + { + RangeTblEntry *prior_rte; + + /* + * If this is a child rel of a parent that is a relation, skip it. + * + * Such range table entries are disambiguated by mentioning the schema + * and name of the table, not by counting them as separate occurrences + * of the same table. + */ + if (top_rti_map[prior_rti - 1] != prior_rti) + continue; + + /* Skip joins. */ + prior_rte = rt_fetch(prior_rti, rtable); + if (prior_rte->rtekind == RTE_JOIN) + continue; + + /* Skip if the alias name differs. */ + if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0) + continue; + + /* Looks like a true duplicate. */ + ++occurrence; + } + + return occurrence; +} diff --git a/contrib/pg_plan_advice/pgpa_identifier.h b/contrib/pg_plan_advice/pgpa_identifier.h new file mode 100644 index 00000000000..393a83dc78c --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_identifier.h @@ -0,0 +1,52 @@ +/*------------------------------------------------------------------------- + * + * pgpa_identifier.h + * create appropriate identifiers for range table entries + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_identifier.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PGPA_IDENTIFIER_H +#define PGPA_IDENTIFIER_H + +#include "nodes/pathnodes.h" +#include "nodes/plannodes.h" + +typedef struct pgpa_identifier +{ + const char *alias_name; + int occurrence; + const char *partnsp; + const char *partrel; + const char *plan_name; +} pgpa_identifier; + +/* Convenience function for comparing possibly-NULL strings. */ +static inline bool +strings_equal_or_both_null(const char *a, const char *b) +{ + if (a == b) + return true; + else if (a == NULL || b == NULL) + return false; + else + return strcmp(a, b) == 0; +} + +extern const char *pgpa_identifier_string(const pgpa_identifier *rid); +extern void pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti, + pgpa_identifier *rid); +extern int pgpa_compute_identifiers_by_relids(PlannerInfo *root, + Bitmapset *relids, + pgpa_identifier *rids); +extern pgpa_identifier *pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt); + +extern Index pgpa_compute_rti_from_identifier(int rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_identifier *rid); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_join.c b/contrib/pg_plan_advice/pgpa_join.c new file mode 100644 index 00000000000..4610d02356f --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_join.c @@ -0,0 +1,638 @@ +/*------------------------------------------------------------------------- + * + * pgpa_join.c + * analysis of joins in Plan trees + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_join.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "pgpa_join.h" +#include "pgpa_scan.h" +#include "pgpa_walker.h" + +#include "nodes/pathnodes.h" +#include "nodes/print.h" +#include "parser/parsetree.h" + +/* + * Temporary object used when unrolling a join tree. + */ +struct pgpa_join_unroller +{ + unsigned nallocated; + unsigned nused; + Plan *outer_subplan; + ElidedNode *outer_elided_node; + bool outer_beneath_any_gather; + pgpa_join_strategy *strategy; + Plan **inner_subplans; + ElidedNode **inner_elided_nodes; + pgpa_join_unroller **inner_unrollers; + bool *inner_beneath_any_gather; +}; + +static pgpa_join_strategy pgpa_decompose_join(pgpa_plan_walker_context *walker, + Plan *plan, + Plan **realouter, + Plan **realinner, + ElidedNode **elidedrealouter, + ElidedNode **elidedrealinner, + bool *found_any_outer_gather, + bool *found_any_inner_gather); +static ElidedNode *pgpa_descend_node(PlannedStmt *pstmt, Plan **plan); +static ElidedNode *pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan, + bool *found_any_gather); +static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan, + ElidedNode **elided_node); + +static bool is_result_node_with_child(Plan *plan); +static bool is_sorting_plan(Plan *plan); + +/* + * Create an initially-empty object for unrolling joins. + * + * This function creates a helper object that can later be used to create a + * pgpa_unrolled_join, after first calling pgpa_unroll_join one or more times. + */ +pgpa_join_unroller * +pgpa_create_join_unroller(void) +{ + pgpa_join_unroller *join_unroller; + + join_unroller = palloc0_object(pgpa_join_unroller); + join_unroller->nallocated = 4; + join_unroller->strategy = + palloc_array(pgpa_join_strategy, join_unroller->nallocated); + join_unroller->inner_subplans = + palloc_array(Plan *, join_unroller->nallocated); + join_unroller->inner_elided_nodes = + palloc_array(ElidedNode *, join_unroller->nallocated); + join_unroller->inner_unrollers = + palloc_array(pgpa_join_unroller *, join_unroller->nallocated); + join_unroller->inner_beneath_any_gather = + palloc_array(bool, join_unroller->nallocated); + + return join_unroller; +} + +/* + * Unroll one level of an unrollable join tree. + * + * Our basic goal here is to unroll join trees as they occur in the Plan + * tree into a simpler and more regular structure that we can more easily + * use for further processing. Unrolling is outer-deep, so if the plan tree + * has Join1(Join2(A,B),Join3(C,D)), the same join unroller object should be + * used for Join1 and Join2, but a different one will be needed for Join3, + * since that involves a join within the *inner* side of another join. + * + * pgpa_plan_walker creates a "top level" join unroller object when it + * encounters a join in a portion of the plan tree in which no join unroller + * is already active. From there, this function is responsible for determing + * to what portion of the plan tree that join unroller applies, and for + * creating any subordinate join unroller objects that are needed as a result + * of non-outer-deep join trees. We do this by returning the join unroller + * objects that should be used for further traversal of the outer and inner + * subtrees of the current plan node via *outer_join_unroller and + * *inner_join_unroller, respectively. + */ +void +pgpa_unroll_join(pgpa_plan_walker_context *walker, Plan *plan, + bool beneath_any_gather, + pgpa_join_unroller *join_unroller, + pgpa_join_unroller **outer_join_unroller, + pgpa_join_unroller **inner_join_unroller) +{ + pgpa_join_strategy strategy; + Plan *realinner, + *realouter; + ElidedNode *elidedinner, + *elidedouter; + int n; + bool found_any_outer_gather = false; + bool found_any_inner_gather = false; + + Assert(join_unroller != NULL); + + /* + * We need to pass the join_unroller object down through certain types of + * plan nodes -- anything that's considered part of the join strategy, and + * any other nodes that can occur in a join tree despite not being scans + * or joins. + * + * This includes: + * + * (1) Materialize, Memoize, and Hash nodes, which are part of the join + * strategy, + * + * (2) Gather and Gather Merge nodes, which can occur at any point in the + * join tree where the planner decided to initiate parallelism, + * + * (3) Sort and IncrementalSort nodes, which can occur beneath MergeJoin + * or GatherMerge, + * + * (4) Agg and Unique nodes, which can occur when we decide to make the + * nullable side of a semijoin unique and then join the result, and + * + * (5) Result nodes with children, which can be added either to project to + * enforce a one-time filter (but Result nodes without children are + * degenerate scans or joins). + */ + if (IsA(plan, Material) || IsA(plan, Memoize) || IsA(plan, Hash) + || IsA(plan, Gather) || IsA(plan, GatherMerge) + || is_sorting_plan(plan) || IsA(plan, Agg) || IsA(plan, Unique) + || is_result_node_with_child(plan)) + { + *outer_join_unroller = join_unroller; + return; + } + + /* + * Since we've already handled nodes that require pass-through treatment, + * this should be an unrollable join. + */ + strategy = pgpa_decompose_join(walker, plan, + &realouter, &realinner, + &elidedouter, &elidedinner, + &found_any_outer_gather, + &found_any_inner_gather); + + /* If our workspace is full, expand it. */ + if (join_unroller->nused >= join_unroller->nallocated) + { + join_unroller->nallocated *= 2; + join_unroller->strategy = + repalloc_array(join_unroller->strategy, + pgpa_join_strategy, + join_unroller->nallocated); + join_unroller->inner_subplans = + repalloc_array(join_unroller->inner_subplans, + Plan *, + join_unroller->nallocated); + join_unroller->inner_elided_nodes = + repalloc_array(join_unroller->inner_elided_nodes, + ElidedNode *, + join_unroller->nallocated); + join_unroller->inner_beneath_any_gather = + repalloc_array(join_unroller->inner_beneath_any_gather, + bool, + join_unroller->nallocated); + join_unroller->inner_unrollers = + repalloc_array(join_unroller->inner_unrollers, + pgpa_join_unroller *, + join_unroller->nallocated); + } + + /* + * Since we're flattening outer-deep join trees, it follows that if the + * outer side is still an unrollable join, it should be unrolled into this + * same object. Otherwise, we've reached the limit of what we can unroll + * into this object and must remember the outer side as the final outer + * subplan. + */ + if (elidedouter == NULL && pgpa_is_join(realouter)) + *outer_join_unroller = join_unroller; + else + { + join_unroller->outer_subplan = realouter; + join_unroller->outer_elided_node = elidedouter; + join_unroller->outer_beneath_any_gather = + beneath_any_gather || found_any_outer_gather; + } + + /* + * Store the inner subplan. If it's an unrollable join, it needs to be + * flattened in turn, but into a new unroller object, not this one. + */ + n = join_unroller->nused++; + join_unroller->strategy[n] = strategy; + join_unroller->inner_subplans[n] = realinner; + join_unroller->inner_elided_nodes[n] = elidedinner; + join_unroller->inner_beneath_any_gather[n] = + beneath_any_gather || found_any_inner_gather; + if (elidedinner == NULL && pgpa_is_join(realinner)) + *inner_join_unroller = pgpa_create_join_unroller(); + else + *inner_join_unroller = NULL; + join_unroller->inner_unrollers[n] = *inner_join_unroller; +} + +/* + * Use the data we've accumulated in a pgpa_join_unroller object to construct + * a pgpa_unrolled_join. + */ +pgpa_unrolled_join * +pgpa_build_unrolled_join(pgpa_plan_walker_context *walker, + pgpa_join_unroller *join_unroller) +{ + pgpa_unrolled_join *ujoin; + int i; + + /* + * We shouldn't have gone even so far as to create a join unroller unless + * we found at least one unrollable join. + */ + Assert(join_unroller->nused > 0); + + /* Allocate result structures. */ + ujoin = palloc0_object(pgpa_unrolled_join); + ujoin->ninner = join_unroller->nused; + ujoin->strategy = palloc0_array(pgpa_join_strategy, join_unroller->nused); + ujoin->inner = palloc0_array(pgpa_join_member, join_unroller->nused); + + /* Handle the outermost join. */ + ujoin->outer.plan = join_unroller->outer_subplan; + ujoin->outer.elided_node = join_unroller->outer_elided_node; + ujoin->outer.scan = + pgpa_build_scan(walker, ujoin->outer.plan, + ujoin->outer.elided_node, + join_unroller->outer_beneath_any_gather, + true); + + /* + * We want the joins from the deepest part of the plan tree to appear + * first in the result object, but the join unroller adds them in exactly + * the reverse of that order, so we need to flip the order of the arrays + * when constructing the final result. + */ + for (i = 0; i < join_unroller->nused; ++i) + { + int k = join_unroller->nused - i - 1; + + /* Copy strategy, Plan, and ElidedNode. */ + ujoin->strategy[i] = join_unroller->strategy[k]; + ujoin->inner[i].plan = join_unroller->inner_subplans[k]; + ujoin->inner[i].elided_node = join_unroller->inner_elided_nodes[k]; + + /* + * Fill in remaining details, using either the nested join unroller, + * or by deriving them from the plan and elided nodes. + */ + if (join_unroller->inner_unrollers[k] != NULL) + ujoin->inner[i].unrolled_join = + pgpa_build_unrolled_join(walker, + join_unroller->inner_unrollers[k]); + else + ujoin->inner[i].scan = + pgpa_build_scan(walker, ujoin->inner[i].plan, + ujoin->inner[i].elided_node, + join_unroller->inner_beneath_any_gather[k], + true); + } + + return ujoin; +} + +/* + * Free memory allocated for pgpa_join_unroller. + */ +void +pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller) +{ + pfree(join_unroller->strategy); + pfree(join_unroller->inner_subplans); + pfree(join_unroller->inner_elided_nodes); + pfree(join_unroller->inner_unrollers); + pfree(join_unroller->inner_beneath_any_gather); + pfree(join_unroller); +} + +/* + * Identify the join strategy used by a join and the "real" inner and outer + * plans. + * + * For example, a Hash Join always has a Hash node on the inner side, but + * for all intents and purposes the real inner input is the Hash node's child, + * not the Hash node itself. + * + * Likewise, a Merge Join may have Sort node on the inner or outer side; if + * it does, the real input to the join is the Sort node's child, not the + * Sort node itself. + * + * In addition, with a Merge Join or a Nested Loop, the join planning code + * may add additional nodes such as Materialize or Memoize. We regard these + * as an aspect of the join strategy. As in the previous cases, the true input + * to the join is the underlying node. + * + * However, if any involved child node previously had a now-elided node stacked + * on top, then we can't "look through" that node -- indeed, what's going to be + * relevant for our purposes is the ElidedNode on top of that plan node, rather + * than the plan node itself. + * + * If there are multiple elided nodes, we want that one that would have been + * uppermost in the plan tree prior to setrefs processing; we expect to find + * that one last in the list of elided nodes. + * + * On return *realouter and *realinner will have been set to the real inner + * and real outer plans that we identified, and *elidedrealouter and + * *elidedrealinner to the last of any corresponding elided nodes. + * Additionally, *found_any_outer_gather and *found_any_inner_gather will + * be set to true if we looked through a Gather or Gather Merge node on + * that side of the join, and false otherwise. + */ +static pgpa_join_strategy +pgpa_decompose_join(pgpa_plan_walker_context *walker, Plan *plan, + Plan **realouter, Plan **realinner, + ElidedNode **elidedrealouter, ElidedNode **elidedrealinner, + bool *found_any_outer_gather, bool *found_any_inner_gather) +{ + PlannedStmt *pstmt = walker->pstmt; + JoinType jointype = ((Join *) plan)->jointype; + Plan *outerplan = plan->lefttree; + Plan *innerplan = plan->righttree; + ElidedNode *elidedouter; + ElidedNode *elidedinner; + pgpa_join_strategy strategy; + bool uniqueouter; + bool uniqueinner; + + elidedouter = pgpa_last_elided_node(pstmt, outerplan); + elidedinner = pgpa_last_elided_node(pstmt, innerplan); + *found_any_outer_gather = false; + *found_any_inner_gather = false; + + switch (nodeTag(plan)) + { + case T_MergeJoin: + + /* + * The planner may have chosen to place a Material node on the + * inner side of the MergeJoin; if this is present, we record it + * as part of the join strategy. + */ + if (elidedinner == NULL && IsA(innerplan, Material)) + { + elidedinner = pgpa_descend_node(pstmt, &innerplan); + strategy = JSTRAT_MERGE_JOIN_MATERIALIZE; + } + else + strategy = JSTRAT_MERGE_JOIN_PLAIN; + + /* + * For a MergeJoin, either the outer or the inner subplan, or + * both, may have needed to be sorted; we must disregard any Sort + * or IncrementalSort node to find the real inner or outer + * subplan. + */ + if (elidedouter == NULL && is_sorting_plan(outerplan)) + elidedouter = pgpa_descend_node(pstmt, &outerplan); + if (elidedinner == NULL && is_sorting_plan(innerplan)) + elidedinner = pgpa_descend_node(pstmt, &innerplan); + break; + + case T_NestLoop: + + /* + * The planner may have chosen to place a Material or Memoize node + * on the inner side of the NestLoop; if this is present, we + * record it as part of the join strategy. + */ + if (elidedinner == NULL && IsA(innerplan, Material)) + { + elidedinner = pgpa_descend_node(pstmt, &innerplan); + strategy = JSTRAT_NESTED_LOOP_MATERIALIZE; + } + else if (elidedinner == NULL && IsA(innerplan, Memoize)) + { + elidedinner = pgpa_descend_node(pstmt, &innerplan); + strategy = JSTRAT_NESTED_LOOP_MEMOIZE; + } + else + strategy = JSTRAT_NESTED_LOOP_PLAIN; + break; + + case T_HashJoin: + + /* + * The inner subplan of a HashJoin is always a Hash node; the real + * inner subplan is the Hash node's child. + */ + Assert(IsA(innerplan, Hash)); + Assert(elidedinner == NULL); + elidedinner = pgpa_descend_node(pstmt, &innerplan); + strategy = JSTRAT_HASH_JOIN; + break; + + default: + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan)); + } + + /* + * The planner may have decided to implement a semijoin by first making + * the nullable side of the plan unique, and then performing a normal join + * against the result. Therefore, we might need to descend through a + * unique node on either side of the plan. + */ + uniqueouter = pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter); + uniqueinner = pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner); + + /* + * Can we see a Result node here, to project above a Gather? So far I've + * found no example that behaves that way; rather, the Gather or Gather + * Merge is made to project. Hence, don't test is_result_node_with_child() + * at this point. + */ + + /* + * The planner may have decided to parallelize part of the join tree, so + * we could find a Gather or Gather Merge node here. Note that, if + * present, this will appear below nodes we considered as part of the join + * strategy, but we could find another uniqueness-enforcing node below the + * Gather or Gather Merge, if present. + */ + if (elidedouter == NULL) + { + elidedouter = pgpa_descend_any_gather(pstmt, &outerplan, + found_any_outer_gather); + if (*found_any_outer_gather && + pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter)) + uniqueouter = true; + } + if (elidedinner == NULL) + { + elidedinner = pgpa_descend_any_gather(pstmt, &innerplan, + found_any_inner_gather); + if (*found_any_inner_gather && + pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner)) + uniqueinner = true; + } + + /* + * It's possible that a Result node has been inserted either to project a + * target list or to implement a one-time filter. If so, we can descend + * through it. Note that a Result node without a child would be a + * degenerate scan or join, and not something we could descend through. + */ + if (elidedouter == NULL && is_result_node_with_child(outerplan)) + elidedouter = pgpa_descend_node(pstmt, &outerplan); + if (elidedinner == NULL && is_result_node_with_child(innerplan)) + elidedinner = pgpa_descend_node(pstmt, &innerplan); + + /* + * If this is a semijoin that was converted to an inner join by making one + * side or the other unique, make a note that the inner or outer subplan, + * as appropriate, should be treated as a query plan feature when the main + * tree traversal reaches it. + * + * Conversely, if the planner could have made one side of the join unique + * and thereby converted it to an inner join, and chose not to do so, that + * is also worth noting. + * + * NB: This code could appear slightly higher up in this function, but + * none of the nodes through which we just descended should have + * associated RTIs. + * + * NB: This seems like a somewhat hacky way of passing information up to + * the main tree walk, but I don't currently have a better idea. + */ + if (uniqueouter) + pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, outerplan); + else if (jointype == JOIN_RIGHT_SEMI) + pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, outerplan); + if (uniqueinner) + pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, innerplan); + else if (jointype == JOIN_SEMI) + pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, innerplan); + + /* Set output parameters. */ + *realouter = outerplan; + *realinner = innerplan; + *elidedrealouter = elidedouter; + *elidedrealinner = elidedinner; + return strategy; +} + +/* + * Descend through a Plan node in a join tree that the caller has determined + * to be irrelevant. + * + * Updates *plan, and returns the last of any elided nodes pertaining to the + * new plan node. + */ +static ElidedNode * +pgpa_descend_node(PlannedStmt *pstmt, Plan **plan) +{ + *plan = (*plan)->lefttree; + return pgpa_last_elided_node(pstmt, *plan); +} + +/* + * Descend through a Gather or Gather Merge node, if present, and any Sort + * or IncrementalSort node occurring under a Gather Merge. + * + * Caller should have verified that there is no ElidedNode pertaining to + * the initial value of *plan. + * + * Updates *plan, and returns the last of any elided nodes pertaining to the + * new plan node. Sets *found_any_gather = true if either Gather or + * Gather Merge was found, and otherwise leaves it unchanged. + */ +static ElidedNode * +pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan, + bool *found_any_gather) +{ + if (IsA(*plan, Gather)) + { + *found_any_gather = true; + return pgpa_descend_node(pstmt, plan); + } + + if (IsA(*plan, GatherMerge)) + { + ElidedNode *elided = pgpa_descend_node(pstmt, plan); + + if (elided == NULL && is_sorting_plan(*plan)) + elided = pgpa_descend_node(pstmt, plan); + + *found_any_gather = true; + return elided; + } + + return NULL; +} + +/* + * If *plan is an Agg or Unique node, we want to descend through it, unless + * it has a corresponding elided node. If its immediate child is a Sort or + * IncrementalSort, we also want to descend through that, unless it has a + * corresponding elided node. + * + * On entry, *elided_node must be the last of any elided nodes corresponding + * to *plan; on exit, this will still be true, but *plan may have been updated. + * + * The reason we don't want to descend through elided nodes is that a single + * join tree can't cross through any sort of elided node: subqueries are + * planned separately, and planning inside an Append or MergeAppend is + * separate from planning outside of it. + * + * The return value is true if we descend through a node that we believe is + * making one side of a semijoin unique, and otherwise false. + */ +static bool +pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan, + ElidedNode **elided_node) +{ + bool descend = false; + bool sjunique = false; + + if (*elided_node != NULL) + return sjunique; + + if (IsA(*plan, Unique)) + { + descend = true; + sjunique = true; + } + else if (IsA(*plan, Agg)) + { + /* + * If this is a simple Agg node, then assume it's here to implement + * semijoin uniqueness. Otherwise, assume it's completing an eager + * aggregation or partitionwise aggregation operation that began at a + * higher level of the plan tree. + * + * (Note that when we're using an Agg node for uniqueness, there's no + * need for any case other than AGGSPLIT_SIMPLE, because there's no + * aggregated column being computed. However, the fact that + * AGGSPLIT_SIMPLE is in use doesn't prove that this Agg is here for + * the semijoin uniqueness. Maybe we should adjust an Agg node to + * carry a "purpose" field so that code like this can be more certain + * of its analysis.) + */ + descend = true; + sjunique = (((Agg *) *plan)->aggsplit == AGGSPLIT_SIMPLE); + } + + if (descend) + { + *elided_node = pgpa_descend_node(pstmt, plan); + + if (*elided_node == NULL && is_sorting_plan(*plan)) + *elided_node = pgpa_descend_node(pstmt, plan); + } + + return sjunique; +} + +/* + * Is this a Result node that has a child? + */ +static bool +is_result_node_with_child(Plan *plan) +{ + return IsA(plan, Result) && plan->lefttree != NULL; +} + +/* + * Is this a Plan node whose purpose is to put the data in a certain order? + */ +static bool +is_sorting_plan(Plan *plan) +{ + return IsA(plan, Sort) || IsA(plan, IncrementalSort); +} diff --git a/contrib/pg_plan_advice/pgpa_join.h b/contrib/pg_plan_advice/pgpa_join.h new file mode 100644 index 00000000000..db8509c07cc --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_join.h @@ -0,0 +1,105 @@ +/*------------------------------------------------------------------------- + * + * pgpa_join.h + * analysis of joins in Plan trees + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_join.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_JOIN_H +#define PGPA_JOIN_H + +#include "nodes/plannodes.h" + +typedef struct pgpa_plan_walker_context pgpa_plan_walker_context; +typedef struct pgpa_join_unroller pgpa_join_unroller; +typedef struct pgpa_unrolled_join pgpa_unrolled_join; + +/* + * Although there are three main join strategies, we try to classify things + * more precisely here: merge joins have the option of using materialization + * on the inner side, and nested loops can use either materialization or + * memoization. + */ +typedef enum +{ + JSTRAT_MERGE_JOIN_PLAIN = 0, + JSTRAT_MERGE_JOIN_MATERIALIZE, + JSTRAT_NESTED_LOOP_PLAIN, + JSTRAT_NESTED_LOOP_MATERIALIZE, + JSTRAT_NESTED_LOOP_MEMOIZE, + JSTRAT_HASH_JOIN + /* update NUM_PGPA_JOIN_STRATEGY if you add anything here */ +} pgpa_join_strategy; + +#define NUM_PGPA_JOIN_STRATEGY ((int) JSTRAT_HASH_JOIN + 1) + +/* + * In an outer-deep join tree, every member of an unrolled join will be a scan, + * but join trees with other shapes can contain unrolled joins. + * + * The plan node we store here will be the inner or outer child of the join + * node, as appropriate, except that we look through subnodes that we regard as + * part of the join method itself. For instance, for a Nested Loop that + * materializes the inner input, we'll store the child of the Materialize node, + * not the Materialize node itself. + * + * If setrefs processing elided one or more nodes from the plan tree, then + * we'll store details about the topmost of those in elided_node; otherwise, + * it will be NULL. + * + * Exactly one of scan and unrolled_join will be non-NULL. + */ +typedef struct +{ + Plan *plan; + ElidedNode *elided_node; + struct pgpa_scan *scan; + pgpa_unrolled_join *unrolled_join; +} pgpa_join_member; + +/* + * We convert outer-deep join trees to a flat structure; that is, ((A JOIN B) + * JOIN C) JOIN D gets converted to outer = A, inner = . When joins + * aren't outer-deep, substructure is required, e.g. (A JOIN B) JOIN (C JOIN D) + * is represented as outer = A, inner = , where X is a pgpa_unrolled_join + * covering C-D. + */ +struct pgpa_unrolled_join +{ + /* Outermost member; must not itself be an unrolled join. */ + pgpa_join_member outer; + + /* Number of inner members. Length of the strategy and inner arrays. */ + unsigned ninner; + + /* Array of strategies, one per non-outermost member. */ + pgpa_join_strategy *strategy; + + /* Array of members, excluding the outermost. Deepest first. */ + pgpa_join_member *inner; +}; + +/* + * Does this plan node inherit from Join? + */ +static inline bool +pgpa_is_join(Plan *plan) +{ + return IsA(plan, NestLoop) || IsA(plan, MergeJoin) || IsA(plan, HashJoin); +} + +extern pgpa_join_unroller *pgpa_create_join_unroller(void); +extern void pgpa_unroll_join(pgpa_plan_walker_context *walker, + Plan *plan, bool beneath_any_gather, + pgpa_join_unroller *join_unroller, + pgpa_join_unroller **outer_join_unroller, + pgpa_join_unroller **inner_join_unroller); +extern pgpa_unrolled_join *pgpa_build_unrolled_join(pgpa_plan_walker_context *walker, + pgpa_join_unroller *join_unroller); +extern void pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_output.c b/contrib/pg_plan_advice/pgpa_output.c new file mode 100644 index 00000000000..28d2839ce1a --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_output.c @@ -0,0 +1,571 @@ +/*------------------------------------------------------------------------- + * + * pgpa_output.c + * produce textual output from the results of a plan tree walk + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_output.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "pgpa_output.h" +#include "pgpa_scan.h" + +#include "nodes/parsenodes.h" +#include "parser/parsetree.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" + +/* + * Context object for textual advice generation. + * + * rt_identifiers is the caller-provided array of range table identifiers. + * See the comments at the top of pgpa_identifier.c for more details. + * + * buf is the caller-provided output buffer. + * + * wrap_column is the wrap column, so that we don't create output that is + * too wide. See pgpa_maybe_linebreak() and comments in pgpa_output_advice. + */ +typedef struct pgpa_output_context +{ + const char **rid_strings; + StringInfo buf; + int wrap_column; +} pgpa_output_context; + +static void pgpa_output_unrolled_join(pgpa_output_context *context, + pgpa_unrolled_join *join); +static void pgpa_output_join_member(pgpa_output_context *context, + pgpa_join_member *member); +static void pgpa_output_scan_strategy(pgpa_output_context *context, + pgpa_scan_strategy strategy, + List *scans); +static void pgpa_output_relation_name(pgpa_output_context *context, Oid relid); +static void pgpa_output_query_feature(pgpa_output_context *context, + pgpa_qf_type type, + List *query_features); +static void pgpa_output_simple_strategy(pgpa_output_context *context, + char *strategy, + List *relid_sets); +static void pgpa_output_no_gather(pgpa_output_context *context, + Bitmapset *relids); +static void pgpa_output_relations(pgpa_output_context *context, StringInfo buf, + Bitmapset *relids); + +static char *pgpa_cstring_join_strategy(pgpa_join_strategy strategy); +static char *pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy); +static char *pgpa_cstring_query_feature_type(pgpa_qf_type type); + +static void pgpa_maybe_linebreak(StringInfo buf, int wrap_column); + +/* + * Append query advice to the provided buffer. + * + * Before calling this function, 'walker' must be used to iterate over the + * main plan tree and all subplans from the PlannedStmt. + * + * 'rt_identifiers' is a table of unique identifiers, one for each RTI. + * See pgpa_create_identifiers_for_planned_stmt(). + * + * Results will be appended to 'buf'. + */ +void +pgpa_output_advice(StringInfo buf, pgpa_plan_walker_context *walker, + pgpa_identifier *rt_identifiers) +{ + Index rtable_length = list_length(walker->pstmt->rtable); + ListCell *lc; + pgpa_output_context context; + + /* Basic initialization. */ + memset(&context, 0, sizeof(pgpa_output_context)); + context.buf = buf; + + /* + * Convert identifiers to string form. Note that the loop variable here is + * not an RTI, because RTIs are 1-based. Some RTIs will have no + * identifier, either because the reloptkind is RTE_JOIN or because that + * portion of the query didn't make it into the final plan. + */ + context.rid_strings = palloc0_array(const char *, rtable_length); + for (int i = 0; i < rtable_length; ++i) + if (rt_identifiers[i].alias_name != NULL) + context.rid_strings[i] = pgpa_identifier_string(&rt_identifiers[i]); + + /* + * If the user chooses to use EXPLAIN (PLAN_ADVICE) in an 80-column window + * from a psql client with default settings, psql will add one space to + * the left of the output and EXPLAIN will add two more to the left of the + * advice. Thus, lines of more than 77 characters will wrap. We set the + * wrap limit to 76 here so that the output won't reach all the way to the + * very last column of the terminal. + * + * Of course, this is fairly arbitrary set of assumptions, and one could + * well make an argument for a different wrap limit, or for a configurable + * one. + */ + context.wrap_column = 76; + + /* + * Each piece of JOIN_ORDER() advice fully describes the join order for a + * a single unrolled join. Merging is not permitted, because that would + * change the meaning, e.g. SEQ_SCAN(a b c d) means simply that sequential + * scans should be used for all of those relations, and is thus equivalent + * to SEQ_SCAN(a b) SEQ_SCAN(c d), but JOIN_ORDER(a b c d) means that "a" + * is the driving table which is then joined to "b" then "c" then "d", + * which is totally different from JOIN_ORDER(a b) and JOIN_ORDER(c d). + */ + foreach(lc, walker->toplevel_unrolled_joins) + { + pgpa_unrolled_join *ujoin = lfirst(lc); + + if (buf->len > 0) + appendStringInfoChar(buf, '\n'); + appendStringInfo(context.buf, "JOIN_ORDER("); + pgpa_output_unrolled_join(&context, ujoin); + appendStringInfoChar(context.buf, ')'); + pgpa_maybe_linebreak(context.buf, context.wrap_column); + } + + /* Emit join strategy advice. */ + for (int s = 0; s < NUM_PGPA_JOIN_STRATEGY; ++s) + { + char *strategy = pgpa_cstring_join_strategy(s); + + pgpa_output_simple_strategy(&context, + strategy, + walker->join_strategies[s]); + } + + /* + * Emit scan strategy advice (but not for ordinary scans, which are + * definitionally uninteresting). + */ + for (int c = 0; c < NUM_PGPA_SCAN_STRATEGY; ++c) + if (c != PGPA_SCAN_ORDINARY) + pgpa_output_scan_strategy(&context, c, walker->scans[c]); + + /* Emit query feature advice. */ + for (int t = 0; t < NUM_PGPA_QF_TYPES; ++t) + pgpa_output_query_feature(&context, t, walker->query_features[t]); + + /* Emit NO_GATHER advice. */ + pgpa_output_no_gather(&context, walker->no_gather_scans); +} + +/* + * Output the members of an unrolled join, first the outermost member, and + * then the inner members one by one, as part of JOIN_ORDER() advice. + */ +static void +pgpa_output_unrolled_join(pgpa_output_context *context, + pgpa_unrolled_join *join) +{ + pgpa_output_join_member(context, &join->outer); + + for (int k = 0; k < join->ninner; ++k) + { + pgpa_join_member *member = &join->inner[k]; + + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + pgpa_output_join_member(context, member); + } +} + +/* + * Output a single member of an unrolled join as part of JOIN_ORDER() advice. + */ +static void +pgpa_output_join_member(pgpa_output_context *context, + pgpa_join_member *member) +{ + if (member->unrolled_join != NULL) + { + appendStringInfoChar(context->buf, '('); + pgpa_output_unrolled_join(context, member->unrolled_join); + appendStringInfoChar(context->buf, ')'); + } + else + { + pgpa_scan *scan = member->scan; + + Assert(scan != NULL); + if (bms_membership(scan->relids) == BMS_SINGLETON) + pgpa_output_relations(context, context->buf, scan->relids); + else + { + appendStringInfoChar(context->buf, '{'); + pgpa_output_relations(context, context->buf, scan->relids); + appendStringInfoChar(context->buf, '}'); + } + } +} + +/* + * Output advice for a List of pgpa_scan objects. + * + * All the scans must use the strategy specified by the "strategy" argument. + */ +static void +pgpa_output_scan_strategy(pgpa_output_context *context, + pgpa_scan_strategy strategy, + List *scans) +{ + bool first = true; + + if (scans == NIL) + return; + + if (context->buf->len > 0) + appendStringInfoChar(context->buf, '\n'); + appendStringInfo(context->buf, "%s(", + pgpa_cstring_scan_strategy(strategy)); + + foreach_ptr(pgpa_scan, scan, scans) + { + Plan *plan = scan->plan; + + if (first) + first = false; + else + { + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + } + + /* Output the relation identifiers. */ + if (bms_membership(scan->relids) == BMS_SINGLETON) + pgpa_output_relations(context, context->buf, scan->relids); + else + { + appendStringInfoChar(context->buf, '('); + pgpa_output_relations(context, context->buf, scan->relids); + appendStringInfoChar(context->buf, ')'); + } + + /* For index or index-only scans, output index information. */ + if (strategy == PGPA_SCAN_INDEX) + { + Assert(IsA(plan, IndexScan)); + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + pgpa_output_relation_name(context, ((IndexScan *) plan)->indexid); + } + else if (strategy == PGPA_SCAN_INDEX_ONLY) + { + Assert(IsA(plan, IndexOnlyScan)); + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + pgpa_output_relation_name(context, + ((IndexOnlyScan *) plan)->indexid); + } + } + + appendStringInfoChar(context->buf, ')'); + pgpa_maybe_linebreak(context->buf, context->wrap_column); +} + +/* + * Output a schema-qualified relation name. + */ +static void +pgpa_output_relation_name(pgpa_output_context *context, Oid relid) +{ + Oid nspoid = get_rel_namespace(relid); + char *relnamespace = get_namespace_name_or_temp(nspoid); + char *relname = get_rel_name(relid); + + appendStringInfoString(context->buf, quote_identifier(relnamespace)); + appendStringInfoChar(context->buf, '.'); + appendStringInfoString(context->buf, quote_identifier(relname)); +} + +/* + * Output advice for a List of pgpa_query_feature objects. + * + * All features must be of the type specified by the "type" argument. + */ +static void +pgpa_output_query_feature(pgpa_output_context *context, pgpa_qf_type type, + List *query_features) +{ + bool first = true; + + if (query_features == NIL) + return; + + if (context->buf->len > 0) + appendStringInfoChar(context->buf, '\n'); + appendStringInfo(context->buf, "%s(", + pgpa_cstring_query_feature_type(type)); + + foreach_ptr(pgpa_query_feature, qf, query_features) + { + if (first) + first = false; + else + { + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + } + + if (bms_membership(qf->relids) == BMS_SINGLETON) + pgpa_output_relations(context, context->buf, qf->relids); + else + { + appendStringInfoChar(context->buf, '('); + pgpa_output_relations(context, context->buf, qf->relids); + appendStringInfoChar(context->buf, ')'); + } + } + + appendStringInfoChar(context->buf, ')'); + pgpa_maybe_linebreak(context->buf, context->wrap_column); +} + +/* + * Output "simple" advice for a List of Bitmapset objects each of which + * contains one or more RTIs. + * + * By simple, we just mean that the advice emitted follows the most + * straightforward pattern: the strategy name, followed by a list of items + * separated by spaces and surrounded by parentheses. Individual items in + * the list are a single relation identifier for a Bitmapset that contains + * just one member, or a sub-list again separated by spaces and surrounded + * by parentheses for a Bitmapset with multiple members. Bitmapsets with + * no members probably shouldn't occur here, but if they do they'll be + * rendered as an empty sub-list. + */ +static void +pgpa_output_simple_strategy(pgpa_output_context *context, char *strategy, + List *relid_sets) +{ + bool first = true; + + if (relid_sets == NIL) + return; + + if (context->buf->len > 0) + appendStringInfoChar(context->buf, '\n'); + appendStringInfo(context->buf, "%s(", strategy); + + foreach_node(Bitmapset, relids, relid_sets) + { + if (first) + first = false; + else + { + pgpa_maybe_linebreak(context->buf, context->wrap_column); + appendStringInfoChar(context->buf, ' '); + } + + if (bms_membership(relids) == BMS_SINGLETON) + pgpa_output_relations(context, context->buf, relids); + else + { + appendStringInfoChar(context->buf, '('); + pgpa_output_relations(context, context->buf, relids); + appendStringInfoChar(context->buf, ')'); + } + } + + appendStringInfoChar(context->buf, ')'); + pgpa_maybe_linebreak(context->buf, context->wrap_column); +} + +/* + * Output NO_GATHER advice for all relations not appearing beneath any + * Gather or Gather Merge node. + */ +static void +pgpa_output_no_gather(pgpa_output_context *context, Bitmapset *relids) +{ + if (relids == NULL) + return; + if (context->buf->len > 0) + appendStringInfoChar(context->buf, '\n'); + appendStringInfoString(context->buf, "NO_GATHER("); + pgpa_output_relations(context, context->buf, relids); + appendStringInfoChar(context->buf, ')'); +} + +/* + * Output the identifiers for each RTI in the provided set. + * + * Identifiers are separated by spaces, and a line break is possible after + * each one. + */ +static void +pgpa_output_relations(pgpa_output_context *context, StringInfo buf, + Bitmapset *relids) +{ + int rti = -1; + bool first = true; + + while ((rti = bms_next_member(relids, rti)) >= 0) + { + const char *rid_string = context->rid_strings[rti - 1]; + + if (rid_string == NULL) + elog(ERROR, "no identifier for RTI %d", rti); + + if (first) + { + first = false; + appendStringInfoString(buf, rid_string); + } + else + { + pgpa_maybe_linebreak(buf, context->wrap_column); + appendStringInfo(buf, " %s", rid_string); + } + } +} + +/* + * Get a C string that corresponds to the specified join strategy. + */ +static char * +pgpa_cstring_join_strategy(pgpa_join_strategy strategy) +{ + switch (strategy) + { + case JSTRAT_MERGE_JOIN_PLAIN: + return "MERGE_JOIN_PLAIN"; + case JSTRAT_MERGE_JOIN_MATERIALIZE: + return "MERGE_JOIN_MATERIALIZE"; + case JSTRAT_NESTED_LOOP_PLAIN: + return "NESTED_LOOP_PLAIN"; + case JSTRAT_NESTED_LOOP_MATERIALIZE: + return "NESTED_LOOP_MATERIALIZE"; + case JSTRAT_NESTED_LOOP_MEMOIZE: + return "NESTED_LOOP_MEMOIZE"; + case JSTRAT_HASH_JOIN: + return "HASH_JOIN"; + } + + pg_unreachable(); + return NULL; +} + +/* + * Get a C string that corresponds to the specified scan strategy. + */ +static char * +pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy) +{ + switch (strategy) + { + case PGPA_SCAN_ORDINARY: + return "ORDINARY_SCAN"; + case PGPA_SCAN_SEQ: + return "SEQ_SCAN"; + case PGPA_SCAN_BITMAP_HEAP: + return "BITMAP_HEAP_SCAN"; + case PGPA_SCAN_FOREIGN: + return "FOREIGN_JOIN"; + case PGPA_SCAN_INDEX: + return "INDEX_SCAN"; + case PGPA_SCAN_INDEX_ONLY: + return "INDEX_ONLY_SCAN"; + case PGPA_SCAN_PARTITIONWISE: + return "PARTITIONWISE"; + case PGPA_SCAN_TID: + return "TID_SCAN"; + } + + pg_unreachable(); + return NULL; +} + +/* + * Get a C string that corresponds to the query feature type. + */ +static char * +pgpa_cstring_query_feature_type(pgpa_qf_type type) +{ + switch (type) + { + case PGPAQF_GATHER: + return "GATHER"; + case PGPAQF_GATHER_MERGE: + return "GATHER_MERGE"; + case PGPAQF_SEMIJOIN_NON_UNIQUE: + return "SEMIJOIN_NON_UNIQUE"; + case PGPAQF_SEMIJOIN_UNIQUE: + return "SEMIJOIN_UNIQUE"; + } + + + pg_unreachable(); + return NULL; +} + +/* + * Insert a line break into the StringInfoData, if needed. + * + * If wrap_column is zero or negative, this does nothing. Otherwise, we + * consider inserting a newline. We only insert a newline if the length of + * the last line in the buffer exceeds wrap_column, and not if we'd be + * inserting a newline at or before the beginning of the current line. + * + * The position at which the newline is inserted is simply wherever the + * buffer ended the last time this function was called. In other words, + * the caller is expected to call this function every time we reach a good + * place for a line break. + */ +static void +pgpa_maybe_linebreak(StringInfo buf, int wrap_column) +{ + char *trailing_nl; + int line_start; + int save_cursor; + + /* If line wrapping is disabled, exit quickly. */ + if (wrap_column <= 0) + return; + + /* + * Set line_start to the byte offset within buf->data of the first + * character of the current line, where the current line means the last + * one in the buffer. Note that line_start could be the offset of the + * trailing '\0' if the last character in the buffer is a line break. + */ + trailing_nl = strrchr(buf->data, '\n'); + if (trailing_nl == NULL) + line_start = 0; + else + line_start = (trailing_nl - buf->data) + 1; + + /* + * Remember that the current end of the buffer is a potential location to + * insert a line break on a future call to this function. + */ + save_cursor = buf->cursor; + buf->cursor = buf->len; + + /* If we haven't passed the wrap column, we don't need a newline. */ + if (buf->len - line_start <= wrap_column) + return; + + /* + * It only makes sense to insert a newline at a position later than the + * beginning of the current line. + */ + if (save_cursor <= line_start) + return; + + /* Insert a newline at the previous cursor location. */ + enlargeStringInfo(buf, 1); + memmove(&buf->data[save_cursor] + 1, &buf->data[save_cursor], + buf->len - save_cursor); + ++buf->cursor; + buf->data[++buf->len] = '\0'; + buf->data[save_cursor] = '\n'; +} diff --git a/contrib/pg_plan_advice/pgpa_output.h b/contrib/pg_plan_advice/pgpa_output.h new file mode 100644 index 00000000000..fd35103bfbd --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_output.h @@ -0,0 +1,22 @@ +/*------------------------------------------------------------------------- + * + * pgpa_output.h + * produce textual output from the results of a plan tree walk + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_output.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_OUTPUT_H +#define PGPA_OUTPUT_H + +#include "pgpa_identifier.h" +#include "pgpa_walker.h" + +extern void pgpa_output_advice(StringInfo buf, + pgpa_plan_walker_context *walker, + pgpa_identifier *rt_identifiers); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_parser.y b/contrib/pg_plan_advice/pgpa_parser.y new file mode 100644 index 00000000000..8bfd7666d07 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_parser.y @@ -0,0 +1,301 @@ +%{ +/* + * Parser for plan advice + * + * Copyright (c) 2000-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_parser.y + */ + +#include "postgres.h" + +#include +#include + +#include "fmgr.h" +#include "nodes/miscnodes.h" +#include "utils/builtins.h" +#include "utils/float.h" + +#include "pgpa_ast.h" +#include "pgpa_parser.h" + +/* + * Bison doesn't allocate anything that needs to live across parser calls, + * so we can easily have it use palloc instead of malloc. This prevents + * memory leaks if we error out during parsing. + */ +#define YYMALLOC palloc +#define YYFREE pfree +%} + +/* BISON Declarations */ +%parse-param {List **result} +%parse-param {char **parse_error_msg_p} +%parse-param {yyscan_t yyscanner} +%lex-param {List **result} +%lex-param {char **parse_error_msg_p} +%lex-param {yyscan_t yyscanner} +%pure-parser +%expect 0 +%name-prefix="pgpa_yy" + +%union +{ + char *str; + int integer; + List *list; + pgpa_advice_item *item; + pgpa_advice_target *target; + pgpa_index_target *itarget; +} +%token TOK_IDENT TOK_TAG_JOIN_ORDER TOK_TAG_INDEX +%token TOK_TAG_SIMPLE TOK_TAG_GENERIC +%token TOK_INTEGER + +%type opt_ri_occurrence +%type advice_item +%type advice_item_list generic_target_list +%type index_target_list join_order_target_list +%type opt_partition simple_target_list +%type identifier opt_plan_name +%type generic_sublist join_order_sublist +%type relation_identifier +%type index_name + +%start parse_toplevel + +/* Grammar follows */ +%% + +parse_toplevel: advice_item_list + { + (void) yynerrs; /* suppress compiler warning */ + *result = $1; + } + ; + +advice_item_list: advice_item_list advice_item + { $$ = lappend($1, $2); } + | + { $$ = NIL; } + ; + +advice_item: TOK_TAG_JOIN_ORDER '(' join_order_target_list ')' + { + $$ = palloc0_object(pgpa_advice_item); + $$->tag = PGPA_TAG_JOIN_ORDER; + $$->targets = $3; + if ($3 == NIL) + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "JOIN_ORDER must have at least one target"); + } + | TOK_TAG_INDEX '(' index_target_list ')' + { + $$ = palloc0_object(pgpa_advice_item); + if (strcmp($1, "index_only_scan") == 0) + $$->tag = PGPA_TAG_INDEX_ONLY_SCAN; + else if (strcmp($1, "index_scan") == 0) + $$->tag = PGPA_TAG_INDEX_SCAN; + else + elog(ERROR, "tag parsing failed: %s", $1); + $$->targets = $3; + } + | TOK_TAG_SIMPLE '(' simple_target_list ')' + { + $$ = palloc0_object(pgpa_advice_item); + if (strcmp($1, "bitmap_heap_scan") == 0) + $$->tag = PGPA_TAG_BITMAP_HEAP_SCAN; + else if (strcmp($1, "no_gather") == 0) + $$->tag = PGPA_TAG_NO_GATHER; + else if (strcmp($1, "seq_scan") == 0) + $$->tag = PGPA_TAG_SEQ_SCAN; + else if (strcmp($1, "tid_scan") == 0) + $$->tag = PGPA_TAG_TID_SCAN; + else + elog(ERROR, "tag parsing failed: %s", $1); + $$->targets = $3; + } + | TOK_TAG_GENERIC '(' generic_target_list ')' + { + bool fail; + + $$ = palloc0_object(pgpa_advice_item); + $$->tag = pgpa_parse_advice_tag($1, &fail); + if (fail) + { + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "unrecognized advice tag"); + } + + if ($$->tag == PGPA_TAG_FOREIGN_JOIN) + { + foreach_ptr(pgpa_advice_target, target, $3) + { + if (target->ttype == PGPA_TARGET_IDENTIFIER || + list_length(target->children) == 1) + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "FOREIGN_JOIN targets must contain more than one relation identifier"); + } + } + + $$->targets = $3; + } + ; + +relation_identifier: identifier opt_ri_occurrence opt_partition opt_plan_name + { + $$ = palloc0_object(pgpa_advice_target); + $$->ttype = PGPA_TARGET_IDENTIFIER; + $$->rid.alias_name = $1; + $$->rid.occurrence = $2; + if (list_length($3) == 2) + { + $$->rid.partnsp = linitial($3); + $$->rid.partrel = lsecond($3); + } + else if ($3 != NIL) + $$->rid.partrel = linitial($3); + $$->rid.plan_name = $4; + } + ; + +index_name: identifier + { + $$ = palloc0_object(pgpa_index_target); + $$->indname = $1; + } + | identifier '.' identifier + { + $$ = palloc0_object(pgpa_index_target); + $$->indnamespace = $1; + $$->indname = $3; + } + ; + +opt_ri_occurrence: + '#' TOK_INTEGER + { + if ($2 <= 0) + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "only positive occurrence numbers are permitted"); + $$ = $2; + } + | + { + /* The default occurrence number is 1. */ + $$ = 1; + } + ; + +identifier: TOK_IDENT + | TOK_TAG_JOIN_ORDER + | TOK_TAG_INDEX + | TOK_TAG_SIMPLE + | TOK_TAG_GENERIC + ; + +/* + * When generating advice, we always schema-qualify the partition name, but + * when parsing advice, we accept a specification that lacks one. + */ +opt_partition: + '/' TOK_IDENT '.' TOK_IDENT + { $$ = list_make2($2, $4); } + | '/' TOK_IDENT + { $$ = list_make1($2); } + | + { $$ = NIL; } + ; + +opt_plan_name: + '@' TOK_IDENT + { $$ = $2; } + | + { $$ = NULL; } + ; + +generic_target_list: generic_target_list relation_identifier + { $$ = lappend($1, $2); } + | generic_target_list generic_sublist + { $$ = lappend($1, $2); } + | + { $$ = NIL; } + ; + +generic_sublist: '(' simple_target_list ')' + { + $$ = palloc0_object(pgpa_advice_target); + $$->ttype = PGPA_TARGET_ORDERED_LIST; + $$->children = $2; + } + ; + +index_target_list: + index_target_list relation_identifier index_name + { + $2->itarget = $3; + $$ = lappend($1, $2); + } + | + { $$ = NIL; } + ; + +join_order_target_list: join_order_target_list relation_identifier + { $$ = lappend($1, $2); } + | join_order_target_list join_order_sublist + { $$ = lappend($1, $2); } + | + { $$ = NIL; } + ; + +join_order_sublist: + '(' join_order_target_list ')' + { + $$ = palloc0_object(pgpa_advice_target); + $$->ttype = PGPA_TARGET_ORDERED_LIST; + $$->children = $2; + } + | '{' simple_target_list '}' + { + $$ = palloc0_object(pgpa_advice_target); + $$->ttype = PGPA_TARGET_UNORDERED_LIST; + $$->children = $2; + } + ; + +simple_target_list: simple_target_list relation_identifier + { $$ = lappend($1, $2); } + | + { $$ = NIL; } + ; + +%% + +/* + * Parse an advice_string and return the resulting list of pgpa_advice_item + * objects. If a parse error occurs, instead return NULL. + * + * If the return value is NULL, *error_p will be set to the error message; + * otherwise, *error_p will be set to NULL. + */ +List * +pgpa_parse(const char *advice_string, char **error_p) +{ + yyscan_t scanner; + List *result; + char *error = NULL; + + pgpa_scanner_init(advice_string, &scanner); + pgpa_yyparse(&result, &error, scanner); + pgpa_scanner_finish(scanner); + + if (error != NULL) + { + *error_p = error; + return NULL; + } + + *error_p = NULL; + return result; +} diff --git a/contrib/pg_plan_advice/pgpa_planner.c b/contrib/pg_plan_advice/pgpa_planner.c new file mode 100644 index 00000000000..5508b8af707 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_planner.c @@ -0,0 +1,2198 @@ +/*------------------------------------------------------------------------- + * + * pgpa_planner.c + * Use planner hooks to observe and modify planner behavior + * + * All interaction with the core planner happens here. Much of it has to + * do with enforcing supplied advice, but we also need these hooks to + * generate advice strings (though the heavy lifting in that case is + * mostly done by pgpa_walker.c). + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_planner.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pg_plan_advice.h" +#include "pgpa_identifier.h" +#include "pgpa_output.h" +#include "pgpa_planner.h" +#include "pgpa_trove.h" +#include "pgpa_walker.h" + +#include "commands/defrem.h" +#include "common/hashfn_unstable.h" +#include "nodes/makefuncs.h" +#include "optimizer/extendplan.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/plancat.h" +#include "optimizer/planner.h" +#include "parser/parsetree.h" +#include "utils/lsyscache.h" + +#ifdef USE_ASSERT_CHECKING + +/* + * When assertions are enabled, we try generating relation identifiers during + * planning, saving them in a hash table, and then cross-checking them against + * the ones generated after planning is complete. + */ +typedef struct pgpa_ri_checker_key +{ + char *plan_name; + Index rti; +} pgpa_ri_checker_key; + +typedef struct pgpa_ri_checker +{ + pgpa_ri_checker_key key; + uint32 status; + const char *rid_string; +} pgpa_ri_checker; + +static uint32 pgpa_ri_checker_hash_key(pgpa_ri_checker_key key); + +static inline bool +pgpa_ri_checker_compare_key(pgpa_ri_checker_key a, pgpa_ri_checker_key b) +{ + if (a.rti != b.rti) + return false; + if (a.plan_name == NULL) + return (b.plan_name == NULL); + if (b.plan_name == NULL) + return false; + return strcmp(a.plan_name, b.plan_name) == 0; +} + +#define SH_PREFIX pgpa_ri_check +#define SH_ELEMENT_TYPE pgpa_ri_checker +#define SH_KEY_TYPE pgpa_ri_checker_key +#define SH_KEY key +#define SH_HASH_KEY(tb, key) pgpa_ri_checker_hash_key(key) +#define SH_EQUAL(tb, a, b) pgpa_ri_checker_compare_key(a, b) +#define SH_SCOPE static inline +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +#endif + +typedef enum pgpa_jo_outcome +{ + PGPA_JO_PERMITTED, /* permit this join order */ + PGPA_JO_DENIED, /* deny this join order */ + PGPA_JO_INDIFFERENT /* do neither */ +} pgpa_jo_outcome; + +typedef struct pgpa_planner_state +{ + bool generate_advice_feedback; + bool generate_advice_string; + pgpa_trove *trove; + List *sj_unique_rels; + +#ifdef USE_ASSERT_CHECKING + pgpa_ri_check_hash *ri_check_hash; +#endif +} pgpa_planner_state; + +typedef struct pgpa_join_state +{ + /* Most-recently-considered outer rel. */ + RelOptInfo *outerrel; + + /* Most-recently-considered inner rel. */ + RelOptInfo *innerrel; + + /* + * Array of relation identifiers for all members of this joinrel, with + * outerrel identifiers before innerrel identifiers. + */ + pgpa_identifier *rids; + + /* Number of outer rel identifiers. */ + int outer_count; + + /* Number of inner rel identifiers. */ + int inner_count; + + /* + * Trove lookup results. + * + * join_entries and rel_entries are arrays of entries, and join_indexes + * and rel_indexes are the integer offsets within those arrays of entries + * potentially relevant to us. The "join" fields correspond to a lookup + * using PGPA_TROVE_LOOKUP_JOIN and the "rel" fields to a lookup using + * PGPA_TROVE_LOOKUP_REL. + */ + pgpa_trove_entry *join_entries; + Bitmapset *join_indexes; + pgpa_trove_entry *rel_entries; + Bitmapset *rel_indexes; +} pgpa_join_state; + +/* Saved hook values */ +static build_simple_rel_hook_type prev_build_simple_rel = NULL; +static join_path_setup_hook_type prev_join_path_setup = NULL; +static joinrel_setup_hook_type prev_joinrel_setup = NULL; +static planner_setup_hook_type prev_planner_setup = NULL; +static planner_shutdown_hook_type prev_planner_shutdown = NULL; + +/* Other global variables */ +int pgpa_planner_generate_advice = 0; +static int planner_extension_id = -1; + +/* Function prototypes. */ +static void pgpa_planner_setup(PlannerGlobal *glob, Query *parse, + const char *query_string, + int cursorOptions, + double *tuple_fraction, + ExplainState *es); +static void pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse, + const char *query_string, PlannedStmt *pstmt); +static void pgpa_build_simple_rel(PlannerInfo *root, + RelOptInfo *rel, + RangeTblEntry *rte); +static void pgpa_joinrel_setup(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + SpecialJoinInfo *sjinfo, + List *restrictlist); +static void pgpa_join_path_setup(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra); +static pgpa_join_state *pgpa_get_join_state(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel); +static void pgpa_planner_apply_joinrel_advice(uint64 *pgs_mask_p, + char *plan_name, + pgpa_join_state *pjs); +static void pgpa_planner_apply_join_path_advice(JoinType jointype, + uint64 *pgs_mask_p, + char *plan_name, + pgpa_join_state *pjs); +static void pgpa_planner_apply_scan_advice(RelOptInfo *rel, + pgpa_trove_entry *scan_entries, + Bitmapset *scan_indexes, + pgpa_trove_entry *rel_entries, + Bitmapset *rel_indexes); +static uint64 pgpa_join_strategy_mask_from_advice_tag(pgpa_advice_tag_type tag); +static pgpa_jo_outcome pgpa_join_order_permits_join(int outer_count, + int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry); +static bool pgpa_join_method_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry, + bool *restrict_method); +static bool pgpa_opaque_join_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry, + bool *restrict_method); +static bool pgpa_semijoin_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry, + bool outer_is_nullable, + bool *restrict_method); + +static List *pgpa_planner_append_feedback(List *list, pgpa_trove *trove, + pgpa_trove_lookup_type type, + pgpa_identifier *rt_identifiers, + pgpa_plan_walker_context *walker); +static void pgpa_planner_feedback_warning(List *feedback); + +static inline void pgpa_ri_checker_save(pgpa_planner_state *pps, + PlannerInfo *root, + RelOptInfo *rel); +static void pgpa_ri_checker_validate(pgpa_planner_state *pps, + PlannedStmt *pstmt); + +static char *pgpa_bms_to_cstring(Bitmapset *bms); +static const char *pgpa_jointype_to_cstring(JoinType jointype); + +/* + * Install planner-related hooks. + */ +void +pgpa_planner_install_hooks(void) +{ + planner_extension_id = GetPlannerExtensionId("pg_plan_advice"); + prev_planner_setup = planner_setup_hook; + planner_setup_hook = pgpa_planner_setup; + prev_planner_shutdown = planner_shutdown_hook; + planner_shutdown_hook = pgpa_planner_shutdown; + prev_build_simple_rel = build_simple_rel_hook; + build_simple_rel_hook = pgpa_build_simple_rel; + prev_joinrel_setup = joinrel_setup_hook; + joinrel_setup_hook = pgpa_joinrel_setup; + prev_join_path_setup = join_path_setup_hook; + join_path_setup_hook = pgpa_join_path_setup; +} + +/* + * Carry out whatever setup work we need to do before planning. + */ +static void +pgpa_planner_setup(PlannerGlobal *glob, Query *parse, const char *query_string, + int cursorOptions, double *tuple_fraction, + ExplainState *es) +{ + pgpa_trove *trove = NULL; + pgpa_planner_state *pps; + char *supplied_advice; + bool generate_advice_feedback = false; + bool generate_advice_string = false; + bool needs_pps = false; + + /* + * Decide whether we need to generate an advice string. We must do this if + * the user has told us to do it categorically, or if another loadable + * module has requested it, or if the user has requested it using the + * EXPLAIN (PLAN_ADVICE) option. + */ + generate_advice_string = (pg_plan_advice_always_store_advice_details || + pgpa_planner_generate_advice || + pg_plan_advice_should_explain(es)); + if (generate_advice_string) + needs_pps = true; + + /* + * If any advice was provided, build a trove of advice for use during + * planning. + */ + supplied_advice = pg_plan_advice_get_supplied_query_advice(glob, parse, + query_string, + cursorOptions, + es); + if (supplied_advice != NULL && supplied_advice[0] != '\0') + { + List *advice_items; + char *error; + + /* + * If the supplied advice string comes from pg_plan_advice.advice, + * parsing shouldn't fail here, because we must have previously parsed + * successfully in pg_plan_advice_advice_check_hook. However, it might + * also come from a hook registered via pg_plan_advice_add_advisor, + * and we can't be sure whether that's valid. (Plus, having an error + * check here seems like a good idea anyway, just for safety.) + */ + advice_items = pgpa_parse(supplied_advice, &error); + if (error) + ereport(WARNING, + errmsg("could not parse supplied advice: %s", error)); + + /* + * It's possible that the advice string was non-empty but contained no + * actual advice, e.g. it was all whitespace. + */ + if (advice_items != NIL) + { + trove = pgpa_build_trove(advice_items); + needs_pps = true; + + /* + * If we know that we're running under EXPLAIN, or if the user has + * told us to always do the work, generate advice feedback. + */ + if (es != NULL || pg_plan_advice_feedback_warnings || + pg_plan_advice_always_store_advice_details) + generate_advice_feedback = true; + } + } + +#ifdef USE_ASSERT_CHECKING + + /* + * If asserts are enabled, always build a private state object for + * cross-checks. + */ + needs_pps = true; +#endif + + /* + * We only create and initialize a private state object if it's needed for + * some purpose. That could be (1) recording that we will need to generate + * an advice string, (2) storing a trove of supplied advice, or (3) + * facilitating debugging cross-checks when asserts are enabled. + */ + if (needs_pps) + { + pps = palloc0_object(pgpa_planner_state); + pps->generate_advice_feedback = generate_advice_feedback; + pps->generate_advice_string = generate_advice_string; + pps->trove = trove; +#ifdef USE_ASSERT_CHECKING + pps->ri_check_hash = + pgpa_ri_check_create(CurrentMemoryContext, 1024, NULL); +#endif + SetPlannerGlobalExtensionState(glob, planner_extension_id, pps); + } + + /* Pass call to previous hook. */ + if (prev_planner_setup) + (*prev_planner_setup) (glob, parse, query_string, cursorOptions, + tuple_fraction, es); +} + +/* + * Carry out whatever work we want to do after planning is complete. + */ +static void +pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse, + const char *query_string, PlannedStmt *pstmt) +{ + pgpa_planner_state *pps; + pgpa_trove *trove = NULL; + pgpa_plan_walker_context walker = {0}; /* placate compiler */ + bool generate_advice_feedback = false; + bool generate_advice_string = false; + List *pgpa_items = NIL; + pgpa_identifier *rt_identifiers = NULL; + + /* Fetch our private state, set up by pgpa_planner_setup(). */ + pps = GetPlannerGlobalExtensionState(glob, planner_extension_id); + if (pps != NULL) + { + trove = pps->trove; + generate_advice_feedback = pps->generate_advice_feedback; + generate_advice_string = pps->generate_advice_string; + } + + /* + * If we're trying to generate an advice string or if we're trying to + * provide advice feedback, then we will need to create range table + * identifiers. + */ + if (generate_advice_string || generate_advice_feedback) + { + pgpa_plan_walker(&walker, pstmt, pps->sj_unique_rels); + rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt); + } + + /* Generate the advice string, if we need to do so. */ + if (generate_advice_string) + { + char *advice_string; + StringInfoData buf; + + /* Generate a textual advice string. */ + initStringInfo(&buf); + pgpa_output_advice(&buf, &walker, rt_identifiers); + advice_string = buf.data; + + /* Save the advice string in the final plan. */ + pgpa_items = lappend(pgpa_items, + makeDefElem("advice_string", + (Node *) makeString(advice_string), + -1)); + } + + /* + * If we're trying to provide advice feedback, then we will need to + * analyze how successful the advice was. + */ + if (generate_advice_feedback) + { + List *feedback = NIL; + + /* + * Inject a Node-tree representation of all the trove-entry flags into + * the PlannedStmt. + */ + feedback = pgpa_planner_append_feedback(feedback, + trove, + PGPA_TROVE_LOOKUP_SCAN, + rt_identifiers, &walker); + feedback = pgpa_planner_append_feedback(feedback, + trove, + PGPA_TROVE_LOOKUP_JOIN, + rt_identifiers, &walker); + feedback = pgpa_planner_append_feedback(feedback, + trove, + PGPA_TROVE_LOOKUP_REL, + rt_identifiers, &walker); + + pgpa_items = lappend(pgpa_items, makeDefElem("feedback", + (Node *) feedback, -1)); + + /* If we were asked to generate feedback warnings, do so. */ + if (pg_plan_advice_feedback_warnings) + pgpa_planner_feedback_warning(feedback); + } + + /* Push whatever data we're saving into the PlannedStmt. */ + if (pgpa_items != NIL) + pstmt->extension_state = + lappend(pstmt->extension_state, + makeDefElem("pg_plan_advice", (Node *) pgpa_items, -1)); + + /* + * If assertions are enabled, cross-check the generated range table + * identifiers. + */ + if (pps != NULL) + pgpa_ri_checker_validate(pps, pstmt); + + /* Pass call to previous hook. */ + if (prev_planner_shutdown) + (*prev_planner_shutdown) (glob, parse, query_string, pstmt); +} + +/* + * Hook function for build_simple_rel(). + * + * We can apply scan advice at this point, and we also use this as an + * opportunity to do range-table identifier cross-checking in assert-enabled + * builds. + */ +static void +pgpa_build_simple_rel(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + pgpa_planner_state *pps; + + /* Fetch our private state, set up by pgpa_planner_setup(). */ + pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id); + + /* Save details needed for range table identifier cross-checking. */ + if (pps != NULL) + pgpa_ri_checker_save(pps, root, rel); + + /* If query advice was provided, search for relevant entries. */ + if (pps != NULL && pps->trove != NULL) + { + pgpa_identifier rid; + pgpa_trove_result tresult_scan; + pgpa_trove_result tresult_rel; + + /* Search for scan advice and general rel advice. */ + pgpa_compute_identifier_by_rti(root, rel->relid, &rid); + pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_SCAN, 1, &rid, + &tresult_scan); + pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL, 1, &rid, + &tresult_rel); + + /* If relevant entries were found, apply them. */ + if (tresult_scan.indexes != NULL || tresult_rel.indexes != NULL) + { + uint64 original_mask = rel->pgs_mask; + + pgpa_planner_apply_scan_advice(rel, + tresult_scan.entries, + tresult_scan.indexes, + tresult_rel.entries, + tresult_rel.indexes); + + /* Emit debugging message, if enabled. */ + if (pg_plan_advice_trace_mask && original_mask != rel->pgs_mask) + { + if (root->plan_name != NULL) + ereport(WARNING, + (errmsg("strategy mask for RTI %u in subplan \"%s\" changed from 0x%" PRIx64 " to 0x%" PRIx64, + rel->relid, root->plan_name, + original_mask, rel->pgs_mask))); + else + ereport(WARNING, + (errmsg("strategy mask for RTI %u changed from 0x%" PRIx64 " to 0x%" PRIx64, + rel->relid, original_mask, + rel->pgs_mask))); + } + } + } + + /* Pass call to previous hook. */ + if (prev_build_simple_rel) + (*prev_build_simple_rel) (root, rel, rte); +} + +/* + * Enforce any provided advice that is relevant to any method of implementing + * this join. + * + * Although we're passed the outerrel and innerrel here, those are just + * whatever values happened to prompt the creation of this joinrel; they + * shouldn't really influence our choice of what advice to apply. + */ +static void +pgpa_joinrel_setup(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + SpecialJoinInfo *sjinfo, List *restrictlist) +{ + pgpa_join_state *pjs; + + Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE); + + /* Get our private state information for this join. */ + pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel); + + /* If there is relevant advice, call a helper function to apply it. */ + if (pjs != NULL) + { + uint64 original_mask = joinrel->pgs_mask; + + pgpa_planner_apply_joinrel_advice(&joinrel->pgs_mask, + root->plan_name, + pjs); + + /* Emit debugging message, if enabled. */ + if (pg_plan_advice_trace_mask && original_mask != joinrel->pgs_mask) + { + if (root->plan_name != NULL) + ereport(WARNING, + (errmsg("strategy mask for join on RTIs %s in subplan \"%s\" changed from 0x%" PRIx64 " to 0x%" PRIx64, + pgpa_bms_to_cstring(joinrel->relids), + root->plan_name, + original_mask, + joinrel->pgs_mask))); + else + ereport(WARNING, + (errmsg("strategy mask for join on RTIs %s changed from 0x%" PRIx64 " to 0x%" PRIx64, + pgpa_bms_to_cstring(joinrel->relids), + original_mask, + joinrel->pgs_mask))); + } + } + + /* Pass call to previous hook. */ + if (prev_joinrel_setup) + (*prev_joinrel_setup) (root, joinrel, outerrel, innerrel, + sjinfo, restrictlist); +} + +/* + * Enforce any provided advice that is relevant to this particular method of + * implementing this particular join. + */ +static void +pgpa_join_path_setup(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + JoinType jointype, JoinPathExtraData *extra) +{ + pgpa_join_state *pjs; + + Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE); + + /* + * If we're considering implementing a semijoin by making one side unique, + * make a note of it in the pgpa_planner_state. See comments for + * pgpa_sj_unique_rel for why we do this. + */ + if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) + { + pgpa_planner_state *pps; + RelOptInfo *uniquerel; + + uniquerel = jointype == JOIN_UNIQUE_OUTER ? outerrel : innerrel; + pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id); + if (pps != NULL && + (pps->generate_advice_string || pps->generate_advice_feedback)) + { + bool found = false; + + /* Avoid adding duplicates. */ + foreach_ptr(pgpa_sj_unique_rel, ur, pps->sj_unique_rels) + { + /* + * We should always use the same pointer for the same plan + * name, so we need not use strcmp() here. + */ + if (root->plan_name == ur->plan_name && + bms_equal(uniquerel->relids, ur->relids)) + { + found = true; + break; + } + } + + /* If not a duplicate, append to the list. */ + if (!found) + { + pgpa_sj_unique_rel *ur = palloc_object(pgpa_sj_unique_rel); + + ur->plan_name = root->plan_name; + ur->relids = uniquerel->relids; + pps->sj_unique_rels = lappend(pps->sj_unique_rels, ur); + } + } + } + + /* Get our private state information for this join. */ + pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel); + + /* If there is relevant advice, call a helper function to apply it. */ + if (pjs != NULL) + { + uint64 original_mask = extra->pgs_mask; + + pgpa_planner_apply_join_path_advice(jointype, + &extra->pgs_mask, + root->plan_name, + pjs); + + /* Emit debugging message, if enabled. */ + if (pg_plan_advice_trace_mask && original_mask != extra->pgs_mask) + { + if (root->plan_name != NULL) + ereport(WARNING, + (errmsg("strategy mask for %s join on %s with outer %s and inner %s in subplan \"%s\" changed from 0x%" PRIx64 " to 0x%" PRIx64, + pgpa_jointype_to_cstring(jointype), + pgpa_bms_to_cstring(joinrel->relids), + pgpa_bms_to_cstring(outerrel->relids), + pgpa_bms_to_cstring(innerrel->relids), + root->plan_name, + original_mask, + extra->pgs_mask))); + else + ereport(WARNING, + (errmsg("strategy mask for %s join on %s with outer %s and inner %s changed from 0x%" PRIx64 " to 0x%" PRIx64, + pgpa_jointype_to_cstring(jointype), + pgpa_bms_to_cstring(joinrel->relids), + pgpa_bms_to_cstring(outerrel->relids), + pgpa_bms_to_cstring(innerrel->relids), + original_mask, + extra->pgs_mask))); + } + } + + /* Pass call to previous hook. */ + if (prev_join_path_setup) + (*prev_join_path_setup) (root, joinrel, outerrel, innerrel, + jointype, extra); +} + +/* + * Search for advice pertaining to a proposed join. + */ +static pgpa_join_state * +pgpa_get_join_state(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel) +{ + pgpa_planner_state *pps; + pgpa_join_state *pjs; + bool new_pjs = false; + + /* Fetch our private state, set up by pgpa_planner_setup(). */ + pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id); + if (pps == NULL || pps->trove == NULL) + { + /* No advice applies to this query, hence none to this joinrel. */ + return NULL; + } + + /* + * See whether we've previously associated a pgpa_join_state with this + * joinrel. If we have not, we need to try to construct one. If we have, + * then there are two cases: (a) if innerrel and outerrel are unchanged, + * we can simply use it, and (b) if they have changed, we need to rejigger + * the array of identifiers but can still skip the trove lookup. + */ + pjs = GetRelOptInfoExtensionState(joinrel, planner_extension_id); + if (pjs != NULL) + { + if (pjs->join_indexes == NULL && pjs->rel_indexes == NULL) + { + /* + * If there's no potentially relevant advice, then the presence of + * this pgpa_join_state acts like a negative cache entry: it tells + * us not to bother searching the trove for advice, because we + * will not find any. + */ + return NULL; + } + + if (pjs->outerrel == outerrel && pjs->innerrel == innerrel) + { + /* No updates required, so just return. */ + /* XXX. Does this need to do something different under GEQO? */ + return pjs; + } + } + + /* + * If there's no pgpa_join_state yet, we need to allocate one. Trove keys + * will not get built for RTE_JOIN RTEs, so the array may end up being + * larger than needed. It's not worth trying to compute a perfectly + * accurate count here. + */ + if (pjs == NULL) + { + int pessimistic_count = bms_num_members(joinrel->relids); + + pjs = palloc0_object(pgpa_join_state); + pjs->rids = palloc_array(pgpa_identifier, pessimistic_count); + new_pjs = true; + } + + /* + * Either we just allocated a new pgpa_join_state, or the existing one + * needs reconfiguring for a new innerrel and outerrel. The required array + * size can't change, so we can overwrite the existing one. + */ + pjs->outerrel = outerrel; + pjs->innerrel = innerrel; + pjs->outer_count = + pgpa_compute_identifiers_by_relids(root, outerrel->relids, pjs->rids); + pjs->inner_count = + pgpa_compute_identifiers_by_relids(root, innerrel->relids, + pjs->rids + pjs->outer_count); + + /* + * If we allocated a new pgpa_join_state, search our trove of advice for + * relevant entries. The trove lookup will return the same results for + * every outerrel/innerrel combination, so we don't need to repeat that + * work every time. + */ + if (new_pjs) + { + pgpa_trove_result tresult; + + /* Find join entries. */ + pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_JOIN, + pjs->outer_count + pjs->inner_count, + pjs->rids, &tresult); + pjs->join_entries = tresult.entries; + pjs->join_indexes = tresult.indexes; + + /* Find rel entries. */ + pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL, + pjs->outer_count + pjs->inner_count, + pjs->rids, &tresult); + pjs->rel_entries = tresult.entries; + pjs->rel_indexes = tresult.indexes; + + /* Now that the new pgpa_join_state is fully valid, save a pointer. */ + SetRelOptInfoExtensionState(joinrel, planner_extension_id, pjs); + + /* + * If there was no relevant advice found, just return NULL. This + * pgpa_join_state will stick around as a sort of negative cache + * entry, so that future calls for this same joinrel quickly return + * NULL. + */ + if (pjs->join_indexes == NULL && pjs->rel_indexes == NULL) + return NULL; + } + + return pjs; +} + +/* + * Enforce overall restrictions on a join relation that apply uniformly + * regardless of the choice of inner and outer rel. + */ +static void +pgpa_planner_apply_joinrel_advice(uint64 *pgs_mask_p, char *plan_name, + pgpa_join_state *pjs) +{ + int i = -1; + int flags; + bool gather_conflict = false; + uint64 gather_mask = 0; + Bitmapset *gather_partial_match = NULL; + Bitmapset *gather_full_match = NULL; + bool partitionwise_conflict = false; + int partitionwise_outcome = 0; + Bitmapset *partitionwise_partial_match = NULL; + Bitmapset *partitionwise_full_match = NULL; + + /* Iterate over all possibly-relevant advice. */ + while ((i = bms_next_member(pjs->rel_indexes, i)) >= 0) + { + pgpa_trove_entry *entry = &pjs->rel_entries[i]; + pgpa_itm_type itm; + bool full_match = false; + uint64 my_gather_mask = 0; + int my_partitionwise_outcome = 0; /* >0 yes, <0 no */ + + /* + * For GATHER and GATHER_MERGE, if the specified relations exactly + * match this joinrel, do whatever the advice says; otherwise, don't + * allow Gather or Gather Merge at this level. For NO_GATHER, there + * must be a single target relation which must be included in this + * joinrel, so just don't allow Gather or Gather Merge here, full + * stop. + */ + if (entry->tag == PGPA_TAG_NO_GATHER) + { + my_gather_mask = PGS_CONSIDER_NONPARTIAL; + full_match = true; + } + else + { + int total_count; + + total_count = pjs->outer_count + pjs->inner_count; + itm = pgpa_identifiers_match_target(total_count, pjs->rids, + entry->target); + Assert(itm != PGPA_ITM_DISJOINT); + + if (itm == PGPA_ITM_EQUAL) + { + full_match = true; + if (entry->tag == PGPA_TAG_PARTITIONWISE) + my_partitionwise_outcome = 1; + else if (entry->tag == PGPA_TAG_GATHER) + my_gather_mask = PGS_GATHER; + else if (entry->tag == PGPA_TAG_GATHER_MERGE) + my_gather_mask = PGS_GATHER_MERGE; + else + elog(ERROR, "unexpected advice tag: %d", + (int) entry->tag); + } + else + { + /* + * If specified relations don't exactly match this joinrel, + * then we should do the opposite of whatever the advice says. + * For instance, if we have PARTITIONWISE((a b c)) or + * GATHER((a b c)) and this joinrel covers {a, b} or {a, b, c, + * d} or {a, d}, we shouldn't plan it partitionwise or put a + * Gather or Gather Merge on it here. + * + * Also, we can't put a Gather or Gather Merge at this level + * if there is PARTITIONWISE advice that overlaps with it, + * unless the PARTITIONWISE advice covers a subset of the + * relations in the joinrel. To continue the previous example, + * PARTITIONWISE((a b c)) is logically incompatible with + * GATHER((a b)) or GATHER((a d)), but not with GATHER((a b c + * d)). + * + * Conversely, we can't proceed partitionwise at this level if + * there is overlapping GATHER or GATHER_MERGE advice, unless + * that advice covers a superset of the relations in this + * joinrel. This is just the flip side of the preceding point. + */ + if (entry->tag == PGPA_TAG_PARTITIONWISE) + { + my_partitionwise_outcome = -1; + if (itm != PGPA_ITM_TARGETS_ARE_SUBSET) + my_gather_mask = PGS_CONSIDER_NONPARTIAL; + } + else if (entry->tag == PGPA_TAG_GATHER || + entry->tag == PGPA_TAG_GATHER_MERGE) + { + my_gather_mask = PGS_CONSIDER_NONPARTIAL; + if (itm != PGPA_ITM_KEYS_ARE_SUBSET) + my_partitionwise_outcome = -1; + } + else + elog(ERROR, "unexpected advice tag: %d", + (int) entry->tag); + } + } + + /* + * If we set my_gather_mask up above, then we (1) make a note if the + * advice conflicted, (2) remember the mask value, and (3) remember + * whether this was a full or partial match. + */ + if (my_gather_mask != 0) + { + if (gather_mask != 0 && gather_mask != my_gather_mask) + gather_conflict = true; + gather_mask = my_gather_mask; + if (full_match) + gather_full_match = bms_add_member(gather_full_match, i); + else + gather_partial_match = bms_add_member(gather_partial_match, i); + } + + /* + * Likewise, if we set my_partitionwise_outcome up above, then we (1) + * make a note if the advice conflicted, (2) remember what the desired + * outcome was, and (3) remember whether this was a full or partial + * match. + */ + if (my_partitionwise_outcome != 0) + { + if (partitionwise_outcome != 0 && + partitionwise_outcome != my_partitionwise_outcome) + partitionwise_conflict = true; + partitionwise_outcome = my_partitionwise_outcome; + if (full_match) + partitionwise_full_match = + bms_add_member(partitionwise_full_match, i); + else + partitionwise_partial_match = + bms_add_member(partitionwise_partial_match, i); + } + } + + /* + * Mark every Gather-related piece of advice as partially matched, and if + * the set of targets exactly matched this relation, fully matched. If + * there was a conflict, mark them all as conflicting. + */ + flags = PGPA_TE_MATCH_PARTIAL; + if (gather_conflict) + flags |= PGPA_TE_CONFLICTING; + pgpa_trove_set_flags(pjs->rel_entries, gather_partial_match, flags); + flags |= PGPA_TE_MATCH_FULL; + pgpa_trove_set_flags(pjs->rel_entries, gather_full_match, flags); + + /* Likewise for partitionwise advice. */ + flags = PGPA_TE_MATCH_PARTIAL; + if (partitionwise_conflict) + flags |= PGPA_TE_CONFLICTING; + pgpa_trove_set_flags(pjs->rel_entries, partitionwise_partial_match, flags); + flags |= PGPA_TE_MATCH_FULL; + pgpa_trove_set_flags(pjs->rel_entries, partitionwise_full_match, flags); + + /* + * Enforce restrictions on the Gather/Gather Merge. Only clear bits here, + * so that we still respect the enable_* GUCs. Do nothing if the advice + * conflicts. + */ + if (gather_mask != 0 && !gather_conflict) + { + uint64 all_gather_mask; + + all_gather_mask = + PGS_GATHER | PGS_GATHER_MERGE | PGS_CONSIDER_NONPARTIAL; + *pgs_mask_p &= ~(all_gather_mask & ~gather_mask); + } + + /* + * As above, but for partitionwise advice. + * + * To induce a partitionwise join, we disable all the ordinary means of + * performing a join, so that an Append or MergeAppend path will hopefully + * be chosen. + * + * To prevent one, we just disable Append and MergeAppend. Note that we + * must not unset PGS_CONSIDER_PARTITIONWISE even when we don't want a + * partitionwise join here, because we might want one at a higher level + * that will construct its own paths using the ones from this level. + */ + if (partitionwise_outcome != 0 && !partitionwise_conflict) + { + if (partitionwise_outcome > 0) + *pgs_mask_p = (*pgs_mask_p & ~PGS_JOIN_ANY); + else + *pgs_mask_p &= ~(PGS_APPEND | PGS_MERGE_APPEND); + } +} + +/* + * Enforce restrictions on the join order or join method. + */ +static void +pgpa_planner_apply_join_path_advice(JoinType jointype, uint64 *pgs_mask_p, + char *plan_name, + pgpa_join_state *pjs) +{ + int i = -1; + Bitmapset *jo_permit_indexes = NULL; + Bitmapset *jo_deny_indexes = NULL; + Bitmapset *jo_deny_rel_indexes = NULL; + Bitmapset *jm_indexes = NULL; + bool jm_conflict = false; + uint32 join_mask = 0; + Bitmapset *sj_permit_indexes = NULL; + Bitmapset *sj_deny_indexes = NULL; + + /* + * Reconsider PARTITIONWISE(...) advice. + * + * We already thought about this for the joinrel as a whole, but in some + * cases, partitionwise advice can also constrain the join order. For + * instance, if the advice says PARTITIONWISE((t1 t2)), we shouldn't build + * join paths for any joinrel that includes t1 or t2 unless it also + * includes the other. In general, the partitionwise operation must have + * already been completed within one side of the current join or the + * other, else the join order is impermissible. + * + * NB: It might seem tempting to try to deal with PARTITIONWISE advice + * entirely in this function, but that doesn't work. Here, we can only + * affect the pgs_mask within a particular JoinPathExtraData, that is, for + * a particular choice of innerrel and outerrel. Partitionwise paths are + * not built that way, so we must set pgs_mask for the RelOptInfo, which + * is best done in pgpa_planner_apply_joinrel_advice. + */ + while ((i = bms_next_member(pjs->rel_indexes, i)) >= 0) + { + pgpa_trove_entry *entry = &pjs->rel_entries[i]; + pgpa_itm_type inner_itm; + pgpa_itm_type outer_itm; + + if (entry->tag != PGPA_TAG_PARTITIONWISE) + continue; + + outer_itm = pgpa_identifiers_match_target(pjs->outer_count, + pjs->rids, entry->target); + if (outer_itm == PGPA_ITM_EQUAL || + outer_itm == PGPA_ITM_TARGETS_ARE_SUBSET) + continue; + + inner_itm = pgpa_identifiers_match_target(pjs->inner_count, + pjs->rids + pjs->outer_count, + entry->target); + if (inner_itm == PGPA_ITM_EQUAL || + inner_itm == PGPA_ITM_TARGETS_ARE_SUBSET) + continue; + + jo_deny_rel_indexes = bms_add_member(jo_deny_rel_indexes, i); + } + + /* Iterate over advice that pertains to the join order and method. */ + i = -1; + while ((i = bms_next_member(pjs->join_indexes, i)) >= 0) + { + pgpa_trove_entry *entry = &pjs->join_entries[i]; + uint32 my_join_mask; + + /* Handle join order advice. */ + if (entry->tag == PGPA_TAG_JOIN_ORDER) + { + pgpa_jo_outcome jo_outcome; + + jo_outcome = pgpa_join_order_permits_join(pjs->outer_count, + pjs->inner_count, + pjs->rids, + entry); + if (jo_outcome == PGPA_JO_PERMITTED) + jo_permit_indexes = bms_add_member(jo_permit_indexes, i); + else if (jo_outcome == PGPA_JO_DENIED) + jo_deny_indexes = bms_add_member(jo_deny_indexes, i); + continue; + } + + /* Handle join method advice. */ + my_join_mask = pgpa_join_strategy_mask_from_advice_tag(entry->tag); + if (my_join_mask != 0) + { + bool permit; + bool restrict_method; + + if (entry->tag == PGPA_TAG_FOREIGN_JOIN) + permit = pgpa_opaque_join_permits_join(pjs->outer_count, + pjs->inner_count, + pjs->rids, + entry, + &restrict_method); + else + permit = pgpa_join_method_permits_join(pjs->outer_count, + pjs->inner_count, + pjs->rids, + entry, + &restrict_method); + if (!permit) + jo_deny_indexes = bms_add_member(jo_deny_indexes, i); + else if (restrict_method) + { + jm_indexes = bms_add_member(jm_indexes, i); + if (join_mask != 0 && join_mask != my_join_mask) + jm_conflict = true; + join_mask = my_join_mask; + } + continue; + } + + /* Handle semijoin uniqueness advice. */ + if (entry->tag == PGPA_TAG_SEMIJOIN_UNIQUE || + entry->tag == PGPA_TAG_SEMIJOIN_NON_UNIQUE) + { + bool outer_side_nullable; + bool restrict_method; + + /* Planner has nullable side of the semijoin on the outer side? */ + outer_side_nullable = (jointype == JOIN_UNIQUE_OUTER || + jointype == JOIN_RIGHT_SEMI); + + if (!pgpa_semijoin_permits_join(pjs->outer_count, + pjs->inner_count, + pjs->rids, + entry, + outer_side_nullable, + &restrict_method)) + jo_deny_indexes = bms_add_member(jo_deny_indexes, i); + else if (restrict_method) + { + bool advice_unique; + bool jt_unique; + bool jt_non_unique; + + /* Advice wants to unique-ify and use a regular join? */ + advice_unique = (entry->tag == PGPA_TAG_SEMIJOIN_UNIQUE); + + /* Planner is trying to unique-ify and use a regular join? */ + jt_unique = (jointype == JOIN_UNIQUE_INNER || + jointype == JOIN_UNIQUE_OUTER); + + /* Planner is trying a semi-join, without unique-ifying? */ + jt_non_unique = (jointype == JOIN_SEMI || + jointype == JOIN_RIGHT_SEMI); + + if (!jt_unique && !jt_non_unique) + { + /* + * This doesn't seem to be a semijoin to which SJ_UNIQUE + * or SJ_NON_UNIQUE can be applied. + */ + entry->flags |= PGPA_TE_INAPPLICABLE; + } + else if (advice_unique != jt_unique) + sj_deny_indexes = bms_add_member(sj_deny_indexes, i); + else + sj_permit_indexes = bms_add_member(sj_permit_indexes, i); + } + continue; + } + } + + /* + * If the advice indicates both that this join order is permissible and + * also that it isn't, then mark advice related to the join order as + * conflicting. + */ + if (jo_permit_indexes != NULL && + (jo_deny_indexes != NULL || jo_deny_rel_indexes != NULL)) + { + pgpa_trove_set_flags(pjs->join_entries, jo_permit_indexes, + PGPA_TE_CONFLICTING); + pgpa_trove_set_flags(pjs->join_entries, jo_deny_indexes, + PGPA_TE_CONFLICTING); + pgpa_trove_set_flags(pjs->rel_entries, jo_deny_rel_indexes, + PGPA_TE_CONFLICTING); + } + + /* + * If more than one join method specification is relevant here and they + * differ, mark them all as conflicting. + */ + if (jm_conflict) + pgpa_trove_set_flags(pjs->join_entries, jm_indexes, + PGPA_TE_CONFLICTING); + + /* If semijoin advice says both yes and no, mark it all as conflicting. */ + if (sj_permit_indexes != NULL && sj_deny_indexes != NULL) + { + pgpa_trove_set_flags(pjs->join_entries, sj_permit_indexes, + PGPA_TE_CONFLICTING); + pgpa_trove_set_flags(pjs->join_entries, sj_deny_indexes, + PGPA_TE_CONFLICTING); + } + + /* + * Enforce restrictions on the join order and join method, and any + * semijoin-related restrictions. Only clear bits here, so that we still + * respect the enable_* GUCs. Do nothing in cases where the advice on a + * single topic conflicts. + */ + if ((jo_deny_indexes != NULL || jo_deny_rel_indexes != NULL) && + jo_permit_indexes == NULL) + *pgs_mask_p &= ~PGS_JOIN_ANY; + if (join_mask != 0 && !jm_conflict) + *pgs_mask_p &= ~(PGS_JOIN_ANY & ~join_mask); + if (sj_deny_indexes != NULL && sj_permit_indexes == NULL) + *pgs_mask_p &= ~PGS_JOIN_ANY; +} + +/* + * Translate an advice tag into a path generation strategy mask. + * + * This function can be called with tag types that don't represent join + * strategies. In such cases, we just return 0, which can't be confused with + * a valid mask. + */ +static uint64 +pgpa_join_strategy_mask_from_advice_tag(pgpa_advice_tag_type tag) +{ + switch (tag) + { + case PGPA_TAG_FOREIGN_JOIN: + return PGS_FOREIGNJOIN; + case PGPA_TAG_MERGE_JOIN_PLAIN: + return PGS_MERGEJOIN_PLAIN; + case PGPA_TAG_MERGE_JOIN_MATERIALIZE: + return PGS_MERGEJOIN_MATERIALIZE; + case PGPA_TAG_NESTED_LOOP_PLAIN: + return PGS_NESTLOOP_PLAIN; + case PGPA_TAG_NESTED_LOOP_MATERIALIZE: + return PGS_NESTLOOP_MATERIALIZE; + case PGPA_TAG_NESTED_LOOP_MEMOIZE: + return PGS_NESTLOOP_MEMOIZE; + case PGPA_TAG_HASH_JOIN: + return PGS_HASHJOIN; + default: + return 0; + } +} + +/* + * Does a certain item of join order advice permit a certain join? + * + * Returns PGPA_JO_DENIED if the advice is incompatible with the proposed + * join order. + * + * Returns PGPA_JO_PERMITTED if the advice specifies exactly the proposed + * join order. This implies that a partitionwise join should not be + * performed at this level; rather, one of the traditional join methods + * should be used. + * + * Returns PGPA_JO_INDIFFERENT if the advice does not care what happens. + * We use this for unordered JOIN_ORDER sublists, which are compatible with + * partitionwise join but do not mandate it. + */ +static pgpa_jo_outcome +pgpa_join_order_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry) +{ + bool loop = true; + bool sublist = false; + int length; + int outer_length; + pgpa_advice_target *target = entry->target; + pgpa_advice_target *prefix_target; + + /* We definitely have at least a partial match for this trove entry. */ + entry->flags |= PGPA_TE_MATCH_PARTIAL; + + /* + * Find the innermost sublist that contains all keys; if no sublist does, + * then continue processing with the toplevel list. + * + * For example, if the advice says JOIN_ORDER(t1 t2 (t3 t4 t5)), then we + * should evaluate joins that only involve t3, t4, and/or t5 against the + * (t3 t4 t5) sublist, and others against the full list. + * + * Note that (1) outermost sublist is always ordered and (2) whenever we + * zoom into an unordered sublist, we instantly return + * PGPA_JO_INDIFFERENT. + */ + while (loop) + { + Assert(target->ttype == PGPA_TARGET_ORDERED_LIST); + + loop = false; + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + pgpa_itm_type itm; + + if (child_target->ttype == PGPA_TARGET_IDENTIFIER) + continue; + + itm = pgpa_identifiers_match_target(outer_count + inner_count, + rids, child_target); + if (itm == PGPA_ITM_EQUAL || itm == PGPA_ITM_KEYS_ARE_SUBSET) + { + if (child_target->ttype == PGPA_TARGET_ORDERED_LIST) + { + target = child_target; + sublist = true; + loop = true; + break; + } + else + { + Assert(child_target->ttype == PGPA_TARGET_UNORDERED_LIST); + return PGPA_JO_INDIFFERENT; + } + } + } + } + + /* + * Try to find a prefix of the selected join order list that is exactly + * equal to the outer side of the proposed join. + */ + length = list_length(target->children); + prefix_target = palloc0_object(pgpa_advice_target); + prefix_target->ttype = PGPA_TARGET_ORDERED_LIST; + for (outer_length = 1; outer_length <= length; ++outer_length) + { + pgpa_itm_type itm; + + /* Avoid leaking memory in every loop iteration. */ + if (prefix_target->children != NULL) + list_free(prefix_target->children); + prefix_target->children = list_copy_head(target->children, + outer_length); + + /* Search, hoping to find an exact match. */ + itm = pgpa_identifiers_match_target(outer_count, rids, prefix_target); + if (itm == PGPA_ITM_EQUAL) + break; + + /* + * If the prefix of the join order list that we're considering + * includes some but not all of the outer rels, we can make the prefix + * longer to find an exact match. But if the advice hasn't mentioned + * everything that's part of our outer rel yet, but has mentioned + * things that are not, then this join doesn't match the join order + * list. + */ + if (itm != PGPA_ITM_TARGETS_ARE_SUBSET) + return PGPA_JO_DENIED; + } + + /* + * If the previous loop stopped before the prefix_target included the + * entire join order list, then the next member of the join order list + * must exactly match the inner side of the join. + * + * Example: Given JOIN_ORDER(t1 t2 (t3 t4 t5)), if the outer side of the + * current join includes only t1, then the inner side must be exactly t2; + * if the outer side includes both t1 and t2, then the inner side must + * include exactly t3, t4, and t5. + */ + if (outer_length < length) + { + pgpa_advice_target *inner_target; + pgpa_itm_type itm; + + inner_target = list_nth(target->children, outer_length); + + itm = pgpa_identifiers_match_target(inner_count, rids + outer_count, + inner_target); + + /* + * Before returning, consider whether we need to mark this entry as + * fully matched. If we're considering the full list rather than a + * sublist, and if we found every item but one on the outer side of + * the join and the last item on the inner side of the join, then the + * answer is yes. + */ + if (!sublist && outer_length + 1 == length && itm == PGPA_ITM_EQUAL) + entry->flags |= PGPA_TE_MATCH_FULL; + + return (itm == PGPA_ITM_EQUAL) ? PGPA_JO_PERMITTED : PGPA_JO_DENIED; + } + + /* + * If we get here, then the outer side of the join includes the entirety + * of the join order list. In this case, we behave differently depending + * on whether we're looking at the top-level join order list or sublist. + * At the top-level, we treat the specified list as mandating that the + * actual join order has the given list as a prefix, but a sublist + * requires an exact match. + * + * Example: Given JOIN_ORDER(t1 t2 (t3 t4 t5)), we must start by joining + * all five of those relations and in that sequence, but once that is + * done, it's OK to join any other rels that are part of the join problem. + * This allows a user to specify the driving table and perhaps the first + * few things to which it should be joined while leaving the rest of the + * join order up the optimizer. But it seems like it would be surprising, + * given that specification, if the user could add t6 to the (t3 t4 t5) + * sub-join, so we don't allow that. If we did want to allow it, the logic + * earlier in this function would require substantial adjustment: we could + * allow the t3-t4-t5-t6 join to be built here, but the next step of + * joining t1-t2 to the result would still be rejected. + */ + if (!sublist) + entry->flags |= PGPA_TE_MATCH_FULL; + return sublist ? PGPA_JO_DENIED : PGPA_JO_PERMITTED; +} + +/* + * Does a certain item of join method advice permit a certain join? + * + * Advice such as HASH_JOIN((x y)) means that there should be a hash join with + * exactly x and y on the inner side. Obviously, this means that if we are + * considering a join with exactly x and y on the inner side, we should enforce + * the use of a hash join. However, it also means that we must reject some + * incompatible join orders entirely. For example, a join with exactly x + * and y on the outer side shouldn't be allowed, because such paths might win + * over the advice-driven path on cost. + * + * To accommodate these requirements, this function returns true if the join + * should be allowed and false if it should not. Furthermore, *restrict_method + * is set to true if the join method should be enforced and false if not. + */ +static bool +pgpa_join_method_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry, + bool *restrict_method) +{ + pgpa_advice_target *target = entry->target; + pgpa_itm_type inner_itm; + pgpa_itm_type outer_itm; + pgpa_itm_type join_itm; + + /* We definitely have at least a partial match for this trove entry. */ + entry->flags |= PGPA_TE_MATCH_PARTIAL; + + *restrict_method = false; + + /* + * If our inner rel mentions exactly the same relations as the advice + * target, allow the join and enforce the join method restriction. + * + * If our inner rel mentions a superset of the target relations, allow the + * join. The join we care about has already taken place, and this advice + * imposes no further restrictions. + */ + inner_itm = pgpa_identifiers_match_target(inner_count, + rids + outer_count, + target); + if (inner_itm == PGPA_ITM_EQUAL) + { + entry->flags |= PGPA_TE_MATCH_FULL; + *restrict_method = true; + return true; + } + else if (inner_itm == PGPA_ITM_TARGETS_ARE_SUBSET) + return true; + + /* + * If our outer rel mentions a superset of the relations in the advice + * target, no restrictions apply, because the join we care about has + * already taken place. + * + * On the other hand, if our outer rel mentions exactly the relations + * mentioned in the advice target, the planner is trying to reverse the + * sides of the join as compared with our desired outcome. Reject that. + */ + outer_itm = pgpa_identifiers_match_target(outer_count, + rids, target); + if (outer_itm == PGPA_ITM_TARGETS_ARE_SUBSET) + return true; + else if (outer_itm == PGPA_ITM_EQUAL) + return false; + + /* + * If the advice target mentions only a single relation, the test below + * cannot ever pass, so save some work by exiting now. + */ + if (target->ttype == PGPA_TARGET_IDENTIFIER) + return false; + + /* + * If everything in the joinrel appears in the advice target, we're below + * the level of the join we want to control. + * + * For example, HASH_JOIN((x y)) doesn't restrict how x and y can be + * joined. + * + * This lookup shouldn't return PGPA_ITM_DISJOINT, because any such advice + * should not have been returned from the trove in the first place. + */ + join_itm = pgpa_identifiers_match_target(outer_count + inner_count, + rids, target); + Assert(join_itm != PGPA_ITM_DISJOINT); + if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET || + join_itm == PGPA_ITM_EQUAL) + return true; + + /* + * We've already permitted all allowable cases, so reject this. + * + * If we reach this point, then the advice overlaps with this join but + * isn't entirely contained within either side, and there's also at least + * one relation present in the join that isn't mentioned by the advice. + * + * For instance, in the HASH_JOIN((x y)) example, we would reach here if x + * were on one side of the join, y on the other, and at least one of the + * two sides also included some other relation, say t. In that case, + * accepting this join would allow the (x y t) joinrel to contain + * non-disabled paths that do not put (x y) on the inner side of a hash + * join; we could instead end up with something like (x JOIN t) JOIN y. + */ + return false; +} + +/* + * Does advice concerning an opaque join permit a certain join? + * + * By an opaque join, we mean one where the exact mechanism by which the + * join is performed is not visible to PostgreSQL. Currently this is the + * case only for foreign joins: FOREIGN_JOIN((x y z)) means that x, y, and + * z are joined on the remote side, but we know nothing about the join order + * or join methods used over there. + * + * The logic here needs to differ from pgpa_join_method_permits_join because, + * for other join types, the advice target is the set of inner rels; here, it + * includes both inner and outer rels. + */ +static bool +pgpa_opaque_join_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry, + bool *restrict_method) +{ + pgpa_advice_target *target = entry->target; + pgpa_itm_type join_itm; + + /* We definitely have at least a partial match for this trove entry. */ + entry->flags |= PGPA_TE_MATCH_PARTIAL; + + *restrict_method = false; + + join_itm = pgpa_identifiers_match_target(outer_count + inner_count, + rids, target); + if (join_itm == PGPA_ITM_EQUAL) + { + /* + * We have an exact match, and should therefore allow the join and + * enforce the use of the relevant opaque join method. + */ + entry->flags |= PGPA_TE_MATCH_FULL; + *restrict_method = true; + return true; + } + + if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET || + join_itm == PGPA_ITM_TARGETS_ARE_SUBSET) + { + /* + * If join_itm == PGPA_ITM_TARGETS_ARE_SUBSET, then the join we care + * about has already taken place and no further restrictions apply. + * + * If join_itm == PGPA_ITM_KEYS_ARE_SUBSET, we're still building up to + * the join we care about and have not introduced any extraneous + * relations not named in the advice. Note that ForeignScan paths for + * joins are built up from ForeignScan paths from underlying joins and + * scans, so we must not disable this join when considering a subset + * of the relations we ultimately want. + */ + return true; + } + + /* + * The advice overlaps the join, but at least one relation is present in + * the join that isn't mentioned by the advice. We want to disable such + * paths so that we actually push down the join as intended. + */ + return false; +} + +/* + * Does advice concerning a semijoin permit a certain join? + * + * Unlike join method advice, which lists the rels on the inner side of the + * join, semijoin uniqueness advice lists the rels on the nullable side of the + * join. Those can be the same, if the join type is JOIN_UNIQUE_INNER or + * JOIN_SEMI, or they can be different, in case of JOIN_UNIQUE_OUTER or + * JOIN_RIGHT_SEMI. + * + * We don't know here whether the caller specified SEMIJOIN_UNIQUE or + * SEMIJOIN_NON_UNIQUE. The caller should check the join type against the + * advice type if and only if we set *restrict_method to true. + */ +static bool +pgpa_semijoin_permits_join(int outer_count, int inner_count, + pgpa_identifier *rids, + pgpa_trove_entry *entry, + bool outer_is_nullable, + bool *restrict_method) +{ + pgpa_advice_target *target = entry->target; + pgpa_itm_type join_itm; + pgpa_itm_type inner_itm; + pgpa_itm_type outer_itm; + + *restrict_method = false; + + /* We definitely have at least a partial match for this trove entry. */ + entry->flags |= PGPA_TE_MATCH_PARTIAL; + + /* + * If outer rel is the nullable side and contains exactly the same + * relations as the advice target, then the join order is allowable, but + * the caller must check whether the advice tag (either SEMIJOIN_UNIQUE or + * SEMIJOIN_NON_UNIQUE) matches the join type. + * + * If the outer rel is a superset of the target relations, the join we + * care about has already taken place, so we should impose no further + * restrictions. + */ + outer_itm = pgpa_identifiers_match_target(outer_count, + rids, target); + if (outer_itm == PGPA_ITM_EQUAL) + { + entry->flags |= PGPA_TE_MATCH_FULL; + if (outer_is_nullable) + { + *restrict_method = true; + return true; + } + } + else if (outer_itm == PGPA_ITM_TARGETS_ARE_SUBSET) + return true; + + /* As above, but for the inner rel. */ + inner_itm = pgpa_identifiers_match_target(inner_count, + rids + outer_count, + target); + if (inner_itm == PGPA_ITM_EQUAL) + { + entry->flags |= PGPA_TE_MATCH_FULL; + if (!outer_is_nullable) + { + *restrict_method = true; + return true; + } + } + else if (inner_itm == PGPA_ITM_TARGETS_ARE_SUBSET) + return true; + + /* + * If everything in the joinrel appears in the advice target, we're below + * the level of the join we want to control. + */ + join_itm = pgpa_identifiers_match_target(outer_count + inner_count, + rids, target); + Assert(join_itm != PGPA_ITM_DISJOINT); + if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET || + join_itm == PGPA_ITM_EQUAL) + return true; + + /* + * We've tested for all allowable possibilities, and so must reject this + * join order. This can happen in two ways. + * + * First, we might be considering a semijoin that overlaps incompletely + * with one or both sides of the join. For example, if the user has + * specified SEMIJOIN_UNIQUE((t1 t2)) or SEMIJOIN_NON_UNIQUE((t1 t2)), we + * should reject a proposed t2-t3 join, since that could not result in a + * final plan compatible with the advice. + * + * Second, we might be considering a semijoin where the advice target + * perfectly matches one side of the join, but it's the wrong one. For + * example, in the example above, we might see a 3-way join between t1, + * t2, and t3, with (t1 t2) on the non-nullable side. That, too, would be + * incompatible with the advice. + */ + return false; +} + +/* + * Apply scan advice to a RelOptInfo. + */ +static void +pgpa_planner_apply_scan_advice(RelOptInfo *rel, + pgpa_trove_entry *scan_entries, + Bitmapset *scan_indexes, + pgpa_trove_entry *rel_entries, + Bitmapset *rel_indexes) +{ + bool gather_conflict = false; + Bitmapset *gather_partial_match = NULL; + Bitmapset *gather_full_match = NULL; + int i = -1; + pgpa_trove_entry *scan_entry = NULL; + int flags; + bool scan_type_conflict = false; + Bitmapset *scan_type_indexes = NULL; + Bitmapset *scan_type_rel_indexes = NULL; + uint64 gather_mask = 0; + uint64 scan_type = 0; + + /* Scrutinize available scan advice. */ + while ((i = bms_next_member(scan_indexes, i)) >= 0) + { + pgpa_trove_entry *my_entry = &scan_entries[i]; + uint64 my_scan_type = 0; + + /* Translate our advice tags to a scan strategy advice value. */ + if (my_entry->tag == PGPA_TAG_BITMAP_HEAP_SCAN) + { + /* + * Currently, PGS_CONSIDER_INDEXONLY can suppress Bitmap Heap + * Scans, so don't clear it when such a scan is requested. This + * happens because build_index_scan() thinks that the possibility + * of an index-only scan is a sufficient reason to consider using + * an otherwise-useless index, and get_index_paths() thinks that + * the same paths that are useful for index or index-only scans + * should also be considered for bitmap scans. Perhaps that logic + * should be tightened up, but until then we need to include + * PGS_CONSIDER_INDEXONLY in my_scan_type here. + */ + my_scan_type = PGS_BITMAPSCAN | PGS_CONSIDER_INDEXONLY; + } + else if (my_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN) + my_scan_type = PGS_INDEXONLYSCAN | PGS_CONSIDER_INDEXONLY; + else if (my_entry->tag == PGPA_TAG_INDEX_SCAN) + my_scan_type = PGS_INDEXSCAN; + else if (my_entry->tag == PGPA_TAG_SEQ_SCAN) + my_scan_type = PGS_SEQSCAN; + else if (my_entry->tag == PGPA_TAG_TID_SCAN) + my_scan_type = PGS_TIDSCAN; + + /* + * If this is understandable scan advice, hang on to the entry, the + * inferred scan type, and the index at which we found it. + * + * Also make a note if we see conflicting scan type advice. Note that + * we regard two index specifications as conflicting unless they match + * exactly. In theory, perhaps we could regard INDEX_SCAN(a c) and + * INDEX_SCAN(a b.c) as non-conflicting if it happens that the only + * index named c is in schema b, but it doesn't seem worth the code. + */ + if (my_scan_type != 0) + { + if (scan_type != 0 && scan_type != my_scan_type) + scan_type_conflict = true; + if (!scan_type_conflict && scan_entry != NULL && + my_entry->target->itarget != NULL && + scan_entry->target->itarget != NULL && + !pgpa_index_targets_equal(scan_entry->target->itarget, + my_entry->target->itarget)) + scan_type_conflict = true; + scan_entry = my_entry; + scan_type = my_scan_type; + scan_type_indexes = bms_add_member(scan_type_indexes, i); + } + } + + /* Scrutinize available gather-related and partitionwise advice. */ + i = -1; + while ((i = bms_next_member(rel_indexes, i)) >= 0) + { + pgpa_trove_entry *my_entry = &rel_entries[i]; + uint64 my_gather_mask = 0; + bool just_one_rel; + + just_one_rel = my_entry->target->ttype == PGPA_TARGET_IDENTIFIER + || list_length(my_entry->target->children) == 1; + + /* + * PARTITIONWISE behaves like a scan type, except that if there's more + * than one relation targeted, it has no effect at this level. + */ + if (my_entry->tag == PGPA_TAG_PARTITIONWISE) + { + if (just_one_rel) + { + const uint64 my_scan_type = PGS_APPEND | PGS_MERGE_APPEND; + + if (scan_type != 0 && scan_type != my_scan_type) + scan_type_conflict = true; + scan_entry = my_entry; + scan_type = my_scan_type; + scan_type_rel_indexes = + bms_add_member(scan_type_rel_indexes, i); + } + continue; + } + + /* + * GATHER and GATHER_MERGE applied to a single rel mean that we should + * use the corresponding strategy here, while applying either to more + * than one rel means we should not use those strategies here, but + * rather at the level of the joinrel that corresponds to what was + * specified. NO_GATHER can only be applied to single rels. + * + * Note that setting PGS_CONSIDER_NONPARTIAL in my_gather_mask is + * equivalent to allowing the non-use of either form of Gather here. + */ + if (my_entry->tag == PGPA_TAG_GATHER || + my_entry->tag == PGPA_TAG_GATHER_MERGE) + { + if (!just_one_rel) + my_gather_mask = PGS_CONSIDER_NONPARTIAL; + else if (my_entry->tag == PGPA_TAG_GATHER) + my_gather_mask = PGS_GATHER; + else + my_gather_mask = PGS_GATHER_MERGE; + } + else if (my_entry->tag == PGPA_TAG_NO_GATHER) + { + Assert(just_one_rel); + my_gather_mask = PGS_CONSIDER_NONPARTIAL; + } + + /* + * If we set my_gather_mask up above, then we (1) make a note if the + * advice conflicted, (2) remember the mask value, and (3) remember + * whether this was a full or partial match. + */ + if (my_gather_mask != 0) + { + if (gather_mask != 0 && gather_mask != my_gather_mask) + gather_conflict = true; + gather_mask = my_gather_mask; + if (just_one_rel) + gather_full_match = bms_add_member(gather_full_match, i); + else + gather_partial_match = bms_add_member(gather_partial_match, i); + } + } + + /* Enforce choice of index. */ + if (scan_entry != NULL && !scan_type_conflict && + (scan_entry->tag == PGPA_TAG_INDEX_SCAN || + scan_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN)) + { + pgpa_index_target *itarget = scan_entry->target->itarget; + IndexOptInfo *matched_index = NULL; + + foreach_node(IndexOptInfo, index, rel->indexlist) + { + char *relname = get_rel_name(index->indexoid); + Oid nspoid = get_rel_namespace(index->indexoid); + char *relnamespace = get_namespace_name_or_temp(nspoid); + + if (strcmp(itarget->indname, relname) == 0 && + (itarget->indnamespace == NULL || + strcmp(itarget->indnamespace, relnamespace) == 0)) + { + matched_index = index; + break; + } + } + + if (matched_index == NULL) + { + /* Don't force the scan type if the index doesn't exist. */ + scan_type = 0; + + /* Mark advice as inapplicable. */ + pgpa_trove_set_flags(scan_entries, scan_type_indexes, + PGPA_TE_INAPPLICABLE); + } + else + { + /* Disable every other index. */ + foreach_node(IndexOptInfo, index, rel->indexlist) + { + if (index != matched_index) + index->disabled = true; + } + } + } + + /* + * Mark all the scan method entries as fully matched; and if they specify + * different things, mark them all as conflicting. + */ + flags = PGPA_TE_MATCH_PARTIAL | PGPA_TE_MATCH_FULL; + if (scan_type_conflict) + flags |= PGPA_TE_CONFLICTING; + pgpa_trove_set_flags(scan_entries, scan_type_indexes, flags); + pgpa_trove_set_flags(rel_entries, scan_type_rel_indexes, flags); + + /* + * Mark every Gather-related piece of advice as partially matched. Mark + * the ones that included this relation as a target by itself as fully + * matched. If there was a conflict, mark them all as conflicting. + */ + flags = PGPA_TE_MATCH_PARTIAL; + if (gather_conflict) + flags |= PGPA_TE_CONFLICTING; + pgpa_trove_set_flags(rel_entries, gather_partial_match, flags); + flags |= PGPA_TE_MATCH_FULL; + pgpa_trove_set_flags(rel_entries, gather_full_match, flags); + + /* + * Enforce restrictions on the scan type and use of Gather/Gather Merge. + * Only clear bits here, so that we still respect the enable_* GUCs. Do + * nothing in cases where the advice on a single topic conflicts. + */ + if (scan_type != 0 && !scan_type_conflict) + { + uint64 all_scan_mask; + + all_scan_mask = PGS_SCAN_ANY | PGS_APPEND | PGS_MERGE_APPEND | + PGS_CONSIDER_INDEXONLY; + rel->pgs_mask &= ~(all_scan_mask & ~scan_type); + } + if (gather_mask != 0 && !gather_conflict) + { + uint64 all_gather_mask; + + all_gather_mask = + PGS_GATHER | PGS_GATHER_MERGE | PGS_CONSIDER_NONPARTIAL; + rel->pgs_mask &= ~(all_gather_mask & ~gather_mask); + } +} + +/* + * Add feedback entries for one trove slice to the provided list and + * return the resulting list. + * + * Feedback entries are generated from the trove entry's flags. It's assumed + * that the caller has already set all relevant flags with the exception of + * PGPA_TE_FAILED. We set that flag here if appropriate. + */ +static List * +pgpa_planner_append_feedback(List *list, pgpa_trove *trove, + pgpa_trove_lookup_type type, + pgpa_identifier *rt_identifiers, + pgpa_plan_walker_context *walker) +{ + pgpa_trove_entry *entries; + int nentries; + + pgpa_trove_lookup_all(trove, type, &entries, &nentries); + for (int i = 0; i < nentries; ++i) + { + pgpa_trove_entry *entry = &entries[i]; + DefElem *item; + + /* + * If this entry was fully matched, check whether generating advice + * from this plan would produce such an entry. If not, label the entry + * as failed. + */ + if ((entry->flags & PGPA_TE_MATCH_FULL) != 0 && + !pgpa_walker_would_advise(walker, rt_identifiers, + entry->tag, entry->target)) + entry->flags |= PGPA_TE_FAILED; + + item = makeDefElem(pgpa_cstring_trove_entry(entry), + (Node *) makeInteger(entry->flags), -1); + list = lappend(list, item); + } + + return list; +} + +/* + * Emit a WARNING to tell the user about a problem with the supplied plan + * advice. + */ +static void +pgpa_planner_feedback_warning(List *feedback) +{ + StringInfoData detailbuf; + StringInfoData flagbuf; + + /* Quick exit if there's no feedback. */ + if (feedback == NIL) + return; + + /* Initialize buffers. */ + initStringInfo(&detailbuf); + initStringInfo(&flagbuf); + + /* Main loop. */ + foreach_node(DefElem, item, feedback) + { + int flags = defGetInt32(item); + + /* + * Don't emit anything if it was fully matched with no problems found. + * + * NB: Feedback should never be marked fully matched without also + * being marked partially matched. + */ + if (flags == (PGPA_TE_MATCH_PARTIAL | PGPA_TE_MATCH_FULL)) + continue; + + /* + * Terminate each detail line except the last with a newline. This is + * also a convenient place to reset flagbuf. + */ + if (detailbuf.len > 0) + { + appendStringInfoChar(&detailbuf, '\n'); + resetStringInfo(&flagbuf); + } + + /* Generate output. */ + pgpa_trove_append_flags(&flagbuf, flags); + appendStringInfo(&detailbuf, "advice %s feedback is \"%s\"", + item->defname, flagbuf.data); + } + + /* Emit the warning, if any problems were found. */ + if (detailbuf.len > 0) + ereport(WARNING, + errmsg("supplied plan advice was not enforced"), + errdetail("%s", detailbuf.data)); +} + +#ifdef USE_ASSERT_CHECKING + +/* + * Fast hash function for a key consisting of an RTI and plan name. + */ +static uint32 +pgpa_ri_checker_hash_key(pgpa_ri_checker_key key) +{ + fasthash_state hs; + int sp_len; + + fasthash_init(&hs, 0); + + hs.accum = key.rti; + fasthash_combine(&hs); + + /* plan_name can be NULL */ + if (key.plan_name == NULL) + sp_len = 0; + else + sp_len = fasthash_accum_cstring(&hs, key.plan_name); + + /* hashfn_unstable.h recommends using string length as tweak */ + return fasthash_final32(&hs, sp_len); +} + +#endif + +/* + * Save the range table identifier for one relation for future cross-checking. + */ +static void +pgpa_ri_checker_save(pgpa_planner_state *pps, PlannerInfo *root, + RelOptInfo *rel) +{ +#ifdef USE_ASSERT_CHECKING + pgpa_ri_checker_key key; + pgpa_ri_checker *check; + pgpa_identifier rid; + const char *rid_string; + bool found; + + key.rti = bms_singleton_member(rel->relids); + key.plan_name = root->plan_name; + pgpa_compute_identifier_by_rti(root, key.rti, &rid); + rid_string = pgpa_identifier_string(&rid); + check = pgpa_ri_check_insert(pps->ri_check_hash, key, &found); + Assert(!found || strcmp(check->rid_string, rid_string) == 0); + check->rid_string = rid_string; +#endif +} + +/* + * Validate that the range table identifiers we were able to generate during + * planning match the ones we generated from the final plan. + */ +static void +pgpa_ri_checker_validate(pgpa_planner_state *pps, PlannedStmt *pstmt) +{ +#ifdef USE_ASSERT_CHECKING + pgpa_identifier *rt_identifiers; + pgpa_ri_check_iterator it; + pgpa_ri_checker *check; + + /* Create identifiers from the planned statement. */ + rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt); + + /* Iterate over identifiers created during planning, so we can compare. */ + pgpa_ri_check_start_iterate(pps->ri_check_hash, &it); + while ((check = pgpa_ri_check_iterate(pps->ri_check_hash, &it)) != NULL) + { + int rtoffset = 0; + const char *rid_string; + Index flat_rti; + + /* + * If there's no plan name associated with this entry, then the + * rtoffset is 0. Otherwise, we can search the SubPlanRTInfo list to + * find the rtoffset. + */ + if (check->key.plan_name != NULL) + { + foreach_node(SubPlanRTInfo, rtinfo, pstmt->subrtinfos) + { + /* + * If rtinfo->dummy is set, then the subquery's range table + * will only have been partially copied to the final range + * table. Specifically, only RTE_RELATION entries and + * RTE_SUBQUERY entries that were once RTE_RELATION entries + * will be copied, as per add_rtes_to_flat_rtable. Therefore, + * there's no fixed rtoffset that we can apply to the RTIs + * used during planning to locate the corresponding relations + * in the final rtable. + * + * With more complex logic, we could work around that problem + * by remembering the whole contents of the subquery's rtable + * during planning, determining which of those would have been + * copied to the final rtable, and matching them up. But it + * doesn't seem like a worthwhile endeavor for right now, + * because RTIs from such subqueries won't appear in the plan + * tree itself, just in the range table. Hence, we can neither + * generate nor accept advice for them. + */ + if (strcmp(check->key.plan_name, rtinfo->plan_name) == 0 + && !rtinfo->dummy) + { + rtoffset = rtinfo->rtoffset; + Assert(rtoffset > 0); + break; + } + } + + /* + * It's not an error if we don't find the plan name: that just + * means that we planned a subplan by this name but it ended up + * being a dummy subplan and so wasn't included in the final plan + * tree. + */ + if (rtoffset == 0) + continue; + } + + /* + * check->key.rti is the RTI that we saw prior to range-table + * flattening, so we must add the appropriate RT offset to get the + * final RTI. + */ + flat_rti = check->key.rti + rtoffset; + Assert(flat_rti <= list_length(pstmt->rtable)); + + /* Assert that the string we compute now matches the previous one. */ + rid_string = pgpa_identifier_string(&rt_identifiers[flat_rti - 1]); + Assert(strcmp(rid_string, check->rid_string) == 0); + } +#endif +} + +/* + * Convert a bitmapset to a C string of comma-separated integers. + */ +static char * +pgpa_bms_to_cstring(Bitmapset *bms) +{ + StringInfoData buf; + int x = -1; + + if (bms_is_empty(bms)) + return "none"; + + initStringInfo(&buf); + while ((x = bms_next_member(bms, x)) >= 0) + { + if (buf.len > 0) + appendStringInfo(&buf, ", %d", x); + else + appendStringInfo(&buf, "%d", x); + } + + return buf.data; +} + +/* + * Convert a JoinType to a C string. + */ +static const char * +pgpa_jointype_to_cstring(JoinType jointype) +{ + switch (jointype) + { + case JOIN_INNER: + return "inner"; + case JOIN_LEFT: + return "left"; + case JOIN_FULL: + return "full"; + case JOIN_RIGHT: + return "right"; + case JOIN_SEMI: + return "semi"; + case JOIN_ANTI: + return "anti"; + case JOIN_RIGHT_SEMI: + return "right semi"; + case JOIN_RIGHT_ANTI: + return "right anti"; + case JOIN_UNIQUE_OUTER: + return "unique outer"; + case JOIN_UNIQUE_INNER: + return "unique inner"; + } + return "???"; +} diff --git a/contrib/pg_plan_advice/pgpa_planner.h b/contrib/pg_plan_advice/pgpa_planner.h new file mode 100644 index 00000000000..c70e486a7f3 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_planner.h @@ -0,0 +1,19 @@ +/*------------------------------------------------------------------------- + * + * pgpa_planner.h + * planner integration for pg_plan_advice + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_planner.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_PLANNER_H +#define PGPA_PLANNER_H + +extern void pgpa_planner_install_hooks(void); + +extern int pgpa_planner_generate_advice; + +#endif diff --git a/contrib/pg_plan_advice/pgpa_scan.c b/contrib/pg_plan_advice/pgpa_scan.c new file mode 100644 index 00000000000..14bde3e149a --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_scan.c @@ -0,0 +1,271 @@ +/*------------------------------------------------------------------------- + * + * pgpa_scan.c + * analysis of scans in Plan trees + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_scan.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pgpa_scan.h" +#include "pgpa_walker.h" + +#include "nodes/parsenodes.h" +#include "parser/parsetree.h" + +static pgpa_scan *pgpa_make_scan(pgpa_plan_walker_context *walker, Plan *plan, + pgpa_scan_strategy strategy, + Bitmapset *relids); + + +static RTEKind unique_nonjoin_rtekind(Bitmapset *relids, List *rtable); + +/* + * Build a pgpa_scan object for a Plan node and update the plan walker + * context as appropriate. If this is an Append or MergeAppend scan, also + * build pgpa_scan for any scans that were consolidated into this one by + * Append/MergeAppend pull-up. + * + * If there is at least one ElidedNode for this plan node, pass the uppermost + * one as elided_node, else pass NULL. + * + * Set the 'beneath_any_gather' node if we are underneath a Gather or + * Gather Merge node (except for a single-copy Gather node, for which + * GATHER or GATHER_MERGE advice should not be emitted). + * + * Set the 'within_join_problem' flag if we're inside of a join problem and + * not otherwise. + */ +pgpa_scan * +pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan, + ElidedNode *elided_node, + bool beneath_any_gather, bool within_join_problem) +{ + pgpa_scan_strategy strategy = PGPA_SCAN_ORDINARY; + Bitmapset *relids = NULL; + int rti = -1; + List *child_append_relid_sets = NIL; + NodeTag nodetype = nodeTag(plan); + + if (elided_node != NULL) + { + nodetype = elided_node->elided_type; + relids = elided_node->relids; + + /* + * If setrefs processing elided an Append or MergeAppend node that had + * only one surviving child, it could be either a partitionwise + * operation or a setop over subqueries, depending on the rtekind. + * + * A setop over subqueries, or a trivial SubqueryScan that was elided, + * is an "ordinary" scan i.e. one for which we do not need to generate + * advice because the planner has not made any meaningful choice. + * + * Note that the PGPA_SCAN_PARTITIONWISE case also includes + * partitionwise joins; this module considers those to be a form of + * scan, since they lack internal structure that we can decompose. + */ + if ((nodetype == T_Append || nodetype == T_MergeAppend) && + unique_nonjoin_rtekind(relids, + walker->pstmt->rtable) == RTE_RELATION) + strategy = PGPA_SCAN_PARTITIONWISE; + else + strategy = PGPA_SCAN_ORDINARY; + + /* Join RTIs can be present, but advice never refers to them. */ + relids = pgpa_filter_out_join_relids(relids, walker->pstmt->rtable); + } + else if ((rti = pgpa_scanrelid(plan)) != 0) + { + relids = bms_make_singleton(rti); + + switch (nodeTag(plan)) + { + case T_SeqScan: + strategy = PGPA_SCAN_SEQ; + break; + case T_BitmapHeapScan: + strategy = PGPA_SCAN_BITMAP_HEAP; + break; + case T_IndexScan: + strategy = PGPA_SCAN_INDEX; + break; + case T_IndexOnlyScan: + strategy = PGPA_SCAN_INDEX_ONLY; + break; + case T_TidScan: + case T_TidRangeScan: + strategy = PGPA_SCAN_TID; + break; + default: + + /* + * This case includes a ForeignScan targeting a single + * relation; no other strategy is possible in that case, but + * see below, where things are different in multi-relation + * cases. + */ + strategy = PGPA_SCAN_ORDINARY; + break; + } + } + else if ((relids = pgpa_relids(plan)) != NULL) + { + switch (nodeTag(plan)) + { + case T_ForeignScan: + + /* + * If multiple relations are being targeted by a single + * foreign scan, then the foreign join has been pushed to the + * remote side, and we want that to be reflected in the + * generated advice. + */ + strategy = PGPA_SCAN_FOREIGN; + break; + case T_Append: + + /* + * Append nodes can represent partitionwise scans of a + * relation, but when they implement a set operation, they are + * just ordinary scans. + */ + if (unique_nonjoin_rtekind(relids, walker->pstmt->rtable) + == RTE_RELATION) + strategy = PGPA_SCAN_PARTITIONWISE; + else + strategy = PGPA_SCAN_ORDINARY; + + /* Be sure to account for pulled-up scans. */ + child_append_relid_sets = + ((Append *) plan)->child_append_relid_sets; + break; + case T_MergeAppend: + /* Same logic here as for Append, above. */ + if (unique_nonjoin_rtekind(relids, walker->pstmt->rtable) + == RTE_RELATION) + strategy = PGPA_SCAN_PARTITIONWISE; + else + strategy = PGPA_SCAN_ORDINARY; + + /* Be sure to account for pulled-up scans. */ + child_append_relid_sets = + ((MergeAppend *) plan)->child_append_relid_sets; + break; + default: + strategy = PGPA_SCAN_ORDINARY; + break; + } + + + /* Join RTIs can be present, but advice never refers to them. */ + relids = pgpa_filter_out_join_relids(relids, walker->pstmt->rtable); + } + + /* + * If this is an Append or MergeAppend node into which subordinate Append + * or MergeAppend paths were merged, each of those merged paths is + * effectively another scan for which we need to account. + */ + foreach_node(Bitmapset, child_relids, child_append_relid_sets) + { + Bitmapset *child_nonjoin_relids; + + child_nonjoin_relids = + pgpa_filter_out_join_relids(child_relids, + walker->pstmt->rtable); + (void) pgpa_make_scan(walker, plan, strategy, + child_nonjoin_relids); + } + + /* + * If this plan node has no associated RTIs, it's not a scan. When the + * 'within_join_problem' flag is set, that's unexpected, so throw an + * error, else return quietly. + */ + if (relids == NULL) + { + if (within_join_problem) + elog(ERROR, "plan node has no RTIs: %d", (int) nodeTag(plan)); + return NULL; + } + + /* + * Add the appropriate set of RTIs to walker->no_gather_scans. + * + * Add nothing if we're beneath a Gather or Gather Merge node, since + * NO_GATHER advice is clearly inappropriate in that situation. + * + * Add nothing if this is an Append or MergeAppend node, whether or not + * elided. We'll emit NO_GATHER() for the underlying scan, which is good + * enough. + */ + if (!beneath_any_gather && nodetype != T_Append && + nodetype != T_MergeAppend) + walker->no_gather_scans = + bms_add_members(walker->no_gather_scans, relids); + + /* Caller tells us whether NO_GATHER() advice for this scan is needed. */ + return pgpa_make_scan(walker, plan, strategy, relids); +} + +/* + * Create a single pgpa_scan object and update the pgpa_plan_walker_context. + */ +static pgpa_scan * +pgpa_make_scan(pgpa_plan_walker_context *walker, Plan *plan, + pgpa_scan_strategy strategy, Bitmapset *relids) +{ + pgpa_scan *scan; + + /* Create the scan object. */ + scan = palloc(sizeof(pgpa_scan)); + scan->plan = plan; + scan->strategy = strategy; + scan->relids = relids; + + /* Add it to the appropriate list. */ + walker->scans[scan->strategy] = lappend(walker->scans[scan->strategy], + scan); + + return scan; +} + +/* + * Determine the unique rtekind of a set of relids. + */ +static RTEKind +unique_nonjoin_rtekind(Bitmapset *relids, List *rtable) +{ + int rti = -1; + bool first = true; + RTEKind rtekind; + + Assert(relids != NULL); + + while ((rti = bms_next_member(relids, rti)) >= 0) + { + RangeTblEntry *rte = rt_fetch(rti, rtable); + + if (rte->rtekind == RTE_JOIN) + continue; + + if (first) + { + rtekind = rte->rtekind; + first = false; + } + else if (rtekind != rte->rtekind) + elog(ERROR, "rtekind mismatch: %d vs. %d", + rtekind, rte->rtekind); + } + + if (first) + elog(ERROR, "no non-RTE_JOIN RTEs found"); + + return rtekind; +} diff --git a/contrib/pg_plan_advice/pgpa_scan.h b/contrib/pg_plan_advice/pgpa_scan.h new file mode 100644 index 00000000000..391c1a4b596 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_scan.h @@ -0,0 +1,85 @@ +/*------------------------------------------------------------------------- + * + * pgpa_scan.h + * analysis of scans in Plan trees + * + * For purposes of this module, a "scan" includes (1) single plan nodes that + * scan multiple RTIs, such as a degenerate Result node that replaces what + * would otherwise have been a join, and (2) Append and MergeAppend nodes + * implementing a partitionwise scan or a partitionwise join. Said + * differently, scans are the leaves of the join tree for a single join + * problem. + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_scan.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_SCAN_H +#define PGPA_SCAN_H + +#include "nodes/plannodes.h" + +typedef struct pgpa_plan_walker_context pgpa_plan_walker_context; + +/* + * Scan strategies. + * + * PGPA_SCAN_ORDINARY is any scan strategy that isn't interesting to us + * because there is no meaningful planner decision involved. For example, + * the only way to scan a subquery is a SubqueryScan, and the only way to + * scan a VALUES construct is a ValuesScan. We need not care exactly which + * type of planner node was used in such cases, because the same thing will + * happen when replanning. + * + * PGPA_SCAN_ORDINARY also includes Result nodes that correspond to scans + * or even joins that are proved empty. We don't know whether or not the scan + * or join will still be provably empty at replanning time, but if it is, + * then no scan-type advice is needed, and if it's not, we can't recommend + * a scan type based on the current plan. + * + * PGPA_SCAN_PARTITIONWISE also lumps together scans and joins: this can + * be either a partitionwise scan of a partitioned table or a partitionwise + * join between several partitioned tables. Note that all decisions about + * whether or not to use partitionwise join are meaningful: no matter what + * we decided this time, we could do more or fewer things partitionwise the + * next time. + * + * PGPA_SCAN_FOREIGN is only used when there's more than one relation involved; + * a single-table foreign scan is classified as ordinary, since there is no + * decision to make in that case. + * + * Other scan strategies map one-to-one to plan nodes. + */ +typedef enum +{ + PGPA_SCAN_ORDINARY = 0, + PGPA_SCAN_SEQ, + PGPA_SCAN_BITMAP_HEAP, + PGPA_SCAN_FOREIGN, + PGPA_SCAN_INDEX, + PGPA_SCAN_INDEX_ONLY, + PGPA_SCAN_PARTITIONWISE, + PGPA_SCAN_TID + /* update NUM_PGPA_SCAN_STRATEGY if you add anything here */ +} pgpa_scan_strategy; + +#define NUM_PGPA_SCAN_STRATEGY ((int) PGPA_SCAN_TID + 1) + +/* + * All of the details we need regarding a scan. + */ +typedef struct pgpa_scan +{ + Plan *plan; + pgpa_scan_strategy strategy; + Bitmapset *relids; +} pgpa_scan; + +extern pgpa_scan *pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan, + ElidedNode *elided_node, + bool beneath_any_gather, + bool within_join_problem); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_scanner.l b/contrib/pg_plan_advice/pgpa_scanner.l new file mode 100644 index 00000000000..3b3be6eb727 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_scanner.l @@ -0,0 +1,297 @@ +%top{ +/* + * Scanner for plan advice + * + * Copyright (c) 2000-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_scanner.l + */ +#include "postgres.h" + +#include "common/string.h" +#include "nodes/miscnodes.h" +#include "parser/scansup.h" + +#include "pgpa_ast.h" +#include "pgpa_parser.h" + +/* + * Extra data that we pass around when during scanning. + * + * 'litbuf' is used to implement the exclusive state, which handles + * double-quoted identifiers. + */ +typedef struct pgpa_yy_extra_type +{ + StringInfoData litbuf; +} pgpa_yy_extra_type; + +} + +%{ +/* LCOV_EXCL_START */ + +#define YY_DECL \ + extern int pgpa_yylex(union YYSTYPE *yylval_param, List **result, \ + char **parse_error_msg_p, yyscan_t yyscanner) + +/* No reason to constrain amount of data slurped */ +#define YY_READ_BUF_SIZE 16777216 + +/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ +#undef fprintf +#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg) + +static void +fprintf_to_ereport(const char *fmt, const char *msg) +{ + ereport(ERROR, (errmsg_internal("%s", msg))); +} +%} + +%option reentrant +%option bison-bridge +%option 8bit +%option never-interactive +%option nodefault +%option noinput +%option nounput +%option noyywrap +%option noyyalloc +%option noyyrealloc +%option noyyfree +%option warn +%option prefix="pgpa_yy" +%option extra-type="pgpa_yy_extra_type *" + +/* + * What follows is a severely stripped-down version of the core scanner. We + * only care about recognizing identifiers with or without identifier quoting + * (i.e. double-quoting), decimal integers, and a small handful of other + * things. Keep these rules in sync with src/backend/parser/scan.l. As in that + * file, we use an exclusive state called 'xc' for C-style comments, and an + * exclusive state called 'xd' for double-quoted identifiers. + */ +%x xc +%x xd + +ident_start [A-Za-z\200-\377_] +ident_cont [A-Za-z\200-\377_0-9\$] + +identifier {ident_start}{ident_cont}* + +decdigit [0-9] +decinteger {decdigit}(_?{decdigit})* + +space [ \t\n\r\f\v] +whitespace {space}+ + +dquote \" +xdstart {dquote} +xdstop {dquote} +xddouble {dquote}{dquote} +xdinside [^"]+ + +xcstart \/\* +xcstop \*+\/ +xcinside [^*/]+ + +%% + +{whitespace} { /* ignore */ } + +{identifier} { + char *str; + bool fail; + pgpa_advice_tag_type tag; + + /* + * Unlike the core scanner, we don't truncate identifiers + * here. There is no obvious reason to do so. + */ + str = downcase_identifier(yytext, yyleng, false, false); + yylval->str = str; + + /* + * If it's not a tag, just return TOK_IDENT; else, return + * a token type based on how further parsing should + * proceed. + */ + tag = pgpa_parse_advice_tag(str, &fail); + if (fail) + return TOK_IDENT; + else if (tag == PGPA_TAG_JOIN_ORDER) + return TOK_TAG_JOIN_ORDER; + else if (tag == PGPA_TAG_INDEX_SCAN || + tag == PGPA_TAG_INDEX_ONLY_SCAN) + return TOK_TAG_INDEX; + else if (tag == PGPA_TAG_SEQ_SCAN || + tag == PGPA_TAG_TID_SCAN || + tag == PGPA_TAG_BITMAP_HEAP_SCAN || + tag == PGPA_TAG_NO_GATHER) + return TOK_TAG_SIMPLE; + else + return TOK_TAG_GENERIC; + } + +{decinteger} { + char *endptr; + + errno = 0; + yylval->integer = strtoint(yytext, &endptr, 10); + if (*endptr != '\0' || errno == ERANGE) + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "integer out of range"); + return TOK_INTEGER; + } + +{xcstart} { + BEGIN(xc); + } + +{xdstart} { + BEGIN(xd); + resetStringInfo(&yyextra->litbuf); + } + +. { return yytext[0]; } + +{xcstop} { + BEGIN(INITIAL); + } + +{xcinside} { + /* discard multiple characters without slash or asterisk */ + } + +. { + /* + * Discard any single character. flex prefers longer + * matches, so this rule will never be picked when we could + * have matched xcstop. + * + * NB: At present, we don't bother to support nested + * C-style comments here, but this logic could be extended + * if that restriction poses a problem. + */ + } + +<> { + BEGIN(INITIAL); + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "unterminated comment"); + } + +{xdstop} { + BEGIN(INITIAL); + if (yyextra->litbuf.len == 0) + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "zero-length delimited identifier"); + yylval->str = pstrdup(yyextra->litbuf.data); + return TOK_IDENT; + } + +{xddouble} { + appendStringInfoChar(&yyextra->litbuf, '"'); + } + +{xdinside} { + appendBinaryStringInfo(&yyextra->litbuf, yytext, yyleng); + } + +<> { + BEGIN(INITIAL); + pgpa_yyerror(result, parse_error_msg_p, yyscanner, + "unterminated quoted identifier"); + } + +%% + +/* LCOV_EXCL_STOP */ + +/* + * Handler for errors while scanning or parsing advice. + * + * bison passes the error message to us via 'message', and the context is + * available via the 'yytext' macro. We assemble those values into a final + * error text and then arrange to pass it back to the caller of pgpa_yyparse() + * by storing it into *parse_error_msg_p. + */ +void +pgpa_yyerror(List **result, char **parse_error_msg_p, yyscan_t yyscanner, + const char *message) +{ + struct yyguts_t *yyg = (struct yyguts_t *) yyscanner; /* needed for yytext + * macro */ + + + /* report only the first error in a parse operation */ + if (*parse_error_msg_p) + return; + + if (yytext[0]) + *parse_error_msg_p = psprintf("%s at or near \"%s\"", message, yytext); + else + *parse_error_msg_p = psprintf("%s at end of input", message); +} + +/* + * Initialize the advice scanner. + * + * This should be called before parsing begins. + */ +void +pgpa_scanner_init(const char *str, yyscan_t *yyscannerp) +{ + yyscan_t yyscanner; + pgpa_yy_extra_type *yyext = palloc0_object(pgpa_yy_extra_type); + + if (yylex_init(yyscannerp) != 0) + elog(ERROR, "yylex_init() failed: %m"); + + yyscanner = *yyscannerp; + + initStringInfo(&yyext->litbuf); + pgpa_yyset_extra(yyext, yyscanner); + + yy_scan_string(str, yyscanner); +} + + +/* + * Shut down the advice scanner. + * + * This should be called after parsing is complete. + */ +void +pgpa_scanner_finish(yyscan_t yyscanner) +{ + yylex_destroy(yyscanner); +} + +/* + * Interface functions to make flex use palloc() instead of malloc(). + * It'd be better to make these static, but flex insists otherwise. + */ + +void * +yyalloc(yy_size_t size, yyscan_t yyscanner) +{ + return palloc(size); +} + +void * +yyrealloc(void *ptr, yy_size_t size, yyscan_t yyscanner) +{ + if (ptr) + return repalloc(ptr, size); + else + return palloc(size); +} + +void +yyfree(void *ptr, yyscan_t yyscanner) +{ + if (ptr) + pfree(ptr); +} diff --git a/contrib/pg_plan_advice/pgpa_trove.c b/contrib/pg_plan_advice/pgpa_trove.c new file mode 100644 index 00000000000..634ec5c4c6e --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_trove.c @@ -0,0 +1,516 @@ +/*------------------------------------------------------------------------- + * + * pgpa_trove.c + * All of the advice given for a particular query, appropriately + * organized for convenient access. + * + * This name comes from the English expression "trove of advice", which + * means a collection of wisdom. This slightly unusual term is chosen + * partly because it seems to fit and partly because it's not presently + * used for anything else, making it easy to grep. Note that, while we + * don't know whether the provided advice is actually wise, it's not our + * job to question the user's choices. + * + * The goal of this module is to make it easy to locate the specific + * bits of advice that pertain to any given part of a query, or to + * determine that there are none. + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_trove.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pgpa_trove.h" + +#include "common/hashfn_unstable.h" + +/* + * An advice trove is organized into a series of "slices", each of which + * contains information about one topic e.g. scan methods. Each slice consists + * of an array of trove entries plus a hash table that we can use to determine + * which ones are relevant to a particular part of the query. + */ +typedef struct pgpa_trove_slice +{ + unsigned nallocated; + unsigned nused; + pgpa_trove_entry *entries; + struct pgpa_trove_entry_hash *hash; +} pgpa_trove_slice; + +/* + * Scan advice is stored into 'scan'; join advice is stored into 'join'; and + * advice that can apply to both cases is stored into 'rel'. This lets callers + * ask just for what's relevant. These slices correspond to the possible values + * of pgpa_trove_lookup_type. + */ +struct pgpa_trove +{ + pgpa_trove_slice join; + pgpa_trove_slice rel; + pgpa_trove_slice scan; +}; + +/* + * We're going to build a hash table to allow clients of this module to find + * relevant advice for a given part of the query quickly. However, we're going + * to use only three of the five key fields as hash keys. There are two reasons + * for this. + * + * First, it's allowable to set partition_schema to NULL to match a partition + * with the correct name in any schema. + * + * Second, we expect the "occurrence" and "partition_schema" portions of the + * relation identifiers to be mostly uninteresting. Most of the time, the + * occurrence field will be 1 and the partition_schema values will all be the + * same. Even when there is some variation, the absolute number of entries + * that have the same values for all three of these key fields should be + * quite small. + */ +typedef struct +{ + const char *alias_name; + const char *partition_name; + const char *plan_name; +} pgpa_trove_entry_key; + +typedef struct +{ + pgpa_trove_entry_key key; + int status; + Bitmapset *indexes; +} pgpa_trove_entry_element; + +static uint32 pgpa_trove_entry_hash_key(pgpa_trove_entry_key key); + +static inline bool +pgpa_trove_entry_compare_key(pgpa_trove_entry_key a, pgpa_trove_entry_key b) +{ + if (strcmp(a.alias_name, b.alias_name) != 0) + return false; + + if (!strings_equal_or_both_null(a.partition_name, b.partition_name)) + return false; + + if (!strings_equal_or_both_null(a.plan_name, b.plan_name)) + return false; + + return true; +} + +#define SH_PREFIX pgpa_trove_entry +#define SH_ELEMENT_TYPE pgpa_trove_entry_element +#define SH_KEY_TYPE pgpa_trove_entry_key +#define SH_KEY key +#define SH_HASH_KEY(tb, key) pgpa_trove_entry_hash_key(key) +#define SH_EQUAL(tb, a, b) pgpa_trove_entry_compare_key(a, b) +#define SH_SCOPE static inline +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +static void pgpa_init_trove_slice(pgpa_trove_slice *tslice); +static void pgpa_trove_add_to_slice(pgpa_trove_slice *tslice, + pgpa_advice_tag_type tag, + pgpa_advice_target *target); +static void pgpa_trove_add_to_hash(pgpa_trove_entry_hash *hash, + pgpa_advice_target *target, + int index); +static Bitmapset *pgpa_trove_slice_lookup(pgpa_trove_slice *tslice, + pgpa_identifier *rid); + +/* + * Build a trove of advice from a list of advice items. + * + * Caller can obtain a list of advice items to pass to this function by + * calling pgpa_parse(). + */ +pgpa_trove * +pgpa_build_trove(List *advice_items) +{ + pgpa_trove *trove = palloc_object(pgpa_trove); + + pgpa_init_trove_slice(&trove->join); + pgpa_init_trove_slice(&trove->rel); + pgpa_init_trove_slice(&trove->scan); + + foreach_ptr(pgpa_advice_item, item, advice_items) + { + switch (item->tag) + { + case PGPA_TAG_JOIN_ORDER: + { + pgpa_advice_target *target; + + /* + * For most advice types, each element in the top-level + * list is a separate target, but it's most convenient to + * regard the entirety of a JOIN_ORDER specification as a + * single target. Since it wasn't represented that way + * during parsing, build a surrogate object now. + */ + target = palloc0_object(pgpa_advice_target); + target->ttype = PGPA_TARGET_ORDERED_LIST; + target->children = item->targets; + + pgpa_trove_add_to_slice(&trove->join, + item->tag, target); + } + break; + + case PGPA_TAG_BITMAP_HEAP_SCAN: + case PGPA_TAG_INDEX_ONLY_SCAN: + case PGPA_TAG_INDEX_SCAN: + case PGPA_TAG_SEQ_SCAN: + case PGPA_TAG_TID_SCAN: + + /* + * Scan advice. + */ + foreach_ptr(pgpa_advice_target, target, item->targets) + { + /* + * For now, all of our scan types target single relations, + * but in the future this might not be true, e.g. a custom + * scan could replace a join. + */ + Assert(target->ttype == PGPA_TARGET_IDENTIFIER); + pgpa_trove_add_to_slice(&trove->scan, + item->tag, target); + } + break; + + case PGPA_TAG_FOREIGN_JOIN: + case PGPA_TAG_HASH_JOIN: + case PGPA_TAG_MERGE_JOIN_MATERIALIZE: + case PGPA_TAG_MERGE_JOIN_PLAIN: + case PGPA_TAG_NESTED_LOOP_MATERIALIZE: + case PGPA_TAG_NESTED_LOOP_MEMOIZE: + case PGPA_TAG_NESTED_LOOP_PLAIN: + case PGPA_TAG_SEMIJOIN_NON_UNIQUE: + case PGPA_TAG_SEMIJOIN_UNIQUE: + + /* + * Join strategy advice. + */ + foreach_ptr(pgpa_advice_target, target, item->targets) + { + pgpa_trove_add_to_slice(&trove->join, + item->tag, target); + } + break; + + case PGPA_TAG_PARTITIONWISE: + case PGPA_TAG_GATHER: + case PGPA_TAG_GATHER_MERGE: + case PGPA_TAG_NO_GATHER: + + /* + * Advice about a RelOptInfo relevant to both scans and joins. + */ + foreach_ptr(pgpa_advice_target, target, item->targets) + { + pgpa_trove_add_to_slice(&trove->rel, + item->tag, target); + } + break; + } + } + + return trove; +} + +/* + * Search a trove of advice for relevant entries. + * + * All parameters are input parameters except for *result, which is an output + * parameter used to return results to the caller. + */ +void +pgpa_trove_lookup(pgpa_trove *trove, pgpa_trove_lookup_type type, + int nrids, pgpa_identifier *rids, pgpa_trove_result *result) +{ + pgpa_trove_slice *tslice; + Bitmapset *indexes; + + Assert(nrids > 0); + + if (type == PGPA_TROVE_LOOKUP_SCAN) + tslice = &trove->scan; + else if (type == PGPA_TROVE_LOOKUP_JOIN) + tslice = &trove->join; + else + tslice = &trove->rel; + + indexes = pgpa_trove_slice_lookup(tslice, &rids[0]); + for (int i = 1; i < nrids; ++i) + { + Bitmapset *other_indexes; + + /* + * If the caller is asking about two relations that aren't part of the + * same subquery, they've messed up. + */ + Assert(strings_equal_or_both_null(rids[0].plan_name, + rids[i].plan_name)); + + other_indexes = pgpa_trove_slice_lookup(tslice, &rids[i]); + indexes = bms_union(indexes, other_indexes); + } + + result->entries = tslice->entries; + result->indexes = indexes; +} + +/* + * Return all entries in a trove slice to the caller. + * + * The first two arguments are input arguments, and the remainder are output + * arguments. + */ +void +pgpa_trove_lookup_all(pgpa_trove *trove, pgpa_trove_lookup_type type, + pgpa_trove_entry **entries, int *nentries) +{ + pgpa_trove_slice *tslice; + + if (type == PGPA_TROVE_LOOKUP_SCAN) + tslice = &trove->scan; + else if (type == PGPA_TROVE_LOOKUP_JOIN) + tslice = &trove->join; + else + tslice = &trove->rel; + + *entries = tslice->entries; + *nentries = tslice->nused; +} + +/* + * Convert a trove entry to an item of plan advice that would produce it. + */ +char * +pgpa_cstring_trove_entry(pgpa_trove_entry *entry) +{ + StringInfoData buf; + + initStringInfo(&buf); + appendStringInfo(&buf, "%s", pgpa_cstring_advice_tag(entry->tag)); + + /* JOIN_ORDER tags are transformed by pgpa_build_trove; undo that here */ + if (entry->tag != PGPA_TAG_JOIN_ORDER) + appendStringInfoChar(&buf, '('); + else + Assert(entry->target->ttype == PGPA_TARGET_ORDERED_LIST); + + pgpa_format_advice_target(&buf, entry->target); + + if (entry->target->itarget != NULL) + { + appendStringInfoChar(&buf, ' '); + pgpa_format_index_target(&buf, entry->target->itarget); + } + + if (entry->tag != PGPA_TAG_JOIN_ORDER) + appendStringInfoChar(&buf, ')'); + + return buf.data; +} + +/* + * Set PGPA_TE_* flags on a set of trove entries. + */ +void +pgpa_trove_set_flags(pgpa_trove_entry *entries, Bitmapset *indexes, int flags) +{ + int i = -1; + + while ((i = bms_next_member(indexes, i)) >= 0) + { + pgpa_trove_entry *entry = &entries[i]; + + entry->flags |= flags; + } +} + +/* + * Append a string representation of the specified PGPA_TE_* flags to the + * given StringInfo. + */ +void +pgpa_trove_append_flags(StringInfo buf, int flags) +{ + if ((flags & PGPA_TE_MATCH_FULL) != 0) + { + Assert((flags & PGPA_TE_MATCH_PARTIAL) != 0); + appendStringInfo(buf, "matched"); + } + else if ((flags & PGPA_TE_MATCH_PARTIAL) != 0) + appendStringInfo(buf, "partially matched"); + else + appendStringInfo(buf, "not matched"); + if ((flags & PGPA_TE_INAPPLICABLE) != 0) + appendStringInfo(buf, ", inapplicable"); + if ((flags & PGPA_TE_CONFLICTING) != 0) + appendStringInfo(buf, ", conflicting"); + if ((flags & PGPA_TE_FAILED) != 0) + appendStringInfo(buf, ", failed"); +} + +/* + * Add a new advice target to an existing pgpa_trove_slice object. + */ +static void +pgpa_trove_add_to_slice(pgpa_trove_slice *tslice, + pgpa_advice_tag_type tag, + pgpa_advice_target *target) +{ + pgpa_trove_entry *entry; + + if (tslice->nused >= tslice->nallocated) + { + int new_allocated; + + new_allocated = tslice->nallocated * 2; + tslice->entries = repalloc_array(tslice->entries, pgpa_trove_entry, + new_allocated); + tslice->nallocated = new_allocated; + } + + entry = &tslice->entries[tslice->nused]; + entry->tag = tag; + entry->target = target; + entry->flags = 0; + + pgpa_trove_add_to_hash(tslice->hash, target, tslice->nused); + + tslice->nused++; +} + +/* + * Update the hash table for a newly-added advice target. + */ +static void +pgpa_trove_add_to_hash(pgpa_trove_entry_hash *hash, pgpa_advice_target *target, + int index) +{ + pgpa_trove_entry_key key; + pgpa_trove_entry_element *element; + bool found; + + /* For non-identifiers, add entries for all descendants. */ + if (target->ttype != PGPA_TARGET_IDENTIFIER) + { + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + pgpa_trove_add_to_hash(hash, child_target, index); + } + return; + } + + /* Sanity checks. */ + Assert(target->rid.occurrence > 0); + Assert(target->rid.alias_name != NULL); + + /* Add an entry for this relation identifier. */ + key.alias_name = target->rid.alias_name; + key.partition_name = target->rid.partrel; + key.plan_name = target->rid.plan_name; + element = pgpa_trove_entry_insert(hash, key, &found); + if (!found) + element->indexes = NULL; + element->indexes = bms_add_member(element->indexes, index); +} + +/* + * Create and initialize a new pgpa_trove_slice object. + */ +static void +pgpa_init_trove_slice(pgpa_trove_slice *tslice) +{ + /* + * In an ideal world, we'll make tslice->nallocated big enough that the + * array and hash table will be large enough to contain the number of + * advice items in this trove slice, but a generous default value is not + * good for performance, because pgpa_init_trove_slice() has to zero an + * amount of memory proportional to tslice->nallocated. Hence, we keep the + * starting value quite small, on the theory that advice strings will + * often be relatively short. + */ + tslice->nallocated = 16; + tslice->nused = 0; + tslice->entries = palloc_array(pgpa_trove_entry, tslice->nallocated); + tslice->hash = pgpa_trove_entry_create(CurrentMemoryContext, + tslice->nallocated, NULL); +} + +/* + * Fast hash function for a key consisting of alias_name, partition_name, + * and plan_name. + */ +static uint32 +pgpa_trove_entry_hash_key(pgpa_trove_entry_key key) +{ + fasthash_state hs; + int sp_len; + + fasthash_init(&hs, 0); + + /* alias_name may not be NULL */ + sp_len = fasthash_accum_cstring(&hs, key.alias_name); + + /* partition_name and plan_name, however, can be NULL */ + if (key.partition_name != NULL) + sp_len += fasthash_accum_cstring(&hs, key.partition_name); + if (key.plan_name != NULL) + sp_len += fasthash_accum_cstring(&hs, key.plan_name); + + /* + * hashfn_unstable.h recommends using string length as tweak. It's not + * clear to me what to do if there are multiple strings, so for now I'm + * just using the total of all of the lengths. + */ + return fasthash_final32(&hs, sp_len); +} + +/* + * Look for matching entries. + */ +static Bitmapset * +pgpa_trove_slice_lookup(pgpa_trove_slice *tslice, pgpa_identifier *rid) +{ + pgpa_trove_entry_key key; + pgpa_trove_entry_element *element; + Bitmapset *result = NULL; + + Assert(rid->occurrence >= 1); + + key.alias_name = rid->alias_name; + key.partition_name = rid->partrel; + key.plan_name = rid->plan_name; + + element = pgpa_trove_entry_lookup(tslice->hash, key); + + if (element != NULL) + { + int i = -1; + + while ((i = bms_next_member(element->indexes, i)) >= 0) + { + pgpa_trove_entry *entry = &tslice->entries[i]; + + /* + * We know that this target or one of its descendants matches the + * identifier on the three key fields above, but we don't know + * which descendant or whether the occurrence and schema also + * match. + */ + if (pgpa_identifier_matches_target(rid, entry->target)) + result = bms_add_member(result, i); + } + } + + return result; +} diff --git a/contrib/pg_plan_advice/pgpa_trove.h b/contrib/pg_plan_advice/pgpa_trove.h new file mode 100644 index 00000000000..22fe3a620f7 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_trove.h @@ -0,0 +1,114 @@ +/*------------------------------------------------------------------------- + * + * pgpa_trove.h + * All of the advice given for a particular query, appropriately + * organized for convenient access. + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_trove.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_TROVE_H +#define PGPA_TROVE_H + +#include "pgpa_ast.h" + +#include "nodes/bitmapset.h" + +typedef struct pgpa_trove pgpa_trove; + +/* + * Flags that can be set on a pgpa_trove_entry to indicate what happened when + * trying to plan using advice. + * + * PGPA_TE_MATCH_PARTIAL means that we found some part of the query that at + * least partially matched the target; e.g. given JOIN_ORDER(a b), this would + * be set if we ever saw any joinrel including either "a" or "b". + * + * PGPA_TE_MATCH_FULL means that we found an exact match for the target; e.g. + * given JOIN_ORDER(a b), this would be set if we saw a joinrel containing + * exactly "a" and "b" and nothing else. + * + * PGPA_TE_INAPPLICABLE means that the advice doesn't properly apply to the + * target; e.g. INDEX_SCAN(foo bar_idx) would be so marked if bar_idx does not + * exist on foo. The fact that this bit has been set does not mean that the + * advice had no effect. + * + * PGPA_TE_CONFLICTING means that a conflict was detected between what this + * advice wants and what some other plan advice wants; e.g. JOIN_ORDER(a b) + * would conflict with HASH_JOIN(a), because the former requires "a" to be the + * outer table while the latter requires it to be the inner table. + * + * PGPA_TE_FAILED means that the resulting plan did not conform to the advice. + */ +#define PGPA_TE_MATCH_PARTIAL 0x0001 +#define PGPA_TE_MATCH_FULL 0x0002 +#define PGPA_TE_INAPPLICABLE 0x0004 +#define PGPA_TE_CONFLICTING 0x0008 +#define PGPA_TE_FAILED 0x0010 + +/* + * Each entry in a trove of advice represents the application of a tag to + * a single target. + */ +typedef struct pgpa_trove_entry +{ + pgpa_advice_tag_type tag; + pgpa_advice_target *target; + int flags; +} pgpa_trove_entry; + +/* + * What kind of information does the caller want to find in a trove? + * + * PGPA_TROVE_LOOKUP_SCAN means we're looking for scan advice. + * + * PGPA_TROVE_LOOKUP_JOIN means we're looking for join-related advice. + * This includes join order advice, join method advice, and semijoin-uniqueness + * advice. + * + * PGPA_TROVE_LOOKUP_REL means we're looking for general advice about this + * a RelOptInfo that may correspond to either a scan or a join. This includes + * gather-related advice and partitionwise advice. Note that partitionwise + * advice might seem like join advice, but that's not a helpful way of viewing + * the matter because (1) partitionwise advice is also relevant at the scan + * level and (2) other types of join advice affect only what to do from + * join_path_setup_hook, but partitionwise advice affects what to do in + * joinrel_setup_hook. + */ +typedef enum pgpa_trove_lookup_type +{ + PGPA_TROVE_LOOKUP_JOIN, + PGPA_TROVE_LOOKUP_REL, + PGPA_TROVE_LOOKUP_SCAN +} pgpa_trove_lookup_type; + +/* + * This struct is used to store the result of a trove lookup. For each member + * of "indexes", the entry at the corresponding offset within "entries" is one + * of the results. + */ +typedef struct pgpa_trove_result +{ + pgpa_trove_entry *entries; + Bitmapset *indexes; +} pgpa_trove_result; + +extern pgpa_trove *pgpa_build_trove(List *advice_items); +extern void pgpa_trove_lookup(pgpa_trove *trove, + pgpa_trove_lookup_type type, + int nrids, + pgpa_identifier *rids, + pgpa_trove_result *result); +extern void pgpa_trove_lookup_all(pgpa_trove *trove, + pgpa_trove_lookup_type type, + pgpa_trove_entry **entries, + int *nentries); +extern char *pgpa_cstring_trove_entry(pgpa_trove_entry *entry); +extern void pgpa_trove_set_flags(pgpa_trove_entry *entries, + Bitmapset *indexes, int flags); +extern void pgpa_trove_append_flags(StringInfo buf, int flags); + +#endif diff --git a/contrib/pg_plan_advice/pgpa_walker.c b/contrib/pg_plan_advice/pgpa_walker.c new file mode 100644 index 00000000000..7b86cc5e6f9 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_walker.c @@ -0,0 +1,1029 @@ +/*------------------------------------------------------------------------- + * + * pgpa_walker.c + * Main entrypoints for analyzing a plan to generate an advice string + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_walker.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "pgpa_join.h" +#include "pgpa_scan.h" +#include "pgpa_walker.h" + +#include "nodes/plannodes.h" +#include "parser/parsetree.h" +#include "utils/lsyscache.h" + +static void pgpa_walk_recursively(pgpa_plan_walker_context *walker, Plan *plan, + bool within_join_problem, + pgpa_join_unroller *join_unroller, + List *active_query_features, + bool beneath_any_gather); +static Bitmapset *pgpa_process_unrolled_join(pgpa_plan_walker_context *walker, + pgpa_unrolled_join *ujoin); + +static pgpa_query_feature *pgpa_add_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, + Plan *plan); + +static void pgpa_qf_add_rti(List *active_query_features, Index rti); +static void pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids); +static void pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan, + List *rtable); + +static bool pgpa_walker_join_order_matches(pgpa_unrolled_join *ujoin, + Index rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_advice_target *target, + bool toplevel); +static bool pgpa_walker_join_order_matches_member(pgpa_join_member *member, + Index rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_advice_target *target); +static pgpa_scan *pgpa_walker_find_scan(pgpa_plan_walker_context *walker, + pgpa_scan_strategy strategy, + Bitmapset *relids); +static bool pgpa_walker_index_target_matches_plan(pgpa_index_target *itarget, + Plan *plan); +static bool pgpa_walker_contains_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, + Bitmapset *relids); +static bool pgpa_walker_contains_join(pgpa_plan_walker_context *walker, + pgpa_join_strategy strategy, + Bitmapset *relids); +static bool pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker, + Bitmapset *relids); + +/* + * Top-level entrypoint for the plan tree walk. + * + * Populates walker based on a traversal of the Plan trees in pstmt. + * + * sj_unique_rels is a list of pgpa_sj_unique_rel objects, one for each + * relation we considered making unique as part of semijoin planning. + */ +void +pgpa_plan_walker(pgpa_plan_walker_context *walker, PlannedStmt *pstmt, + List *sj_unique_rels) +{ + ListCell *lc; + List *sj_unique_rtis = NULL; + List *sj_nonunique_qfs = NULL; + + /* Initialization. */ + memset(walker, 0, sizeof(pgpa_plan_walker_context)); + walker->pstmt = pstmt; + + /* Walk the main plan tree. */ + pgpa_walk_recursively(walker, pstmt->planTree, false, NULL, NIL, false); + + /* Main plan tree walk won't reach subplans, so walk those. */ + foreach(lc, pstmt->subplans) + { + Plan *plan = lfirst(lc); + + if (plan != NULL) + pgpa_walk_recursively(walker, plan, false, NULL, NIL, false); + } + + /* Adjust RTIs from sj_unique_rels for the flattened range table. */ + foreach_ptr(pgpa_sj_unique_rel, ur, sj_unique_rels) + { + int rtindex = -1; + int rtoffset = 0; + bool dummy = false; + Bitmapset *relids = NULL; + + /* If this is a subplan, find the range table offset. */ + if (ur->plan_name != NULL) + { + foreach_node(SubPlanRTInfo, rtinfo, pstmt->subrtinfos) + { + if (strcmp(ur->plan_name, rtinfo->plan_name) == 0) + { + rtoffset = rtinfo->rtoffset; + dummy = rtinfo->dummy; + break; + } + } + + if (rtoffset == 0) + elog(ERROR, "no rtoffset for plan %s", ur->plan_name); + } + + /* If this entry pertains to a dummy subquery, ignore it. */ + if (dummy) + continue; + + /* Offset each entry from the original set. */ + while ((rtindex = bms_next_member(ur->relids, rtindex)) >= 0) + relids = bms_add_member(relids, rtindex + rtoffset); + + /* Store the resulting set. */ + sj_unique_rtis = lappend(sj_unique_rtis, relids); + } + + /* + * Remove any non-unique semijoin query features for which making the rel + * unique wasn't considered. + */ + foreach_ptr(pgpa_query_feature, qf, + walker->query_features[PGPAQF_SEMIJOIN_NON_UNIQUE]) + { + if (list_member(sj_unique_rtis, qf->relids)) + sj_nonunique_qfs = lappend(sj_nonunique_qfs, qf); + } + walker->query_features[PGPAQF_SEMIJOIN_NON_UNIQUE] = sj_nonunique_qfs; + + /* + * If we find any cases where analysis of the Plan tree shows that the + * semijoin was made unique but this possibility was never observed to be + * considered during planning, then we have a bug somewhere. + */ + foreach_ptr(pgpa_query_feature, qf, + walker->query_features[PGPAQF_SEMIJOIN_UNIQUE]) + { + if (!list_member(sj_unique_rtis, qf->relids)) + { + StringInfoData buf; + + initStringInfo(&buf); + outBitmapset(&buf, qf->relids); + elog(ERROR, + "unique semijoin found for relids %s but not observed during planning", + buf.data); + } + } + + /* + * It's possible for a Gather or Gather Merge query feature to find no + * RTIs when partitionwise aggregation is in use. We shouldn't emit + * something like GATHER_MERGE(()), so instead emit nothing. This means + * that we won't advise either GATHER or GATHER_MERGE or NO_GATHER in such + * cases, which might be something we want to improve in the future. + * + * (Should the Partial Aggregates in such a case be created in an + * UPPERREL_GROUP_AGG with a non-empty relid set? Right now that doesn't + * happen, but it seems like it would make life easier for us if it did.) + */ + for (int t = 0; t < NUM_PGPA_QF_TYPES; ++t) + { + List *query_features = NIL; + + foreach_ptr(pgpa_query_feature, qf, walker->query_features[t]) + { + if (qf->relids != NULL) + query_features = lappend(query_features, qf); + else + Assert(t == PGPAQF_GATHER || t == PGPAQF_GATHER_MERGE); + } + + walker->query_features[t] = query_features; + } +} + +/* + * Main workhorse for the plan tree walk. + * + * If within_join_problem is true, we encountered a join at some higher level + * of the tree walk and haven't yet descended out of the portion of the plan + * tree that is part of that same join problem. We're no longer in the same + * join problem if (1) we cross into a different subquery or (2) we descend + * through an Append or MergeAppend node, below which any further joins would + * be partitionwise joins planned separately from the outer join problem. + * + * If join_unroller != NULL, the join unroller code expects us to find a join + * that should be unrolled into that object. This implies that we're within a + * join problem, but the reverse is not true: when we've traversed all the + * joins but are still looking for the scan that is the leaf of the join tree, + * join_unroller will be NULL but within_join_problem will be true. + * + * Each element of active_query_features corresponds to some item of advice + * that needs to enumerate all the relations it affects. We add RTIs we find + * during tree traversal to each of these query features. + * + * If beneath_any_gather == true, some higher level of the tree traversal found + * a Gather or Gather Merge node. + */ +static void +pgpa_walk_recursively(pgpa_plan_walker_context *walker, Plan *plan, + bool within_join_problem, + pgpa_join_unroller *join_unroller, + List *active_query_features, + bool beneath_any_gather) +{ + pgpa_join_unroller *outer_join_unroller = NULL; + pgpa_join_unroller *inner_join_unroller = NULL; + bool join_unroller_toplevel = false; + ListCell *lc; + List *extraplans = NIL; + List *elided_nodes = NIL; + + Assert(within_join_problem || join_unroller == NULL); + + /* + * Check the future_query_features list to see whether this was previously + * identified as a plan node that needs to be treated as a query feature. + * We must do this before handling elided nodes, because if there's an + * elided node associated with a future query feature, the RTIs associated + * with the elided node should be the only ones attributed to the query + * feature. + */ + foreach_ptr(pgpa_query_feature, qf, walker->future_query_features) + { + if (qf->plan == plan) + { + active_query_features = list_copy(active_query_features); + active_query_features = lappend(active_query_features, qf); + walker->future_query_features = + list_delete_ptr(walker->future_query_features, qf); + break; + } + } + + /* + * Find all elided nodes for this Plan node. + */ + foreach_node(ElidedNode, n, walker->pstmt->elidedNodes) + { + if (n->plan_node_id == plan->plan_node_id) + elided_nodes = lappend(elided_nodes, n); + } + + /* If we found any elided_nodes, handle them. */ + if (elided_nodes != NIL) + { + int num_elided_nodes = list_length(elided_nodes); + ElidedNode *last_elided_node; + + /* + * RTIs for the final -- and thus logically uppermost -- elided node + * should be collected for query features passed down by the caller. + * However, elided nodes act as barriers to query features, which + * means that (1) the remaining elided nodes, if any, should be + * ignored for purposes of query features and (2) the list of active + * query features should be reset to empty so that we do not add RTIs + * from the plan node that is logically beneath the elided node to the + * query features passed down from the caller. + */ + last_elided_node = list_nth(elided_nodes, num_elided_nodes - 1); + pgpa_qf_add_rtis(active_query_features, + pgpa_filter_out_join_relids(last_elided_node->relids, + walker->pstmt->rtable)); + active_query_features = NIL; + + /* + * If we're within a join problem, the join_unroller is responsible + * for building the scan for the final elided node, so throw it out. + */ + if (within_join_problem) + elided_nodes = list_truncate(elided_nodes, num_elided_nodes - 1); + + /* Build scans for all (or the remaining) elided nodes. */ + foreach_node(ElidedNode, elided_node, elided_nodes) + { + (void) pgpa_build_scan(walker, plan, elided_node, + beneath_any_gather, within_join_problem); + } + + /* + * If there were any elided nodes, then everything beneath those nodes + * is not part of the same join problem. + * + * In more detail, if an Append or MergeAppend was elided, then a + * partitionwise join was chosen and only a single child survived; if + * a SubqueryScan was elided, the subquery was planned without + * flattening it into the parent. + */ + within_join_problem = false; + join_unroller = NULL; + } + + /* + * If this is a Gather or Gather Merge node, directly add it to the list + * of currently-active query features. We must do this after handling + * elided nodes, since the Gather or Gather Merge node occurs logically + * beneath any associated elided nodes. + * + * Exception: We disregard any single_copy Gather nodes. These are created + * by debug_parallel_query, and having them affect the plan advice is + * counterproductive, as the result will be to advise the use of a real + * Gather node, rather than a single copy one. + */ + if (IsA(plan, Gather) && !((Gather *) plan)->single_copy) + { + active_query_features = + lappend(list_copy(active_query_features), + pgpa_add_feature(walker, PGPAQF_GATHER, plan)); + beneath_any_gather = true; + } + else if (IsA(plan, GatherMerge)) + { + active_query_features = + lappend(list_copy(active_query_features), + pgpa_add_feature(walker, PGPAQF_GATHER_MERGE, plan)); + beneath_any_gather = true; + } + + /* + * If we're within a join problem, the join unroller is responsible for + * building any required scan for this node. If not, we do it here. + */ + if (!within_join_problem) + (void) pgpa_build_scan(walker, plan, NULL, beneath_any_gather, false); + + /* + * If this join needs to be unrolled but there's no join unroller already + * available, create one. + */ + if (join_unroller == NULL && pgpa_is_join(plan)) + { + join_unroller = pgpa_create_join_unroller(); + join_unroller_toplevel = true; + within_join_problem = true; + } + + /* + * If this join is to be unrolled, pgpa_unroll_join() will return the join + * unroller object that should be passed down when we recurse into the + * outer and inner sides of the plan. + */ + if (join_unroller != NULL) + pgpa_unroll_join(walker, plan, beneath_any_gather, join_unroller, + &outer_join_unroller, &inner_join_unroller); + + /* Add RTIs from the plan node to all active query features. */ + pgpa_qf_add_plan_rtis(active_query_features, plan, walker->pstmt->rtable); + + /* + * Recurse into the outer and inner subtrees. + * + * As an exception, if this is a ForeignScan, don't recurse. postgres_fdw + * sometimes stores an EPQ recheck plan in plan->lefttree, but that's + * going to mention the same set of relations as the ForeignScan itself, + * and we have no way to emit advice targeting the EPQ case vs. the + * non-EPQ case. Moreover, it's not entirely clear what other FDWs might + * do with the left and right subtrees. Maybe some better handling is + * needed here, but for now, we just punt. + */ + if (!IsA(plan, ForeignScan)) + { + if (plan->lefttree != NULL) + pgpa_walk_recursively(walker, plan->lefttree, within_join_problem, + outer_join_unroller, active_query_features, + beneath_any_gather); + if (plan->righttree != NULL) + pgpa_walk_recursively(walker, plan->righttree, within_join_problem, + inner_join_unroller, active_query_features, + beneath_any_gather); + } + + /* + * If we created a join unroller up above, then it's also our join to use + * it to build the final pgpa_unrolled_join, and to destroy the object. + */ + if (join_unroller_toplevel) + { + pgpa_unrolled_join *ujoin; + + ujoin = pgpa_build_unrolled_join(walker, join_unroller); + walker->toplevel_unrolled_joins = + lappend(walker->toplevel_unrolled_joins, ujoin); + pgpa_destroy_join_unroller(join_unroller); + (void) pgpa_process_unrolled_join(walker, ujoin); + } + + /* + * Some plan types can have additional children. Nodes like Append that + * can have any number of children store them in a List; a SubqueryScan + * just has a field for a single additional Plan. + */ + switch (nodeTag(plan)) + { + case T_Append: + { + Append *aplan = (Append *) plan; + + extraplans = aplan->appendplans; + } + break; + case T_MergeAppend: + { + MergeAppend *maplan = (MergeAppend *) plan; + + extraplans = maplan->mergeplans; + } + break; + case T_BitmapAnd: + extraplans = ((BitmapAnd *) plan)->bitmapplans; + break; + case T_BitmapOr: + extraplans = ((BitmapOr *) plan)->bitmapplans; + break; + case T_SubqueryScan: + + /* + * We don't pass down active_query_features across here, because + * those are specific to a subquery level. + */ + pgpa_walk_recursively(walker, ((SubqueryScan *) plan)->subplan, + 0, NULL, NIL, beneath_any_gather); + break; + case T_CustomScan: + extraplans = ((CustomScan *) plan)->custom_plans; + break; + default: + break; + } + + /* If we found a list of extra children, iterate over it. */ + foreach(lc, extraplans) + { + Plan *subplan = lfirst(lc); + + pgpa_walk_recursively(walker, subplan, false, NULL, NIL, + beneath_any_gather); + } +} + +/* + * Perform final processing of a newly-constructed pgpa_unrolled_join. This + * only needs to be called for toplevel pgpa_unrolled_join objects, since it + * recurses to sub-joins as needed. + * + * Our goal is to add the set of inner relids to the relevant join_strategies + * list, and to do the same for any sub-joins. To that end, the return value + * is the set of relids found beneath the join, but it is expected that + * the toplevel caller will ignore this. + */ +static Bitmapset * +pgpa_process_unrolled_join(pgpa_plan_walker_context *walker, + pgpa_unrolled_join *ujoin) +{ + Bitmapset *all_relids = bms_copy(ujoin->outer.scan->relids); + + /* If this fails, we didn't unroll properly. */ + Assert(ujoin->outer.unrolled_join == NULL); + + for (int k = 0; k < ujoin->ninner; ++k) + { + pgpa_join_member *member = &ujoin->inner[k]; + Bitmapset *relids; + + if (member->unrolled_join != NULL) + relids = pgpa_process_unrolled_join(walker, + member->unrolled_join); + else + { + Assert(member->scan != NULL); + relids = member->scan->relids; + } + walker->join_strategies[ujoin->strategy[k]] = + lappend(walker->join_strategies[ujoin->strategy[k]], relids); + all_relids = bms_add_members(all_relids, relids); + } + + return all_relids; +} + +/* + * Arrange for the given plan node to be treated as a query feature when the + * tree walk reaches it. + * + * Make sure to only use this for nodes that the tree walk can't have reached + * yet! + */ +void +pgpa_add_future_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, Plan *plan) +{ + pgpa_query_feature *qf = pgpa_add_feature(walker, type, plan); + + walker->future_query_features = + lappend(walker->future_query_features, qf); +} + +/* + * Return the last of any elided nodes associated with this plan node ID. + * + * The last elided node is the one that would have been uppermost in the plan + * tree had it not been removed during setrefs processing. + */ +ElidedNode * +pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan) +{ + ElidedNode *elided_node = NULL; + + foreach_node(ElidedNode, n, pstmt->elidedNodes) + { + if (n->plan_node_id == plan->plan_node_id) + elided_node = n; + } + + return elided_node; +} + +/* + * Certain plan nodes can refer to a set of RTIs. Extract and return the set. + */ +Bitmapset * +pgpa_relids(Plan *plan) +{ + if (IsA(plan, Result)) + return ((Result *) plan)->relids; + else if (IsA(plan, ForeignScan)) + return ((ForeignScan *) plan)->fs_relids; + else if (IsA(plan, Append)) + return ((Append *) plan)->apprelids; + else if (IsA(plan, MergeAppend)) + return ((MergeAppend *) plan)->apprelids; + + return NULL; +} + +/* + * Extract the scanned RTI from a plan node. + * + * Returns 0 if there isn't one. + */ +Index +pgpa_scanrelid(Plan *plan) +{ + switch (nodeTag(plan)) + { + case T_SeqScan: + case T_SampleScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_TidRangeScan: + case T_SubqueryScan: + case T_FunctionScan: + case T_TableFuncScan: + case T_ValuesScan: + case T_CteScan: + case T_NamedTuplestoreScan: + case T_WorkTableScan: + case T_ForeignScan: + case T_CustomScan: + case T_IndexScan: + case T_IndexOnlyScan: + return ((Scan *) plan)->scanrelid; + default: + return 0; + } +} + +/* + * Construct a new Bitmapset containing non-RTE_JOIN members of 'relids'. + */ +Bitmapset * +pgpa_filter_out_join_relids(Bitmapset *relids, List *rtable) +{ + int rti = -1; + Bitmapset *result = NULL; + + while ((rti = bms_next_member(relids, rti)) >= 0) + { + RangeTblEntry *rte = rt_fetch(rti, rtable); + + if (rte->rtekind != RTE_JOIN) + result = bms_add_member(result, rti); + } + + return result; +} + +/* + * Create a pgpa_query_feature and add it to the list of all query features + * for this plan. + */ +static pgpa_query_feature * +pgpa_add_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, Plan *plan) +{ + pgpa_query_feature *qf = palloc0_object(pgpa_query_feature); + + qf->type = type; + qf->plan = plan; + + walker->query_features[qf->type] = + lappend(walker->query_features[qf->type], qf); + + return qf; +} + +/* + * Add a single RTI to each active query feature. + */ +static void +pgpa_qf_add_rti(List *active_query_features, Index rti) +{ + foreach_ptr(pgpa_query_feature, qf, active_query_features) + { + qf->relids = bms_add_member(qf->relids, rti); + } +} + +/* + * Add a set of RTIs to each active query feature. + */ +static void +pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids) +{ + foreach_ptr(pgpa_query_feature, qf, active_query_features) + { + qf->relids = bms_add_members(qf->relids, relids); + } +} + +/* + * Add RTIs directly contained in a plan node to each active query feature, + * but filter out any join RTIs, since advice doesn't mention those. + */ +static void +pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan, List *rtable) +{ + Bitmapset *relids; + Index rti; + + if ((relids = pgpa_relids(plan)) != NULL) + { + relids = pgpa_filter_out_join_relids(relids, rtable); + pgpa_qf_add_rtis(active_query_features, relids); + } + else if ((rti = pgpa_scanrelid(plan)) != 0) + pgpa_qf_add_rti(active_query_features, rti); +} + +/* + * If we generated plan advice using the provided walker object and array + * of identifiers, would we generate the specified tag/target combination? + * + * If yes, the plan conforms to the advice; if no, it does not. Note that + * we have no way of knowing whether the planner was forced to emit a plan + * that conformed to the advice or just happened to do so. + */ +bool +pgpa_walker_would_advise(pgpa_plan_walker_context *walker, + pgpa_identifier *rt_identifiers, + pgpa_advice_tag_type tag, + pgpa_advice_target *target) +{ + Index rtable_length = list_length(walker->pstmt->rtable); + Bitmapset *relids = NULL; + + if (tag == PGPA_TAG_JOIN_ORDER) + { + foreach_ptr(pgpa_unrolled_join, ujoin, walker->toplevel_unrolled_joins) + { + if (pgpa_walker_join_order_matches(ujoin, rtable_length, + rt_identifiers, target, true)) + return true; + } + + return false; + } + + if (target->ttype == PGPA_TARGET_IDENTIFIER) + { + Index rti; + + rti = pgpa_compute_rti_from_identifier(rtable_length, rt_identifiers, + &target->rid); + if (rti == 0) + return false; + relids = bms_make_singleton(rti); + } + else + { + Assert(target->ttype == PGPA_TARGET_ORDERED_LIST); + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + Index rti; + + Assert(child_target->ttype == PGPA_TARGET_IDENTIFIER); + rti = pgpa_compute_rti_from_identifier(rtable_length, + rt_identifiers, + &child_target->rid); + if (rti == 0) + return false; + relids = bms_add_member(relids, rti); + } + } + + switch (tag) + { + case PGPA_TAG_JOIN_ORDER: + /* should have been handled above */ + pg_unreachable(); + break; + case PGPA_TAG_BITMAP_HEAP_SCAN: + return pgpa_walker_find_scan(walker, + PGPA_SCAN_BITMAP_HEAP, + relids) != NULL; + case PGPA_TAG_FOREIGN_JOIN: + return pgpa_walker_find_scan(walker, + PGPA_SCAN_FOREIGN, + relids) != NULL; + case PGPA_TAG_INDEX_ONLY_SCAN: + { + pgpa_scan *scan; + + scan = pgpa_walker_find_scan(walker, PGPA_SCAN_INDEX_ONLY, + relids); + if (scan == NULL) + return false; + + return pgpa_walker_index_target_matches_plan(target->itarget, scan->plan); + } + case PGPA_TAG_INDEX_SCAN: + { + pgpa_scan *scan; + + scan = pgpa_walker_find_scan(walker, PGPA_SCAN_INDEX, + relids); + if (scan == NULL) + return false; + + return pgpa_walker_index_target_matches_plan(target->itarget, scan->plan); + } + case PGPA_TAG_PARTITIONWISE: + return pgpa_walker_find_scan(walker, + PGPA_SCAN_PARTITIONWISE, + relids) != NULL; + case PGPA_TAG_SEQ_SCAN: + return pgpa_walker_find_scan(walker, + PGPA_SCAN_SEQ, + relids) != NULL; + case PGPA_TAG_TID_SCAN: + return pgpa_walker_find_scan(walker, + PGPA_SCAN_TID, + relids) != NULL; + case PGPA_TAG_GATHER: + return pgpa_walker_contains_feature(walker, + PGPAQF_GATHER, + relids); + case PGPA_TAG_GATHER_MERGE: + return pgpa_walker_contains_feature(walker, + PGPAQF_GATHER_MERGE, + relids); + case PGPA_TAG_SEMIJOIN_NON_UNIQUE: + return pgpa_walker_contains_feature(walker, + PGPAQF_SEMIJOIN_NON_UNIQUE, + relids); + case PGPA_TAG_SEMIJOIN_UNIQUE: + return pgpa_walker_contains_feature(walker, + PGPAQF_SEMIJOIN_UNIQUE, + relids); + case PGPA_TAG_HASH_JOIN: + return pgpa_walker_contains_join(walker, + JSTRAT_HASH_JOIN, + relids); + case PGPA_TAG_MERGE_JOIN_MATERIALIZE: + return pgpa_walker_contains_join(walker, + JSTRAT_MERGE_JOIN_MATERIALIZE, + relids); + case PGPA_TAG_MERGE_JOIN_PLAIN: + return pgpa_walker_contains_join(walker, + JSTRAT_MERGE_JOIN_PLAIN, + relids); + case PGPA_TAG_NESTED_LOOP_MATERIALIZE: + return pgpa_walker_contains_join(walker, + JSTRAT_NESTED_LOOP_MATERIALIZE, + relids); + case PGPA_TAG_NESTED_LOOP_MEMOIZE: + return pgpa_walker_contains_join(walker, + JSTRAT_NESTED_LOOP_MEMOIZE, + relids); + case PGPA_TAG_NESTED_LOOP_PLAIN: + return pgpa_walker_contains_join(walker, + JSTRAT_NESTED_LOOP_PLAIN, + relids); + case PGPA_TAG_NO_GATHER: + return pgpa_walker_contains_no_gather(walker, relids); + } + + /* should not get here */ + return false; +} + +/* + * Does the index target match the Plan? + * + * Should only be called when we know that itarget mandates an Index Scan or + * Index Only Scan and this corresponds to the type of Plan. Here, our job is + * just to check whether it's the same index. + */ +static bool +pgpa_walker_index_target_matches_plan(pgpa_index_target *itarget, Plan *plan) +{ + Oid indexoid = InvalidOid; + + /* Retrieve the index OID from the plan. */ + if (IsA(plan, IndexScan)) + indexoid = ((IndexScan *) plan)->indexid; + else if (IsA(plan, IndexOnlyScan)) + indexoid = ((IndexOnlyScan *) plan)->indexid; + else + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan)); + + /* Check whether schema name matches, if specified in index target. */ + if (itarget->indnamespace != NULL) + { + Oid nspoid = get_rel_namespace(indexoid); + char *relnamespace = get_namespace_name_or_temp(nspoid); + + if (strcmp(itarget->indnamespace, relnamespace) != 0) + return false; + } + + /* Check whether relation name matches. */ + return (strcmp(itarget->indname, get_rel_name(indexoid)) == 0); +} + +/* + * Does an unrolled join match the join order specified by an advice target? + */ +static bool +pgpa_walker_join_order_matches(pgpa_unrolled_join *ujoin, + Index rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_advice_target *target, + bool toplevel) +{ + int nchildren = list_length(target->children); + + Assert(target->ttype == PGPA_TARGET_ORDERED_LIST); + + /* At toplevel, we allow a prefix match. */ + if (toplevel) + { + if (nchildren > ujoin->ninner + 1) + return false; + } + else + { + if (nchildren != ujoin->ninner + 1) + return false; + } + + /* Outermost rel must match. */ + if (!pgpa_walker_join_order_matches_member(&ujoin->outer, + rtable_length, + rt_identifiers, + linitial(target->children))) + return false; + + /* Each inner rel must match. */ + for (int n = 0; n < nchildren - 1; ++n) + { + pgpa_advice_target *child_target = list_nth(target->children, n + 1); + + if (!pgpa_walker_join_order_matches_member(&ujoin->inner[n], + rtable_length, + rt_identifiers, + child_target)) + return false; + } + + return true; +} + +/* + * Does one member of an unrolled join match an advice target? + */ +static bool +pgpa_walker_join_order_matches_member(pgpa_join_member *member, + Index rtable_length, + pgpa_identifier *rt_identifiers, + pgpa_advice_target *target) +{ + Bitmapset *relids = NULL; + + if (member->unrolled_join != NULL) + { + if (target->ttype != PGPA_TARGET_ORDERED_LIST) + return false; + return pgpa_walker_join_order_matches(member->unrolled_join, + rtable_length, + rt_identifiers, + target, + false); + } + + Assert(member->scan != NULL); + switch (target->ttype) + { + case PGPA_TARGET_ORDERED_LIST: + /* Could only match an unrolled join */ + return false; + + case PGPA_TARGET_UNORDERED_LIST: + { + foreach_ptr(pgpa_advice_target, child_target, target->children) + { + Index rti; + + rti = pgpa_compute_rti_from_identifier(rtable_length, + rt_identifiers, + &child_target->rid); + if (rti == 0) + return false; + relids = bms_add_member(relids, rti); + } + break; + } + + case PGPA_TARGET_IDENTIFIER: + { + Index rti; + + rti = pgpa_compute_rti_from_identifier(rtable_length, + rt_identifiers, + &target->rid); + if (rti == 0) + return false; + relids = bms_make_singleton(rti); + break; + } + } + + return bms_equal(member->scan->relids, relids); +} + +/* + * Find the scan where the walker says that the given scan strategy should be + * used for the given relid set, if one exists. + * + * Returns the pgpa_scan object, or NULL if none was found. + */ +static pgpa_scan * +pgpa_walker_find_scan(pgpa_plan_walker_context *walker, + pgpa_scan_strategy strategy, + Bitmapset *relids) +{ + List *scans = walker->scans[strategy]; + + foreach_ptr(pgpa_scan, scan, scans) + { + if (bms_equal(scan->relids, relids)) + return scan; + } + + return NULL; +} + +/* + * Does this walker say that the given query feature applies to the given + * relid set? + */ +static bool +pgpa_walker_contains_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, + Bitmapset *relids) +{ + List *query_features = walker->query_features[type]; + + foreach_ptr(pgpa_query_feature, qf, query_features) + { + if (bms_equal(qf->relids, relids)) + return true; + } + + return false; +} + +/* + * Does the walker say that the given join strategy should be used for the + * given relid set? + */ +static bool +pgpa_walker_contains_join(pgpa_plan_walker_context *walker, + pgpa_join_strategy strategy, + Bitmapset *relids) +{ + List *join_strategies = walker->join_strategies[strategy]; + + foreach_ptr(Bitmapset, jsrelids, join_strategies) + { + if (bms_equal(jsrelids, relids)) + return true; + } + + return false; +} + +/* + * Does the walker say that the given relids should be marked as NO_GATHER? + */ +static bool +pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker, + Bitmapset *relids) +{ + return bms_is_subset(relids, walker->no_gather_scans); +} diff --git a/contrib/pg_plan_advice/pgpa_walker.h b/contrib/pg_plan_advice/pgpa_walker.h new file mode 100644 index 00000000000..4890d554dd3 --- /dev/null +++ b/contrib/pg_plan_advice/pgpa_walker.h @@ -0,0 +1,141 @@ +/*------------------------------------------------------------------------- + * + * pgpa_walker.h + * Main entrypoints for analyzing a plan to generate an advice string + * + * Copyright (c) 2016-2026, PostgreSQL Global Development Group + * + * contrib/pg_plan_advice/pgpa_walker.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGPA_WALKER_H +#define PGPA_WALKER_H + +#include "pgpa_ast.h" +#include "pgpa_join.h" +#include "pgpa_scan.h" + +/* + * When generating advice, we should emit either SEMIJOIN_UNIQUE advice or + * SEMIJOIN_NON_UNIQUE advice for each semijoin depending on whether we chose + * to implement it as a semijoin or whether we instead chose to make the + * nullable side unique and then perform an inner join. When the make-unique + * strategy is not chosen, it's not easy to tell from the final plan tree + * whether it was considered. That's awkward, because we don't want to emit + * useless SEMIJOIN_NON_UNIQUE advice when there was no decision to be made. + * + * To avoid that, during planning, we create a pgpa_sj_unique_rel for each + * relation that we considered making unique for purposes of semijoin planning. + */ +typedef struct pgpa_sj_unique_rel +{ + char *plan_name; + Bitmapset *relids; +} pgpa_sj_unique_rel; + +/* + * We use the term "query feature" to refer to plan nodes that are interesting + * in the following way: to generate advice, we'll need to know the set of + * same-subquery, non-join RTIs occurring at or below that plan node, without + * admixture of parent and child RTIs. + * + * For example, Gather nodes, designated by PGPAQF_GATHER, and Gather Merge + * nodes, designated by PGPAQF_GATHER_MERGE, are query features, because we'll + * want to admit some kind of advice that describes the portion of the plan + * tree that appears beneath those nodes. + * + * Each semijoin can be implemented either by directly performing a semijoin, + * or by making one side unique and then performing a normal join. Either way, + * we use a query feature to notice what decision was made, so that we can + * describe it by enumerating the RTIs on that side of the join. + * + * To elaborate on the "no admixture of parent and child RTIs" rule, in all of + * these cases, if the entirety of an inheritance hierarchy appears beneath + * the query feature, we only want to name the parent table. But it's also + * possible to have cases where we must name child tables. This is particularly + * likely to happen when partitionwise join is in use, but could happen for + * Gather or Gather Merge even without that, if one of those appears below + * an Append or MergeAppend node for a single table. + */ +typedef enum pgpa_qf_type +{ + PGPAQF_GATHER, + PGPAQF_GATHER_MERGE, + PGPAQF_SEMIJOIN_NON_UNIQUE, + PGPAQF_SEMIJOIN_UNIQUE + /* update NUM_PGPA_QF_TYPES if you add anything here */ +} pgpa_qf_type; + +#define NUM_PGPA_QF_TYPES ((int) PGPAQF_SEMIJOIN_UNIQUE + 1) + +/* + * For each query feature, we keep track of the feature type and the set of + * relids that we found underneath the relevant plan node. See the comments + * on pgpa_qf_type, above, for additional details. + */ +typedef struct pgpa_query_feature +{ + pgpa_qf_type type; + Plan *plan; + Bitmapset *relids; +} pgpa_query_feature; + +/* + * Context object for plan tree walk. + * + * pstmt is the PlannedStmt we're studying. + * + * scans is an array of lists of pgpa_scan objects. The array is indexed by + * the scan's pgpa_scan_strategy. + * + * no_gather_scans is the set of scan RTIs that do not appear beneath any + * Gather or Gather Merge node. + * + * toplevel_unrolled_joins is a list of all pgpa_unrolled_join objects that + * are not a child of some other pgpa_unrolled_join. + * + * join_strategy is an array of lists of Bitmapset objects. Each Bitmapset + * is the set of relids that appears on the inner side of some join (excluding + * RTIs from partition children and subqueries). The array is indexed by + * pgpa_join_strategy. + * + * query_features is an array lists of pgpa_query_feature objects, indexed + * by pgpa_qf_type. + * + * future_query_features is only used during the plan tree walk and should + * be empty when the tree walk concludes. It is a list of pgpa_query_feature + * objects for Plan nodes that the plan tree walk has not yet encountered; + * when encountered, they will be moved to the list of active query features + * that is propagated via the call stack. + */ +typedef struct pgpa_plan_walker_context +{ + PlannedStmt *pstmt; + List *scans[NUM_PGPA_SCAN_STRATEGY]; + Bitmapset *no_gather_scans; + List *toplevel_unrolled_joins; + List *join_strategies[NUM_PGPA_JOIN_STRATEGY]; + List *query_features[NUM_PGPA_QF_TYPES]; + List *future_query_features; +} pgpa_plan_walker_context; + +extern void pgpa_plan_walker(pgpa_plan_walker_context *walker, + PlannedStmt *pstmt, + List *sj_unique_rels); + +extern void pgpa_add_future_feature(pgpa_plan_walker_context *walker, + pgpa_qf_type type, + Plan *plan); + +extern ElidedNode *pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan); +extern Bitmapset *pgpa_relids(Plan *plan); +extern Index pgpa_scanrelid(Plan *plan); +extern Bitmapset *pgpa_filter_out_join_relids(Bitmapset *relids, List *rtable); + +extern bool pgpa_walker_would_advise(pgpa_plan_walker_context *walker, + pgpa_identifier *rt_identifiers, + pgpa_advice_tag_type tag, + pgpa_advice_target *target); + +#endif diff --git a/contrib/pg_plan_advice/sql/gather.sql b/contrib/pg_plan_advice/sql/gather.sql new file mode 100644 index 00000000000..776666bf196 --- /dev/null +++ b/contrib/pg_plan_advice/sql/gather.sql @@ -0,0 +1,86 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 1; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +SET min_parallel_table_scan_size = 0; +SET debug_parallel_query = off; + +CREATE TABLE gt_dim (id serial primary key, dim text) + WITH (autovacuum_enabled = false); +INSERT INTO gt_dim (dim) SELECT random()::text FROM generate_series(1,100) g; +VACUUM ANALYZE gt_dim; + +CREATE TABLE gt_fact ( + id int not null, + dim_id integer not null references gt_dim (id) +) WITH (autovacuum_enabled = false); +INSERT INTO gt_fact + SELECT g, (g%3)+1 FROM generate_series(1,100000) g; +VACUUM ANALYZE gt_fact; + +-- By default, we expect Gather Merge with a parallel hash join. +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; + +-- Force Gather or Gather Merge of both relations together. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +COMMIT; + +-- Force a separate Gather or Gather Merge operation for each relation. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather((d d/d.d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +COMMIT; + +-- Force a Gather or Gather Merge on one relation but no parallelism on other. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge(f) no_gather(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather_merge(d) no_gather(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather(f) no_gather(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +SET LOCAL pg_plan_advice.advice = 'gather(d) no_gather(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +COMMIT; + +-- Force no Gather or Gather Merge use at all. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'no_gather(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +COMMIT; + +-- Can't force Gather Merge without the ORDER BY clause, but just Gather is OK. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'gather((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id; +COMMIT; + +-- Test conflicting advice. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'gather((f d)) no_gather(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id; +COMMIT; diff --git a/contrib/pg_plan_advice/sql/join_order.sql b/contrib/pg_plan_advice/sql/join_order.sql new file mode 100644 index 00000000000..88d90de9cc6 --- /dev/null +++ b/contrib/pg_plan_advice/sql/join_order.sql @@ -0,0 +1,145 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; + +CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int) + WITH (autovacuum_enabled = false); +INSERT INTO jo_dim1 (id, dim1, val1) + SELECT g, 'some filler text ' || g, (g % 3) + 1 + FROM generate_series(1,100) g; +VACUUM ANALYZE jo_dim1; +CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int) + WITH (autovacuum_enabled = false); +INSERT INTO jo_dim2 (id, dim2, val2) + SELECT g, 'some filler text ' || g, (g % 53) + 1 + FROM generate_series(1,1000) g; +VACUUM ANALYZE jo_dim2; + +CREATE TABLE jo_fact ( + id int primary key, + dim1_id integer not null references jo_dim1 (id), + dim2_id integer not null references jo_dim2 (id) +) WITH (autovacuum_enabled = false); +INSERT INTO jo_fact + SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g; +VACUUM ANALYZE jo_fact; + +-- We expect to join to d2 first and then d1, since the condition on d2 +-- is more selective. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + +-- Force a few different join orders. Some of these are very inefficient, +-- but the planner considers them all viable. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +SET LOCAL pg_plan_advice.advice = 'join_order(f {d1 d2})'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +COMMIT; + +-- Force a join order by mentioning just a prefix of the join list. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +SET LOCAL pg_plan_advice.advice = 'join_order(d2 d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +COMMIT; + +-- jo_fact is not partitioned, but let's try pretending that it is and +-- verifying that the advice does not apply. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 d1 d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 (d1 d2))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; +COMMIT; + +-- The unusual formulation of this query is intended to prevent the query +-- planner from reducing the FULL JOIN to some other join type, so that we +-- can test what happens with a join type that cannot be reordered. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; + +-- We should not be able to force the planner to join f to d1 first, because +-- that is not a valid join order, but we should be able to force the planner +-- to make either d2 or f the driving table. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; +SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; +SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; +COMMIT; + +-- Two incompatible join orders should conflict. In the second case, +-- the conflict is implicit: if d1 is on the inner side of a join of any +-- type, it cannot also be the driving table. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'join_order(f) join_order(d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; +SET LOCAL pg_plan_advice.advice = 'join_order(d1) hash_join(d1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM jo_dim1 d1 + INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0) + ON d1.id = f.dim1_id OR f.dim1_id IS NULL; +COMMIT; diff --git a/contrib/pg_plan_advice/sql/join_strategy.sql b/contrib/pg_plan_advice/sql/join_strategy.sql new file mode 100644 index 00000000000..edd5c4c0e14 --- /dev/null +++ b/contrib/pg_plan_advice/sql/join_strategy.sql @@ -0,0 +1,84 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; + +CREATE TABLE join_dim (id serial primary key, dim text) + WITH (autovacuum_enabled = false); +INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g; +VACUUM ANALYZE join_dim; + +CREATE TABLE join_fact ( + id int primary key, + dim_id integer not null references join_dim (id) +) WITH (autovacuum_enabled = false); +INSERT INTO join_fact + SELECT g, (g%3)+1 FROM generate_series(1,100000) g; +CREATE INDEX join_fact_dim_id ON join_fact (dim_id); +VACUUM ANALYZE join_fact; + +-- We expect a hash join by default. +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + +-- Try forcing each join method in turn with join_dim as the inner table. +-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will +-- fail, because the planner knows that join_dim (id) is unique, and will +-- refuse to add mark/restore overhead. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +COMMIT; + +-- Now try forcing each join method in turn with join_fact as the inner +-- table. All of these should work. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +COMMIT; + +-- Non-working cases. We can't force a foreign join between these tables, +-- because they aren't foreign tables. We also can't use two different +-- strategies on the same table, nor can we put both tables on the inner +-- side of the same join. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f) NESTED_LOOP_MATERIALIZE(f)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f d)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; +COMMIT; diff --git a/contrib/pg_plan_advice/sql/partitionwise.sql b/contrib/pg_plan_advice/sql/partitionwise.sql new file mode 100644 index 00000000000..c51456dbbb5 --- /dev/null +++ b/contrib/pg_plan_advice/sql/partitionwise.sql @@ -0,0 +1,99 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +SET enable_partitionwise_join = true; + +CREATE TABLE pt1 (id integer primary key, dim1 text, val1 int) + PARTITION BY RANGE (id); +CREATE TABLE pt1a PARTITION OF pt1 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt1b PARTITION OF pt1 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt1c PARTITION OF pt1 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt1 (id, dim1, val1) + SELECT g, 'some filler text ' || g, (g % 3) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE pt1; + +CREATE TABLE pt2 (id integer primary key, dim2 text, val2 int) + PARTITION BY RANGE (id); +CREATE TABLE pt2a PARTITION OF pt2 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt2b PARTITION OF pt2 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt2c PARTITION OF pt2 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt2 (id, dim2, val2) + SELECT g, 'some other text ' || g, (g % 5) + 1 + FROM generate_series(1,3000,2) g; +VACUUM ANALYZE pt2; + +CREATE TABLE pt3 (id integer primary key, dim3 text, val3 int) + PARTITION BY RANGE (id); +CREATE TABLE pt3a PARTITION OF pt3 FOR VALUES FROM (1) to (1001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt3b PARTITION OF pt3 FOR VALUES FROM (1001) to (2001) + WITH (autovacuum_enabled = false); +CREATE TABLE pt3c PARTITION OF pt3 FOR VALUES FROM (2001) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO pt3 (id, dim3, val3) + SELECT g, 'a third random text ' || g, (g % 7) + 1 + FROM generate_series(1,3000,3) g; +VACUUM ANALYZE pt3; + +CREATE TABLE ptmismatch (id integer primary key, dimm text, valm int) + PARTITION BY RANGE (id); +CREATE TABLE ptmismatcha PARTITION OF ptmismatch + FOR VALUES FROM (1) to (1501) + WITH (autovacuum_enabled = false); +CREATE TABLE ptmismatchb PARTITION OF ptmismatch + FOR VALUES FROM (1501) to (3001) + WITH (autovacuum_enabled = false); +INSERT INTO ptmismatch (id, dimm, valm) + SELECT g, 'yet another text ' || g, (g % 2) + 1 + FROM generate_series(1,3000) g; +VACUUM ANALYZE ptmismatch; + +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; + +-- Suppress partitionwise join, or do it just partially. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE(pt1 pt2 pt3)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) pt3)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; +COMMIT; + +-- Test conflicting advice. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) (pt1 pt3))'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; +COMMIT; + +-- Can't force a partitionwise join with a mismatched table. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 ptmismatch))'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, ptmismatch WHERE pt1.id = ptmismatch.id; +COMMIT; + +-- Force join order for a particular branch of the partitionwise join with +-- and without mentioning the schema name. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'JOIN_ORDER(pt3/public.pt3a pt2/public.pt2a pt1/public.pt1a)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; +SET LOCAL pg_plan_advice.advice = 'JOIN_ORDER(pt3/pt3a pt2/pt2a pt1/pt1a)'; +EXPLAIN (PLAN_ADVICE, COSTS OFF) +SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id + AND val1 = 1 AND val2 = 1 AND val3 = 1; +COMMIT; diff --git a/contrib/pg_plan_advice/sql/prepared.sql b/contrib/pg_plan_advice/sql/prepared.sql new file mode 100644 index 00000000000..3ec30eedee5 --- /dev/null +++ b/contrib/pg_plan_advice/sql/prepared.sql @@ -0,0 +1,37 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; + +CREATE TABLE ptab (id integer, val text) WITH (autovacuum_enabled = false); + +SET pg_plan_advice.always_store_advice_details = false; + +-- Not prepared, so advice should be generated. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM ptab; + +-- Prepared, so advice should not be generated. +PREPARE pt1 AS SELECT * FROM ptab; +EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt1; + +SET pg_plan_advice.always_store_advice_details = true; + +-- Prepared, but always_store_advice_details = true, so should show advice. +PREPARE pt2 AS SELECT * FROM ptab; +EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt2; + +-- Not prepared, so feedback should be generated. +SET pg_plan_advice.always_store_advice_details = false; +SET pg_plan_advice.advice = 'SEQ_SCAN(ptab)'; +EXPLAIN (COSTS OFF) +SELECT * FROM ptab; + +-- Prepared, so advice should not be generated. +PREPARE pt3 AS SELECT * FROM ptab; +EXPLAIN (COSTS OFF) EXECUTE pt1; + +SET pg_plan_advice.always_store_advice_details = true; + +-- Prepared, but always_store_advice_details = true, so should show feedback. +PREPARE pt4 AS SELECT * FROM ptab; +EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt2; + diff --git a/contrib/pg_plan_advice/sql/scan.sql b/contrib/pg_plan_advice/sql/scan.sql new file mode 100644 index 00000000000..4fc494c7d8e --- /dev/null +++ b/contrib/pg_plan_advice/sql/scan.sql @@ -0,0 +1,195 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; +SET seq_page_cost = 0.1; +SET random_page_cost = 0.1; +SET cpu_tuple_cost = 0; +SET cpu_index_tuple_cost = 0; + +CREATE TABLE scan_table (a int primary key, b text) + WITH (autovacuum_enabled = false); +INSERT INTO scan_table + SELECT g, 'some text ' || g FROM generate_series(1, 100000) g; +CREATE INDEX scan_table_b ON scan_table USING brin (b); +VACUUM ANALYZE scan_table; + +-- Sequential scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; + +-- Index scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; + +-- Index-only scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; + +-- Bitmap heap scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; + +-- TID scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; + +-- TID range scan +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; + +-- Try forcing each of our test queries to use the scan type they +-- wanted to use anyway. This should succeed. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; +COMMIT; + +-- Try to force a full scan of the table to use some other scan type. All +-- of these will fail. An index scan or bitmap heap scan could potentially +-- generate the correct answer, but the planner does not even consider these +-- possibilities due to the lack of a WHERE clause. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table; +COMMIT; + +-- Try again to force index use. This should now succeed for the INDEX_SCAN +-- and BITMAP_HEAP_SCAN, but the INDEX_ONLY_SCAN can't be forced because the +-- query fetches columns not included in the index. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; +SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0; +COMMIT; + +-- We can force a primary key lookup to use a sequential scan, but we +-- can't force it to use an index-only scan (due to the column list) +-- or a TID scan (due to the absence of a TID qual). +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +COMMIT; + +-- We can forcibly downgrade an index-only scan to an index scan, but we can't +-- force the use of an index that the planner thinks is inapplicable. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +COMMIT; + +-- We can force the use of a sequential scan in place of a bitmap heap scan, +-- but a plain index scan on a BRIN index is not possible. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE b > 'some text 8'; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +COMMIT; + +-- We can force the use of a sequential scan rather than a TID scan or +-- TID range scan. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table + WHERE ctid > '(1,1)' AND ctid < '(2,1)'; +COMMIT; + +-- Test more complex scenarios with index scans. +BEGIN; +-- Should still work if we mention the schema. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +-- But not if we mention the wrong schema. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table cilbup.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +-- It's OK to repeat the same advice. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +-- But it doesn't work if the index target is even notionally different. +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table public.scan_table_pkey)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1; +COMMIT; + +-- Test assorted incorrect advice. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(nothing)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(nothing whatsoever)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table bogus)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(nothing whatsoever)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table bogus)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1; +COMMIT; + +-- Test our ability to refer to multiple instances of the same alias. +BEGIN; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s#2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s) SEQ_SCAN(s#2)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x + LEFT JOIN scan_table s ON g = s.a; +COMMIT; + +-- Test our ability to refer to scans within a subquery. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); +BEGIN; +-- Should not match. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); +-- Should match first query only. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@x)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); +-- Should match second query only. +SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@unnamed_subquery)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0); +COMMIT; diff --git a/contrib/pg_plan_advice/sql/semijoin.sql b/contrib/pg_plan_advice/sql/semijoin.sql new file mode 100644 index 00000000000..5a4ae52d1d9 --- /dev/null +++ b/contrib/pg_plan_advice/sql/semijoin.sql @@ -0,0 +1,118 @@ +LOAD 'pg_plan_advice'; +SET max_parallel_workers_per_gather = 0; + +CREATE TABLE sj_wide ( + id integer primary key, + val1 integer, + padding text storage plain +) WITH (autovacuum_enabled = false); +INSERT INTO sj_wide + SELECT g, g%10+1, repeat(' ', 300) FROM generate_series(1, 1000) g; +CREATE INDEX ON sj_wide (val1); +VACUUM ANALYZE sj_wide; + +CREATE TABLE sj_narrow ( + id integer primary key, + val1 integer +) WITH (autovacuum_enabled = false); +INSERT INTO sj_narrow + SELECT g, g%10+1 FROM generate_series(1, 1000) g; +CREATE INDEX ON sj_narrow (val1); +VACUUM ANALYZE sj_narrow; + +-- We expect this to make the VALUES list unique and use index lookups to +-- find the rows in sj_wide, so as to avoid a full scan of sj_wide. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_wide + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); + +-- If we ask for a unique semijoin, we should get the same plan as with +-- no advice. If we ask for a non-unique semijoin, we should see a Semi +-- Join operation in the plan tree. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique("*VALUES*")'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_wide + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); +SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique("*VALUES*")'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_wide + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); +COMMIT; + +-- Because this table is narrower than the previous one, a sequential scan +-- is less expensive, and we choose a straightforward Semi Join plan by +-- default. (Note that this is also very sensitive to the length of the IN +-- list, which affects how many index lookups the alternative plan will need.) +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_narrow + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); + +-- Here, we expect advising a unique semijoin to swith to the same plan that +-- we got with sj_wide, and advising a non-unique semijoin should not change +-- the plan. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique("*VALUES*")'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_narrow + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); +SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique("*VALUES*")'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM sj_narrow + WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); +COMMIT; + +-- In the above example, we made the outer side of the join unique, but here, +-- we should make the inner side unique. +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); + +-- We should be able to force a plan with or without the make-unique strategy, +-- with either side as the driving table. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); +SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(sj_narrow)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); +SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow) join_order(sj_narrow)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); +SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(sj_narrow) join_order(sj_narrow)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); +COMMIT; + +-- However, mentioning the wrong side of the join should result in an advice +-- failure. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique(g)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); +SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(g)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); +COMMIT; + +-- Test conflicting advice. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow) semijoin_non_unique(sj_narrow)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g + WHERE g in (select val1 from sj_narrow); +COMMIT; + +-- Try applying SEMIJOIN_UNIQUE() to a non-semijoin. +BEGIN; +SET LOCAL pg_plan_advice.advice = 'semijoin_unique(g)'; +EXPLAIN (COSTS OFF, PLAN_ADVICE) +SELECT * FROM generate_series(1,1000) g, sj_narrow s WHERE g = s.val1; +COMMIT; diff --git a/contrib/pg_plan_advice/sql/syntax.sql b/contrib/pg_plan_advice/sql/syntax.sql new file mode 100644 index 00000000000..56a5d54e2b5 --- /dev/null +++ b/contrib/pg_plan_advice/sql/syntax.sql @@ -0,0 +1,68 @@ +LOAD 'pg_plan_advice'; + +-- An empty string is allowed. Empty target lists are allowed for most advice +-- tags, but not for JOIN_ORDER. "Supplied Plan Advice" should be omitted in +-- text format when there is no actual advice, but not in non-text format. +SET pg_plan_advice.advice = ''; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = 'SEQ_SCAN()'; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = 'NESTED_LOOP_PLAIN()'; +EXPLAIN (COSTS OFF, FORMAT JSON) SELECT 1; +SET pg_plan_advice.advice = 'JOIN_ORDER()'; + +-- Test assorted variations in capitalization, whitespace, and which parts of +-- the relation identifier are included. These should all work. +SET pg_plan_advice.advice = 'SEQ_SCAN(x)'; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = 'seq_scan(x@y)'; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = 'SEQ_scan(x#2)'; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = 'SEQ_SCAN (x/y)'; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = ' SEQ_SCAN ( x / y . z ) '; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = 'SEQ_SCAN("x"#2/"y"."z"@"t")'; +EXPLAIN (COSTS OFF) SELECT 1; + +-- Syntax errors. +SET pg_plan_advice.advice = 'SEQUENTIAL_SCAN(x)'; +SET pg_plan_advice.advice = 'SEQ_SCAN'; +SET pg_plan_advice.advice = 'SEQ_SCAN('; +SET pg_plan_advice.advice = 'SEQ_SCAN("'; +SET pg_plan_advice.advice = 'SEQ_SCAN("")'; +SET pg_plan_advice.advice = 'SEQ_SCAN("a"'; +SET pg_plan_advice.advice = 'SEQ_SCAN(#'; +SET pg_plan_advice.advice = '()'; +SET pg_plan_advice.advice = '123'; + +-- Tags like SEQ_SCAN and NO_GATHER don't allow sublists at all; other tags, +-- except for JOIN_ORDER, allow at most one level of sublist. Hence, these +-- examples should error out. +SET pg_plan_advice.advice = 'SEQ_SCAN((x))'; +SET pg_plan_advice.advice = 'GATHER(((x)))'; + +-- Legal comments. +SET pg_plan_advice.advice = '/**/'; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = 'HASH_JOIN(_)/***/'; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(/*x*/y)'; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(y//*x*/z)'; +EXPLAIN (COSTS OFF) SELECT 1; + +-- Unterminated comments. +SET pg_plan_advice.advice = '/*'; +SET pg_plan_advice.advice = 'JOIN_ORDER("fOO") /* oops'; + +-- Nested comments are not supported, so the first of these is legal and +-- the second is not. +SET pg_plan_advice.advice = '/*/*/'; +EXPLAIN (COSTS OFF) SELECT 1; +SET pg_plan_advice.advice = '/*/* stuff */*/'; + +-- Foreign join requires multiple relation identifiers. +SET pg_plan_advice.advice = 'FOREIGN_JOIN(a)'; +SET pg_plan_advice.advice = 'FOREIGN_JOIN((a))'; diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index 24b706b29ad..bdd4865f53f 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -156,6 +156,7 @@ CREATE EXTENSION extension_name; &pgfreespacemap; &pglogicalinspect; &pgoverexplain; + &pgplanadvice; &pgprewarm; &pgrowlocks; &pgstatstatements; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index ac66fcbdb57..d90b4338d2a 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -149,6 +149,7 @@ + diff --git a/doc/src/sgml/pgplanadvice.sgml b/doc/src/sgml/pgplanadvice.sgml new file mode 100644 index 00000000000..8df8a978ecf --- /dev/null +++ b/doc/src/sgml/pgplanadvice.sgml @@ -0,0 +1,813 @@ + + + + pg_plan_advice — help the planner get the right plan + + + pg_plan_advice + + + + The pg_plan_advice module allows key planner decisions + to be described, reproduced, and altered using a special-purpose "plan + advice" mini-language. It is intended to allow stabilization of plan choices + that the user believes to be good, as well as experimentation with plans that + the planner believes to be non-optimal. + + + + Note that, since the planner often makes good decisions, overriding its + judgment can easily backfire. For example, if the distribution of the + underlying data changes, the planner normally has the option to adjust the + plan in an attempt to preserve good performance. If the plan advice prevents + this, a very poor plan may be chosen. It is important to use plan advice + only when the risks of constraining the planner's choices are outweighed by + the benefits. + + + + Getting Started + + + First, you must arrange to load the pg_plan_advice + module. You can do this on a system-wide basis by adding + pg_plan_advice to + and restarting the + server, or by adding it to + and starting a new session, + or by loading it into an individual session using the + LOAD command. + + + + Once the pg_plan_advice module is loaded, + EXPLAIN will support + a PLAN_ADVICE option. You can use this option to see + a plan advice string for the chosen plan. For example: + + + +EXPLAIN (COSTS OFF, PLAN_ADVICE) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------ + Hash Join + Hash Cond: (f.dim_id = d.id) + -> Seq Scan on join_fact f + -> Hash + -> Seq Scan on join_dim d + Generated Plan Advice: + JOIN_ORDER(f d) + HASH_JOIN(d) + SEQ_SCAN(f d) + NO_GATHER(f d) + + + + In this example, the user has not specified any advice; instead, the + planner has been permitted to make whatever decisions it thinks best, and + those decisions are memorialized in the form of an advice string. + JOIN_ORDER(f d) means that f should + be the driving table, and the first table to which it should be joined is + d. HASH_JOIN(d) means that + d should appear on the inner side of a hash join. + SEQ_SCAN(f d) means that both f + and d should be accessed via a sequential scan. + NO_GATHER(f d) means that neither f + nor d should appear beneath a Gather + or Gather Merge node. For more details on the plan + advice mini-language, see the information on + advice targets and + advice tags, below. + + + + Once you have an advice string for a query, you can use it to control how + that query is planned. You can do this by setting + pg_plan_advice.advice to the advice string you've + chosen. This can be an advice string that was generated by the system, + or one you've written yourself. One good way of creating your own advice + string is to take the string generated by the system and pick out just + those elements that you wish to enforce. In the example above, + pg_plan_advice emits advice for the join order, the + join method, the scan method, and the use of parallelism, but you might + only want to control the join order: + + + +SET pg_plan_advice.advice = 'JOIN_ORDER(f d)'; +EXPLAIN (COSTS OFF) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +------------------------------------ + Hash Join + Hash Cond: (f.dim_id = d.id) + -> Seq Scan on join_fact f + -> Hash + -> Seq Scan on join_dim d + Supplied Plan Advice: + JOIN_ORDER(f d) /* matched */ + + + + Since the PLAN_ADVICE option to + EXPLAIN was not specified, no advice string is generated + for the plan. However, the supplied plan advice is still shown so that + anyone looking at the EXPLAIN output knows that the + chosen plan was influenced by plan advice. If information about supplied + plan advice is not desired, it can be suppressed by configuring + pg_plan_advice.always_explain_supplied_advice = false. + For each piece of supplied advice, the output shows + advice feedback indicating + whether or not the advice was successfully applied to the query. In this + case, the feedback says /* matched */, which means that + f and d were found in the query and + that the resulting query plan conforms to the specified advice. + + + + + + How It Works + + + Plan advice is written imperatively; that is, it specifies what should be + done. However, at an implementation level, + pg_plan_advice works by telling the core planner what + should not be done. In other words, it operates by constraining the + planner's choices, not by replacing it. Therefore, no matter what advice + you provide, you will only ever get a plan that the core planner would have + considered for the query in question. If you attempt to force what you + believe to be the correct plan by supplying an advice string, and the + planner still fails to produce the desired plan, this means that either + there is a bug in your advice string, or the plan in question was not + considered viable by the core planner. This commonly happens for one of two + reasons. First, it might be that the planner believes that the plan you're + trying to force would be semantically incorrect - that is, it would produce + the wrong results - and for that reason it wasn't considered. Second, it + might be that the planner rejected the plan you were hoping to generate on + some grounds other than cost. For example, given a very simple query such as + SELECT * FROM some_table, the query planner will + decide that the use of an index is worthless here before it performs any + costing calculations. You cannot force it to use an index for this query + even if you set enable_seqscan = false, and you can't + force it to use an index using plan advice, either. + + + + Specifying plan advice should never cause planner failure. However, if you + specify plan advice that asks for something impossible, you may get a plan + where some plan nodes are flagged as Disabled: true in + the EXPLAIN output. In some cases, such plans will be + basically the same plan you would have gotten with no supplied advice at + all, but in other cases, they may be much worse. For example: + + + +SET pg_plan_advice.advice = 'JOIN_ORDER(x f d)'; +EXPLAIN (COSTS OFF) + SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id; + QUERY PLAN +---------------------------------------------------- + Nested Loop + Disabled: true + -> Seq Scan on join_fact f + -> Index Scan using join_dim_pkey on join_dim d + Index Cond: (id = f.dim_id) + Supplied Plan Advice: + JOIN_ORDER(x f d) /* partially matched */ + + + + Because neither f nor d is the + first table in the JOIN_ORDER() specification, the + planner disables all direct joins between the two of them, thinking that + the join to x should happen first. Since planning isn't + allowed to fail, a disabled plan between the two relations is eventually + selected anyway, but here it's a Nested Loop rather than + the Hash Join that was chosen in the above example where + no advice was specified. There are several different ways that this kind + of thing can happen; when it does, the resulting plan is generally worse + than if no advice had been specified at all. Therefore, it is a good idea + to validate that the advice you specify applies to the query to which it + is applied and that the results are as expected. + + + + + + Advice Targets + + + An advice target uniquely identifies a particular + instance of a particular relation involved in a particular query. In simple + cases, such as the examples shown above, the advice target is simply the + relation alias. However, a more complex syntax is required when subqueries + are used, when tables are partitioned, or when the same relation alias is + mentioned more than once in the same subquery (e.g., (foo JOIN bar + ON foo.a = bar.a) x JOIN foo ON x.b = foo.b). Any combination of + these three things can occur simultaneously: a relation could be mentioned + more than once, be partitioned, and be used inside of a subquery. + + + + Because of this, the general syntax for a relation identifier is: + + + +alias_name#occurrence_number/partition_schema.partition_name@plan_name + + + + All components except for the alias_name are optional + and are included only when required. When a component is omitted, the + preceding punctuation must also be omitted. For the first occurrence of a + relation within a given subquery, generated advice will omit the occurrence + number, but it is legal to write #1, if desired. The + partition schema and partition name are included only for children of + partitioned tables. In generated advice, pg_plan_advice + always includes both, but it is legal to omit the schema. The plan name is + omitted for the top-level plan, and must be included for any subplan. + + + + It is not always easy to determine the correct advice target by examining + the query. For instance, if the planner pulls up a subquery into the parent + query level, everything inside of it becomes part of the parent query level, + and uses the parent query's subplan name (or no subplan name, if pulled up + to the top level). Furthermore, the correct subquery name is sometimes not + obvious. For example, when two queries are joined using an operation such as + UNION or INTERSECT, no name for the + subqueries is present in the SQL syntax; instead, a system-generated name is + assigned to each branch. The easiest way to discover the proper advice + targets is to use EXPLAIN (PLAN_ADVICE) and examine the + generated advice. + + + + + + Advice Tags + + + An advice tag specifies a particular behavior that + should be enforced for some portion of the query, such as a particular + join order or join method. All advice tags take + advice targets as arguments, + and many allow lists of advice targets, which in some cases can be nested + multiple levels deep. Several different classes of advice tags exist, + each controlling a different aspect of query planning. + + + + Scan Method Advice + +SEQ_SCAN(target [ ... ]) +TID_SCAN(target [ ... ]) +INDEX_SCAN(target index_name [ ... ]) +INDEX_ONLY_SCAN(target index_name [ ... ]) +FOREIGN_SCAN((target [ ... ]) [ ... ]) +BITMAP_HEAP_SCAN(target [ ... ]) + + + SEQ_SCAN specifies that each target should be + scanned using a Seq Scan. TID_SCAN + specifies that each target should be scanned using a + TID Scan or TID Range Scan. + BITMAP_HEAP_SCAN specifies that each target + should be scanned using a Bitmap Heap Scan. + + + + INDEX_SCAN specifies that each target should + be scanned using an Index Scan on the given index + name. INDEX_ONLY_SCAN is similar, but specifies the + use of an Index Only Scan. In either case, the index + name can be, but does not have to be, schema-qualified. + + + + FOREIGN_SCAN specifies that a join between two or + more foreign tables should be pushed down to a remote server so + that it can be implemented as a single Foreign Scan. + Specifying FOREIGN_SCAN for a single foreign table is + neither necessary nor permissible: a Foreign Scan will + need to be used regardless. If you want to prevent a join from being + pushed down, consider using the JOIN_ORDER tag for + that purpose. + + + + The planner supports many types of scans other than those listed here; + however, in most of those cases, there is no meaningful decision to be + made, and hence no need for advice. For example, the output of a + set-returning function that appears in the FROM clause + can only ever be scanned using a Function Scan, so + there is no opportunity for advice to change anything. + + + + + + Join Order Advice + +JOIN_ORDER(join_order_item [ ... ]) + +where join_order_item is: + +advice_target | +( join_order_item [ ... ] ) | +{ join_order_item [ ... ] } + + + When JOIN_ORDER is used without any sublists, it + specifies an outer-deep join with the first advice target as the driving + table, joined to each subsequent advice target in turn in the order + specified. For instance, JOIN_ORDER(a b c) means that + a should be the driving table, and that it should be + joined first to b and then to c. + If there are more relations in the query than a, + b, and c, the rest can be joined + afterwards in any manner. + + + + If a JOIN_ORDER list contains a parenthesized sublist, + it specifies a non-outer-deep join. The relations in the sublist must first + be joined to each other much as if the sublist were a top-level + JOIN_ORDER list, and the resulting join product must + then appear on the inner side of a join at the appropriate point in the + join order. For example, JOIN_ORDER(a (b c) d) requires + a plan of this form: + + + +Join + -> Join + -> Scan on a + -> Join + -> Scan on b + -> Scan on c + -> Scan on d + + + + If a JOIN_ORDER list contains a sublist surrounded by + curly braces, this also specifies a non-outer-deep join. However, the join + order within the sublist is not constrained. For example, specifying + JOIN_ORDER(a {b c} d) would allow the scans of + b and c to be swapped in the + previous example, which is not allowed when parentheses are used. + + + + Parenthesized sublists can be arbitrarily nested, but sublists surrounded + by curly braces cannot themselves contain sublists. + + + + Multiple instances of JOIN_ORDER() can sometimes be + needed in order to fully constrain the join order. This occurs when there + are multiple join problems that are optimized separately by the planner. + This can happen due to the presence of subqueries, or because there is a + partitionwise join. In the latter case, each branch of the partitionwise + join can have its own join order, independent of every other branch. + + + + + + Join Method Advice + +join_method_name(join_method_item [ ... ]) + +where join_method_name is: + +{ MERGE_JOIN_MATERIALIZE | MERGE_JOIN_PLAIN | NESTED_LOOP_MATERIALIZE | NESTED_LOOP_PLAIN | HASH_JOIN } + +and join_method_item is: + +{ advice_target | +( advice_target [ ... ] ) } + + + Join method advice specifies the relation, or set of relations, that should + appear on the inner side of a join using the named join method. For + example, HASH_JOIN(a b) means that each of + a and b should appear on the inner + side of a hash join; a conforming plan must contain at least two hash + joins, one of which has a and nothing else on the + inner side, and the other of which has b and nothing + else on the inner side. On the other hand, + HASH_JOIN((a b)) means that the join product of + a and b should appear together + on the inner side of a single hash join. + + + + Note that join method advice implies a negative join order constraint. + Since the named relation or relations must be on the inner side of a join + using the specified method, none of them can be the driving table for the + entire join problem. Moreover, no relation inside the set should be joined + to any relation outside the set until all relations within the set have + been joined to each other. For example, if the advice specifies + HASH_JOIN((a b)) and the system begins by joining either + of those to some third relation c, the resulting + plan could never be compliant with the request to put exactly those two + relations on the inner side of a hash join. When using both join order + advice and join method advice for the same query, it is a good idea to make + sure that they do not mandate incompatible join orders. + + + + + + Partitionwise Advice + +PARTITIONWISE(partitionwise_item [ ... ]) + +where partitionwise_item is: + +{ advice_target | +( advice_target [ ... ] ) } + + + When applied to a single target, PARTITIONWISE + specifies that the specified table should not be part of any partitionwise + join. When applied to a list of targets, PARTITIONWISE + specifies that exactly that set of relations should be joined in + partitionwise fashion. Note that, regardless of what advice is specified, + no partitionwise joins will be possible if + enable_partitionwise_join = off. + + + + + + Semijoin Uniqueness Advice + +SEMIJOIN_UNIQUE(sj_unique_item [ ... ]) +SEMIJOIN_NON_UNIQUE(sj_unique_item [ ... ]) + +where sj_unique_item is: + +{ advice_target | +( advice_target [ ... ] ) } + + + The planner sometimes has a choice between implementing a semijoin + directly and implementing a semijoin by making the nullable side unique + and then performing an inner join. SEMIJOIN_UNIQUE + specifies the latter strategy, while SEMIJOIN_NON_UNIQUE + specifies the former strategy. In either case, the argument is the single + relation or list of relations that appear beneath the nullable side of the + join. + + + + + + Parallel Query Advice + +GATHER(gather_item [ ... ]) +GATHER_MERGE(gather_item [ ... ]) +NO_GATHER(advice_target [ ... ]) + +where gather_item is: + +{ advice_target | +( advice_target [ ... ] ) } + + + GATHER or GATHER_MERGE specifies + that Gather or Gather Merge, + respectively, should be placed on top of the single relation specified as + a target, or on top of the join between the list of relations specified as + a target. This means that GATHER(a b c) is a request + for three different Gather nodes, while + GATHER((a b c)) is a request for a single + Gather node on top of a 3-way join. + + + + NO_GATHER specifies that no Gather or + Gather Merge node should appear above any of the + targets, but it only constrains the planning of an individual subquery, + and outer subquery levels can still use parallel query. For example, + NO_GATHER(inner_example@any_1) precludes using a + Parallel Seq Scan to access the + inner_example table within the any_1 + subquery, but it does not prevent the planner from placing + SubPlan any_1 beneath a Gather + or Gather Merge node. The following plan is + compatible with NO_GATHER(inner_example@any_1), but + not with NO_GATHER(outer_example): + + + + Finalize Aggregate + -> Gather + -> Partial Aggregate + -> Parallel Seq Scan on outer_example + Filter: (something = (hashed SubPlan any_1).col1) + SubPlan any_1 + -> Seq Scan on inner_example + Filter: (something_else > 100) + + + + Here is the reverse case, that is, a plan compatible with + NO_GATHER(outer_example) but not with + NO_GATHER(inner_example@any_1): + + + + Aggregate + -> Seq Scan on outer_example + Filter: (something = (hashed SubPlan any_1).col1) + SubPlan any_1 + -> Gather + -> Parallel Seq Scan on inner_example + Filter: (something_else > 100) + + + + + + + Advice Feedback + + + EXPLAIN provides feedback on whether supplied advice was + successfully applied to the query in the form of a comment on each piece + of supplied advice. For example: + + + +SET pg_plan_advice.advice = 'hash_join(f g) join_order(f g) index_scan(f no_such_index)'; +EXPLAIN (COSTS OFF) + SELECT * FROM jo_fact f + LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id + LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id + WHERE val1 = 1 AND val2 = 1; + QUERY PLAN +------------------------------------------------------------------- + Hash Join + Hash Cond: ((d1.id = f.dim1_id) AND (d2.id = f.dim2_id)) + -> Nested Loop + -> Seq Scan on jo_dim2 d2 + Filter: (val2 = 1) + -> Materialize + -> Seq Scan on jo_dim1 d1 + Filter: (val1 = 1) + -> Hash + -> Seq Scan on jo_fact f + Supplied Plan Advice: + INDEX_SCAN(f no_such_index) /* matched, inapplicable, failed */ + HASH_JOIN(f) /* matched */ + HASH_JOIN(g) /* not matched */ + JOIN_ORDER(f g) /* partially matched */ + + + + For this query, f is a valid advice target, but + g is not. Therefore, the request to place + f on the inner side of a hash join is listed as + matched, but the request to place g + on the inner side of a hash join is listed as + not matched. The JOIN_ORDER advice + tag involves one valid target and one invalid target, and so is listed as + partially matched. Note that + HASH_JOIN(f g) is actually a request for two logically + separate behaviors, so in the feedback it is decomposed into + HASH_JOIN(f) and HASH_JOIN(g). + By contrast, JOIN_ORDER(f g) is a single request and + appears as-is. + + + + Advice feedback can include any of the following: + + + + + + + matched means that all of the specified advice targets + were observed together during query planning, at a time at which the + advice could be enforced. + + + + + + partially matched means that some but not all of the + specified advice targets were observed during query planning, or all + of the advice targets were observed but not together. For example, this + may happen if all the targets of JOIN_ORDER advice + individually match the query, but the proposed join order is not legal. + + + + + + not matched means that none of the + specified advice targets were observed during query planning. This may + happen if the advice simply doesn't match the query, or it may + occur if the relevant portion of the query was not planned, perhaps + because it was gated by a condition that was simplified to constant false. + + + + + + inapplicable means that the advice tag could not + be applied to the advice targets for some reason. For example, this will + happen if the use of a nonexistent index is requested, or if an attempt + is made to control semijoin uniqueness for a non-semijoin. + + + + + + conflicting means that two or more pieces of advice + request incompatible behaviors. For example, if you advise a sequential + scan and an index scan for the same table, both requests will be flagged + as conflicting. This also commonly happens if join method advice or + semijoin uniqueness advice implies a join order incompatible with the + one explicitly specified; see + . + + + + + + failed means that the query plan does not comply with + the advice. This only occurs for entries that are also shown as + matched. It frequently occurs for entries that are + also marked as conflicting or + inapplicable. However, it can also occur when the + advice is valid insofar as pg_plan_advice is able + to determine, but the planner is not able to construct a legal + plan that can comply with the advice. It is important to note that the + sanity checks performed by pg_plan_advice are fairly + superficial and focused mostly on looking for logical inconsistencies in + the advice string; only the planner knows what will actually work. + + + + + + + All advice should be marked as exactly one of matched, + partially matched, or not matched. + + + + + + Configuration Parameters + + + + + + pg_plan_advice.advice (string) + + pg_plan_advice.advice configuration parameter + + + + + + pg_plan_advice.advice is an advice string to be + used during query planning. + + + + + + + pg_plan_advice.always_explain_supplied_advice (boolean) + + pg_plan_advice.always_explain_supplied_advice configuration parameter + + + + + + pg_plan_advice.always_explain_supplied_advice causes + EXPLAIN to always show any supplied advice and the + associated + advice feedback. + The default value is true. If set to + false, this information will be displayed only when + EXPLAIN (PLAN_ADVICE) is used. + + + + + + + pg_plan_advice.always_store_advice_details (boolean) + + pg_plan_advice.always_store_advice_details configuration parameter + + + + + + pg_plan_advice.always_store_advice_details allows + EXPLAIN to show details related to plan advice even + when prepared queries are used. The default value is + false. When planning a prepared query, it is not + possible to know whether EXPLAIN will later be used, + so by default, to reduce overhead, pg_plan_advice + will not generate plan advice or feedback on supplied advice. This means + that if EXPLAIN EXECUTE is used on the prepared query, + it will not be able to show this information. Changing this setting to + true avoids this problem, but adds additional + overhead. It is probably a good idea to enable this option only in + sessions where it is needed, rather than on a system-wide basis. + + + + + + + pg_plan_advice.feedback_warnings (boolean) + + pg_plan_advice.feedback_warnings configuration parameter + + + + + + When set to true, pg_plan_advice.feedback_warnings + emits a warning whenever supplied plan advice is not successfully + enforced. The default value is false. + + + + + + + pg_plan_advice.trace_mask (boolean) + + pg_plan_advice.trace_mask configuration parameter + + + + + + When pg_plan_advice.trace_mask is + true, pg_plan_advice will print + messages during query planning each time that + pg_plan_advice alters the mask of allowable query + plan types in response to supplied plan advice. The default value is + false. The messages printed by this setting are not + expected to be useful except for purposes of debugging this module. + + + + + + + + + + Limitations + + + It is currently not possible to control any aspect of the planner's behavior + with respect to aggregation. This includes both whether aggregates are + computed by sorting or hashing, and also whether strategies such as + eager aggregation or + partitionwise + aggregation are used. + + + + It also is currently not possible to control any aspect of the planner's + behavior with respect to set operations such as UNION + or INTERSECT. + + + + As discussed above under How + It Works, the use of plan advice can only affect which plan + the planner chooses from among those it believes to be viable. It can never + force the choice of a plan which the planner refused to consider in the + first place. + + + + + Author + + + Robert Haas rhaas@postgresql.org + + + + diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 141b9d6e077..bf1697b90a2 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3988,6 +3988,39 @@ pg_uuid_t pg_wchar pg_wchar_tbl pgp_armor_headers_state +pgpa_advice_item +pgpa_advice_tag_type +pgpa_advice_target +pgpa_identifier +pgpa_index_target +pgpa_index_type +pgpa_itm_type +pgpa_jo_outcome +pgpa_join_class +pgpa_join_member +pgpa_join_state +pgpa_join_strategy +pgpa_join_unroller +pgpa_output_context +pgpa_plan_walker_context +pgpa_planner_state +pgpa_qf_type +pgpa_query_feature +pgpa_ri_checker +pgpa_ri_checker_key +pgpa_scan +pgpa_scan_strategy +pgpa_sj_unique_rel +pgpa_target_type +pgpa_trove +pgpa_trove_entry +pgpa_trove_entry_element +pgpa_trove_entry_hash +pgpa_trove_entry_key +pgpa_trove_lookup_type +pgpa_trove_result +pgpa_trove_slice +pgpa_unrolled_join pgsocket pgsql_thing_t pgssEntry