postgresql/src/include/optimizer/planmain.h
Alexander Korotkov fc069a3a63 Implement Self-Join Elimination
The Self-Join Elimination (SJE) feature removes an inner join of a plain
table to itself in the query tree if it is proven that the join can be
replaced with a scan without impacting the query result.  Self-join and
inner relation get replaced with the outer in query, equivalence classes,
and planner info structures.  Also, the inner restrictlist moves to the
outer one with the removal of duplicated clauses.  Thus, this optimization
reduces the length of the range table list (this especially makes sense for
partitioned relations), reduces the number of restriction clauses and,
in turn, selectivity estimations, and potentially improves total planner
prediction for the query.

This feature is dedicated to avoiding redundancy, which can appear after
pull-up transformations or the creation of an EquivalenceClass-derived clause
like the below.

  SELECT * FROM t1 WHERE x IN (SELECT t3.x FROM t1 t3);
  SELECT * FROM t1 WHERE EXISTS (SELECT t3.x FROM t1 t3 WHERE t3.x = t1.x);
  SELECT * FROM t1,t2, t1 t3 WHERE t1.x = t2.x AND t2.x = t3.x;

In the future, we could also reduce redundancy caused by subquery pull-up
after unnecessary outer join removal in cases like the one below.

  SELECT * FROM t1 WHERE x IN
    (SELECT t3.x FROM t1 t3 LEFT JOIN t2 ON t2.x = t1.x);

Also, it can drastically help to join partitioned tables, removing entries
even before their expansion.

The SJE proof is based on innerrel_is_unique() machinery.

We can remove a self-join when for each outer row:

 1. At most, one inner row matches the join clause;
 2. Each matched inner row must be (physically) the same as the outer one;
 3. Inner and outer rows have the same row mark.

In this patch, we use the next approach to identify a self-join:

 1. Collect all merge-joinable join quals which look like a.x = b.x;
 2. Add to the list above the baseretrictinfo of the inner table;
 3. Check innerrel_is_unique() for the qual list.  If it returns false, skip
    this pair of joining tables;
 4. Check uniqueness, proved by the baserestrictinfo clauses. To prove the
    possibility of self-join elimination, the inner and outer clauses must
    match exactly.

The relation replacement procedure is not trivial and is partly combined
with the one used to remove useless left joins.  Tests covering this feature
were added to join.sql.  Some of the existing regression tests changed due
to self-join removal logic.

Discussion: https://postgr.es/m/flat/64486b0b-0404-e39e-322d-0801154901f3%40postgrespro.ru
Author: Andrey Lepikhov <a.lepikhov@postgrespro.ru>
Author: Alexander Kuzmenkov <a.kuzmenkov@postgrespro.ru>
Co-authored-by: Alexander Korotkov <aekorotkov@gmail.com>
Co-authored-by: Alena Rybakina <lena.ribackina@yandex.ru>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Simon Riggs <simon@2ndquadrant.com>
Reviewed-by: Jonathan S. Katz <jkatz@postgresql.org>
Reviewed-by: David Rowley <david.rowley@2ndquadrant.com>
Reviewed-by: Thomas Munro <thomas.munro@enterprisedb.com>
Reviewed-by: Konstantin Knizhnik <k.knizhnik@postgrespro.ru>
Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-by: Hywel Carver <hywel@skillerwhale.com>
Reviewed-by: Laurenz Albe <laurenz.albe@cybertec.at>
Reviewed-by: Ronan Dunklau <ronan.dunklau@aiven.io>
Reviewed-by: vignesh C <vignesh21@gmail.com>
Reviewed-by: Zhihong Yu <zyu@yugabyte.com>
Reviewed-by: Greg Stark <stark@mit.edu>
Reviewed-by: Jaime Casanova <jcasanov@systemguards.com.ec>
Reviewed-by: Michał Kłeczek <michal@kleczek.org>
Reviewed-by: Alena Rybakina <lena.ribackina@yandex.ru>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
2025-02-17 12:44:12 +02:00

134 lines
5.2 KiB
C

/*-------------------------------------------------------------------------
*
* planmain.h
* prototypes for various files in optimizer/plan
*
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/optimizer/planmain.h
*
*-------------------------------------------------------------------------
*/
#ifndef PLANMAIN_H
#define PLANMAIN_H
#include "nodes/pathnodes.h"
#include "nodes/plannodes.h"
/* GUC parameters */
#define DEFAULT_CURSOR_TUPLE_FRACTION 0.1
extern PGDLLIMPORT double cursor_tuple_fraction;
extern PGDLLIMPORT bool enable_self_join_elimination;
/* query_planner callback to compute query_pathkeys */
typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
/*
* prototypes for plan/planmain.c
*/
extern RelOptInfo *query_planner(PlannerInfo *root,
query_pathkeys_callback qp_callback, void *qp_extra);
/*
* prototypes for plan/planagg.c
*/
extern void preprocess_minmax_aggregates(PlannerInfo *root);
/*
* prototypes for plan/createplan.c
*/
extern Plan *create_plan(PlannerInfo *root, Path *best_path);
extern ForeignScan *make_foreignscan(List *qptlist, List *qpqual,
Index scanrelid, List *fdw_exprs, List *fdw_private,
List *fdw_scan_tlist, List *fdw_recheck_quals,
Plan *outer_plan);
extern Plan *change_plan_targetlist(Plan *subplan, List *tlist,
bool tlist_parallel_safe);
extern Plan *materialize_finished_plan(Plan *subplan);
extern bool is_projection_capable_path(Path *path);
extern bool is_projection_capable_plan(Plan *plan);
/* External use of these functions is deprecated: */
extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree);
extern Agg *make_agg(List *tlist, List *qual,
AggStrategy aggstrategy, AggSplit aggsplit,
int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
List *groupingSets, List *chain, double dNumGroups,
Size transitionSpace, Plan *lefttree);
extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
LimitOption limitOption, int uniqNumCols,
AttrNumber *uniqColIdx, Oid *uniqOperators,
Oid *uniqCollations);
/*
* prototypes for plan/initsplan.c
*/
extern PGDLLIMPORT int from_collapse_limit;
extern PGDLLIMPORT int join_collapse_limit;
extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode);
extern void add_other_rels_to_query(PlannerInfo *root);
extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist);
extern void add_vars_to_targetlist(PlannerInfo *root, List *vars,
Relids where_needed);
extern void add_vars_to_attr_needed(PlannerInfo *root, List *vars,
Relids where_needed);
extern void remove_useless_groupby_columns(PlannerInfo *root);
extern void find_lateral_references(PlannerInfo *root);
extern void rebuild_lateral_attr_needed(PlannerInfo *root);
extern void create_lateral_join_info(PlannerInfo *root);
extern List *deconstruct_jointree(PlannerInfo *root);
extern bool restriction_is_always_true(PlannerInfo *root,
RestrictInfo *restrictinfo);
extern bool restriction_is_always_false(PlannerInfo *root,
RestrictInfo *restrictinfo);
extern void distribute_restrictinfo_to_rels(PlannerInfo *root,
RestrictInfo *restrictinfo);
extern RestrictInfo *process_implied_equality(PlannerInfo *root,
Oid opno,
Oid collation,
Expr *item1,
Expr *item2,
Relids qualscope,
Index security_level,
bool both_const);
extern RestrictInfo *build_implied_join_equality(PlannerInfo *root,
Oid opno,
Oid collation,
Expr *item1,
Expr *item2,
Relids qualscope,
Index security_level);
extern void rebuild_joinclause_attr_needed(PlannerInfo *root);
extern void match_foreign_keys_to_quals(PlannerInfo *root);
/*
* prototypes for plan/analyzejoins.c
*/
extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
extern void reduce_unique_semijoins(PlannerInfo *root);
extern bool query_supports_distinctness(Query *query);
extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
extern bool innerrel_is_unique(PlannerInfo *root,
Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel,
JoinType jointype, List *restrictlist, bool force_cache);
extern bool innerrel_is_unique_ext(PlannerInfo *root, Relids joinrelids,
Relids outerrelids, RelOptInfo *innerrel,
JoinType jointype, List *restrictlist,
bool force_cache, List **uclauses);
extern List *remove_useless_self_joins(PlannerInfo *root, List *jointree);
/*
* prototypes for plan/setrefs.c
*/
extern Plan *set_plan_references(PlannerInfo *root, Plan *plan);
extern bool trivial_subqueryscan(SubqueryScan *plan);
extern Param *find_minmax_agg_replacement_param(PlannerInfo *root,
Aggref *aggref);
extern void record_plan_function_dependency(PlannerInfo *root, Oid funcid);
extern void record_plan_type_dependency(PlannerInfo *root, Oid typid);
extern bool extract_query_dependencies_walker(Node *node, PlannerInfo *context);
#endif /* PLANMAIN_H */