diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 90275e25872..5eceb321828 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -4279,6 +4279,11 @@ check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo) */ if (subquery->hasGroupRTE) { + /* + * We can safely pass NULL for the root here. This function uses the + * expanded expressions solely to check for volatile or set-returning + * functions, which is independent of the Vars' nullingrels. + */ flattened_targetList = (List *) flatten_group_exprs(NULL, subquery, (Node *) subquery->targetList); } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 97ea95a4eb8..9aaf1c4e5ca 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -3447,7 +3447,7 @@ restriction_is_always_true(PlannerInfo *root, if (nulltest->argisrow) return false; - return expr_is_nonnullable(root, nulltest->arg, true); + return expr_is_nonnullable(root, nulltest->arg, NOTNULL_SOURCE_RELOPT); } /* If it's an OR, check its sub-clauses */ @@ -3512,7 +3512,7 @@ restriction_is_always_false(PlannerInfo *root, if (nulltest->argisrow) return false; - return expr_is_nonnullable(root, nulltest->arg, true); + return expr_is_nonnullable(root, nulltest->arg, NOTNULL_SOURCE_RELOPT); } /* If it's an OR, check its sub-clauses */ diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index d7f3cedf3d5..0d31861da7f 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -91,6 +91,7 @@ static bool contain_outer_selfref(Node *node); static bool contain_outer_selfref_walker(Node *node, Index *depth); static void inline_cte(PlannerInfo *root, CommonTableExpr *cte); static bool inline_cte_walker(Node *node, inline_cte_walker_context *context); +static bool sublink_testexpr_is_not_nullable(PlannerInfo *root, SubLink *sublink); static bool simplify_EXISTS_query(PlannerInfo *root, Query *query); static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, Node **testexpr, List **paramIds); @@ -1306,11 +1307,14 @@ convert_VALUES_to_ANY(PlannerInfo *root, Node *testexpr, Query *values) * If so, form a JoinExpr and return it. Return NULL if the SubLink cannot * be converted to a join. * - * The only non-obvious input parameter is available_rels: this is the set - * of query rels that can safely be referenced in the sublink expression. - * (We must restrict this to avoid changing the semantics when a sublink - * is present in an outer join's ON qual.) The conversion must fail if - * the converted qual would reference any but these parent-query relids. + * If under_not is true, the caller actually found NOT (ANY SubLink), so + * that what we must try to build is an ANTI not SEMI join. + * + * available_rels is the set of query rels that can safely be referenced + * in the sublink expression. (We must restrict this to avoid changing + * the semantics when a sublink is present in an outer join's ON qual.) + * The conversion must fail if the converted qual would reference any but + * these parent-query relids. * * On success, the returned JoinExpr has larg = NULL and rarg = the jointree * item representing the pulled-up subquery. The caller must set larg to @@ -1333,7 +1337,7 @@ convert_VALUES_to_ANY(PlannerInfo *root, Node *testexpr, Query *values) */ JoinExpr * convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, - Relids available_rels) + bool under_not, Relids available_rels) { JoinExpr *result; Query *parse = root->parse; @@ -1351,6 +1355,19 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, Assert(sublink->subLinkType == ANY_SUBLINK); + /* + * Per SQL spec, NOT IN is not ordinarily equivalent to an anti-join, so + * that by default we have to fail when under_not. However, if we can + * prove that neither the outer query's expressions nor the sub-select's + * output columns can be NULL, and further that the operator itself cannot + * return NULL for non-null inputs, then the logic is identical and it's + * safe to convert NOT IN to an anti-join. + */ + if (under_not && + (!sublink_testexpr_is_not_nullable(root, sublink) || + !query_outputs_are_not_nullable(subselect))) + return NULL; + /* * If the sub-select contains any Vars of the parent query, we treat it as * LATERAL. (Vars from higher levels don't matter here.) @@ -1428,7 +1445,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, * And finally, build the JoinExpr node. */ result = makeNode(JoinExpr); - result->jointype = JOIN_SEMI; + result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI; result->isNatural = false; result->larg = NULL; /* caller must fill this in */ result->rarg = (Node *) rtr; @@ -1441,12 +1458,134 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, return result; } +/* + * sublink_testexpr_is_not_nullable: verify that testexpr of an ANY_SUBLINK + * guarantees a non-null result, assuming the inner side is also non-null. + * + * To ensure the expression never returns NULL, we require both that the outer + * expressions are provably non-nullable and that the operator itself is safe. + * We validate operator safety by checking for membership in a standard index + * operator family (B-tree or Hash); this acts as a proxy for standard boolean + * behavior, ensuring the operator does not produce NULL results from non-null + * inputs. + * + * We handle the three standard parser representations for ANY sublinks: a + * single OpExpr for single-column comparisons, a BoolExpr containing a list of + * OpExprs for multi-column equality or inequality checks (where equality + * becomes an AND and inequality becomes an OR), and a RowCompareExpr for + * multi-column ordering checks. In all cases, we validate the operators and + * the outer expressions. + * + * It is acceptable for this check not to be exhaustive. We can err on the + * side of conservatism: if we're not sure, it's okay to return FALSE. + */ +static bool +sublink_testexpr_is_not_nullable(PlannerInfo *root, SubLink *sublink) +{ + Node *testexpr = sublink->testexpr; + List *outer_exprs = NIL; + + /* Punt if sublink is not in the expected format */ + if (sublink->subLinkType != ANY_SUBLINK || testexpr == NULL) + return false; + + if (IsA(testexpr, OpExpr)) + { + /* single-column comparison */ + OpExpr *opexpr = (OpExpr *) testexpr; + + /* standard ANY structure should be op(outer_var, param) */ + if (list_length(opexpr->args) != 2) + return false; + + /* + * We rely on membership in a B-tree or Hash operator family as a + * guarantee that the operator acts as a proper boolean comparison and + * does not yield NULL for valid non-null inputs. + */ + if (!op_is_safe_index_member(opexpr->opno)) + return false; + + outer_exprs = lappend(outer_exprs, linitial(opexpr->args)); + } + else if (is_andclause(testexpr) || is_orclause(testexpr)) + { + /* multi-column equality or inequality checks */ + BoolExpr *bexpr = (BoolExpr *) testexpr; + + foreach_ptr(OpExpr, opexpr, bexpr->args) + { + if (!IsA(opexpr, OpExpr)) + return false; + + /* standard ANY structure should be op(outer_var, param) */ + if (list_length(opexpr->args) != 2) + return false; + + /* verify operator safety; see comment above */ + if (!op_is_safe_index_member(opexpr->opno)) + return false; + + outer_exprs = lappend(outer_exprs, linitial(opexpr->args)); + } + } + else if (IsA(testexpr, RowCompareExpr)) + { + /* multi-column ordering checks */ + RowCompareExpr *rcexpr = (RowCompareExpr *) testexpr; + + foreach_oid(opno, rcexpr->opnos) + { + /* verify operator safety; see comment above */ + if (!op_is_safe_index_member(opno)) + return false; + } + + outer_exprs = list_concat(outer_exprs, rcexpr->largs); + } + else + { + /* Punt if other node types */ + return false; + } + + /* + * Since the query hasn't yet been through expression preprocessing, we + * must apply flatten_join_alias_vars to the outer expressions to avoid + * being fooled by join aliases. + * + * We do not need to apply flatten_group_exprs though, since grouping Vars + * cannot appear in jointree quals. + */ + outer_exprs = (List *) + flatten_join_alias_vars(root, root->parse, (Node *) outer_exprs); + + /* Check that every outer expression is non-nullable */ + foreach_ptr(Expr, expr, outer_exprs) + { + /* + * We have already collected relation-level not-null constraints for + * the outer query, so we can consult the global hash table for + * nullability information. + */ + if (!expr_is_nonnullable(root, expr, NOTNULL_SOURCE_HASHTABLE)) + return false; + + /* + * Note: It is possible to further prove non-nullability by examining + * the qual clauses available at or below the jointree node where this + * NOT IN clause is evaluated, but for the moment it doesn't seem + * worth the extra complication. + */ + } + + return true; +} + /* * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join * - * The API of this function is identical to convert_ANY_sublink_to_join's, - * except that we also support the case where the caller has found NOT EXISTS, - * so we need an additional input parameter "under_not". + * The API of this function is identical to convert_ANY_sublink_to_join's. */ JoinExpr * convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index c90f4b32733..b2beb0a0d68 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -852,14 +852,15 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, if ((saop = convert_VALUES_to_ANY(root, sublink->testexpr, (Query *) sublink->subselect)) != NULL) - + { /* * The VALUES sequence was simplified. Nothing more to do * here. */ return (Node *) saop; + } - if ((j = convert_ANY_sublink_to_join(root, sublink, + if ((j = convert_ANY_sublink_to_join(root, sublink, false, available_rels1)) != NULL) { /* Yes; insert the new join node into the join tree */ @@ -885,7 +886,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, return NULL; } if (available_rels2 != NULL && - (j = convert_ANY_sublink_to_join(root, sublink, + (j = convert_ANY_sublink_to_join(root, sublink, false, available_rels2)) != NULL) { /* Yes; insert the new join node into the join tree */ @@ -970,14 +971,68 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, } if (is_notclause(node)) { - /* If the immediate argument of NOT is EXISTS, try to convert */ + /* If the immediate argument of NOT is ANY or EXISTS, try to convert */ SubLink *sublink = (SubLink *) get_notclausearg((Expr *) node); JoinExpr *j; Relids child_rels; if (sublink && IsA(sublink, SubLink)) { - if (sublink->subLinkType == EXISTS_SUBLINK) + if (sublink->subLinkType == ANY_SUBLINK) + { + if ((j = convert_ANY_sublink_to_join(root, sublink, true, + available_rels1)) != NULL) + { + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink1; + *jtlink1 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ + j->rarg = pull_up_sublinks_jointree_recurse(root, + j->rarg, + &child_rels); + + /* + * Now recursively process the pulled-up quals. Because + * we are underneath a NOT, we can't pull up sublinks that + * reference the left-hand stuff, but it's still okay to + * pull up sublinks referencing j->rarg. + */ + j->quals = pull_up_sublinks_qual_recurse(root, + j->quals, + &j->rarg, + child_rels, + NULL, NULL); + /* Return NULL representing constant TRUE */ + return NULL; + } + if (available_rels2 != NULL && + (j = convert_ANY_sublink_to_join(root, sublink, true, + available_rels2)) != NULL) + { + /* Yes; insert the new join node into the join tree */ + j->larg = *jtlink2; + *jtlink2 = (Node *) j; + /* Recursively process pulled-up jointree nodes */ + j->rarg = pull_up_sublinks_jointree_recurse(root, + j->rarg, + &child_rels); + + /* + * Now recursively process the pulled-up quals. Because + * we are underneath a NOT, we can't pull up sublinks that + * reference the left-hand stuff, but it's still okay to + * pull up sublinks referencing j->rarg. + */ + j->quals = pull_up_sublinks_qual_recurse(root, + j->quals, + &j->rarg, + child_rels, + NULL, NULL); + /* Return NULL representing constant TRUE */ + return NULL; + } + } + else if (sublink->subLinkType == EXISTS_SUBLINK) { if ((j = convert_EXISTS_sublink_to_join(root, sublink, true, available_rels1)) != NULL) @@ -3706,6 +3761,13 @@ has_notnull_forced_var(PlannerInfo *root, List *forced_null_vars, rte = rt_fetch(varno, root->parse->rtable); + /* We can only reason about ordinary relations */ + if (rte->rtekind != RTE_RELATION) + { + bms_free(forcednullattnums); + continue; + } + /* * We must skip inheritance parent tables, as some child tables may * have a NOT NULL constraint for a column while others may not. This diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index a41d81734cf..f0f8e2515ec 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -21,6 +21,7 @@ #include "access/htup_details.h" #include "catalog/pg_class.h" +#include "catalog/pg_inherits.h" #include "catalog/pg_language.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" @@ -112,6 +113,7 @@ static bool contain_context_dependent_node_walker(Node *node, int *flags); static bool contain_leaked_vars_walker(Node *node, void *context); static Relids find_nonnullable_rels_walker(Node *node, bool top_level); static List *find_nonnullable_vars_walker(Node *node, bool top_level); +static void find_subquery_safe_quals(Node *jtnode, List **safe_quals); static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK); static bool convert_saop_to_hashed_saop_walker(Node *node, void *context); static Node *eval_const_expressions_mutator(Node *node, @@ -1433,6 +1435,10 @@ contain_leaked_vars_walker(Node *node, void *context) context); } +/***************************************************************************** + * Nullability analysis + *****************************************************************************/ + /* * find_nonnullable_rels * Determine which base rels are forced nonnullable by given clause. @@ -1701,7 +1707,7 @@ find_nonnullable_rels_walker(Node *node, bool top_level) * but here we assume that the input is a Boolean expression, and wish to * see if NULL inputs will provably cause a FALSE-or-NULL result. We expect * the expression to have been AND/OR flattened and converted to implicit-AND - * format. + * format (but the results are still good if it wasn't AND/OR flattened). * * Attnos of the identified Vars are returned in a multibitmapset (a List of * Bitmapsets). List indexes correspond to relids (varnos), while the per-rel @@ -2021,6 +2027,231 @@ find_forced_null_var(Node *node) return NULL; } +/* + * query_outputs_are_not_nullable + * Returns TRUE if the output values of the Query are certainly not NULL. + * All output columns must return non-NULL to answer TRUE. + * + * The reason this takes a Query, and not just an individual tlist expression, + * is so that we can make use of the query's WHERE/ON clauses to prove it does + * not return nulls. + * + * In current usage, the passed sub-Query hasn't yet been through any planner + * processing. This means that applying find_nonnullable_vars() to its WHERE + * clauses isn't really ideal: for lack of const-simplification, we might be + * unable to prove not-nullness in some cases where we could have proved it + * afterwards. However, we should not get any false positive results. + * + * Like the other forms of nullability analysis above, we can err on the + * side of conservatism: if we're not sure, it's okay to return FALSE. + */ +bool +query_outputs_are_not_nullable(Query *query) +{ + PlannerInfo subroot; + List *safe_quals = NIL; + List *nonnullable_vars = NIL; + bool computed_nonnullable_vars = false; + + /* + * If the query contains set operations, punt. The set ops themselves + * couldn't introduce nulls that weren't in their inputs, but the tlist + * present in the top-level query is just dummy and won't give us useful + * info. We could get an answer by recursing to examine each leaf query, + * but for the moment it doesn't seem worth the extra complication. + */ + if (query->setOperations) + return false; + + /* + * If the query contains grouping sets, punt. Grouping sets can introduce + * NULL values, and we currently lack the PlannerInfo needed to flatten + * grouping Vars in the query's outputs. + */ + if (query->groupingSets) + return false; + + /* + * We need a PlannerInfo to pass to expr_is_nonnullable. Fortunately, we + * can cons up an entirely dummy one, because only the "parse" link in the + * struct is used by expr_is_nonnullable. + */ + MemSet(&subroot, 0, sizeof(subroot)); + subroot.parse = query; + + /* + * Examine each targetlist entry to prove that it can't produce NULL. + */ + foreach_node(TargetEntry, tle, query->targetList) + { + Expr *expr = tle->expr; + + /* Resjunk columns can be ignored: they don't produce output values */ + if (tle->resjunk) + continue; + + /* + * Look through binary relabelings, since we know those don't + * introduce nulls. + */ + while (expr && IsA(expr, RelabelType)) + expr = ((RelabelType *) expr)->arg; + + if (expr == NULL) /* paranoia */ + return false; + + /* + * Since the subquery hasn't yet been through expression + * preprocessing, we must explicitly flatten grouping Vars and join + * alias Vars in the given expression. Note that flatten_group_exprs + * must be applied before flatten_join_alias_vars, as grouping Vars + * can wrap join alias Vars. + * + * We must also apply flatten_join_alias_vars to the quals extracted + * by find_subquery_safe_quals. We do not need to apply + * flatten_group_exprs to these quals, though, because grouping Vars + * cannot appear in jointree quals. + */ + + /* + * We have verified that the query does not contain grouping sets, + * meaning the grouping Vars will not have varnullingrels that need + * preserving, so it's safe to use NULL as the root here. + */ + if (query->hasGroupRTE) + expr = (Expr *) flatten_group_exprs(NULL, query, (Node *) expr); + + /* + * We won't be dealing with arbitrary expressions, so it's safe to use + * NULL as the root, so long as adjust_standard_join_alias_expression + * can handle everything the parser would make as a join alias + * expression. + */ + expr = (Expr *) flatten_join_alias_vars(NULL, query, (Node *) expr); + + /* + * Check to see if the expr cannot be NULL. Since we're on a raw + * parse tree, we need to look up the not-null constraints from the + * system catalogs. + */ + if (expr_is_nonnullable(&subroot, expr, NOTNULL_SOURCE_SYSCACHE)) + continue; + + if (IsA(expr, Var)) + { + Var *var = (Var *) expr; + + /* + * For a plain Var, even if that didn't work, we can conclude that + * the Var is not nullable if find_nonnullable_vars can find a + * "var IS NOT NULL" or similarly strict condition among the quals + * on non-outerjoined-rels. Compute the list of Vars having such + * quals if we didn't already. + */ + if (!computed_nonnullable_vars) + { + find_subquery_safe_quals((Node *) query->jointree, &safe_quals); + safe_quals = (List *) + flatten_join_alias_vars(NULL, query, (Node *) safe_quals); + nonnullable_vars = find_nonnullable_vars((Node *) safe_quals); + computed_nonnullable_vars = true; + } + + if (!mbms_is_member(var->varno, + var->varattno - FirstLowInvalidHeapAttributeNumber, + nonnullable_vars)) + return false; /* we failed to prove the Var non-null */ + } + else + { + /* Punt otherwise */ + return false; + } + } + + return true; +} + +/* + * find_subquery_safe_quals + * Traverse jointree to locate quals on non-outerjoined-rels. + * + * We locate all WHERE and JOIN/ON quals that constrain the rels that are not + * below the nullable side of any outer join, and add them to the *safe_quals + * list (forming a list with implicit-AND semantics). These quals can be used + * to prove non-nullability of the subquery's outputs. + * + * Top-level caller must initialize *safe_quals to NIL. + */ +static void +find_subquery_safe_quals(Node *jtnode, List **safe_quals) +{ + if (jtnode == NULL) + return; + if (IsA(jtnode, RangeTblRef)) + { + /* Leaf node: nothing to do */ + return; + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + + /* All elements of the FROM list are allowable */ + foreach_ptr(Node, child_node, f->fromlist) + find_subquery_safe_quals(child_node, safe_quals); + /* ... and its WHERE quals are too */ + if (f->quals) + *safe_quals = lappend(*safe_quals, f->quals); + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + + switch (j->jointype) + { + case JOIN_INNER: + /* visit both children */ + find_subquery_safe_quals(j->larg, safe_quals); + find_subquery_safe_quals(j->rarg, safe_quals); + /* and grab the ON quals too */ + if (j->quals) + *safe_quals = lappend(*safe_quals, j->quals); + break; + + case JOIN_LEFT: + case JOIN_SEMI: + case JOIN_ANTI: + + /* + * Only the left input is possibly non-nullable; furthermore, + * the quals of this join don't constrain the left input. + * Note: we probably can't see SEMI or ANTI joins at this + * point, but if we do, we can treat them like LEFT joins. + */ + find_subquery_safe_quals(j->larg, safe_quals); + break; + + case JOIN_RIGHT: + /* Reverse of the above case */ + find_subquery_safe_quals(j->rarg, safe_quals); + break; + + case JOIN_FULL: + /* Neither side is non-nullable, so stop descending */ + break; + + default: + elog(ERROR, "unrecognized join type: %d", + (int) j->jointype); + break; + } + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); +} + /* * Can we treat a ScalarArrayOpExpr as strict? * @@ -2739,7 +2970,8 @@ eval_const_expressions_mutator(Node *node, if (!has_nullable_nonconst && !expr_is_nonnullable(context->root, - (Expr *) lfirst(arg), false)) + (Expr *) lfirst(arg), + NOTNULL_SOURCE_HASHTABLE)) has_nullable_nonconst = true; } } @@ -3418,7 +3650,8 @@ eval_const_expressions_mutator(Node *node, newargs = lappend(newargs, e); break; } - if (expr_is_nonnullable(context->root, (Expr *) e, false)) + if (expr_is_nonnullable(context->root, (Expr *) e, + NOTNULL_SOURCE_HASHTABLE)) { if (newargs == NIL) return e; /* first expr */ @@ -3612,7 +3845,7 @@ eval_const_expressions_mutator(Node *node, */ if (relem && expr_is_nonnullable(context->root, (Expr *) relem, - false)) + NOTNULL_SOURCE_HASHTABLE)) { if (ntest->nulltesttype == IS_NULL) return makeBoolConst(false, false); @@ -3664,7 +3897,8 @@ eval_const_expressions_mutator(Node *node, return makeBoolConst(result, false); } if (!ntest->argisrow && arg && - expr_is_nonnullable(context->root, (Expr *) arg, false)) + expr_is_nonnullable(context->root, (Expr *) arg, + NOTNULL_SOURCE_HASHTABLE)) { bool result; @@ -3749,7 +3983,9 @@ eval_const_expressions_mutator(Node *node, return makeBoolConst(result, false); } - if (arg && expr_is_nonnullable(context->root, (Expr *) arg, false)) + if (arg && + expr_is_nonnullable(context->root, (Expr *) arg, + NOTNULL_SOURCE_HASHTABLE)) { /* * If arg is proven non-nullable, simplify to boolean @@ -4384,14 +4620,11 @@ simplify_aggref(Aggref *aggref, eval_const_expressions_context *context) * If the Var is defined NOT NULL and meanwhile is not nulled by any outer * joins or grouping sets, then we can know that it cannot be NULL. * - * use_rel_info indicates whether the corresponding RelOptInfo is available for - * use. + * "source" specifies where we should look for NOT NULL proofs. */ bool -var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info) +var_is_nonnullable(PlannerInfo *root, Var *var, NotNullSource source) { - Bitmapset *notnullattnums = NULL; - Assert(IsA(var, Var)); /* skip upper-level Vars */ @@ -4406,35 +4639,89 @@ var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info) if (var->varattno < 0) return true; - /* - * Check if the Var is defined as NOT NULL. We retrieve the column NOT - * NULL constraint information from the corresponding RelOptInfo if it is - * available; otherwise, we search the hash table for this information. - */ - if (use_rel_info) + /* we don't trust whole-row Vars */ + if (var->varattno == 0) + return false; + + /* Check if the Var is defined as NOT NULL. */ + switch (source) { - RelOptInfo *rel = find_base_rel(root, var->varno); + case NOTNULL_SOURCE_RELOPT: + { + /* + * We retrieve the column NOT NULL constraint information from + * the corresponding RelOptInfo. + */ + RelOptInfo *rel; + Bitmapset *notnullattnums; - notnullattnums = rel->notnullattnums; + rel = find_base_rel(root, var->varno); + notnullattnums = rel->notnullattnums; + + return bms_is_member(var->varattno, notnullattnums); + } + case NOTNULL_SOURCE_HASHTABLE: + { + /* + * We retrieve the column NOT NULL constraint information from + * the hash table. + */ + RangeTblEntry *rte; + Bitmapset *notnullattnums; + + rte = planner_rt_fetch(var->varno, root); + + /* We can only reason about ordinary relations */ + if (rte->rtekind != RTE_RELATION) + return false; + + /* + * We must skip inheritance parent tables, as some child + * tables may have a NOT NULL constraint for a column while + * others may not. This cannot happen with partitioned + * tables, though. + */ + if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE) + return false; + + notnullattnums = find_relation_notnullatts(root, rte->relid); + + return bms_is_member(var->varattno, notnullattnums); + } + case NOTNULL_SOURCE_SYSCACHE: + { + /* + * We look up the "attnotnull" field in the attribute + * relation. + */ + RangeTblEntry *rte; + + rte = planner_rt_fetch(var->varno, root); + + /* We can only reason about ordinary relations */ + if (rte->rtekind != RTE_RELATION) + return false; + + /* + * We must skip inheritance parent tables, as some child + * tables may have a NOT NULL constraint for a column while + * others may not. This cannot happen with partitioned + * tables, though. + * + * Note that we need to check if the relation actually has any + * children, as we might not have done that yet. + */ + if (rte->inh && has_subclass(rte->relid) && + rte->relkind != RELKIND_PARTITIONED_TABLE) + return false; + + return get_attnotnull(rte->relid, var->varattno); + } + default: + elog(ERROR, "unrecognized NotNullSource: %d", + (int) source); + break; } - else - { - RangeTblEntry *rte = planner_rt_fetch(var->varno, root); - - /* - * We must skip inheritance parent tables, as some child tables may - * have a NOT NULL constraint for a column while others may not. This - * cannot happen with partitioned tables, though. - */ - if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE) - return false; - - notnullattnums = find_relation_notnullatts(root, rte->relid); - } - - if (var->varattno > 0 && - bms_is_member(var->varattno, notnullattnums)) - return true; return false; } @@ -4444,16 +4731,22 @@ var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info) * * Returns true iff the given 'expr' cannot produce SQL NULLs. * - * If 'use_rel_info' is true, nullability of Vars is checked via the - * corresponding RelOptInfo for the given Var. Some callers require - * nullability information before RelOptInfos are generated. These should - * pass 'use_rel_info' as false. + * source: specifies where we should look for NOT NULL proofs for Vars. + * - NOTNULL_SOURCE_RELOPT: Used when RelOptInfos have been generated. We + * retrieve nullability information directly from the RelOptInfo corresponding + * to the Var. + * - NOTNULL_SOURCE_HASHTABLE: Used when RelOptInfos are not yet available, + * but we have already collected relation-level not-null constraints into the + * global hash table. + * - NOTNULL_SOURCE_SYSCACHE: Used for raw parse trees where neither + * RelOptInfos nor the hash table are available. In this case, we have to + * look up the 'attnotnull' field directly in the system catalogs. * * For now, we support only a limited set of expression types. Support for * additional node types can be added in the future. */ bool -expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info) +expr_is_nonnullable(PlannerInfo *root, Expr *expr, NotNullSource source) { /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); @@ -4463,7 +4756,7 @@ expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info) case T_Var: { if (root) - return var_is_nonnullable(root, (Var *) expr, use_rel_info); + return var_is_nonnullable(root, (Var *) expr, source); } break; case T_Const: @@ -4480,7 +4773,7 @@ expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info) foreach_ptr(Expr, arg, coalesceexpr->args) { - if (expr_is_nonnullable(root, arg, use_rel_info)) + if (expr_is_nonnullable(root, arg, source)) return true; } } @@ -4495,7 +4788,7 @@ expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info) foreach_ptr(Expr, arg, minmaxexpr->args) { - if (expr_is_nonnullable(root, arg, use_rel_info)) + if (expr_is_nonnullable(root, arg, source)) return true; } } @@ -4511,13 +4804,13 @@ expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info) /* The default result must be present and non-nullable */ if (caseexpr->defresult == NULL || - !expr_is_nonnullable(root, caseexpr->defresult, use_rel_info)) + !expr_is_nonnullable(root, caseexpr->defresult, source)) return false; /* All branch results must be non-nullable */ foreach_ptr(CaseWhen, casewhen, caseexpr->args) { - if (!expr_is_nonnullable(root, casewhen->result, use_rel_info)) + if (!expr_is_nonnullable(root, casewhen->result, source)) return false; } @@ -4565,7 +4858,7 @@ expr_is_nonnullable(PlannerInfo *root, Expr *expr, bool use_rel_info) * non-nullable. */ return expr_is_nonnullable(root, ((RelabelType *) expr)->arg, - use_rel_info); + source); } default: break; diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 2a792e3223a..907a255c36f 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -988,15 +988,12 @@ flatten_join_alias_vars_mutator(Node *node, * existing nullingrels field(s); otherwise we have to add a PlaceHolderVar * wrapper. * - * NOTE: this is also used by ruleutils.c, to deparse one query parsetree back - * to source text, and by check_output_expressions() to check for unsafe - * pushdowns. For these use-cases, root will be NULL, which is why we have to - * pass the Query separately. We need the root itself only for preserving - * varnullingrels. We can avoid preserving varnullingrels in the ruleutils.c's - * usage because it does not make any difference to the deparsed source text. - * We can also avoid it in check_output_expressions() because that function - * uses the expanded expressions solely to check for volatile or set-returning - * functions, which is independent of the Vars' nullingrels. + * NOTE: root may be passed as NULL, which is why we have to pass the Query + * separately. We need the root itself only for preserving varnullingrels. + * Callers can safely pass NULL if preserving varnullingrels is unnecessary for + * their specific use case (e.g., deparsing source text, or scanning for + * volatile functions), or if it is already guaranteed that the query cannot + * contain grouping sets. */ Node * flatten_group_exprs(PlannerInfo *root, Query *query, Node *node) diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 37d34685b93..6c8fb7b7275 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -834,7 +834,7 @@ int8inc_support(PG_FUNCTION_ARGS) PG_RETURN_POINTER(NULL); /* If the arg isn't NULLable, do the conversion */ - if (expr_is_nonnullable(req->root, arg, false)) + if (expr_is_nonnullable(req->root, arg, NOTNULL_SOURCE_HASHTABLE)) { Aggref *newagg; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index f16f1535785..6298a37f88e 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -5644,6 +5644,9 @@ get_query_def(Query *query, StringInfo buf, List *parentnamespace, /* * Replace any Vars in the query's targetlist and havingQual that * reference GROUP outputs with the underlying grouping expressions. + * + * We can safely pass NULL for the root here. Preserving varnullingrels + * makes no difference to the deparsed source text. */ if (query->hasGroupRTE) { diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 1913b009d40..f10948483b9 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -858,6 +858,47 @@ comparison_ops_are_compatible(Oid opno1, Oid opno2) return result; } +/* + * op_is_safe_index_member + * Check if the operator is a member of a B-tree or Hash operator family. + * + * We use this check as a proxy for "null-safety": if an operator is trusted by + * the btree or hash opfamily, it implies that the operator adheres to standard + * boolean behavior, and would not return NULL when given valid non-null + * inputs, as doing so would break index integrity. + */ +bool +op_is_safe_index_member(Oid opno) +{ + bool result = false; + CatCList *catlist; + int i; + + /* + * Search pg_amop to see if the target operator is registered for any + * btree or hash opfamily. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + /* Check if the AM is B-tree or Hash */ + if (aform->amopmethod == BTREE_AM_OID || + aform->amopmethod == HASH_AM_OID) + { + result = true; + break; + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + /* ---------- AMPROC CACHES ---------- */ @@ -1071,6 +1112,33 @@ get_attoptions(Oid relid, int16 attnum) return result; } +/* + * get_attnotnull + * + * Given the relation id and the attribute number, + * return the "attnotnull" field from the attribute relation. + */ +bool +get_attnotnull(Oid relid, AttrNumber attnum) +{ + HeapTuple tp; + bool result = false; + + tp = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + + result = att_tup->attnotnull; + ReleaseSysCache(tp); + } + + return result; +} + /* ---------- PG_CAST CACHE ---------- */ /* diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index a64034e8a6d..853a28c0007 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -42,6 +42,7 @@ extern Relids find_nonnullable_rels(Node *clause); extern List *find_nonnullable_vars(Node *clause); extern List *find_forced_null_vars(Node *node); extern Var *find_forced_null_var(Node *node); +extern bool query_outputs_are_not_nullable(Query *query); extern bool is_pseudo_constant_clause(Node *clause); extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids); diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index b562ca380a8..e8b409afb7f 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -130,6 +130,14 @@ extern Expr *canonicalize_qual(Expr *qual, bool is_check); /* in util/clauses.c: */ +/* Enum to specify where var_is_nonnullable should look for NOT NULL proofs */ +typedef enum +{ + NOTNULL_SOURCE_RELOPT, /* Use RelOptInfo */ + NOTNULL_SOURCE_HASHTABLE, /* Use Global Hash Table */ + NOTNULL_SOURCE_SYSCACHE, /* Use System Catalog */ +} NotNullSource; + extern bool contain_mutable_functions(Node *clause); extern bool contain_mutable_functions_after_planning(Expr *expr); extern bool contain_volatile_functions(Node *clause); @@ -145,10 +153,11 @@ extern Node *estimate_expression_value(PlannerInfo *root, Node *node); extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, Oid result_collation); -extern bool var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info); +extern bool var_is_nonnullable(PlannerInfo *root, Var *var, + NotNullSource source); extern bool expr_is_nonnullable(PlannerInfo *root, Expr *expr, - bool use_rel_info); + NotNullSource source); extern List *expand_function_arguments(List *args, bool include_out_arguments, Oid result_type, diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h index 8a5503eb973..4ecccf46bd3 100644 --- a/src/include/optimizer/subselect.h +++ b/src/include/optimizer/subselect.h @@ -22,6 +22,7 @@ extern ScalarArrayOpExpr *convert_VALUES_to_ANY(PlannerInfo *root, Query *values); extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, + bool under_not, Relids available_rels); extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 5655aca4c14..b9ad84ecd41 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -89,6 +89,7 @@ extern bool get_op_hash_functions(Oid opno, extern List *get_op_index_interpretation(Oid opno); extern bool equality_ops_are_compatible(Oid opno1, Oid opno2); extern bool comparison_ops_are_compatible(Oid opno1, Oid opno2); +extern bool op_is_safe_index_member(Oid opno); extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum); extern char *get_attname(Oid relid, AttrNumber attnum, bool missing_ok); @@ -98,6 +99,7 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum); extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum, Oid *typid, int32 *typmod, Oid *collid); extern Datum get_attoptions(Oid relid, int16 attnum); +extern bool get_attnotnull(Oid relid, AttrNumber attnum); extern Oid get_cast_oid(Oid sourcetypeid, Oid targettypeid, bool missing_ok); extern char *get_collation_name(Oid colloid); extern bool get_collation_isdeterministic(Oid colloid); diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 2135d82884d..200236a0a69 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -3323,3 +3323,442 @@ SELECT ten FROM onek t WHERE 1.0::integer IN ((VALUES (1), (3))); Seq Scan on onek t (1 row) +-- +-- Check NOT IN performs an ANTI JOIN when both the outer query's expressions +-- and the sub-select's output columns are provably non-nullable, and the +-- operator itself cannot return NULL for non-null inputs. +-- +BEGIN; +CREATE TEMP TABLE not_null_tab (id int NOT NULL, val int NOT NULL); +CREATE TEMP TABLE null_tab (id int, val int); +-- ANTI JOIN: both sides are defined NOT NULL +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT id FROM not_null_tab); + QUERY PLAN +----------------------------------------------------- + Hash Anti Join + Hash Cond: (not_null_tab.id = not_null_tab_1.id) + -> Seq Scan on not_null_tab + -> Hash + -> Seq Scan on not_null_tab not_null_tab_1 +(5 rows) + +-- No ANTI JOIN: outer side is nullable +EXPLAIN (COSTS OFF) +SELECT * FROM null_tab +WHERE id NOT IN (SELECT id FROM not_null_tab); + QUERY PLAN +---------------------------------------------------------- + Seq Scan on null_tab + Filter: (NOT (ANY (id = (hashed SubPlan any_1).col1))) + SubPlan any_1 + -> Seq Scan on not_null_tab +(4 rows) + +-- No ANTI JOIN: inner side is nullable +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT id FROM null_tab); + QUERY PLAN +---------------------------------------------------------- + Seq Scan on not_null_tab + Filter: (NOT (ANY (id = (hashed SubPlan any_1).col1))) + SubPlan any_1 + -> Seq Scan on null_tab +(4 rows) + +-- ANTI JOIN: outer side is defined NOT NULL, inner side is forced nonnullable +-- by qual clause +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT id FROM null_tab WHERE id IS NOT NULL); + QUERY PLAN +---------------------------------------------- + Hash Anti Join + Hash Cond: (not_null_tab.id = null_tab.id) + -> Seq Scan on not_null_tab + -> Hash + -> Seq Scan on null_tab + Filter: (id IS NOT NULL) +(6 rows) + +-- No ANTI JOIN: outer side is nullable (we don't check outer query quals for now) +EXPLAIN (COSTS OFF) +SELECT * FROM null_tab +WHERE id IS NOT NULL + AND id NOT IN (SELECT id FROM not_null_tab); + QUERY PLAN +--------------------------------------------------------------------------------- + Seq Scan on null_tab + Filter: ((id IS NOT NULL) AND (NOT (ANY (id = (hashed SubPlan any_1).col1)))) + SubPlan any_1 + -> Seq Scan on not_null_tab +(4 rows) + +-- ANTI JOIN: outer side is defined NOT NULL, inner side is defined NOT NULL +-- and is not nulled by outer join +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT t1.id + FROM not_null_tab t1 + LEFT JOIN not_null_tab t2 ON t1.id = t2.id +); + QUERY PLAN +----------------------------------------------------- + Hash Anti Join + Hash Cond: (not_null_tab.id = t1.id) + -> Seq Scan on not_null_tab + -> Hash + -> Merge Left Join + Merge Cond: (t1.id = t2.id) + -> Sort + Sort Key: t1.id + -> Seq Scan on not_null_tab t1 + -> Sort + Sort Key: t2.id + -> Seq Scan on not_null_tab t2 +(12 rows) + +-- No ANTI JOIN: inner side is defined NOT NULL but is nulled by outer join +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT t2.id + FROM not_null_tab t1 + LEFT JOIN not_null_tab t2 ON t1.id = t2.id +); + QUERY PLAN +---------------------------------------------------------- + Seq Scan on not_null_tab + Filter: (NOT (ANY (id = (hashed SubPlan any_1).col1))) + SubPlan any_1 + -> Merge Left Join + Merge Cond: (t1.id = t2.id) + -> Sort + Sort Key: t1.id + -> Seq Scan on not_null_tab t1 + -> Sort + Sort Key: t2.id + -> Seq Scan on not_null_tab t2 +(11 rows) + +-- ANTI JOIN: outer side is defined NOT NULL, inner side is forced nonnullable +-- by qual clause +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT t2.id + FROM not_null_tab t1 + LEFT JOIN not_null_tab t2 ON t1.id = t2.id + WHERE t2.id IS NOT NULL +); + QUERY PLAN +----------------------------------------------------- + Hash Anti Join + Hash Cond: (not_null_tab.id = t2.id) + -> Seq Scan on not_null_tab + -> Hash + -> Merge Join + Merge Cond: (t1.id = t2.id) + -> Sort + Sort Key: t1.id + -> Seq Scan on not_null_tab t1 + -> Sort + Sort Key: t2.id + -> Seq Scan on not_null_tab t2 +(12 rows) + +-- ANTI JOIN: outer side is defined NOT NULL, inner side is forced nonnullable +-- by qual clause +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT t1.id + FROM null_tab t1 + LEFT JOIN null_tab t2 ON t1.id = t2.id + WHERE t1.id IS NOT NULL +); + QUERY PLAN +---------------------------------------------------- + Hash Anti Join + Hash Cond: (not_null_tab.id = t1.id) + -> Seq Scan on not_null_tab + -> Hash + -> Merge Left Join + Merge Cond: (t1.id = t2.id) + -> Sort + Sort Key: t1.id + -> Seq Scan on null_tab t1 + Filter: (id IS NOT NULL) + -> Sort + Sort Key: t2.id + -> Seq Scan on null_tab t2 +(13 rows) + +-- ANTI JOIN: outer side is defined NOT NULL, inner side is forced nonnullable +-- by qual clause +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT t1.id + FROM null_tab t1 + INNER JOIN null_tab t2 ON t1.id = t2.id + LEFT JOIN null_tab t3 ON TRUE +); + QUERY PLAN +------------------------------------------------- + Merge Anti Join + Merge Cond: (not_null_tab.id = t1.id) + -> Sort + Sort Key: not_null_tab.id + -> Seq Scan on not_null_tab + -> Nested Loop Left Join + -> Merge Join + Merge Cond: (t1.id = t2.id) + -> Sort + Sort Key: t1.id + -> Seq Scan on null_tab t1 + -> Sort + Sort Key: t2.id + -> Seq Scan on null_tab t2 + -> Materialize + -> Seq Scan on null_tab t3 +(16 rows) + +-- ANTI JOIN: outer side is defined NOT NULL and is not nulled by outer join, +-- inner side is defined NOT NULL +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab t1 +LEFT JOIN not_null_tab t2 ON t1.id = t2.id +WHERE t1.id NOT IN (SELECT id FROM not_null_tab); + QUERY PLAN +---------------------------------------------------- + Merge Left Join + Merge Cond: (t1.id = t2.id) + -> Sort + Sort Key: t1.id + -> Hash Anti Join + Hash Cond: (t1.id = not_null_tab.id) + -> Seq Scan on not_null_tab t1 + -> Hash + -> Seq Scan on not_null_tab + -> Sort + Sort Key: t2.id + -> Seq Scan on not_null_tab t2 +(12 rows) + +-- No ANTI JOIN: outer side is nulled by outer join +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab t1 +LEFT JOIN not_null_tab t2 ON t1.id = t2.id +WHERE t2.id NOT IN (SELECT id FROM not_null_tab); + QUERY PLAN +------------------------------------------------------------- + Merge Left Join + Merge Cond: (t1.id = t2.id) + Filter: (NOT (ANY (t2.id = (hashed SubPlan any_1).col1))) + -> Sort + Sort Key: t1.id + -> Seq Scan on not_null_tab t1 + -> Sort + Sort Key: t2.id + -> Seq Scan on not_null_tab t2 + SubPlan any_1 + -> Seq Scan on not_null_tab +(11 rows) + +-- No ANTI JOIN: sublink is in an outer join's ON qual and references the +-- non-nullable side +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab t1 +LEFT JOIN not_null_tab t2 +ON t1.id NOT IN (SELECT id FROM not_null_tab); + QUERY PLAN +------------------------------------------------------------------ + Nested Loop Left Join + Join Filter: (NOT (ANY (t1.id = (hashed SubPlan any_1).col1))) + -> Seq Scan on not_null_tab t1 + -> Materialize + -> Seq Scan on not_null_tab t2 + SubPlan any_1 + -> Seq Scan on not_null_tab +(7 rows) + +-- ANTI JOIN: outer side is defined NOT NULL and is not nulled by outer join, +-- inner side is defined NOT NULL +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab t1 +LEFT JOIN not_null_tab t2 +ON t2.id NOT IN (SELECT id FROM not_null_tab); + QUERY PLAN +---------------------------------------------------- + Nested Loop Left Join + -> Seq Scan on not_null_tab t1 + -> Materialize + -> Hash Anti Join + Hash Cond: (t2.id = not_null_tab.id) + -> Seq Scan on not_null_tab t2 + -> Hash + -> Seq Scan on not_null_tab +(8 rows) + +-- ANTI JOIN: both sides are defined NOT NULL +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE (id, val) NOT IN (SELECT id, val FROM not_null_tab); + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Merge Anti Join + Merge Cond: ((not_null_tab.id = not_null_tab_1.id) AND (not_null_tab.val = not_null_tab_1.val)) + -> Sort + Sort Key: not_null_tab.id, not_null_tab.val + -> Seq Scan on not_null_tab + -> Sort + Sort Key: not_null_tab_1.id, not_null_tab_1.val + -> Seq Scan on not_null_tab not_null_tab_1 +(8 rows) + +-- ANTI JOIN: both sides are defined NOT NULL +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE NOT (id, val) > ANY (SELECT id, val FROM not_null_tab); + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Nested Loop Anti Join + Join Filter: (ROW(not_null_tab.id, not_null_tab.val) > ROW(not_null_tab_1.id, not_null_tab_1.val)) + -> Seq Scan on not_null_tab + -> Materialize + -> Seq Scan on not_null_tab not_null_tab_1 +(5 rows) + +-- No ANTI JOIN: one column of the outer side is nullable +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab t1, null_tab t2 +WHERE (t1.id, t2.id) NOT IN (SELECT id, val FROM not_null_tab); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (NOT (ANY ((t1.id = (hashed SubPlan any_1).col1) AND (t2.id = (hashed SubPlan any_1).col2)))) + -> Seq Scan on not_null_tab t1 + -> Materialize + -> Seq Scan on null_tab t2 + SubPlan any_1 + -> Seq Scan on not_null_tab +(7 rows) + +-- No ANTI JOIN: one column of the inner side is nullable +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE (id, val) NOT IN (SELECT t1.id, t2.id FROM not_null_tab t1, null_tab t2); + QUERY PLAN +-------------------------------------------------------------------------------------- + Seq Scan on not_null_tab + Filter: (NOT (ANY ((id = (SubPlan any_1).col1) AND (val = (SubPlan any_1).col2)))) + SubPlan any_1 + -> Materialize + -> Nested Loop + -> Seq Scan on not_null_tab t1 + -> Materialize + -> Seq Scan on null_tab t2 +(8 rows) + +-- ANTI JOIN: COALESCE(nullable, constant) is non-nullable +EXPLAIN (COSTS OFF) +SELECT * FROM null_tab +WHERE COALESCE(id, -1) NOT IN (SELECT id FROM not_null_tab); + QUERY PLAN +----------------------------------------------------------------------- + Hash Anti Join + Hash Cond: (COALESCE(null_tab.id, '-1'::integer) = not_null_tab.id) + -> Seq Scan on null_tab + -> Hash + -> Seq Scan on not_null_tab +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT COALESCE(id, -1) FROM null_tab); + QUERY PLAN +----------------------------------------------------------------------- + Hash Anti Join + Hash Cond: (not_null_tab.id = COALESCE(null_tab.id, '-1'::integer)) + -> Seq Scan on not_null_tab + -> Hash + -> Seq Scan on null_tab +(5 rows) + +-- ANTI JOIN: GROUP BY (without Grouping Sets) preserves the non-nullability of +-- the column +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT id FROM not_null_tab GROUP BY id); + QUERY PLAN +----------------------------------------------------------- + Hash Anti Join + Hash Cond: (not_null_tab.id = not_null_tab_1.id) + -> Seq Scan on not_null_tab + -> Hash + -> HashAggregate + Group Key: not_null_tab_1.id + -> Seq Scan on not_null_tab not_null_tab_1 +(7 rows) + +-- No ANTI JOIN: GROUP BY on a nullable column +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT id FROM null_tab GROUP BY id); + QUERY PLAN +---------------------------------------------------------- + Seq Scan on not_null_tab + Filter: (NOT (ANY (id = (hashed SubPlan any_1).col1))) + SubPlan any_1 + -> HashAggregate + Group Key: null_tab.id + -> Seq Scan on null_tab +(6 rows) + +-- No ANTI JOIN: Grouping Sets can introduce NULLs +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT id + FROM not_null_tab + GROUP BY GROUPING SETS ((id), (val)) +); + QUERY PLAN +---------------------------------------------------------- + Seq Scan on not_null_tab + Filter: (NOT (ANY (id = (hashed SubPlan any_1).col1))) + SubPlan any_1 + -> HashAggregate + Hash Key: not_null_tab_1.id + Hash Key: not_null_tab_1.val + -> Seq Scan on not_null_tab not_null_tab_1 +(7 rows) + +-- create a custom "unsafe" equality operator +CREATE FUNCTION int4eq_unsafe(int4, int4) + RETURNS bool + AS 'int4eq' + LANGUAGE internal IMMUTABLE; +CREATE OPERATOR ?= ( + PROCEDURE = int4eq_unsafe, + LEFTARG = int4, + RIGHTARG = int4 +); +-- No ANTI JOIN: the operator is not safe +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE NOT id ?= ANY (SELECT id FROM not_null_tab); + QUERY PLAN +------------------------------------------------------- + Seq Scan on not_null_tab + Filter: (NOT (ANY (id ?= (SubPlan any_1).col1))) + SubPlan any_1 + -> Materialize + -> Seq Scan on not_null_tab not_null_tab_1 +(5 rows) + +ROLLBACK; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index cadc3293687..4cd016f4ac3 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -1448,3 +1448,188 @@ SELECT * FROM onek t1, lateral (SELECT * FROM onek t2 WHERE t2.ten IN (values (t -- VtA causes the whole expression to be evaluated as a constant EXPLAIN (COSTS OFF) SELECT ten FROM onek t WHERE 1.0::integer IN ((VALUES (1), (3))); + +-- +-- Check NOT IN performs an ANTI JOIN when both the outer query's expressions +-- and the sub-select's output columns are provably non-nullable, and the +-- operator itself cannot return NULL for non-null inputs. +-- + +BEGIN; + +CREATE TEMP TABLE not_null_tab (id int NOT NULL, val int NOT NULL); +CREATE TEMP TABLE null_tab (id int, val int); + +-- ANTI JOIN: both sides are defined NOT NULL +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT id FROM not_null_tab); + +-- No ANTI JOIN: outer side is nullable +EXPLAIN (COSTS OFF) +SELECT * FROM null_tab +WHERE id NOT IN (SELECT id FROM not_null_tab); + +-- No ANTI JOIN: inner side is nullable +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT id FROM null_tab); + +-- ANTI JOIN: outer side is defined NOT NULL, inner side is forced nonnullable +-- by qual clause +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT id FROM null_tab WHERE id IS NOT NULL); + +-- No ANTI JOIN: outer side is nullable (we don't check outer query quals for now) +EXPLAIN (COSTS OFF) +SELECT * FROM null_tab +WHERE id IS NOT NULL + AND id NOT IN (SELECT id FROM not_null_tab); + +-- ANTI JOIN: outer side is defined NOT NULL, inner side is defined NOT NULL +-- and is not nulled by outer join +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT t1.id + FROM not_null_tab t1 + LEFT JOIN not_null_tab t2 ON t1.id = t2.id +); + +-- No ANTI JOIN: inner side is defined NOT NULL but is nulled by outer join +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT t2.id + FROM not_null_tab t1 + LEFT JOIN not_null_tab t2 ON t1.id = t2.id +); + +-- ANTI JOIN: outer side is defined NOT NULL, inner side is forced nonnullable +-- by qual clause +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT t2.id + FROM not_null_tab t1 + LEFT JOIN not_null_tab t2 ON t1.id = t2.id + WHERE t2.id IS NOT NULL +); + +-- ANTI JOIN: outer side is defined NOT NULL, inner side is forced nonnullable +-- by qual clause +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT t1.id + FROM null_tab t1 + LEFT JOIN null_tab t2 ON t1.id = t2.id + WHERE t1.id IS NOT NULL +); + +-- ANTI JOIN: outer side is defined NOT NULL, inner side is forced nonnullable +-- by qual clause +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT t1.id + FROM null_tab t1 + INNER JOIN null_tab t2 ON t1.id = t2.id + LEFT JOIN null_tab t3 ON TRUE +); + +-- ANTI JOIN: outer side is defined NOT NULL and is not nulled by outer join, +-- inner side is defined NOT NULL +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab t1 +LEFT JOIN not_null_tab t2 ON t1.id = t2.id +WHERE t1.id NOT IN (SELECT id FROM not_null_tab); + +-- No ANTI JOIN: outer side is nulled by outer join +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab t1 +LEFT JOIN not_null_tab t2 ON t1.id = t2.id +WHERE t2.id NOT IN (SELECT id FROM not_null_tab); + +-- No ANTI JOIN: sublink is in an outer join's ON qual and references the +-- non-nullable side +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab t1 +LEFT JOIN not_null_tab t2 +ON t1.id NOT IN (SELECT id FROM not_null_tab); + +-- ANTI JOIN: outer side is defined NOT NULL and is not nulled by outer join, +-- inner side is defined NOT NULL +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab t1 +LEFT JOIN not_null_tab t2 +ON t2.id NOT IN (SELECT id FROM not_null_tab); + +-- ANTI JOIN: both sides are defined NOT NULL +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE (id, val) NOT IN (SELECT id, val FROM not_null_tab); + +-- ANTI JOIN: both sides are defined NOT NULL +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE NOT (id, val) > ANY (SELECT id, val FROM not_null_tab); + +-- No ANTI JOIN: one column of the outer side is nullable +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab t1, null_tab t2 +WHERE (t1.id, t2.id) NOT IN (SELECT id, val FROM not_null_tab); + +-- No ANTI JOIN: one column of the inner side is nullable +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE (id, val) NOT IN (SELECT t1.id, t2.id FROM not_null_tab t1, null_tab t2); + +-- ANTI JOIN: COALESCE(nullable, constant) is non-nullable +EXPLAIN (COSTS OFF) +SELECT * FROM null_tab +WHERE COALESCE(id, -1) NOT IN (SELECT id FROM not_null_tab); + +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT COALESCE(id, -1) FROM null_tab); + +-- ANTI JOIN: GROUP BY (without Grouping Sets) preserves the non-nullability of +-- the column +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT id FROM not_null_tab GROUP BY id); + +-- No ANTI JOIN: GROUP BY on a nullable column +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN (SELECT id FROM null_tab GROUP BY id); + +-- No ANTI JOIN: Grouping Sets can introduce NULLs +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE id NOT IN ( + SELECT id + FROM not_null_tab + GROUP BY GROUPING SETS ((id), (val)) +); + +-- create a custom "unsafe" equality operator +CREATE FUNCTION int4eq_unsafe(int4, int4) + RETURNS bool + AS 'int4eq' + LANGUAGE internal IMMUTABLE; + +CREATE OPERATOR ?= ( + PROCEDURE = int4eq_unsafe, + LEFTARG = int4, + RIGHTARG = int4 +); + +-- No ANTI JOIN: the operator is not safe +EXPLAIN (COSTS OFF) +SELECT * FROM not_null_tab +WHERE NOT id ?= ANY (SELECT id FROM not_null_tab); + +ROLLBACK; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 3da19d41413..141b9d6e077 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1788,6 +1788,7 @@ Node NodeTag NonEmptyRange NoneCompressorState +NotNullSource Notification NotificationList NotifyStmt