postgresql/src/backend/optimizer/plan/subselect.c

2134 lines
63 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* subselect.c
* Planning routines for subselects and parameters.
*
2009-01-01 12:24:05 -05:00
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.146 2009/02/25 03:30:37 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/planmain.h"
1999-07-16 01:00:38 -04:00
#include "optimizer/planner.h"
#include "optimizer/prep.h"
1999-07-16 01:00:38 -04:00
#include "optimizer/subselect.h"
#include "optimizer/var.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
typedef struct convert_testexpr_context
{
PlannerInfo *root;
List *subst_nodes; /* Nodes to substitute for Params */
} convert_testexpr_context;
typedef struct process_sublinks_context
{
PlannerInfo *root;
bool isTopQual;
} process_sublinks_context;
typedef struct finalize_primnode_context
{
PlannerInfo *root;
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
Bitmapset *paramids; /* Non-local PARAM_EXEC paramids found */
} finalize_primnode_context;
static Node *build_subplan(PlannerInfo *root, Plan *plan, List *rtable,
SubLinkType subLinkType, Node *testexpr,
bool adjust_testexpr, bool unknownEqFalse);
static List *generate_subquery_params(PlannerInfo *root, List *tlist,
List **paramIds);
static List *generate_subquery_vars(PlannerInfo *root, List *tlist,
Index varno);
static Node *convert_testexpr(PlannerInfo *root,
2007-11-15 16:14:46 -05:00
Node *testexpr,
List *subst_nodes);
static Node *convert_testexpr_mutator(Node *node,
2006-10-03 20:30:14 -04:00
convert_testexpr_context *context);
static bool subplan_is_hashable(Plan *plan);
static bool testexpr_is_hashable(Node *testexpr);
static bool hash_ok_operator(OpExpr *expr);
static bool simplify_EXISTS_query(Query *query);
static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
Node **testexpr, List **paramIds);
static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
static Node *process_sublinks_mutator(Node *node,
process_sublinks_context *context);
static Bitmapset *finalize_plan(PlannerInfo *root,
Plan *plan,
Bitmapset *valid_params);
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
/*
* Generate a Param node to replace the given Var,
* which is expected to have varlevelsup > 0 (ie, it is not local).
*/
static Param *
replace_outer_var(PlannerInfo *root, Var *var)
{
Param *retval;
ListCell *ppl;
PlannerParamItem *pitem;
Index abslevel;
int i;
Assert(var->varlevelsup > 0 && var->varlevelsup < root->query_level);
abslevel = root->query_level - var->varlevelsup;
/*
2007-11-15 16:14:46 -05:00
* If there's already a paramlist entry for this same Var, just use it.
* NOTE: in sufficiently complex querytrees, it is possible for the same
* varno/abslevel to refer to different RTEs in different parts of the
* parsetree, so that different fields might end up sharing the same Param
* number. As long as we check the vartype/typmod as well, I believe that
* this sort of aliasing will cause no trouble. The correct field should
* get stored into the Param slot at execution in each part of the tree.
*/
i = 0;
foreach(ppl, root->glob->paramlist)
{
pitem = (PlannerParamItem *) lfirst(ppl);
if (pitem->abslevel == abslevel && IsA(pitem->item, Var))
{
2003-08-03 20:43:34 -04:00
Var *pvar = (Var *) pitem->item;
if (pvar->varno == var->varno &&
pvar->varattno == var->varattno &&
pvar->vartype == var->vartype &&
pvar->vartypmod == var->vartypmod)
break;
}
i++;
}
if (!ppl)
{
/* Nope, so make a new one */
var = (Var *) copyObject(var);
var->varlevelsup = 0;
pitem = makeNode(PlannerParamItem);
pitem->item = (Node *) var;
pitem->abslevel = abslevel;
root->glob->paramlist = lappend(root->glob->paramlist, pitem);
/* i is already the correct index for the new item */
}
retval = makeNode(Param);
retval->paramkind = PARAM_EXEC;
retval->paramid = i;
retval->paramtype = var->vartype;
retval->paramtypmod = var->vartypmod;
retval->location = -1;
1998-08-31 23:29:17 -04:00
return retval;
}
/*
* Generate a Param node to replace the given Aggref
* which is expected to have agglevelsup > 0 (ie, it is not local).
*/
static Param *
replace_outer_agg(PlannerInfo *root, Aggref *agg)
{
Param *retval;
PlannerParamItem *pitem;
Index abslevel;
int i;
Assert(agg->agglevelsup > 0 && agg->agglevelsup < root->query_level);
abslevel = root->query_level - agg->agglevelsup;
/*
2005-10-14 22:49:52 -04:00
* It does not seem worthwhile to try to match duplicate outer aggs. Just
* make a new slot every time.
*/
agg = (Aggref *) copyObject(agg);
2003-08-03 20:43:34 -04:00
IncrementVarSublevelsUp((Node *) agg, -((int) agg->agglevelsup), 0);
Assert(agg->agglevelsup == 0);
pitem = makeNode(PlannerParamItem);
pitem->item = (Node *) agg;
pitem->abslevel = abslevel;
root->glob->paramlist = lappend(root->glob->paramlist, pitem);
i = list_length(root->glob->paramlist) - 1;
retval = makeNode(Param);
retval->paramkind = PARAM_EXEC;
retval->paramid = i;
retval->paramtype = agg->aggtype;
retval->paramtypmod = -1;
retval->location = -1;
return retval;
}
/*
* Generate a new Param node that will not conflict with any other.
*
* This is used to allocate PARAM_EXEC slots for subplan outputs.
*/
static Param *
generate_new_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod)
{
Param *retval;
PlannerParamItem *pitem;
retval = makeNode(Param);
retval->paramkind = PARAM_EXEC;
retval->paramid = list_length(root->glob->paramlist);
retval->paramtype = paramtype;
retval->paramtypmod = paramtypmod;
retval->location = -1;
pitem = makeNode(PlannerParamItem);
pitem->item = (Node *) retval;
pitem->abslevel = root->query_level;
root->glob->paramlist = lappend(root->glob->paramlist, pitem);
return retval;
}
/*
* Assign a (nonnegative) PARAM_EXEC ID for a recursive query's worktable.
*/
int
SS_assign_worktable_param(PlannerInfo *root)
{
Param *param;
/* We generate a Param of datatype INTERNAL */
param = generate_new_param(root, INTERNALOID, -1);
/* ... but the caller only cares about its ID */
return param->paramid;
}
/*
* Get the datatype of the first column of the plan's output.
*
* This is stored for ARRAY_SUBLINK and for exprType(), which doesn't have any
* way to get at the plan associated with a SubPlan node. We really only need
* the value for EXPR_SUBLINK and ARRAY_SUBLINK subplans, but for consistency
* we set it always.
*/
static Oid
get_first_col_type(Plan *plan)
{
/* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */
if (plan->targetlist)
{
TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);
Assert(IsA(tent, TargetEntry));
if (!tent->resjunk)
return exprType((Node *) tent->expr);
}
return VOIDOID;
}
/*
* Convert a SubLink (as created by the parser) into a SubPlan.
*
* We are given the SubLink's contained query, type, and testexpr. We are
* also told if this expression appears at top level of a WHERE/HAVING qual.
*
* Note: we assume that the testexpr has been AND/OR flattened (actually,
* it's been through eval_const_expressions), but not converted to
* implicit-AND form; and any SubLinks in it should already have been
* converted to SubPlans. The subquery is as yet untouched, however.
*
* The result is whatever we need to substitute in place of the SubLink
* node in the executable expression. This will be either the SubPlan
* node (if we have to do the subplan as a subplan), or a Param node
* representing the result of an InitPlan, or a row comparison expression
* tree containing InitPlan Param nodes.
*/
static Node *
make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType,
Node *testexpr, bool isTopQual)
{
Query *subquery;
bool simple_exists = false;
double tuple_fraction;
Plan *plan;
PlannerInfo *subroot;
Node *result;
/*
2005-10-14 22:49:52 -04:00
* Copy the source Query node. This is a quick and dirty kluge to resolve
* the fact that the parser can generate trees with multiple links to the
* same sub-Query node, but the planner wants to scribble on the Query.
* Try to clean this up when we do querytree redesign...
*/
subquery = (Query *) copyObject(orig_subquery);
/*
* If it's an EXISTS subplan, we might be able to simplify it.
*/
if (subLinkType == EXISTS_SUBLINK)
simple_exists = simplify_EXISTS_query(subquery);
/*
2005-10-14 22:49:52 -04:00
* For an EXISTS subplan, tell lower-level planner to expect that only the
* first tuple will be retrieved. For ALL and ANY subplans, we will be
* able to stop evaluating if the test condition fails or matches, so very
* often not all the tuples will be retrieved; for lack of a better idea,
* specify 50% retrieval. For EXPR and ROWCOMPARE subplans, use default
* behavior (we're only expecting one row out, anyway).
*
* NOTE: if you change these numbers, also change cost_subplan() in
* path/costsize.c.
*
* XXX If an ANY subplan is uncorrelated, build_subplan may decide to hash
* its output. In that case it would've been better to specify full
* retrieval. At present, however, we can only check hashability after
* we've made the subplan :-(. (Determining whether it'll fit in work_mem
* is the really hard part.) Therefore, we don't want to be too
* optimistic about the percentage of tuples retrieved, for fear of
* selecting a plan that's bad for the materialization case.
*/
if (subLinkType == EXISTS_SUBLINK)
tuple_fraction = 1.0; /* just like a LIMIT 1 */
else if (subLinkType == ALL_SUBLINK ||
subLinkType == ANY_SUBLINK)
tuple_fraction = 0.5; /* 50% */
else
tuple_fraction = 0.0; /* default behavior */
/*
* Generate the plan for the subquery.
*/
plan = subquery_planner(root->glob, subquery,
root,
false, tuple_fraction,
&subroot);
/* And convert to SubPlan or InitPlan format. */
result = build_subplan(root, plan, subroot->parse->rtable,
subLinkType, testexpr, true, isTopQual);
/*
* If it's a correlated EXISTS with an unimportant targetlist, we might be
* able to transform it to the equivalent of an IN and then implement it
* by hashing. We don't have enough information yet to tell which way
* is likely to be better (it depends on the expected number of executions
* of the EXISTS qual, and we are much too early in planning the outer
* query to be able to guess that). So we generate both plans, if
* possible, and leave it to the executor to decide which to use.
*/
if (simple_exists && IsA(result, SubPlan))
{
Node *newtestexpr;
List *paramIds;
/* Make a second copy of the original subquery */
subquery = (Query *) copyObject(orig_subquery);
/* and re-simplify */
simple_exists = simplify_EXISTS_query(subquery);
Assert(simple_exists);
/* See if it can be converted to an ANY query */
subquery = convert_EXISTS_to_ANY(root, subquery,
&newtestexpr, &paramIds);
if (subquery)
{
/* Generate the plan for the ANY subquery; we'll need all rows */
plan = subquery_planner(root->glob, subquery,
root,
false, 0.0,
&subroot);
/* Now we can check if it'll fit in work_mem */
if (subplan_is_hashable(plan))
{
SubPlan *hashplan;
AlternativeSubPlan *asplan;
/* OK, convert to SubPlan format. */
hashplan = (SubPlan *) build_subplan(root, plan,
subroot->parse->rtable,
ANY_SUBLINK, newtestexpr,
false, true);
/* Check we got what we expected */
Assert(IsA(hashplan, SubPlan));
Assert(hashplan->parParam == NIL);
Assert(hashplan->useHashTable);
/* build_subplan won't have filled in paramIds */
hashplan->paramIds = paramIds;
/* Leave it to the executor to decide which plan to use */
asplan = makeNode(AlternativeSubPlan);
asplan->subplans = list_make2(result, hashplan);
result = (Node *) asplan;
}
}
}
return result;
}
/*
* Build a SubPlan node given the raw inputs --- subroutine for make_subplan
*
* Returns either the SubPlan, or an expression using initplan output Params,
* as explained in the comments for make_subplan.
*/
static Node *
build_subplan(PlannerInfo *root, Plan *plan, List *rtable,
SubLinkType subLinkType, Node *testexpr,
bool adjust_testexpr, bool unknownEqFalse)
{
Node *result;
SubPlan *splan;
bool isInitPlan;
Bitmapset *tmpset;
int paramid;
/*
* Initialize the SubPlan node. Note plan_id isn't set till further down,
* likewise the cost fields.
*/
splan = makeNode(SubPlan);
splan->subLinkType = subLinkType;
splan->testexpr = NULL;
splan->paramIds = NIL;
splan->firstColType = get_first_col_type(plan);
splan->useHashTable = false;
splan->unknownEqFalse = unknownEqFalse;
splan->setParam = NIL;
splan->parParam = NIL;
splan->args = NIL;
/*
* Make parParam and args lists of param IDs and expressions that current
* query level will pass to this child plan.
*/
tmpset = bms_copy(plan->extParam);
while ((paramid = bms_first_member(tmpset)) >= 0)
{
PlannerParamItem *pitem = list_nth(root->glob->paramlist, paramid);
if (pitem->abslevel == root->query_level)
{
splan->parParam = lappend_int(splan->parParam, paramid);
/*
* The Var or Aggref has already been adjusted to have the correct
* varlevelsup or agglevelsup. We probably don't even need to
* copy it again, but be safe.
*/
splan->args = lappend(splan->args, copyObject(pitem->item));
}
}
bms_free(tmpset);
/*
2005-10-14 22:49:52 -04:00
* Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY, or
* ROWCOMPARE types can be used as initPlans. For EXISTS, EXPR, or ARRAY,
2005-10-14 22:49:52 -04:00
* we just produce a Param referring to the result of evaluating the
* initPlan. For ROWCOMPARE, we must modify the testexpr tree to contain
* PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted by the
* parser.
*/
if (splan->parParam == NIL && subLinkType == EXISTS_SUBLINK)
{
Param *prm;
Assert(testexpr == NULL);
prm = generate_new_param(root, BOOLOID, -1);
splan->setParam = list_make1_int(prm->paramid);
isInitPlan = true;
result = (Node *) prm;
}
else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK)
{
TargetEntry *te = linitial(plan->targetlist);
Param *prm;
Assert(!te->resjunk);
Assert(testexpr == NULL);
prm = generate_new_param(root,
exprType((Node *) te->expr),
exprTypmod((Node *) te->expr));
splan->setParam = list_make1_int(prm->paramid);
isInitPlan = true;
result = (Node *) prm;
}
else if (splan->parParam == NIL && subLinkType == ARRAY_SUBLINK)
{
TargetEntry *te = linitial(plan->targetlist);
Oid arraytype;
Param *prm;
Assert(!te->resjunk);
Assert(testexpr == NULL);
arraytype = get_array_type(exprType((Node *) te->expr));
if (!OidIsValid(arraytype))
elog(ERROR, "could not find array type for datatype %s",
format_type_be(exprType((Node *) te->expr)));
prm = generate_new_param(root,
arraytype,
exprTypmod((Node *) te->expr));
splan->setParam = list_make1_int(prm->paramid);
isInitPlan = true;
result = (Node *) prm;
}
else if (splan->parParam == NIL && subLinkType == ROWCOMPARE_SUBLINK)
{
/* Adjust the Params */
List *params;
Assert(testexpr != NULL);
params = generate_subquery_params(root,
plan->targetlist,
&splan->paramIds);
result = convert_testexpr(root,
testexpr,
params);
splan->setParam = list_copy(splan->paramIds);
isInitPlan = true;
2003-08-03 20:43:34 -04:00
/*
* The executable expression is returned to become part of the outer
* plan's expression tree; it is not kept in the initplan node.
*/
}
else
{
/*
* Adjust the Params in the testexpr, unless caller said it's not
* needed.
*/
if (testexpr && adjust_testexpr)
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
{
List *params;
params = generate_subquery_params(root,
plan->targetlist,
&splan->paramIds);
splan->testexpr = convert_testexpr(root,
testexpr,
params);
}
else
splan->testexpr = testexpr;
/*
2005-10-14 22:49:52 -04:00
* We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to
* initPlans, even when they are uncorrelated or undirect correlated,
* because we need to scan the output of the subplan for each outer
* tuple. But if it's a not-direct-correlated IN (= ANY) test, we
* might be able to use a hashtable to avoid comparing all the tuples.
*/
if (subLinkType == ANY_SUBLINK &&
splan->parParam == NIL &&
subplan_is_hashable(plan) &&
testexpr_is_hashable(splan->testexpr))
splan->useHashTable = true;
2003-08-03 20:43:34 -04:00
/*
2005-10-14 22:49:52 -04:00
* Otherwise, we have the option to tack a MATERIAL node onto the top
* of the subplan, to reduce the cost of reading it repeatedly. This
* is pointless for a direct-correlated subplan, since we'd have to
* recompute its results each time anyway. For uncorrelated/undirect
* correlated subplans, we add MATERIAL unless the subplan's top plan
* node would materialize its output anyway.
*/
else if (splan->parParam == NIL)
{
bool use_material;
switch (nodeTag(plan))
{
case T_Material:
case T_FunctionScan:
case T_CteScan:
case T_WorkTableScan:
case T_Sort:
use_material = false;
break;
default:
use_material = true;
break;
}
if (use_material)
plan = materialize_finished_plan(plan);
}
result = (Node *) splan;
isInitPlan = false;
}
/*
* Add the subplan and its rtable to the global lists.
*/
root->glob->subplans = lappend(root->glob->subplans, plan);
root->glob->subrtables = lappend(root->glob->subrtables, rtable);
splan->plan_id = list_length(root->glob->subplans);
if (isInitPlan)
root->init_plans = lappend(root->init_plans, splan);
/*
* A parameterless subplan (not initplan) should be prepared to handle
2007-11-15 16:14:46 -05:00
* REWIND efficiently. If it has direct parameters then there's no point
* since it'll be reset on each scan anyway; and if it's an initplan then
* there's no point since it won't get re-run without parameter changes
* anyway. The input of a hashed subplan doesn't need REWIND either.
*/
if (splan->parParam == NIL && !isInitPlan && !splan->useHashTable)
root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs,
splan->plan_id);
/* Lastly, fill in the cost estimates for use later */
cost_subplan(root, splan, plan);
return result;
}
/*
* generate_subquery_params: build a list of Params representing the output
* columns of a sublink's sub-select, given the sub-select's targetlist.
*
* We also return an integer list of the paramids of the Params.
*/
static List *
generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds)
{
List *result;
List *ids;
ListCell *lc;
result = ids = NIL;
foreach(lc, tlist)
{
TargetEntry *tent = (TargetEntry *) lfirst(lc);
Param *param;
if (tent->resjunk)
continue;
param = generate_new_param(root,
exprType((Node *) tent->expr),
exprTypmod((Node *) tent->expr));
result = lappend(result, param);
ids = lappend_int(ids, param->paramid);
}
*paramIds = ids;
return result;
}
/*
* generate_subquery_vars: build a list of Vars representing the output
* columns of a sublink's sub-select, given the sub-select's targetlist.
* The Vars have the specified varno (RTE index).
*/
static List *
generate_subquery_vars(PlannerInfo *root, List *tlist, Index varno)
{
List *result;
ListCell *lc;
result = NIL;
foreach(lc, tlist)
{
TargetEntry *tent = (TargetEntry *) lfirst(lc);
Var *var;
if (tent->resjunk)
continue;
var = makeVar(varno,
tent->resno,
exprType((Node *) tent->expr),
exprTypmod((Node *) tent->expr),
0);
result = lappend(result, var);
}
return result;
}
/*
* convert_testexpr: convert the testexpr given by the parser into
* actually executable form. This entails replacing PARAM_SUBLINK Params
* with Params or Vars representing the results of the sub-select. The
* nodes to be substituted are passed in as the List result from
* generate_subquery_params or generate_subquery_vars.
*
* The given testexpr has already been recursively processed by
* process_sublinks_mutator. Hence it can no longer contain any
* PARAM_SUBLINK Params for lower SubLink nodes; we can safely assume that
* any we find are for our own level of SubLink.
*/
static Node *
convert_testexpr(PlannerInfo *root,
Node *testexpr,
List *subst_nodes)
{
convert_testexpr_context context;
context.root = root;
context.subst_nodes = subst_nodes;
return convert_testexpr_mutator(testexpr, &context);
}
static Node *
convert_testexpr_mutator(Node *node,
convert_testexpr_context *context)
{
if (node == NULL)
return NULL;
if (IsA(node, Param))
{
2006-10-03 20:30:14 -04:00
Param *param = (Param *) node;
if (param->paramkind == PARAM_SUBLINK)
{
if (param->paramid <= 0 ||
param->paramid > list_length(context->subst_nodes))
elog(ERROR, "unexpected PARAM_SUBLINK ID: %d", param->paramid);
/*
* We copy the list item to avoid having doubly-linked
* substructure in the modified parse tree. This is probably
* unnecessary when it's a Param, but be safe.
*/
return (Node *) copyObject(list_nth(context->subst_nodes,
param->paramid - 1));
}
}
return expression_tree_mutator(node,
convert_testexpr_mutator,
(void *) context);
}
/*
* subplan_is_hashable: can we implement an ANY subplan by hashing?
*/
static bool
subplan_is_hashable(Plan *plan)
{
double subquery_size;
2003-08-03 20:43:34 -04:00
/*
2006-10-03 20:30:14 -04:00
* The estimated size of the subquery result must fit in work_mem. (Note:
* we use sizeof(HeapTupleHeaderData) here even though the tuples will
* actually be stored as MinimalTuples; this provides some fudge factor
* for hashtable overhead.)
*/
subquery_size = plan->plan_rows *
(MAXALIGN(plan->plan_width) + MAXALIGN(sizeof(HeapTupleHeaderData)));
if (subquery_size > work_mem * 1024L)
return false;
2003-08-03 20:43:34 -04:00
return true;
}
/*
* testexpr_is_hashable: is an ANY SubLink's test expression hashable?
*/
static bool
testexpr_is_hashable(Node *testexpr)
{
/*
* The testexpr must be a single OpExpr, or an AND-clause containing
* only OpExprs.
*
2007-11-15 16:14:46 -05:00
* The combining operators must be hashable and strict. The need for
* hashability is obvious, since we want to use hashing. Without
* strictness, behavior in the presence of nulls is too unpredictable. We
* actually must assume even more than plain strictness: they can't yield
* NULL for non-null inputs, either (see nodeSubplan.c). However, hash
* indexes and hash joins assume that too.
*/
if (testexpr && IsA(testexpr, OpExpr))
{
if (hash_ok_operator((OpExpr *) testexpr))
return true;
}
else if (and_clause(testexpr))
{
ListCell *l;
foreach(l, ((BoolExpr *) testexpr)->args)
{
2006-10-03 20:30:14 -04:00
Node *andarg = (Node *) lfirst(l);
if (!IsA(andarg, OpExpr))
return false;
if (!hash_ok_operator((OpExpr *) andarg))
return false;
}
return true;
}
return false;
}
static bool
hash_ok_operator(OpExpr *expr)
{
Oid opid = expr->opno;
HeapTuple tup;
Form_pg_operator optup;
/* quick out if not a binary operator */
if (list_length(expr->args) != 2)
return false;
/* else must look up the operator properties */
tup = SearchSysCache(OPEROID,
ObjectIdGetDatum(opid),
0, 0, 0);
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for operator %u", opid);
optup = (Form_pg_operator) GETSTRUCT(tup);
if (!optup->oprcanhash || !func_strict(optup->oprcode))
{
ReleaseSysCache(tup);
return false;
}
ReleaseSysCache(tup);
return true;
}
/*
* SS_process_ctes: process a query's WITH list
*
* We plan each interesting WITH item and convert it to an initplan.
* A side effect is to fill in root->cte_plan_ids with a list that
* parallels root->parse->cteList and provides the subplan ID for
* each CTE's initplan.
*/
void
SS_process_ctes(PlannerInfo *root)
{
ListCell *lc;
Assert(root->cte_plan_ids == NIL);
foreach(lc, root->parse->cteList)
{
CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc);
Query *subquery;
Plan *plan;
PlannerInfo *subroot;
SubPlan *splan;
Bitmapset *tmpset;
int paramid;
Param *prm;
/*
* Ignore CTEs that are not actually referenced anywhere.
*/
if (cte->cterefcount == 0)
{
/* Make a dummy entry in cte_plan_ids */
root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1);
continue;
}
/*
* Copy the source Query node. Probably not necessary, but let's
* keep this similar to make_subplan.
*/
subquery = (Query *) copyObject(cte->ctequery);
/*
* Generate the plan for the CTE query. Always plan for full
* retrieval --- we don't have enough info to predict otherwise.
*/
plan = subquery_planner(root->glob, subquery,
root,
cte->cterecursive, 0.0,
&subroot);
/*
* Make a SubPlan node for it. This is just enough unlike
* build_subplan that we can't share code.
*
* Note plan_id isn't set till further down, likewise the cost fields.
*/
splan = makeNode(SubPlan);
splan->subLinkType = CTE_SUBLINK;
splan->testexpr = NULL;
splan->paramIds = NIL;
splan->firstColType = get_first_col_type(plan);
splan->useHashTable = false;
splan->unknownEqFalse = false;
splan->setParam = NIL;
splan->parParam = NIL;
splan->args = NIL;
/*
* Make parParam and args lists of param IDs and expressions that
* current query level will pass to this child plan. Even though
* this is an initplan, there could be side-references to earlier
* initplan's outputs, specifically their CTE output parameters.
*/
tmpset = bms_copy(plan->extParam);
while ((paramid = bms_first_member(tmpset)) >= 0)
{
PlannerParamItem *pitem = list_nth(root->glob->paramlist, paramid);
if (pitem->abslevel == root->query_level)
{
prm = (Param *) pitem->item;
if (!IsA(prm, Param) ||
prm->paramtype != INTERNALOID)
elog(ERROR, "bogus local parameter passed to WITH query");
splan->parParam = lappend_int(splan->parParam, paramid);
splan->args = lappend(splan->args, copyObject(prm));
}
}
bms_free(tmpset);
/*
* Assign a param to represent the query output. We only really
* care about reserving a parameter ID number.
*/
prm = generate_new_param(root, INTERNALOID, -1);
splan->setParam = list_make1_int(prm->paramid);
/*
* Add the subplan and its rtable to the global lists.
*/
root->glob->subplans = lappend(root->glob->subplans, plan);
root->glob->subrtables = lappend(root->glob->subrtables,
subroot->parse->rtable);
splan->plan_id = list_length(root->glob->subplans);
root->init_plans = lappend(root->init_plans, splan);
root->cte_plan_ids = lappend_int(root->cte_plan_ids, splan->plan_id);
/* Lastly, fill in the cost estimates for use later */
cost_subplan(root, splan, plan);
}
}
/*
* convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
*
* The caller has found an ANY SubLink at the top level of one of the query's
* qual clauses, but has not checked the properties of the SubLink further.
* Decide whether it is appropriate to process this SubLink in join style.
* If so, form a JoinExpr and return it. Return NULL if the SubLink cannot
* be converted to a join.
*
* The only non-obvious input parameter is available_rels: this is the set
* of query rels that can safely be referenced in the sublink expression.
* (We must restrict this to avoid changing the semantics when a sublink
* is present in an outer join's ON qual.) The conversion must fail if
* the converted qual would reference any but these parent-query relids.
*
* On success, the returned JoinExpr has larg = NULL and rarg = the jointree
* item representing the pulled-up subquery. The caller must set larg to
* represent the relation(s) on the lefthand side of the new join, and insert
* the JoinExpr into the upper query's jointree at an appropriate place
* (typically, where the lefthand relation(s) had been). Note that the
* passed-in SubLink must also be removed from its original position in the
* query quals, since the quals of the returned JoinExpr replace it.
* (Notionally, we replace the SubLink with a constant TRUE, then elide the
* redundant constant from the qual.)
*
* Side effects of a successful conversion include adding the SubLink's
* subselect to the query's rangetable, so that it can be referenced in
* the JoinExpr's rarg.
*/
JoinExpr *
convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
Relids available_rels)
{
JoinExpr *result;
Query *parse = root->parse;
Query *subselect = (Query *) sublink->subselect;
Relids upper_varnos;
int rtindex;
RangeTblEntry *rte;
RangeTblRef *rtr;
List *subquery_vars;
Node *quals;
Assert(sublink->subLinkType == ANY_SUBLINK);
2003-08-03 20:43:34 -04:00
/*
2005-10-14 22:49:52 -04:00
* The sub-select must not refer to any Vars of the parent query. (Vars of
* higher levels should be okay, though.)
*/
if (contain_vars_of_level((Node *) subselect, 1))
return NULL;
2003-08-03 20:43:34 -04:00
/*
* The test expression must contain some Vars of the parent query,
* else it's not gonna be a join. (Note that it won't have Vars
* referring to the subquery, rather Params.)
*/
upper_varnos = pull_varnos(sublink->testexpr);
if (bms_is_empty(upper_varnos))
return NULL;
/*
* However, it can't refer to anything outside available_rels.
*/
if (!bms_is_subset(upper_varnos, available_rels))
return NULL;
2003-08-03 20:43:34 -04:00
/*
* The combining operators and left-hand expressions mustn't be volatile.
*/
if (contain_volatile_functions(sublink->testexpr))
return NULL;
2003-08-03 20:43:34 -04:00
/*
* Okay, pull up the sub-select into upper range table.
*
* We rely here on the assumption that the outer query has no references
* to the inner (necessarily true, other than the Vars that we build
* below). Therefore this is a lot easier than what pull_up_subqueries has
* to go through.
*/
rte = addRangeTableEntryForSubquery(NULL,
subselect,
makeAlias("ANY_subquery", NIL),
false);
parse->rtable = lappend(parse->rtable, rte);
rtindex = list_length(parse->rtable);
/*
* Form a RangeTblRef for the pulled-up sub-select.
*/
rtr = makeNode(RangeTblRef);
rtr->rtindex = rtindex;
2003-08-03 20:43:34 -04:00
/*
* Build a list of Vars representing the subselect outputs.
*/
subquery_vars = generate_subquery_vars(root,
subselect->targetList,
rtindex);
/*
* Build the new join's qual expression, replacing Params with these Vars.
*/
quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
/*
* And finally, build the JoinExpr node.
*/
result = makeNode(JoinExpr);
result->jointype = JOIN_SEMI;
result->isNatural = false;
result->larg = NULL; /* caller must fill this in */
result->rarg = (Node *) rtr;
result->using = NIL;
result->quals = quals;
result->alias = NULL;
result->rtindex = 0; /* we don't need an RTE for it */
2003-08-03 20:43:34 -04:00
return result;
}
/*
* convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
*
* The API of this function is identical to convert_ANY_sublink_to_join's,
* except that we also support the case where the caller has found NOT EXISTS,
* so we need an additional input parameter "under_not".
*/
JoinExpr *
convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
bool under_not, Relids available_rels)
{
JoinExpr *result;
Query *parse = root->parse;
Query *subselect = (Query *) sublink->subselect;
Node *whereClause;
int rtoffset;
int varno;
Relids clause_varnos;
Relids upper_varnos;
Assert(sublink->subLinkType == EXISTS_SUBLINK);
/*
* Copy the subquery so we can modify it safely (see comments in
* make_subplan).
*/
subselect = (Query *) copyObject(subselect);
/*
* See if the subquery can be simplified based on the knowledge that
* it's being used in EXISTS(). If we aren't able to get rid of its
* targetlist, we have to fail, because the pullup operation leaves
* us with noplace to evaluate the targetlist.
*/
if (!simplify_EXISTS_query(subselect))
return NULL;
/*
* The subquery must have a nonempty jointree, else we won't have a join.
*/
if (subselect->jointree->fromlist == NIL)
return NULL;
/*
* Separate out the WHERE clause. (We could theoretically also remove
* top-level plain JOIN/ON clauses, but it's probably not worth the
* trouble.)
*/
whereClause = subselect->jointree->quals;
subselect->jointree->quals = NULL;
/*
* The rest of the sub-select must not refer to any Vars of the parent
* query. (Vars of higher levels should be okay, though.)
*/
if (contain_vars_of_level((Node *) subselect, 1))
return NULL;
/*
* On the other hand, the WHERE clause must contain some Vars of the
* parent query, else it's not gonna be a join.
*/
if (!contain_vars_of_level(whereClause, 1))
return NULL;
/*
* We don't risk optimizing if the WHERE clause is volatile, either.
*/
if (contain_volatile_functions(whereClause))
return NULL;
/*
* Prepare to pull up the sub-select into top range table.
*
* We rely here on the assumption that the outer query has no references
* to the inner (necessarily true). Therefore this is a lot easier than
* what pull_up_subqueries has to go through.
*
* In fact, it's even easier than what convert_ANY_sublink_to_join has
* to do. The machinations of simplify_EXISTS_query ensured that there
* is nothing interesting in the subquery except an rtable and jointree,
* and even the jointree FromExpr no longer has quals. So we can just
* append the rtable to our own and use the FromExpr in our jointree.
* But first, adjust all level-zero varnos in the subquery to account
* for the rtable merger.
*/
rtoffset = list_length(parse->rtable);
OffsetVarNodes((Node *) subselect, rtoffset, 0);
OffsetVarNodes(whereClause, rtoffset, 0);
/*
* Upper-level vars in subquery will now be one level closer to their
* parent than before; in particular, anything that had been level 1
* becomes level zero.
*/
IncrementVarSublevelsUp((Node *) subselect, -1, 1);
IncrementVarSublevelsUp(whereClause, -1, 1);
/*
* Now that the WHERE clause is adjusted to match the parent query
* environment, we can easily identify all the level-zero rels it uses.
* The ones <= rtoffset belong to the upper query; the ones > rtoffset
* do not.
*/
clause_varnos = pull_varnos(whereClause);
upper_varnos = NULL;
while ((varno = bms_first_member(clause_varnos)) >= 0)
{
if (varno <= rtoffset)
upper_varnos = bms_add_member(upper_varnos, varno);
}
bms_free(clause_varnos);
Assert(!bms_is_empty(upper_varnos));
/*
* Now that we've got the set of upper-level varnos, we can make the
* last check: only available_rels can be referenced.
*/
if (!bms_is_subset(upper_varnos, available_rels))
return NULL;
/* Now we can attach the modified subquery rtable to the parent */
parse->rtable = list_concat(parse->rtable, subselect->rtable);
/*
* And finally, build the JoinExpr node.
*/
result = makeNode(JoinExpr);
result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
result->isNatural = false;
result->larg = NULL; /* caller must fill this in */
/* flatten out the FromExpr node if it's useless */
if (list_length(subselect->jointree->fromlist) == 1)
result->rarg = (Node *) linitial(subselect->jointree->fromlist);
else
result->rarg = (Node *) subselect->jointree;
result->using = NIL;
result->quals = whereClause;
result->alias = NULL;
result->rtindex = 0; /* we don't need an RTE for it */
return result;
}
/*
* simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
*
* The only thing that matters about an EXISTS query is whether it returns
* zero or more than zero rows. Therefore, we can remove certain SQL features
* that won't affect that. The only part that is really likely to matter in
* typical usage is simplifying the targetlist: it's a common habit to write
* "SELECT * FROM" even though there is no need to evaluate any columns.
*
* Note: by suppressing the targetlist we could cause an observable behavioral
* change, namely that any errors that might occur in evaluating the tlist
* won't occur, nor will other side-effects of volatile functions. This seems
* unlikely to bother anyone in practice.
*
* Returns TRUE if was able to discard the targetlist, else FALSE.
*/
static bool
simplify_EXISTS_query(Query *query)
{
/*
* We don't try to simplify at all if the query uses set operations,
* aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these
* seem likely in normal usage and their possible effects are complex.
*/
if (query->commandType != CMD_SELECT ||
query->intoClause ||
query->setOperations ||
query->hasAggs ||
query->hasWindowFuncs ||
query->havingQual ||
query->limitOffset ||
query->limitCount ||
query->rowMarks)
return false;
/*
* Mustn't throw away the targetlist if it contains set-returning
* functions; those could affect whether zero rows are returned!
*/
if (expression_returns_set((Node *) query->targetList))
return false;
/*
* Otherwise, we can throw away the targetlist, as well as any GROUP,
* WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will
* change a nonzero-rows result to zero rows or vice versa. (Furthermore,
* since our parsetree representation of these clauses depends on the
* targetlist, we'd better throw them away if we drop the targetlist.)
*/
query->targetList = NIL;
query->groupClause = NIL;
query->windowClause = NIL;
query->distinctClause = NIL;
query->sortClause = NIL;
query->hasDistinctOn = false;
return true;
}
/*
* convert_EXISTS_to_ANY: try to convert EXISTS to a hashable ANY sublink
*
* The subselect is expected to be a fresh copy that we can munge up,
* and to have been successfully passed through simplify_EXISTS_query.
*
* On success, the modified subselect is returned, and we store a suitable
* upper-level test expression at *testexpr, plus a list of the subselect's
* output Params at *paramIds. (The test expression is already Param-ified
* and hence need not go through convert_testexpr, which is why we have to
* deal with the Param IDs specially.)
*
* On failure, returns NULL.
*/
static Query *
convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
Node **testexpr, List **paramIds)
{
Node *whereClause;
List *leftargs,
*rightargs,
*opids,
*newWhere,
*tlist,
*testlist,
*paramids;
ListCell *lc,
*rc,
*oc;
AttrNumber resno;
/*
* Query must not require a targetlist, since we have to insert a new one.
* Caller should have dealt with the case already.
*/
Assert(subselect->targetList == NIL);
/*
* Separate out the WHERE clause. (We could theoretically also remove
* top-level plain JOIN/ON clauses, but it's probably not worth the
* trouble.)
*/
whereClause = subselect->jointree->quals;
subselect->jointree->quals = NULL;
/*
* The rest of the sub-select must not refer to any Vars of the parent
* query. (Vars of higher levels should be okay, though.)
*
* Note: we need not check for Aggrefs separately because we know the
* sub-select is as yet unoptimized; any uplevel Aggref must therefore
* contain an uplevel Var reference. This is not the case below ...
*/
if (contain_vars_of_level((Node *) subselect, 1))
return NULL;
/*
* We don't risk optimizing if the WHERE clause is volatile, either.
*/
if (contain_volatile_functions(whereClause))
return NULL;
/*
* Clean up the WHERE clause by doing const-simplification etc on it.
* Aside from simplifying the processing we're about to do, this is
* important for being able to pull chunks of the WHERE clause up into
* the parent query. Since we are invoked partway through the parent's
* preprocess_expression() work, earlier steps of preprocess_expression()
* wouldn't get applied to the pulled-up stuff unless we do them here.
* For the parts of the WHERE clause that get put back into the child
* query, this work is partially duplicative, but it shouldn't hurt.
*
* Note: we do not run flatten_join_alias_vars. This is OK because
* any parent aliases were flattened already, and we're not going to
* pull any child Vars (of any description) into the parent.
*
* Note: passing the parent's root to eval_const_expressions is technically
* wrong, but we can get away with it since only the boundParams (if any)
* are used, and those would be the same in a subroot.
*/
whereClause = eval_const_expressions(root, whereClause);
whereClause = (Node *) canonicalize_qual((Expr *) whereClause);
whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
/*
* We now have a flattened implicit-AND list of clauses, which we
* try to break apart into "outervar = innervar" hash clauses.
* Anything that can't be broken apart just goes back into the
* newWhere list. Note that we aren't trying hard yet to ensure
* that we have only outer or only inner on each side; we'll check
* that if we get to the end.
*/
leftargs = rightargs = opids = newWhere = NIL;
foreach(lc, (List *) whereClause)
{
OpExpr *expr = (OpExpr *) lfirst(lc);
if (IsA(expr, OpExpr) &&
hash_ok_operator(expr))
{
Node *leftarg = (Node *) linitial(expr->args);
Node *rightarg = (Node *) lsecond(expr->args);
if (contain_vars_of_level(leftarg, 1))
{
leftargs = lappend(leftargs, leftarg);
rightargs = lappend(rightargs, rightarg);
opids = lappend_oid(opids, expr->opno);
continue;
}
if (contain_vars_of_level(rightarg, 1))
{
/*
* We must commute the clause to put the outer var on the
* left, because the hashing code in nodeSubplan.c expects
* that. This probably shouldn't ever fail, since hashable
* operators ought to have commutators, but be paranoid.
*/
expr->opno = get_commutator(expr->opno);
if (OidIsValid(expr->opno) && hash_ok_operator(expr))
{
leftargs = lappend(leftargs, rightarg);
rightargs = lappend(rightargs, leftarg);
opids = lappend_oid(opids, expr->opno);
continue;
}
/* If no commutator, no chance to optimize the WHERE clause */
return NULL;
}
}
/* Couldn't handle it as a hash clause */
newWhere = lappend(newWhere, expr);
}
/*
* If we didn't find anything we could convert, fail.
*/
if (leftargs == NIL)
return NULL;
/*
* There mustn't be any parent Vars or Aggs in the stuff that we intend to
* put back into the child query. Note: you might think we don't need to
* check for Aggs separately, because an uplevel Agg must contain an
* uplevel Var in its argument. But it is possible that the uplevel Var
* got optimized away by eval_const_expressions. Consider
*
* SUM(CASE WHEN false THEN uplevelvar ELSE 0 END)
*/
if (contain_vars_of_level((Node *) newWhere, 1) ||
contain_vars_of_level((Node *) rightargs, 1))
return NULL;
if (root->parse->hasAggs &&
(contain_aggs_of_level((Node *) newWhere, 1) ||
contain_aggs_of_level((Node *) rightargs, 1)))
return NULL;
/*
* And there can't be any child Vars in the stuff we intend to pull up.
* (Note: we'd need to check for child Aggs too, except we know the
* child has no aggs at all because of simplify_EXISTS_query's check.
* The same goes for window functions.)
*/
if (contain_vars_of_level((Node *) leftargs, 0))
return NULL;
/*
* Also reject sublinks in the stuff we intend to pull up. (It might be
* possible to support this, but doesn't seem worth the complication.)
*/
if (contain_subplans((Node *) leftargs))
return NULL;
/*
* Okay, adjust the sublevelsup in the stuff we're pulling up.
*/
IncrementVarSublevelsUp((Node *) leftargs, -1, 1);
/*
* Put back any child-level-only WHERE clauses.
*/
if (newWhere)
subselect->jointree->quals = (Node *) make_ands_explicit(newWhere);
/*
* Build a new targetlist for the child that emits the expressions
* we need. Concurrently, build a testexpr for the parent using
* Params to reference the child outputs. (Since we generate Params
* directly here, there will be no need to convert the testexpr in
* build_subplan.)
*/
tlist = testlist = paramids = NIL;
resno = 1;
/* there's no "for3" so we have to chase one of the lists manually */
oc = list_head(opids);
forboth(lc, leftargs, rc, rightargs)
{
Node *leftarg = (Node *) lfirst(lc);
Node *rightarg = (Node *) lfirst(rc);
Oid opid = lfirst_oid(oc);
Param *param;
oc = lnext(oc);
param = generate_new_param(root,
exprType(rightarg),
exprTypmod(rightarg));
tlist = lappend(tlist,
makeTargetEntry((Expr *) rightarg,
resno++,
NULL,
false));
testlist = lappend(testlist,
make_opclause(opid, BOOLOID, false,
(Expr *) leftarg, (Expr *) param));
paramids = lappend_int(paramids, param->paramid);
}
/* Put everything where it should go, and we're done */
subselect->targetList = tlist;
*testexpr = (Node *) make_ands_explicit(testlist);
*paramIds = paramids;
return subselect;
}
/*
* Replace correlation vars (uplevel vars) with Params.
*
* Uplevel aggregates are replaced, too.
*
* Note: it is critical that this runs immediately after SS_process_sublinks.
* Since we do not recurse into the arguments of uplevel aggregates, they will
* get copied to the appropriate subplan args list in the parent query with
* uplevel vars not replaced by Params, but only adjusted in level (see
2003-08-03 20:43:34 -04:00
* replace_outer_agg). That's exactly what we want for the vars of the parent
* level --- but if an aggregate's argument contains any further-up variables,
2003-08-03 20:43:34 -04:00
* they have to be replaced with Params in their turn. That will happen when
* the parent level runs SS_replace_correlation_vars. Therefore it must do
* so after expanding its sublinks to subplans. And we don't want any steps
* in between, else those steps would never get applied to the aggregate
* argument expressions, either in the parent or the child level.
*/
Node *
SS_replace_correlation_vars(PlannerInfo *root, Node *expr)
{
/* No setup needed for tree walk, so away we go */
return replace_correlation_vars_mutator(expr, root);
}
static Node *
replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
{
if (node == NULL)
return NULL;
if (IsA(node, Var))
{
if (((Var *) node)->varlevelsup > 0)
return (Node *) replace_outer_var(root, (Var *) node);
}
if (IsA(node, Aggref))
{
if (((Aggref *) node)->agglevelsup > 0)
return (Node *) replace_outer_agg(root, (Aggref *) node);
}
return expression_tree_mutator(node,
replace_correlation_vars_mutator,
(void *) root);
}
/*
* Expand SubLinks to SubPlans in the given expression.
*
* The isQual argument tells whether or not this expression is a WHERE/HAVING
* qualifier expression. If it is, any sublinks appearing at top level need
* not distinguish FALSE from UNKNOWN return values.
*/
Node *
SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual)
{
process_sublinks_context context;
context.root = root;
context.isTopQual = isQual;
return process_sublinks_mutator(expr, &context);
}
static Node *
process_sublinks_mutator(Node *node, process_sublinks_context *context)
{
process_sublinks_context locContext;
locContext.root = context->root;
if (node == NULL)
1998-08-31 23:29:17 -04:00
return NULL;
if (IsA(node, SubLink))
{
SubLink *sublink = (SubLink *) node;
Node *testexpr;
/*
2005-10-14 22:49:52 -04:00
* First, recursively process the lefthand-side expressions, if any.
* They're not top-level anymore.
*/
locContext.isTopQual = false;
testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
2003-08-03 20:43:34 -04:00
/*
* Now build the SubPlan node and make the expr to return.
*/
return make_subplan(context->root,
(Query *) sublink->subselect,
sublink->subLinkType,
testexpr,
context->isTopQual);
}
/*
2005-10-14 22:49:52 -04:00
* We should never see a SubPlan expression in the input (since this is
* the very routine that creates 'em to begin with). We shouldn't find
* ourselves invoked directly on a Query, either.
*/
Assert(!IsA(node, SubPlan));
Assert(!IsA(node, AlternativeSubPlan));
Assert(!IsA(node, Query));
/*
* Because make_subplan() could return an AND or OR clause, we have to
2005-10-14 22:49:52 -04:00
* take steps to preserve AND/OR flatness of a qual. We assume the input
* has been AND/OR flattened and so we need no recursion here.
*
* (Due to the coding here, we will not get called on the List subnodes of
* an AND; and the input is *not* yet in implicit-AND format. So no check
* is needed for a bare List.)
*
* Anywhere within the top-level AND/OR clause structure, we can tell
* make_subplan() that NULL and FALSE are interchangeable. So isTopQual
* propagates down in both cases. (Note that this is unlike the meaning
* of "top level qual" used in most other places in Postgres.)
*/
if (and_clause(node))
{
2004-08-29 01:07:03 -04:00
List *newargs = NIL;
ListCell *l;
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
foreach(l, ((BoolExpr *) node)->args)
{
2004-08-29 01:07:03 -04:00
Node *newarg;
newarg = process_sublinks_mutator(lfirst(l), &locContext);
if (and_clause(newarg))
newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
else
newargs = lappend(newargs, newarg);
}
return (Node *) make_andclause(newargs);
}
if (or_clause(node))
{
2004-08-29 01:07:03 -04:00
List *newargs = NIL;
ListCell *l;
/* Still at qual top-level */
locContext.isTopQual = context->isTopQual;
foreach(l, ((BoolExpr *) node)->args)
{
2004-08-29 01:07:03 -04:00
Node *newarg;
newarg = process_sublinks_mutator(lfirst(l), &locContext);
if (or_clause(newarg))
newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
else
newargs = lappend(newargs, newarg);
}
return (Node *) make_orclause(newargs);
}
/*
* If we recurse down through anything other than an AND or OR node,
* we are definitely not at top qual level anymore.
*/
locContext.isTopQual = false;
return expression_tree_mutator(node,
process_sublinks_mutator,
(void *) &locContext);
}
/*
* SS_finalize_plan - do final sublink processing for a completed Plan.
*
* This recursively computes the extParam and allParam sets for every Plan
* node in the given plan tree. It also optionally attaches any previously
* generated InitPlans to the top plan node. (Any InitPlans should already
* have been put through SS_finalize_plan.)
*/
void
SS_finalize_plan(PlannerInfo *root, Plan *plan, bool attach_initplans)
{
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
Bitmapset *valid_params,
*initExtParam,
*initSetParam;
Cost initplan_cost;
int paramid;
ListCell *l;
/*
* Examine any initPlans to determine the set of external params they
* reference, the set of output params they supply, and their total cost.
* We'll use at least some of this info below. (Note we are assuming that
* finalize_plan doesn't touch the initPlans.)
*
* In the case where attach_initplans is false, we are assuming that the
* existing initPlans are siblings that might supply params needed by the
* current plan.
*/
initExtParam = initSetParam = NULL;
initplan_cost = 0;
foreach(l, root->init_plans)
{
SubPlan *initsubplan = (SubPlan *) lfirst(l);
Plan *initplan = planner_subplan_get_plan(root, initsubplan);
ListCell *l2;
initExtParam = bms_add_members(initExtParam, initplan->extParam);
foreach(l2, initsubplan->setParam)
{
initSetParam = bms_add_member(initSetParam, lfirst_int(l2));
}
initplan_cost += initsubplan->startup_cost + initsubplan->per_call_cost;
}
/*
* Now determine the set of params that are validly referenceable in this
* query level; to wit, those available from outer query levels plus the
* output parameters of any initPlans. (We do not include output
* parameters of regular subplans. Those should only appear within the
* testexpr of SubPlan nodes, and are taken care of locally within
* finalize_primnode.)
*
* Note: this is a bit overly generous since some parameters of upper
* query levels might belong to query subtrees that don't include this
* query. However, valid_params is only a debugging crosscheck, so it
* doesn't seem worth expending lots of cycles to try to be exact.
*/
valid_params = bms_copy(initSetParam);
paramid = 0;
foreach(l, root->glob->paramlist)
{
PlannerParamItem *pitem = (PlannerParamItem *) lfirst(l);
if (pitem->abslevel < root->query_level)
{
/* valid outer-level parameter */
valid_params = bms_add_member(valid_params, paramid);
}
paramid++;
}
/* Also include the recursion working table, if any */
if (root->wt_param_id >= 0)
valid_params = bms_add_member(valid_params, root->wt_param_id);
/*
* Now recurse through plan tree.
*/
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
(void) finalize_plan(root, plan, valid_params);
bms_free(valid_params);
/*
2005-10-14 22:49:52 -04:00
* Finally, attach any initPlans to the topmost plan node, and add their
* extParams to the topmost node's, too. However, any setParams of the
* initPlans should not be present in the topmost node's extParams, only
* in its allParams. (As of PG 8.1, it's possible that some initPlans
* have extParams that are setParams of other initPlans, so we have to
* take care of this situation explicitly.)
*
* We also add the eval cost of each initPlan to the startup cost of the
* top node. This is a conservative overestimate, since in fact each
* initPlan might be executed later than plan startup, or even not at all.
*/
if (attach_initplans)
{
plan->initPlan = root->init_plans;
root->init_plans = NIL; /* make sure they're not attached twice */
/* allParam must include all these params */
plan->allParam = bms_add_members(plan->allParam, initExtParam);
plan->allParam = bms_add_members(plan->allParam, initSetParam);
/* extParam must include any child extParam */
plan->extParam = bms_add_members(plan->extParam, initExtParam);
/* but extParam shouldn't include any setParams */
plan->extParam = bms_del_members(plan->extParam, initSetParam);
/* ensure extParam is exactly NULL if it's empty */
if (bms_is_empty(plan->extParam))
plan->extParam = NULL;
plan->startup_cost += initplan_cost;
plan->total_cost += initplan_cost;
}
}
/*
* Recursive processing of all nodes in the plan tree
*
* The return value is the computed allParam set for the given Plan node.
* This is just an internal notational convenience.
*/
static Bitmapset *
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
{
finalize_primnode_context context;
if (plan == NULL)
return NULL;
context.root = root;
context.paramids = NULL; /* initialize set to empty */
/*
2005-10-14 22:49:52 -04:00
* When we call finalize_primnode, context.paramids sets are automatically
* merged together. But when recursing to self, we have to do it the hard
* way. We want the paramids set to include params in subplans as well as
* at this level.
*/
/* Find params in targetlist and qual */
finalize_primnode((Node *) plan->targetlist, &context);
finalize_primnode((Node *) plan->qual, &context);
/* Check additional node-type-specific fields */
switch (nodeTag(plan))
{
case T_Result:
finalize_primnode(((Result *) plan)->resconstantqual,
&context);
break;
case T_IndexScan:
finalize_primnode((Node *) ((IndexScan *) plan)->indexqual,
&context);
/*
2005-10-14 22:49:52 -04:00
* we need not look at indexqualorig, since it will have the same
* param references as indexqual.
*/
break;
case T_BitmapIndexScan:
finalize_primnode((Node *) ((BitmapIndexScan *) plan)->indexqual,
&context);
2005-10-14 22:49:52 -04:00
/*
2005-10-14 22:49:52 -04:00
* we need not look at indexqualorig, since it will have the same
* param references as indexqual.
*/
break;
case T_BitmapHeapScan:
finalize_primnode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig,
&context);
break;
case T_TidScan:
finalize_primnode((Node *) ((TidScan *) plan)->tidquals,
&context);
break;
case T_SubqueryScan:
2001-03-21 23:01:46 -05:00
/*
2005-10-14 22:49:52 -04:00
* In a SubqueryScan, SS_finalize_plan has already been run on the
* subplan by the inner invocation of subquery_planner, so there's
* no need to do it again. Instead, just pull out the subplan's
* extParams list, which represents the params it needs from my
* level and higher levels.
*/
context.paramids = bms_add_members(context.paramids,
2005-10-14 22:49:52 -04:00
((SubqueryScan *) plan)->subplan->extParam);
break;
case T_FunctionScan:
finalize_primnode(((FunctionScan *) plan)->funcexpr,
&context);
break;
case T_ValuesScan:
finalize_primnode((Node *) ((ValuesScan *) plan)->values_lists,
&context);
break;
case T_CteScan:
context.paramids =
bms_add_member(context.paramids,
((CteScan *) plan)->cteParam);
break;
case T_WorkTableScan:
context.paramids =
bms_add_member(context.paramids,
((WorkTableScan *) plan)->wtParam);
break;
case T_Append:
{
2004-08-29 01:07:03 -04:00
ListCell *l;
foreach(l, ((Append *) plan)->appendplans)
{
context.paramids =
bms_add_members(context.paramids,
finalize_plan(root,
(Plan *) lfirst(l),
valid_params));
}
}
break;
case T_BitmapAnd:
{
ListCell *l;
foreach(l, ((BitmapAnd *) plan)->bitmapplans)
{
context.paramids =
bms_add_members(context.paramids,
finalize_plan(root,
(Plan *) lfirst(l),
valid_params));
}
}
break;
case T_BitmapOr:
{
ListCell *l;
foreach(l, ((BitmapOr *) plan)->bitmapplans)
{
context.paramids =
bms_add_members(context.paramids,
finalize_plan(root,
(Plan *) lfirst(l),
valid_params));
}
}
break;
case T_NestLoop:
finalize_primnode((Node *) ((Join *) plan)->joinqual,
&context);
break;
case T_MergeJoin:
finalize_primnode((Node *) ((Join *) plan)->joinqual,
&context);
finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses,
&context);
break;
case T_HashJoin:
finalize_primnode((Node *) ((Join *) plan)->joinqual,
&context);
finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses,
&context);
break;
case T_Limit:
finalize_primnode(((Limit *) plan)->limitOffset,
&context);
finalize_primnode(((Limit *) plan)->limitCount,
&context);
break;
case T_RecursiveUnion:
case T_Hash:
case T_Agg:
case T_WindowAgg:
case T_SeqScan:
case T_Material:
case T_Sort:
case T_Unique:
case T_SetOp:
case T_Group:
break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(plan));
}
/* Process left and right child plans, if any */
context.paramids = bms_add_members(context.paramids,
finalize_plan(root,
plan->lefttree,
valid_params));
context.paramids = bms_add_members(context.paramids,
finalize_plan(root,
plan->righttree,
valid_params));
/*
* RecursiveUnion *generates* its worktable param, so don't bubble that up
*/
if (IsA(plan, RecursiveUnion))
{
context.paramids = bms_del_member(context.paramids,
((RecursiveUnion *) plan)->wtParam);
}
/* Now we have all the paramids */
if (!bms_is_subset(context.paramids, valid_params))
elog(ERROR, "plan should not reference subplan's variable");
/*
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
* Note: by definition, extParam and allParam should have the same value
* in any plan node that doesn't have child initPlans. We set them
* equal here, and later SS_finalize_plan will update them properly
* in node(s) that it attaches initPlans to.
*
2005-10-14 22:49:52 -04:00
* For speed at execution time, make sure extParam/allParam are actually
* NULL if they are empty sets.
*/
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
if (bms_is_empty(context.paramids))
{
plan->extParam = NULL;
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
plan->allParam = NULL;
}
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
else
{
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
plan->extParam = context.paramids;
plan->allParam = bms_copy(context.paramids);
}
return plan->allParam;
}
/*
* finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given
* expression tree to the result set.
*/
static bool
finalize_primnode(Node *node, finalize_primnode_context *context)
{
if (node == NULL)
return false;
if (IsA(node, Param))
{
if (((Param *) node)->paramkind == PARAM_EXEC)
{
int paramid = ((Param *) node)->paramid;
context->paramids = bms_add_member(context->paramids, paramid);
}
return false; /* no more to do here */
}
if (IsA(node, SubPlan))
{
2003-08-03 20:43:34 -04:00
SubPlan *subplan = (SubPlan *) node;
Plan *plan = planner_subplan_get_plan(context->root, subplan);
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
ListCell *lc;
Bitmapset *subparamids;
/* Recurse into the testexpr, but not into the Plan */
finalize_primnode(subplan->testexpr, context);
/*
* Remove any param IDs of output parameters of the subplan that were
* referenced in the testexpr. These are not interesting for
* parameter change signaling since we always re-evaluate the subplan.
* Note that this wouldn't work too well if there might be uses of the
* same param IDs elsewhere in the plan, but that can't happen because
* generate_new_param never tries to merge params.
*/
foreach(lc, subplan->paramIds)
{
context->paramids = bms_del_member(context->paramids,
lfirst_int(lc));
}
Fix mis-calculation of extParam/allParam sets for plan nodes, as seen in bug #4290. The fundamental bug is that masking extParam by outer_params, as finalize_plan had been doing, caused us to lose the information that an initPlan depended on the output of a sibling initPlan. On reflection the best thing to do seemed to be not to try to adjust outer_params for this case but get rid of it entirely. The only thing it was really doing for us was to filter out param IDs associated with SubPlan nodes, and that can be done (with greater accuracy) while processing individual SubPlan nodes in finalize_primnode. This approach was vindicated by the discovery that the masking method was hiding a second bug: SS_finalize_plan failed to remove extParam bits for initPlan output params that were referenced in the main plan tree (it only got rid of those referenced by other initPlans). It's not clear that this caused any real problems, given the limited use of extParam by the executor, but it's certainly not what was intended. I originally thought that there was also a problem with needing to include indirect dependencies on external params in initPlans' param sets, but it turns out that the executor handles this correctly so long as the depended-on initPlan is earlier in the initPlans list than the one using its output. That seems a bit of a fragile assumption, but it is true at the moment, so I just documented it in some code comments rather than making what would be rather invasive changes to remove the assumption. Back-patch to 8.1. Previous versions don't have the case of initPlans referring to other initPlans' outputs, so while the existing logic is still questionable for them, there are not any known bugs to be fixed. So I'll refrain from changing them for now.
2008-07-09 21:17:29 -04:00
/* Also examine args list */
finalize_primnode((Node *) subplan->args, context);
/*
* Add params needed by the subplan to paramids, but excluding those
* we will pass down to it.
*/
subparamids = bms_copy(plan->extParam);
foreach(lc, subplan->parParam)
{
subparamids = bms_del_member(subparamids, lfirst_int(lc));
}
context->paramids = bms_join(context->paramids, subparamids);
return false; /* no more to do here */
}
return expression_tree_walker(node, finalize_primnode,
(void *) context);
}
/*
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
*
* The plan is expected to return a scalar value of the indicated type.
* We build an EXPR_SUBLINK SubPlan node and put it into the initplan
* list for the current query level. A Param that represents the initplan's
* output is returned.
*
* We assume the plan hasn't been put through SS_finalize_plan.
*/
Param *
SS_make_initplan_from_plan(PlannerInfo *root, Plan *plan,
Oid resulttype, int32 resulttypmod)
{
SubPlan *node;
Param *prm;
/*
* We must run SS_finalize_plan(), since that's normally done before a
* subplan gets put into the initplan list. Tell it not to attach any
* pre-existing initplans to this one, since they are siblings not
* children of this initplan. (This is something else that could perhaps
* be cleaner if we did extParam/allParam processing in setrefs.c instead
* of here? See notes for materialize_finished_plan.)
*/
/*
* Build extParam/allParam sets for plan nodes.
*/
SS_finalize_plan(root, plan, false);
/*
* Add the subplan and its rtable to the global lists.
*/
root->glob->subplans = lappend(root->glob->subplans,
plan);
root->glob->subrtables = lappend(root->glob->subrtables,
root->parse->rtable);
/*
* Create a SubPlan node and add it to the outer list of InitPlans.
* Note it has to appear after any other InitPlans it might depend on
* (see comments in ExecReScan).
*/
node = makeNode(SubPlan);
node->subLinkType = EXPR_SUBLINK;
node->firstColType = get_first_col_type(plan);
node->plan_id = list_length(root->glob->subplans);
root->init_plans = lappend(root->init_plans, node);
/*
* The node can't have any inputs (since it's an initplan), so the
* parParam and args lists remain empty.
*/
cost_subplan(root, node, plan);
/*
* Make a Param that will be the subplan's output.
*/
prm = generate_new_param(root, resulttype, resulttypmod);
node->setParam = list_make1_int(prm->paramid);
return prm;
}