2008-10-04 17:56:55 -04:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
|
*
|
|
|
|
|
* nodeWorktablescan.c
|
|
|
|
|
* routines to handle WorkTableScan nodes.
|
|
|
|
|
*
|
2026-01-01 13:24:10 -05:00
|
|
|
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
|
2008-10-04 17:56:55 -04:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
|
*
|
|
|
|
|
*
|
|
|
|
|
* IDENTIFICATION
|
2010-09-20 16:08:53 -04:00
|
|
|
* src/backend/executor/nodeWorktablescan.c
|
2008-10-04 17:56:55 -04:00
|
|
|
*
|
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
2024-03-04 06:00:11 -05:00
|
|
|
#include "executor/executor.h"
|
2008-10-04 17:56:55 -04:00
|
|
|
#include "executor/nodeWorktablescan.h"
|
|
|
|
|
|
|
|
|
|
static TupleTableSlot *WorkTableScanNext(WorkTableScanState *node);
|
|
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
|
* WorkTableScanNext
|
|
|
|
|
*
|
|
|
|
|
* This is a workhorse for ExecWorkTableScan
|
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
static TupleTableSlot *
|
|
|
|
|
WorkTableScanNext(WorkTableScanState *node)
|
|
|
|
|
{
|
|
|
|
|
TupleTableSlot *slot;
|
|
|
|
|
Tuplestorestate *tuplestorestate;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* get information from the estate and scan state
|
2008-10-28 13:13:51 -04:00
|
|
|
*
|
|
|
|
|
* Note: we intentionally do not support backward scan. Although it would
|
|
|
|
|
* take only a couple more lines here, it would force nodeRecursiveunion.c
|
|
|
|
|
* to create the tuplestore with backward scan enabled, which has a
|
|
|
|
|
* performance cost. In practice backward scan is never useful for a
|
|
|
|
|
* worktable plan node, since it cannot appear high enough in the plan
|
|
|
|
|
* tree of a scrollable cursor to be exposed to a backward-scan
|
|
|
|
|
* requirement. So it's not worth expending effort to support it.
|
2009-03-27 14:30:21 -04:00
|
|
|
*
|
|
|
|
|
* Note: we are also assuming that this node is the only reader of the
|
|
|
|
|
* worktable. Therefore, we don't need a private read pointer for the
|
|
|
|
|
* tuplestore, nor do we need to tell tuplestore_gettupleslot to copy.
|
2008-10-04 17:56:55 -04:00
|
|
|
*/
|
2012-03-21 17:30:14 -04:00
|
|
|
Assert(ScanDirectionIsForward(node->ss.ps.state->es_direction));
|
2008-10-04 17:56:55 -04:00
|
|
|
|
|
|
|
|
tuplestorestate = node->rustate->working_table;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Get the next tuple from tuplestore. Return NULL if no more tuples.
|
|
|
|
|
*/
|
|
|
|
|
slot = node->ss.ss_ScanTupleSlot;
|
2009-03-27 14:30:21 -04:00
|
|
|
(void) tuplestore_gettupleslot(tuplestorestate, true, false, slot);
|
2008-10-04 17:56:55 -04:00
|
|
|
return slot;
|
|
|
|
|
}
|
|
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-25 22:26:45 -04:00
|
|
|
/*
|
|
|
|
|
* WorkTableScanRecheck -- access method routine to recheck a tuple in EvalPlanQual
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
WorkTableScanRecheck(WorkTableScanState *node, TupleTableSlot *slot)
|
|
|
|
|
{
|
|
|
|
|
/* nothing to check */
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2008-10-04 17:56:55 -04:00
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
|
* ExecWorkTableScan(node)
|
|
|
|
|
*
|
|
|
|
|
* Scans the worktable sequentially and returns the next qualifying tuple.
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-25 22:26:45 -04:00
|
|
|
* We call the ExecScan() routine and pass it the appropriate
|
|
|
|
|
* access method functions.
|
2008-10-04 17:56:55 -04:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
|
*/
|
2017-07-17 03:33:49 -04:00
|
|
|
static TupleTableSlot *
|
|
|
|
|
ExecWorkTableScan(PlanState *pstate)
|
2008-10-04 17:56:55 -04:00
|
|
|
{
|
2017-07-17 03:33:49 -04:00
|
|
|
WorkTableScanState *node = castNode(WorkTableScanState, pstate);
|
|
|
|
|
|
2008-10-12 20:41:41 -04:00
|
|
|
/*
|
|
|
|
|
* On the first call, find the ancestor RecursiveUnion's state via the
|
|
|
|
|
* Param slot reserved for it. (We can't do this during node init because
|
|
|
|
|
* there are corner cases where we'll get the init call before the
|
|
|
|
|
* RecursiveUnion does.)
|
|
|
|
|
*/
|
|
|
|
|
if (node->rustate == NULL)
|
|
|
|
|
{
|
|
|
|
|
WorkTableScan *plan = (WorkTableScan *) node->ss.ps.plan;
|
|
|
|
|
EState *estate = node->ss.ps.state;
|
|
|
|
|
ParamExecData *param;
|
|
|
|
|
|
|
|
|
|
param = &(estate->es_param_exec_vals[plan->wtParam]);
|
|
|
|
|
Assert(param->execPlan == NULL);
|
|
|
|
|
Assert(!param->isnull);
|
2017-01-26 19:47:03 -05:00
|
|
|
node->rustate = castNode(RecursiveUnionState, DatumGetPointer(param->value));
|
|
|
|
|
Assert(node->rustate);
|
2008-10-12 20:41:41 -04:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The scan tuple type (ie, the rowtype we expect to find in the work
|
|
|
|
|
* table) is the same as the result rowtype of the ancestor
|
|
|
|
|
* RecursiveUnion node. Note this depends on the assumption that
|
|
|
|
|
* RecursiveUnion doesn't allow projection.
|
|
|
|
|
*/
|
|
|
|
|
ExecAssignScanType(&node->ss,
|
|
|
|
|
ExecGetResultType(&node->rustate->ps));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now we can initialize the projection info. This must be completed
|
|
|
|
|
* before we can call ExecScan().
|
|
|
|
|
*/
|
|
|
|
|
ExecAssignScanProjectionInfo(&node->ss);
|
|
|
|
|
}
|
|
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-25 22:26:45 -04:00
|
|
|
return ExecScan(&node->ss,
|
|
|
|
|
(ExecScanAccessMtd) WorkTableScanNext,
|
|
|
|
|
(ExecScanRecheckMtd) WorkTableScanRecheck);
|
2008-10-04 17:56:55 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
|
* ExecInitWorkTableScan
|
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
WorkTableScanState *
|
|
|
|
|
ExecInitWorkTableScan(WorkTableScan *node, EState *estate, int eflags)
|
|
|
|
|
{
|
|
|
|
|
WorkTableScanState *scanstate;
|
|
|
|
|
|
|
|
|
|
/* check for unsupported flags */
|
2008-10-28 13:13:51 -04:00
|
|
|
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
|
2008-10-04 17:56:55 -04:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* WorkTableScan should not have any children.
|
|
|
|
|
*/
|
|
|
|
|
Assert(outerPlan(node) == NULL);
|
|
|
|
|
Assert(innerPlan(node) == NULL);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* create new WorkTableScanState for node
|
|
|
|
|
*/
|
|
|
|
|
scanstate = makeNode(WorkTableScanState);
|
|
|
|
|
scanstate->ss.ps.plan = (Plan *) node;
|
|
|
|
|
scanstate->ss.ps.state = estate;
|
2017-07-17 03:33:49 -04:00
|
|
|
scanstate->ss.ps.ExecProcNode = ExecWorkTableScan;
|
2008-10-12 20:41:41 -04:00
|
|
|
scanstate->rustate = NULL; /* we'll set this later */
|
2008-10-04 17:56:55 -04:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Miscellaneous initialization
|
|
|
|
|
*
|
|
|
|
|
* create expression context for node
|
|
|
|
|
*/
|
|
|
|
|
ExecAssignExprContext(estate, &scanstate->ss.ps);
|
|
|
|
|
|
|
|
|
|
/*
|
2018-02-17 00:17:38 -05:00
|
|
|
* tuple table initialization
|
2008-10-04 17:56:55 -04:00
|
|
|
*/
|
Don't require return slots for nodes without projection.
In a lot of nodes the return slot is not required. That can either be
because the node doesn't do any projection (say an Append node), or
because the node does perform projections but the projection is
optimized away because the projection would yield an identical row.
Slots aren't that small, especially for wide rows, so it's worthwhile
to avoid creating them. It's not possible to just skip creating the
slot - it's currently used to determine the tuple descriptor returned
by ExecGetResultType(). So separate the determination of the result
type from the slot creation. The work previously done internally
ExecInitResultTupleSlotTL() can now also be done separately with
ExecInitResultTypeTL() and ExecInitResultSlot(). That way nodes that
aren't guaranteed to need a result slot, can use
ExecInitResultTypeTL() to determine the result type of the node, and
ExecAssignScanProjectionInfo() (via
ExecConditionalAssignProjectionInfo()) determines that a result slot
is needed, it is created with ExecInitResultSlot().
Besides the advantage of avoiding to create slots that then are
unused, this is necessary preparation for later patches around tuple
table slot abstraction. In particular separating the return descriptor
and slot is a prerequisite to allow JITing of tuple deforming with
knowledge of the underlying tuple format, and to avoid unnecessarily
creating JITed tuple deforming for virtual slots.
This commit removes a redundant argument from
ExecInitResultTupleSlotTL(). While this commit touches a lot of the
relevant lines anyway, it'd normally still not worthwhile to cause
breakage, except that aforementioned later commits will touch *all*
ExecInitResultTupleSlotTL() callers anyway (but fits worse
thematically).
Author: Andres Freund
Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de
2018-11-09 20:19:39 -05:00
|
|
|
ExecInitResultTypeTL(&scanstate->ss.ps);
|
Introduce notion of different types of slots (without implementing them).
Upcoming work intends to allow pluggable ways to introduce new ways of
storing table data. Accessing those table access methods from the
executor requires TupleTableSlots to be carry tuples in the native
format of such storage methods; otherwise there'll be a significant
conversion overhead.
Different access methods will require different data to store tuples
efficiently (just like virtual, minimal, heap already require fields
in TupleTableSlot). To allow that without requiring additional pointer
indirections, we want to have different structs (embedding
TupleTableSlot) for different types of slots. Thus different types of
slots are needed, which requires adapting creators of slots.
The slot that most efficiently can represent a type of tuple in an
executor node will often depend on the type of slot a child node
uses. Therefore we need to track the type of slot is returned by
nodes, so parent slots can create slots based on that.
Relatedly, JIT compilation of tuple deforming needs to know which type
of slot a certain expression refers to, so it can create an
appropriate deforming function for the type of tuple in the slot.
But not all nodes will only return one type of slot, e.g. an append
node will potentially return different types of slots for each of its
subplans.
Therefore add function that allows to query the type of a node's
result slot, and whether it'll always be the same type (whether it's
fixed). This can be queried using ExecGetResultSlotOps().
The scan, result, inner, outer type of slots are automatically
inferred from ExecInitScanTupleSlot(), ExecInitResultSlot(),
left/right subtrees respectively. If that's not correct for a node,
that can be overwritten using new fields in PlanState.
This commit does not introduce the actually abstracted implementation
of different kind of TupleTableSlots, that will be left for a followup
commit. The different types of slots introduced will, for now, still
use the same backing implementation.
While this already partially invalidates the big comment in
tuptable.h, it seems to make more sense to update it later, when the
different TupleTableSlot implementations actually exist.
Author: Ashutosh Bapat and Andres Freund, with changes by Amit Khandekar
Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de
2018-11-16 01:00:30 -05:00
|
|
|
|
|
|
|
|
/* signal that return type is not yet known */
|
|
|
|
|
scanstate->ss.ps.resultopsset = true;
|
|
|
|
|
scanstate->ss.ps.resultopsfixed = false;
|
|
|
|
|
|
Optimize tuple deformation
This commit includes various optimizations to improve the performance of
tuple deformation.
We now precalculate CompactAttribute's attcacheoff, which allows us to
remove the code from the deform routines which was setting the
attcacheoff. Setting the attcacheoff is now handled by
TupleDescFinalize(), which must be called before the TupleDesc is used for
anything. Having TupleDescFinalize() means we can store the first
attribute in the TupleDesc which does not have an offset cached. That
allows us to add a dedicated deforming loop to deform all attributes up
to the final one with an attcacheoff set, or up to the first NULL
attribute, whichever comes first.
Here we also improve tuple deformation performance of tuples with NULLs.
Previously, if the HEAP_HASNULL bit was set in the tuple's t_infomask,
deforming would, one-by-one, check each and every bit in the NULL bitmap
to see if it was zero. Now, we process the NULL bitmap 1 byte at a time
rather than 1 bit at a time to find the attnum with the first NULL. We
can now deform the tuple without checking for NULLs up to just before that
attribute.
We also record the maximum attribute number which is guaranteed to exist
in the tuple, that is, has a NOT NULL constraint and isn't an
atthasmissing attribute. When deforming only attributes prior to the
guaranteed attnum, we've no need to access the tuple's natt count. As an
additional optimization, we only count fixed-width columns when
calculating the maximum guaranteed column, as this eliminates the need to
emit code to fetch byref types in the deformation loop for guaranteed
attributes.
Some locations in the code deform tuples that have yet to go through NOT
NULL constraint validation. We're unable to perform the guaranteed
attribute optimization when that's the case. This optimization is opt-in
via the TupleTableSlot using the TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS
flag.
This commit also adds a more efficient way of populating the isnull
array by using a bit-wise SWAR trick which performs multiplication on the
inverse of the tuple's bitmap byte and masking out all but the lower bit
of each of the boolean's byte. This results in much more optimal code
when compared to determining the NULLness via att_isnull(). 8 isnull
elements are processed at once using this method, which means we need to
round the tts_isnull array size up to the next 8 bytes. The palloc code
does this anyway, but the round-up needed to be formalized so as not to
overwrite the sentinel byte in MEMORY_CONTEXT_CHECKING builds. Doing
this also allows the NULL-checking deforming loop to more efficiently
check the isnull array, rather than doing the bit-wise processing for each
attribute that att_isnull() does.
The level of performance improvement from these changes seems to vary
depending on the CPU architecture. Apple's M chips seem particularly
fond of the changes, with some of the tested deform-heavy queries going
over twice as fast as before. With x86-64, the speedups aren't quite as
large. With tables containing only a small number of columns, the
speedups will be less.
Author: David Rowley <dgrowleyml@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: Amit Langote <amitlangote09@gmail.com>
Reviewed-by: Zsolt Parragi <zsolt.parragi@percona.com>
Reviewed-by: Álvaro Herrera <alvherre@kurilemu.de>
Reviewed-by: Junwang Zhao <zhjwpku@gmail.com>
Discussion: https://postgr.es/m/CAApHDvpoFjaj3%2Bw_jD5uPnGazaw41A71tVJokLDJg2zfcigpMQ%40mail.gmail.com
2026-03-15 18:46:00 -04:00
|
|
|
ExecInitScanTupleSlot(estate, &scanstate->ss, NULL, &TTSOpsMinimalTuple, 0);
|
2008-10-04 17:56:55 -04:00
|
|
|
|
|
|
|
|
/*
|
2018-02-17 00:17:38 -05:00
|
|
|
* initialize child expressions
|
2008-10-04 17:56:55 -04:00
|
|
|
*/
|
2018-02-17 00:17:38 -05:00
|
|
|
scanstate->ss.ps.qual =
|
|
|
|
|
ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
|
2008-10-04 17:56:55 -04:00
|
|
|
|
|
|
|
|
/*
|
2018-02-17 00:17:38 -05:00
|
|
|
* Do not yet initialize projection info, see ExecWorkTableScan() for
|
|
|
|
|
* details.
|
2008-10-04 17:56:55 -04:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
return scanstate;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
2010-07-12 13:01:06 -04:00
|
|
|
* ExecReScanWorkTableScan
|
2008-10-04 17:56:55 -04:00
|
|
|
*
|
|
|
|
|
* Rescans the relation.
|
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
void
|
2010-07-12 13:01:06 -04:00
|
|
|
ExecReScanWorkTableScan(WorkTableScanState *node)
|
2008-10-04 17:56:55 -04:00
|
|
|
{
|
Don't require return slots for nodes without projection.
In a lot of nodes the return slot is not required. That can either be
because the node doesn't do any projection (say an Append node), or
because the node does perform projections but the projection is
optimized away because the projection would yield an identical row.
Slots aren't that small, especially for wide rows, so it's worthwhile
to avoid creating them. It's not possible to just skip creating the
slot - it's currently used to determine the tuple descriptor returned
by ExecGetResultType(). So separate the determination of the result
type from the slot creation. The work previously done internally
ExecInitResultTupleSlotTL() can now also be done separately with
ExecInitResultTypeTL() and ExecInitResultSlot(). That way nodes that
aren't guaranteed to need a result slot, can use
ExecInitResultTypeTL() to determine the result type of the node, and
ExecAssignScanProjectionInfo() (via
ExecConditionalAssignProjectionInfo()) determines that a result slot
is needed, it is created with ExecInitResultSlot().
Besides the advantage of avoiding to create slots that then are
unused, this is necessary preparation for later patches around tuple
table slot abstraction. In particular separating the return descriptor
and slot is a prerequisite to allow JITing of tuple deforming with
knowledge of the underlying tuple format, and to avoid unnecessarily
creating JITed tuple deforming for virtual slots.
This commit removes a redundant argument from
ExecInitResultTupleSlotTL(). While this commit touches a lot of the
relevant lines anyway, it'd normally still not worthwhile to cause
breakage, except that aforementioned later commits will touch *all*
ExecInitResultTupleSlotTL() callers anyway (but fits worse
thematically).
Author: Andres Freund
Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de
2018-11-09 20:19:39 -05:00
|
|
|
if (node->ss.ps.ps_ResultTupleSlot)
|
|
|
|
|
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-25 22:26:45 -04:00
|
|
|
|
|
|
|
|
ExecScanReScan(&node->ss);
|
2008-10-23 11:29:23 -04:00
|
|
|
|
2008-10-12 20:41:41 -04:00
|
|
|
/* No need (or way) to rescan if ExecWorkTableScan not called yet */
|
|
|
|
|
if (node->rustate)
|
|
|
|
|
tuplestore_rescan(node->rustate->working_table);
|
2008-10-04 17:56:55 -04:00
|
|
|
}
|