Thread flags through begin-scan APIs

Add an AM user-settable flags parameter to several of the table scan
functions, one table AM callback, and index_beginscan(). This allows
users to pass additional context to be used when building the scan
descriptors.

For index scans, a new flags field is added to IndexFetchTableData, and
the heap AM saves the caller-provided flags there.

This introduces an extension point for follow-up work to pass per-scan
information (such as whether the relation is read-only for the current
query) from the executor to the AM layer.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Tomas Vondra <tomas@vondra.me>
Reviewed-by: David Rowley <dgrowleyml@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/2be31f17-5405-4de9-8d73-90ebc322f7d8%40vondra.me
This commit is contained in:
Melanie Plageman 2026-03-30 12:27:24 -04:00
parent 095555daf1
commit dcd8cc1c85
26 changed files with 185 additions and 86 deletions

View file

@ -115,7 +115,7 @@ pgrowlocks(PG_FUNCTION_ARGS)
RelationGetRelationName(rel));
/* Scan the relation */
scan = table_beginscan(rel, GetActiveSnapshot(), 0, NULL);
scan = table_beginscan(rel, GetActiveSnapshot(), 0, NULL, SO_NONE);
hscan = (HeapScanDesc) scan;
attinmeta = TupleDescGetAttInMetadata(rsinfo->setDesc);

View file

@ -2844,7 +2844,8 @@ _brin_parallel_scan_and_build(BrinBuildState *state,
indexInfo->ii_Concurrent = brinshared->isconcurrent;
scan = table_beginscan_parallel(heap,
ParallelTableScanFromBrinShared(brinshared));
ParallelTableScanFromBrinShared(brinshared),
SO_NONE);
reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
brinbuildCallbackParallel, state, scan);

View file

@ -2068,7 +2068,8 @@ _gin_parallel_scan_and_build(GinBuildState *state,
indexInfo->ii_Concurrent = ginshared->isconcurrent;
scan = table_beginscan_parallel(heap,
ParallelTableScanFromGinBuildShared(ginshared));
ParallelTableScanFromGinBuildShared(ginshared),
SO_NONE);
reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
ginBuildCallbackParallel, state, scan);

View file

@ -81,11 +81,12 @@ heapam_slot_callbacks(Relation relation)
*/
static IndexFetchTableData *
heapam_index_fetch_begin(Relation rel)
heapam_index_fetch_begin(Relation rel, uint32 flags)
{
IndexFetchHeapData *hscan = palloc0_object(IndexFetchHeapData);
hscan->xs_base.rel = rel;
hscan->xs_base.flags = flags;
hscan->xs_cbuf = InvalidBuffer;
hscan->xs_vmbuffer = InvalidBuffer;
@ -763,7 +764,8 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
tableScan = NULL;
heapScan = NULL;
indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, NULL, 0, 0);
indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, NULL, 0, 0,
SO_NONE);
index_rescan(indexScan, NULL, 0, NULL, 0);
}
else
@ -772,7 +774,8 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
PROGRESS_REPACK_PHASE_SEQ_SCAN_HEAP);
tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL,
SO_NONE);
heapScan = (HeapScanDesc) tableScan;
indexScan = NULL;

View file

@ -455,7 +455,8 @@ systable_beginscan(Relation heapRelation,
}
sysscan->iscan = index_beginscan(heapRelation, irel,
snapshot, NULL, nkeys, 0);
snapshot, NULL, nkeys, 0,
SO_NONE);
index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
sysscan->scan = NULL;
@ -716,7 +717,8 @@ systable_beginscan_ordered(Relation heapRelation,
bsysscan = true;
sysscan->iscan = index_beginscan(heapRelation, indexRelation,
snapshot, NULL, nkeys, 0);
snapshot, NULL, nkeys, 0,
SO_NONE);
index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
sysscan->scan = NULL;

View file

@ -258,7 +258,8 @@ index_beginscan(Relation heapRelation,
Relation indexRelation,
Snapshot snapshot,
IndexScanInstrumentation *instrument,
int nkeys, int norderbys)
int nkeys, int norderbys,
uint32 flags)
{
IndexScanDesc scan;
@ -285,7 +286,7 @@ index_beginscan(Relation heapRelation,
scan->instrument = instrument;
/* prepare to fetch index matches from table */
scan->xs_heapfetch = table_index_fetch_begin(heapRelation);
scan->xs_heapfetch = table_index_fetch_begin(heapRelation, flags);
return scan;
}
@ -588,13 +589,17 @@ index_parallelrescan(IndexScanDesc scan)
/*
* index_beginscan_parallel - join parallel index scan
*
* flags is a bitmask of ScanOptions affecting the underlying table scan. No
* SO_INTERNAL_FLAGS are permitted.
*
* Caller must be holding suitable locks on the heap and the index.
*/
IndexScanDesc
index_beginscan_parallel(Relation heaprel, Relation indexrel,
IndexScanInstrumentation *instrument,
int nkeys, int norderbys,
ParallelIndexScanDesc pscan)
ParallelIndexScanDesc pscan,
uint32 flags)
{
Snapshot snapshot;
IndexScanDesc scan;
@ -616,7 +621,7 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel,
scan->instrument = instrument;
/* prepare to fetch index matches from table */
scan->xs_heapfetch = table_index_fetch_begin(heaprel);
scan->xs_heapfetch = table_index_fetch_begin(heaprel, flags);
return scan;
}

View file

@ -1928,7 +1928,8 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
indexInfo = BuildIndexInfo(btspool->index);
indexInfo->ii_Concurrent = btshared->isconcurrent;
scan = table_beginscan_parallel(btspool->heap,
ParallelTableScanFromBTShared(btshared));
ParallelTableScanFromBTShared(btshared),
SO_NONE);
reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
true, progress, _bt_build_callback,
&buildstate, scan);

View file

@ -118,7 +118,7 @@ table_beginscan_catalog(Relation relation, int nkeys, ScanKeyData *key)
Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
return table_beginscan_common(relation, snapshot, nkeys, key,
NULL, flags);
NULL, flags, SO_NONE);
}
@ -163,10 +163,11 @@ table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan,
}
TableScanDesc
table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan,
uint32 flags)
{
Snapshot snapshot;
uint32 flags = SO_TYPE_SEQSCAN |
uint32 internal_flags = SO_TYPE_SEQSCAN |
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
Assert(RelFileLocatorEquals(relation->rd_locator, pscan->phs_locator));
@ -176,7 +177,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
/* Snapshot was serialized -- restore it */
snapshot = RestoreSnapshot((char *) pscan + pscan->phs_snapshot_off);
RegisterSnapshot(snapshot);
flags |= SO_TEMP_SNAPSHOT;
internal_flags |= SO_TEMP_SNAPSHOT;
}
else
{
@ -185,16 +186,17 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
}
return table_beginscan_common(relation, snapshot, 0, NULL,
pscan, flags);
pscan, internal_flags, flags);
}
TableScanDesc
table_beginscan_parallel_tidrange(Relation relation,
ParallelTableScanDesc pscan)
ParallelTableScanDesc pscan,
uint32 flags)
{
Snapshot snapshot;
uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
TableScanDesc sscan;
uint32 internal_flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
Assert(RelFileLocatorEquals(relation->rd_locator, pscan->phs_locator));
@ -206,7 +208,7 @@ table_beginscan_parallel_tidrange(Relation relation,
/* Snapshot was serialized -- restore it */
snapshot = RestoreSnapshot((char *) pscan + pscan->phs_snapshot_off);
RegisterSnapshot(snapshot);
flags |= SO_TEMP_SNAPSHOT;
internal_flags |= SO_TEMP_SNAPSHOT;
}
else
{
@ -215,7 +217,7 @@ table_beginscan_parallel_tidrange(Relation relation,
}
sscan = table_beginscan_common(relation, snapshot, 0, NULL,
pscan, flags);
pscan, internal_flags, flags);
return sscan;
}
@ -248,7 +250,7 @@ table_index_fetch_tuple_check(Relation rel,
bool found;
slot = table_slot_create(rel, NULL);
scan = table_index_fetch_begin(rel);
scan = table_index_fetch_begin(rel, SO_NONE);
found = table_index_fetch_tuple(scan, tid, snapshot, slot, &call_again,
all_dead);
table_index_fetch_end(scan);

View file

@ -106,7 +106,8 @@ unique_key_recheck(PG_FUNCTION_ARGS)
*/
tmptid = checktid;
{
IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation);
IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation,
SO_NONE);
bool call_again = false;
if (!table_index_fetch_tuple(scan, &tmptid, SnapshotSelf, slot,

View file

@ -1336,7 +1336,8 @@ CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel, uint64 *proc
AttrMap *map = NULL;
TupleTableSlot *root_slot = NULL;
scandesc = table_beginscan(rel, GetActiveSnapshot(), 0, NULL);
scandesc = table_beginscan(rel, GetActiveSnapshot(), 0, NULL,
SO_NONE);
slot = table_slot_create(rel, NULL);
/*

View file

@ -6411,7 +6411,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap)
* checking all the constraints.
*/
snapshot = RegisterSnapshot(GetLatestSnapshot());
scan = table_beginscan(oldrel, snapshot, 0, NULL);
scan = table_beginscan(oldrel, snapshot, 0, NULL,
SO_NONE);
/*
* Switch to per-tuple memory context and reset it for each tuple
@ -13982,8 +13983,8 @@ validateForeignKeyConstraint(char *conname,
*/
snapshot = RegisterSnapshot(GetLatestSnapshot());
slot = table_slot_create(rel, NULL);
scan = table_beginscan(rel, snapshot, 0, NULL);
scan = table_beginscan(rel, snapshot, 0, NULL,
SO_NONE);
perTupCxt = AllocSetContextCreate(CurrentMemoryContext,
"validateForeignKeyConstraint",
ALLOCSET_SMALL_SIZES);
@ -22884,7 +22885,8 @@ MergePartitionsMoveRows(List **wqueue, List *mergingPartitions, Relation newPart
/* Scan through the rows. */
snapshot = RegisterSnapshot(GetLatestSnapshot());
scan = table_beginscan(mergingPartition, snapshot, 0, NULL);
scan = table_beginscan(mergingPartition, snapshot, 0, NULL,
SO_NONE);
/*
* Switch to per-tuple memory context and reset it for each tuple
@ -23348,7 +23350,8 @@ SplitPartitionMoveRows(List **wqueue, Relation rel, Relation splitRel,
/* Scan through the rows. */
snapshot = RegisterSnapshot(GetLatestSnapshot());
scan = table_beginscan(splitRel, snapshot, 0, NULL);
scan = table_beginscan(splitRel, snapshot, 0, NULL,
SO_NONE);
/*
* Switch to per-tuple memory context and reset it for each tuple

View file

@ -3185,7 +3185,8 @@ validateDomainNotNullConstraint(Oid domainoid)
/* Scan all tuples in this relation */
snapshot = RegisterSnapshot(GetLatestSnapshot());
scan = table_beginscan(testrel, snapshot, 0, NULL);
scan = table_beginscan(testrel, snapshot, 0, NULL,
SO_NONE);
slot = table_slot_create(testrel, NULL);
while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
{
@ -3266,7 +3267,8 @@ validateDomainCheckConstraint(Oid domainoid, const char *ccbin, LOCKMODE lockmod
/* Scan all tuples in this relation */
snapshot = RegisterSnapshot(GetLatestSnapshot());
scan = table_beginscan(testrel, snapshot, 0, NULL);
scan = table_beginscan(testrel, snapshot, 0, NULL,
SO_NONE);
slot = table_slot_create(testrel, NULL);
while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
{

View file

@ -815,7 +815,9 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
retry:
conflict = false;
found_self = false;
index_scan = index_beginscan(heap, index, &DirtySnapshot, NULL, indnkeyatts, 0);
index_scan = index_beginscan(heap, index,
&DirtySnapshot, NULL, indnkeyatts, 0,
SO_NONE);
index_rescan(index_scan, scankeys, indnkeyatts, NULL, 0);
while (index_getnext_slot(index_scan, ForwardScanDirection, existing_slot))

View file

@ -205,7 +205,8 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
skey_attoff = build_replindex_scan_key(skey, rel, idxrel, searchslot);
/* Start an index scan. */
scan = index_beginscan(rel, idxrel, &snap, NULL, skey_attoff, 0);
scan = index_beginscan(rel, idxrel,
&snap, NULL, skey_attoff, 0, SO_NONE);
retry:
found = false;
@ -383,7 +384,8 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
/* Start a heap scan. */
InitDirtySnapshot(snap);
scan = table_beginscan(rel, &snap, 0, NULL);
scan = table_beginscan(rel, &snap, 0, NULL,
SO_NONE);
scanslot = table_slot_create(rel, NULL);
retry:
@ -602,7 +604,8 @@ RelationFindDeletedTupleInfoSeq(Relation rel, TupleTableSlot *searchslot,
* not yet committed or those just committed prior to the scan are
* excluded in update_most_recent_deletion_info().
*/
scan = table_beginscan(rel, SnapshotAny, 0, NULL);
scan = table_beginscan(rel, SnapshotAny, 0, NULL,
SO_NONE);
scanslot = table_slot_create(rel, NULL);
table_rescan(scan, NULL);
@ -666,7 +669,8 @@ RelationFindDeletedTupleInfoByIndex(Relation rel, Oid idxoid,
* not yet committed or those just committed prior to the scan are
* excluded in update_most_recent_deletion_info().
*/
scan = index_beginscan(rel, idxrel, SnapshotAny, NULL, skey_attoff, 0);
scan = index_beginscan(rel, idxrel,
SnapshotAny, NULL, skey_attoff, 0, SO_NONE);
index_rescan(scan, skey, skey_attoff, NULL, 0);

View file

@ -148,7 +148,8 @@ BitmapTableScanSetup(BitmapHeapScanState *node)
table_beginscan_bm(node->ss.ss_currentRelation,
node->ss.ps.state->es_snapshot,
0,
NULL);
NULL,
SO_NONE);
}
node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;

View file

@ -95,7 +95,8 @@ IndexOnlyNext(IndexOnlyScanState *node)
estate->es_snapshot,
node->ioss_Instrument,
node->ioss_NumScanKeys,
node->ioss_NumOrderByKeys);
node->ioss_NumOrderByKeys,
SO_NONE);
node->ioss_ScanDesc = scandesc;
@ -794,7 +795,8 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
node->ioss_Instrument,
node->ioss_NumScanKeys,
node->ioss_NumOrderByKeys,
piscan);
piscan,
SO_NONE);
node->ioss_ScanDesc->xs_want_itup = true;
node->ioss_VMBuffer = InvalidBuffer;
@ -860,7 +862,8 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node,
node->ioss_Instrument,
node->ioss_NumScanKeys,
node->ioss_NumOrderByKeys,
piscan);
piscan,
SO_NONE);
node->ioss_ScanDesc->xs_want_itup = true;
/*

View file

@ -113,7 +113,8 @@ IndexNext(IndexScanState *node)
estate->es_snapshot,
node->iss_Instrument,
node->iss_NumScanKeys,
node->iss_NumOrderByKeys);
node->iss_NumOrderByKeys,
SO_NONE);
node->iss_ScanDesc = scandesc;
@ -209,7 +210,8 @@ IndexNextWithReorder(IndexScanState *node)
estate->es_snapshot,
node->iss_Instrument,
node->iss_NumScanKeys,
node->iss_NumOrderByKeys);
node->iss_NumOrderByKeys,
SO_NONE);
node->iss_ScanDesc = scandesc;
@ -1730,7 +1732,8 @@ ExecIndexScanInitializeDSM(IndexScanState *node,
node->iss_Instrument,
node->iss_NumScanKeys,
node->iss_NumOrderByKeys,
piscan);
piscan,
SO_NONE);
/*
* If no run-time keys to calculate or they are ready, go ahead and pass
@ -1794,7 +1797,8 @@ ExecIndexScanInitializeWorker(IndexScanState *node,
node->iss_Instrument,
node->iss_NumScanKeys,
node->iss_NumOrderByKeys,
piscan);
piscan,
SO_NONE);
/*
* If no run-time keys to calculate or they are ready, go ahead and pass

View file

@ -298,7 +298,8 @@ tablesample_init(SampleScanState *scanstate)
0, NULL,
scanstate->use_bulkread,
allow_sync,
scanstate->use_pagemode);
scanstate->use_pagemode,
SO_NONE);
}
else
{

View file

@ -71,7 +71,8 @@ SeqNext(SeqScanState *node)
*/
scandesc = table_beginscan(node->ss.ss_currentRelation,
estate->es_snapshot,
0, NULL);
0, NULL,
SO_NONE);
node->ss.ss_currentScanDesc = scandesc;
}
@ -375,7 +376,8 @@ ExecSeqScanInitializeDSM(SeqScanState *node,
estate->es_snapshot);
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
node->ss.ss_currentScanDesc =
table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
table_beginscan_parallel(node->ss.ss_currentRelation, pscan,
SO_NONE);
}
/* ----------------------------------------------------------------
@ -408,5 +410,6 @@ ExecSeqScanInitializeWorker(SeqScanState *node,
pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
node->ss.ss_currentScanDesc =
table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
table_beginscan_parallel(node->ss.ss_currentRelation, pscan,
SO_NONE);
}

View file

@ -245,7 +245,8 @@ TidRangeNext(TidRangeScanState *node)
scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation,
estate->es_snapshot,
&node->trss_mintid,
&node->trss_maxtid);
&node->trss_maxtid,
SO_NONE);
node->ss.ss_currentScanDesc = scandesc;
}
else
@ -460,7 +461,7 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt)
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
node->ss.ss_currentScanDesc =
table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
pscan);
pscan, SO_NONE);
}
/* ----------------------------------------------------------------
@ -494,5 +495,5 @@ ExecTidRangeScanInitializeWorker(TidRangeScanState *node,
pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
node->ss.ss_currentScanDesc =
table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
pscan);
pscan, SO_NONE);
}

View file

@ -3362,7 +3362,8 @@ check_default_partition_contents(Relation parent, Relation default_rel,
econtext = GetPerTupleExprContext(estate);
snapshot = RegisterSnapshot(GetLatestSnapshot());
tupslot = table_slot_create(part_rel, &estate->es_tupleTable);
scan = table_beginscan(part_rel, snapshot, 0, NULL);
scan = table_beginscan(part_rel, snapshot, 0, NULL,
SO_NONE);
/*
* Switch to per-tuple memory context and reset it for each tuple

View file

@ -7178,7 +7178,8 @@ get_actual_variable_endpoint(Relation heapRel,
index_scan = index_beginscan(heapRel, indexRel,
&SnapshotNonVacuumable, NULL,
1, 0);
1, 0,
SO_NONE);
/* Set it up for index-only scan */
index_scan->xs_want_itup = true;
index_rescan(index_scan, scankeys, 1, NULL, 0);

View file

@ -158,7 +158,8 @@ extern IndexScanDesc index_beginscan(Relation heapRelation,
Relation indexRelation,
Snapshot snapshot,
IndexScanInstrumentation *instrument,
int nkeys, int norderbys);
int nkeys, int norderbys,
uint32 flags);
extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation,
Snapshot snapshot,
IndexScanInstrumentation *instrument,
@ -184,7 +185,8 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
Relation indexrel,
IndexScanInstrumentation *instrument,
int nkeys, int norderbys,
ParallelIndexScanDesc pscan);
ParallelIndexScanDesc pscan,
uint32 flags);
extern ItemPointer index_getnext_tid(IndexScanDesc scan,
ScanDirection direction);
extern bool index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot);

View file

@ -95,10 +95,7 @@ typedef struct HeapScanDescData
*/
ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
/*
* For sequential scans and bitmap heap scans. The current heap block's
* corresponding page in the visibility map.
*/
/* Current heap block's corresponding page in the visibility map */
Buffer rs_vmbuffer;
/* these fields only used in page-at-a-time mode and for bitmap scans */

View file

@ -122,6 +122,12 @@ typedef struct ParallelBlockTableScanWorkerData *ParallelBlockTableScanWorker;
typedef struct IndexFetchTableData
{
Relation rel;
/*
* Bitmask of ScanOptions affecting the relation. No SO_INTERNAL_FLAGS are
* permitted.
*/
uint32 flags;
} IndexFetchTableData;
struct IndexScanInstrumentation;

View file

@ -45,6 +45,8 @@ typedef struct ValidateIndexState ValidateIndexState;
*/
typedef enum ScanOptions
{
SO_NONE = 0,
/* one of SO_TYPE_* may be specified */
SO_TYPE_SEQSCAN = 1 << 0,
SO_TYPE_BITMAPSCAN = 1 << 1,
@ -65,6 +67,19 @@ typedef enum ScanOptions
SO_TEMP_SNAPSHOT = 1 << 9,
} ScanOptions;
/*
* Mask of flags that are set internally by the table scan functions and
* shouldn't be passed by callers. Some of these are effectively set by callers
* through parameters to table scan functions (e.g. SO_ALLOW_STRAT/allow_strat),
* however, for now, retain tight control over them and don't allow users to
* pass these themselves to table scan functions.
*/
#define SO_INTERNAL_FLAGS \
(SO_TYPE_SEQSCAN | SO_TYPE_BITMAPSCAN | SO_TYPE_SAMPLESCAN | \
SO_TYPE_TIDSCAN | SO_TYPE_TIDRANGESCAN | SO_TYPE_ANALYZE | \
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE | \
SO_TEMP_SNAPSHOT)
/*
* Result codes for table_{update,delete,lock_tuple}, and for visibility
* routines inside table AMs.
@ -324,8 +339,9 @@ typedef struct TableAmRoutine
* `flags` is a bitmask indicating the type of scan (ScanOptions's
* SO_TYPE_*, currently only one may be specified), options controlling
* the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
* specified, an AM may ignore unsupported ones) and whether the snapshot
* needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
* specified, an AM may ignore unsupported ones), whether the snapshot
* needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT),
* and any number of the other ScanOptions values.
*/
TableScanDesc (*scan_begin) (Relation rel,
Snapshot snapshot,
@ -421,9 +437,12 @@ typedef struct TableAmRoutine
* IndexFetchTableData, which the AM will typically embed in a larger
* structure with additional information.
*
* flags is a bitmask of ScanOptions affecting underlying table scan
* behavior. See scan_begin() for more information on passing these.
*
* Tuples for an index scan can then be fetched via index_fetch_tuple.
*/
struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
struct IndexFetchTableData *(*index_fetch_begin) (Relation rel, uint32 flags);
/*
* Reset index fetch. Typically this will release cross index fetch
@ -874,12 +893,19 @@ extern TupleTableSlot *table_slot_create(Relation relation, List **reglist);
* A wrapper around the Table Access Method scan_begin callback, to centralize
* error checking. All calls to ->scan_begin() should go through this
* function.
*
* The caller-provided user_flags are validated against SO_INTERNAL_FLAGS to
* catch callers that accidentally pass scan-type or other internal flags.
*/
static TableScanDesc
table_beginscan_common(Relation rel, Snapshot snapshot, int nkeys,
ScanKeyData *key, ParallelTableScanDesc pscan,
uint32 flags)
uint32 flags, uint32 user_flags)
{
Assert((user_flags & SO_INTERNAL_FLAGS) == 0);
Assert((flags & ~SO_INTERNAL_FLAGS) == 0);
flags |= user_flags;
/*
* We don't allow scans to be started while CheckXidAlive is set, except
* via systable_beginscan() et al. See detailed comments in xact.c where
@ -894,15 +920,18 @@ table_beginscan_common(Relation rel, Snapshot snapshot, int nkeys,
/*
* Start a scan of `rel`. Returned tuples pass a visibility test of
* `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
*
* flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted.
*/
static inline TableScanDesc
table_beginscan(Relation rel, Snapshot snapshot,
int nkeys, ScanKeyData *key)
int nkeys, ScanKeyData *key, uint32 flags)
{
uint32 flags = SO_TYPE_SEQSCAN |
uint32 internal_flags = SO_TYPE_SEQSCAN |
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
return table_beginscan_common(rel, snapshot, nkeys, key, NULL,
internal_flags, flags);
}
/*
@ -931,7 +960,8 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
if (allow_sync)
flags |= SO_ALLOW_SYNC;
return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
return table_beginscan_common(rel, snapshot, nkeys, key, NULL,
flags, SO_NONE);
}
/*
@ -939,14 +969,17 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
* TableScanDesc for a bitmap heap scan. Although that scan technology is
* really quite unlike a standard seqscan, there is just enough commonality to
* make it worth using the same data structure.
*
* flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted.
*/
static inline TableScanDesc
table_beginscan_bm(Relation rel, Snapshot snapshot,
int nkeys, ScanKeyData *key)
int nkeys, ScanKeyData *key, uint32 flags)
{
uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
uint32 internal_flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
return table_beginscan_common(rel, snapshot, nkeys, key, NULL,
internal_flags, flags);
}
/*
@ -955,23 +988,26 @@ table_beginscan_bm(Relation rel, Snapshot snapshot,
* using the same data structure although the behavior is rather different.
* In addition to the options offered by table_beginscan_strat, this call
* also allows control of whether page-mode visibility checking is used.
*
* flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted.
*/
static inline TableScanDesc
table_beginscan_sampling(Relation rel, Snapshot snapshot,
int nkeys, ScanKeyData *key,
bool allow_strat, bool allow_sync,
bool allow_pagemode)
bool allow_pagemode, uint32 flags)
{
uint32 flags = SO_TYPE_SAMPLESCAN;
uint32 internal_flags = SO_TYPE_SAMPLESCAN;
if (allow_strat)
flags |= SO_ALLOW_STRAT;
internal_flags |= SO_ALLOW_STRAT;
if (allow_sync)
flags |= SO_ALLOW_SYNC;
internal_flags |= SO_ALLOW_SYNC;
if (allow_pagemode)
flags |= SO_ALLOW_PAGEMODE;
internal_flags |= SO_ALLOW_PAGEMODE;
return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
return table_beginscan_common(rel, snapshot, nkeys, key, NULL,
internal_flags, flags);
}
/*
@ -984,7 +1020,8 @@ table_beginscan_tid(Relation rel, Snapshot snapshot)
{
uint32 flags = SO_TYPE_TIDSCAN;
return table_beginscan_common(rel, snapshot, 0, NULL, NULL, flags);
return table_beginscan_common(rel, snapshot, 0, NULL, NULL,
flags, SO_NONE);
}
/*
@ -997,7 +1034,8 @@ table_beginscan_analyze(Relation rel)
{
uint32 flags = SO_TYPE_ANALYZE;
return table_beginscan_common(rel, NULL, 0, NULL, NULL, flags);
return table_beginscan_common(rel, NULL, 0, NULL, NULL,
flags, SO_NONE);
}
/*
@ -1058,16 +1096,19 @@ table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableS
/*
* table_beginscan_tidrange is the entry point for setting up a TableScanDesc
* for a TID range scan.
*
* flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted.
*/
static inline TableScanDesc
table_beginscan_tidrange(Relation rel, Snapshot snapshot,
ItemPointer mintid,
ItemPointer maxtid)
ItemPointer maxtid, uint32 flags)
{
TableScanDesc sscan;
uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
uint32 internal_flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
sscan = table_beginscan_common(rel, snapshot, 0, NULL, NULL, flags);
sscan = table_beginscan_common(rel, snapshot, 0, NULL, NULL,
internal_flags, flags);
/* Set the range of TIDs to scan */
sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
@ -1139,20 +1180,26 @@ extern void table_parallelscan_initialize(Relation rel,
* table_parallelscan_initialize(), for the same relation. The initialization
* does not need to have happened in this backend.
*
* flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted.
*
* Caller must hold a suitable lock on the relation.
*/
extern TableScanDesc table_beginscan_parallel(Relation relation,
ParallelTableScanDesc pscan);
ParallelTableScanDesc pscan,
uint32 flags);
/*
* Begin a parallel tid range scan. `pscan` needs to have been initialized
* with table_parallelscan_initialize(), for the same relation. The
* initialization does not need to have happened in this backend.
*
* flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted.
*
* Caller must hold a suitable lock on the relation.
*/
extern TableScanDesc table_beginscan_parallel_tidrange(Relation relation,
ParallelTableScanDesc pscan);
ParallelTableScanDesc pscan,
uint32 flags);
/*
* Restart a parallel scan. Call this in the leader process. Caller is
@ -1175,11 +1222,15 @@ table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
* Prepare to fetch tuples from the relation, as needed when fetching tuples
* for an index scan.
*
* flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted.
*
* Tuples for an index scan can then be fetched via table_index_fetch_tuple().
*/
static inline IndexFetchTableData *
table_index_fetch_begin(Relation rel)
table_index_fetch_begin(Relation rel, uint32 flags)
{
Assert((flags & SO_INTERNAL_FLAGS) == 0);
/*
* We don't allow scans to be started while CheckXidAlive is set, except
* via systable_beginscan() et al. See detailed comments in xact.c where
@ -1188,7 +1239,7 @@ table_index_fetch_begin(Relation rel)
if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
elog(ERROR, "scan started during logical decoding");
return rel->rd_tableam->index_fetch_begin(rel);
return rel->rd_tableam->index_fetch_begin(rel, flags);
}
/*