From dcd8cc1c852cac4e65d336a79afb6b9eb9700ae1 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 30 Mar 2026 12:27:24 -0400 Subject: [PATCH] Thread flags through begin-scan APIs Add an AM user-settable flags parameter to several of the table scan functions, one table AM callback, and index_beginscan(). This allows users to pass additional context to be used when building the scan descriptors. For index scans, a new flags field is added to IndexFetchTableData, and the heap AM saves the caller-provided flags there. This introduces an extension point for follow-up work to pass per-scan information (such as whether the relation is read-only for the current query) from the executor to the AM layer. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Tomas Vondra Reviewed-by: David Rowley Reviewed-by: Chao Li Discussion: https://postgr.es/m/2be31f17-5405-4de9-8d73-90ebc322f7d8%40vondra.me --- contrib/pgrowlocks/pgrowlocks.c | 2 +- src/backend/access/brin/brin.c | 3 +- src/backend/access/gin/gininsert.c | 3 +- src/backend/access/heap/heapam_handler.c | 9 +- src/backend/access/index/genam.c | 6 +- src/backend/access/index/indexam.c | 13 ++- src/backend/access/nbtree/nbtsort.c | 3 +- src/backend/access/table/tableam.c | 22 ++--- src/backend/commands/constraint.c | 3 +- src/backend/commands/copyto.c | 3 +- src/backend/commands/tablecmds.c | 13 +-- src/backend/commands/typecmds.c | 6 +- src/backend/executor/execIndexing.c | 4 +- src/backend/executor/execReplication.c | 12 ++- src/backend/executor/nodeBitmapHeapscan.c | 3 +- src/backend/executor/nodeIndexonlyscan.c | 9 +- src/backend/executor/nodeIndexscan.c | 12 ++- src/backend/executor/nodeSamplescan.c | 3 +- src/backend/executor/nodeSeqscan.c | 9 +- src/backend/executor/nodeTidrangescan.c | 7 +- src/backend/partitioning/partbounds.c | 3 +- src/backend/utils/adt/selfuncs.c | 3 +- src/include/access/genam.h | 6 +- src/include/access/heapam.h | 5 +- src/include/access/relscan.h | 6 ++ src/include/access/tableam.h | 103 ++++++++++++++++------ 26 files changed, 185 insertions(+), 86 deletions(-) diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c index ff3692c87c4..d164c4c03ad 100644 --- a/contrib/pgrowlocks/pgrowlocks.c +++ b/contrib/pgrowlocks/pgrowlocks.c @@ -115,7 +115,7 @@ pgrowlocks(PG_FUNCTION_ARGS) RelationGetRelationName(rel)); /* Scan the relation */ - scan = table_beginscan(rel, GetActiveSnapshot(), 0, NULL); + scan = table_beginscan(rel, GetActiveSnapshot(), 0, NULL, SO_NONE); hscan = (HeapScanDesc) scan; attinmeta = TupleDescGetAttInMetadata(rsinfo->setDesc); diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 2a0f8c8e3b8..bdb30752e09 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -2844,7 +2844,8 @@ _brin_parallel_scan_and_build(BrinBuildState *state, indexInfo->ii_Concurrent = brinshared->isconcurrent; scan = table_beginscan_parallel(heap, - ParallelTableScanFromBrinShared(brinshared)); + ParallelTableScanFromBrinShared(brinshared), + SO_NONE); reltuples = table_index_build_scan(heap, index, indexInfo, true, true, brinbuildCallbackParallel, state, scan); diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index e54782d9dd8..9d83a495775 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -2068,7 +2068,8 @@ _gin_parallel_scan_and_build(GinBuildState *state, indexInfo->ii_Concurrent = ginshared->isconcurrent; scan = table_beginscan_parallel(heap, - ParallelTableScanFromGinBuildShared(ginshared)); + ParallelTableScanFromGinBuildShared(ginshared), + SO_NONE); reltuples = table_index_build_scan(heap, index, indexInfo, true, progress, ginBuildCallbackParallel, state, scan); diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 7c2194b152b..4de1eab4a73 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -81,11 +81,12 @@ heapam_slot_callbacks(Relation relation) */ static IndexFetchTableData * -heapam_index_fetch_begin(Relation rel) +heapam_index_fetch_begin(Relation rel, uint32 flags) { IndexFetchHeapData *hscan = palloc0_object(IndexFetchHeapData); hscan->xs_base.rel = rel; + hscan->xs_base.flags = flags; hscan->xs_cbuf = InvalidBuffer; hscan->xs_vmbuffer = InvalidBuffer; @@ -763,7 +764,8 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, tableScan = NULL; heapScan = NULL; - indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, NULL, 0, 0); + indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, NULL, 0, 0, + SO_NONE); index_rescan(indexScan, NULL, 0, NULL, 0); } else @@ -772,7 +774,8 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, pgstat_progress_update_param(PROGRESS_REPACK_PHASE, PROGRESS_REPACK_PHASE_SEQ_SCAN_HEAP); - tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL); + tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL, + SO_NONE); heapScan = (HeapScanDesc) tableScan; indexScan = NULL; diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 5e89b86a62c..1408989c568 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -455,7 +455,8 @@ systable_beginscan(Relation heapRelation, } sysscan->iscan = index_beginscan(heapRelation, irel, - snapshot, NULL, nkeys, 0); + snapshot, NULL, nkeys, 0, + SO_NONE); index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0); sysscan->scan = NULL; @@ -716,7 +717,8 @@ systable_beginscan_ordered(Relation heapRelation, bsysscan = true; sysscan->iscan = index_beginscan(heapRelation, indexRelation, - snapshot, NULL, nkeys, 0); + snapshot, NULL, nkeys, 0, + SO_NONE); index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0); sysscan->scan = NULL; diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index fbfc33159eb..44496ae0963 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -258,7 +258,8 @@ index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, IndexScanInstrumentation *instrument, - int nkeys, int norderbys) + int nkeys, int norderbys, + uint32 flags) { IndexScanDesc scan; @@ -285,7 +286,7 @@ index_beginscan(Relation heapRelation, scan->instrument = instrument; /* prepare to fetch index matches from table */ - scan->xs_heapfetch = table_index_fetch_begin(heapRelation); + scan->xs_heapfetch = table_index_fetch_begin(heapRelation, flags); return scan; } @@ -588,13 +589,17 @@ index_parallelrescan(IndexScanDesc scan) /* * index_beginscan_parallel - join parallel index scan * + * flags is a bitmask of ScanOptions affecting the underlying table scan. No + * SO_INTERNAL_FLAGS are permitted. + * * Caller must be holding suitable locks on the heap and the index. */ IndexScanDesc index_beginscan_parallel(Relation heaprel, Relation indexrel, IndexScanInstrumentation *instrument, int nkeys, int norderbys, - ParallelIndexScanDesc pscan) + ParallelIndexScanDesc pscan, + uint32 flags) { Snapshot snapshot; IndexScanDesc scan; @@ -616,7 +621,7 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel, scan->instrument = instrument; /* prepare to fetch index matches from table */ - scan->xs_heapfetch = table_index_fetch_begin(heaprel); + scan->xs_heapfetch = table_index_fetch_begin(heaprel, flags); return scan; } diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 47a9bda30c9..756dfa3dcf4 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -1928,7 +1928,8 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, indexInfo = BuildIndexInfo(btspool->index); indexInfo->ii_Concurrent = btshared->isconcurrent; scan = table_beginscan_parallel(btspool->heap, - ParallelTableScanFromBTShared(btshared)); + ParallelTableScanFromBTShared(btshared), + SO_NONE); reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo, true, progress, _bt_build_callback, &buildstate, scan); diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index dfda1af412e..86481d7c029 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -118,7 +118,7 @@ table_beginscan_catalog(Relation relation, int nkeys, ScanKeyData *key) Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); return table_beginscan_common(relation, snapshot, nkeys, key, - NULL, flags); + NULL, flags, SO_NONE); } @@ -163,10 +163,11 @@ table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, } TableScanDesc -table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan) +table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan, + uint32 flags) { Snapshot snapshot; - uint32 flags = SO_TYPE_SEQSCAN | + uint32 internal_flags = SO_TYPE_SEQSCAN | SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE; Assert(RelFileLocatorEquals(relation->rd_locator, pscan->phs_locator)); @@ -176,7 +177,7 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan) /* Snapshot was serialized -- restore it */ snapshot = RestoreSnapshot((char *) pscan + pscan->phs_snapshot_off); RegisterSnapshot(snapshot); - flags |= SO_TEMP_SNAPSHOT; + internal_flags |= SO_TEMP_SNAPSHOT; } else { @@ -185,16 +186,17 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan) } return table_beginscan_common(relation, snapshot, 0, NULL, - pscan, flags); + pscan, internal_flags, flags); } TableScanDesc table_beginscan_parallel_tidrange(Relation relation, - ParallelTableScanDesc pscan) + ParallelTableScanDesc pscan, + uint32 flags) { Snapshot snapshot; - uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE; TableScanDesc sscan; + uint32 internal_flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE; Assert(RelFileLocatorEquals(relation->rd_locator, pscan->phs_locator)); @@ -206,7 +208,7 @@ table_beginscan_parallel_tidrange(Relation relation, /* Snapshot was serialized -- restore it */ snapshot = RestoreSnapshot((char *) pscan + pscan->phs_snapshot_off); RegisterSnapshot(snapshot); - flags |= SO_TEMP_SNAPSHOT; + internal_flags |= SO_TEMP_SNAPSHOT; } else { @@ -215,7 +217,7 @@ table_beginscan_parallel_tidrange(Relation relation, } sscan = table_beginscan_common(relation, snapshot, 0, NULL, - pscan, flags); + pscan, internal_flags, flags); return sscan; } @@ -248,7 +250,7 @@ table_index_fetch_tuple_check(Relation rel, bool found; slot = table_slot_create(rel, NULL); - scan = table_index_fetch_begin(rel); + scan = table_index_fetch_begin(rel, SO_NONE); found = table_index_fetch_tuple(scan, tid, snapshot, slot, &call_again, all_dead); table_index_fetch_end(scan); diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index cc11c47b6f2..421d8c359f0 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -106,7 +106,8 @@ unique_key_recheck(PG_FUNCTION_ARGS) */ tmptid = checktid; { - IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation); + IndexFetchTableData *scan = table_index_fetch_begin(trigdata->tg_relation, + SO_NONE); bool call_again = false; if (!table_index_fetch_tuple(scan, &tmptid, SnapshotSelf, slot, diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index faf62d959b4..f0e0147c665 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -1336,7 +1336,8 @@ CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel, uint64 *proc AttrMap *map = NULL; TupleTableSlot *root_slot = NULL; - scandesc = table_beginscan(rel, GetActiveSnapshot(), 0, NULL); + scandesc = table_beginscan(rel, GetActiveSnapshot(), 0, NULL, + SO_NONE); slot = table_slot_create(rel, NULL); /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index dd00e36c69d..cfb60940f4d 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -6411,7 +6411,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap) * checking all the constraints. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = table_beginscan(oldrel, snapshot, 0, NULL); + scan = table_beginscan(oldrel, snapshot, 0, NULL, + SO_NONE); /* * Switch to per-tuple memory context and reset it for each tuple @@ -13982,8 +13983,8 @@ validateForeignKeyConstraint(char *conname, */ snapshot = RegisterSnapshot(GetLatestSnapshot()); slot = table_slot_create(rel, NULL); - scan = table_beginscan(rel, snapshot, 0, NULL); - + scan = table_beginscan(rel, snapshot, 0, NULL, + SO_NONE); perTupCxt = AllocSetContextCreate(CurrentMemoryContext, "validateForeignKeyConstraint", ALLOCSET_SMALL_SIZES); @@ -22884,7 +22885,8 @@ MergePartitionsMoveRows(List **wqueue, List *mergingPartitions, Relation newPart /* Scan through the rows. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = table_beginscan(mergingPartition, snapshot, 0, NULL); + scan = table_beginscan(mergingPartition, snapshot, 0, NULL, + SO_NONE); /* * Switch to per-tuple memory context and reset it for each tuple @@ -23348,7 +23350,8 @@ SplitPartitionMoveRows(List **wqueue, Relation rel, Relation splitRel, /* Scan through the rows. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = table_beginscan(splitRel, snapshot, 0, NULL); + scan = table_beginscan(splitRel, snapshot, 0, NULL, + SO_NONE); /* * Switch to per-tuple memory context and reset it for each tuple diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 3dab6bb5a79..cd38e9cddf4 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -3185,7 +3185,8 @@ validateDomainNotNullConstraint(Oid domainoid) /* Scan all tuples in this relation */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = table_beginscan(testrel, snapshot, 0, NULL); + scan = table_beginscan(testrel, snapshot, 0, NULL, + SO_NONE); slot = table_slot_create(testrel, NULL); while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) { @@ -3266,7 +3267,8 @@ validateDomainCheckConstraint(Oid domainoid, const char *ccbin, LOCKMODE lockmod /* Scan all tuples in this relation */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = table_beginscan(testrel, snapshot, 0, NULL); + scan = table_beginscan(testrel, snapshot, 0, NULL, + SO_NONE); slot = table_slot_create(testrel, NULL); while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) { diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 9d071e495c6..cc6eb3a6ee9 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -815,7 +815,9 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, retry: conflict = false; found_self = false; - index_scan = index_beginscan(heap, index, &DirtySnapshot, NULL, indnkeyatts, 0); + index_scan = index_beginscan(heap, index, + &DirtySnapshot, NULL, indnkeyatts, 0, + SO_NONE); index_rescan(index_scan, scankeys, indnkeyatts, NULL, 0); while (index_getnext_slot(index_scan, ForwardScanDirection, existing_slot)) diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 2497ee7edc5..fea8991cb04 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -205,7 +205,8 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, skey_attoff = build_replindex_scan_key(skey, rel, idxrel, searchslot); /* Start an index scan. */ - scan = index_beginscan(rel, idxrel, &snap, NULL, skey_attoff, 0); + scan = index_beginscan(rel, idxrel, + &snap, NULL, skey_attoff, 0, SO_NONE); retry: found = false; @@ -383,7 +384,8 @@ RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, /* Start a heap scan. */ InitDirtySnapshot(snap); - scan = table_beginscan(rel, &snap, 0, NULL); + scan = table_beginscan(rel, &snap, 0, NULL, + SO_NONE); scanslot = table_slot_create(rel, NULL); retry: @@ -602,7 +604,8 @@ RelationFindDeletedTupleInfoSeq(Relation rel, TupleTableSlot *searchslot, * not yet committed or those just committed prior to the scan are * excluded in update_most_recent_deletion_info(). */ - scan = table_beginscan(rel, SnapshotAny, 0, NULL); + scan = table_beginscan(rel, SnapshotAny, 0, NULL, + SO_NONE); scanslot = table_slot_create(rel, NULL); table_rescan(scan, NULL); @@ -666,7 +669,8 @@ RelationFindDeletedTupleInfoByIndex(Relation rel, Oid idxoid, * not yet committed or those just committed prior to the scan are * excluded in update_most_recent_deletion_info(). */ - scan = index_beginscan(rel, idxrel, SnapshotAny, NULL, skey_attoff, 0); + scan = index_beginscan(rel, idxrel, + SnapshotAny, NULL, skey_attoff, 0, SO_NONE); index_rescan(scan, skey, skey_attoff, NULL, 0); diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 7cf8d23c742..69683d81527 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -148,7 +148,8 @@ BitmapTableScanSetup(BitmapHeapScanState *node) table_beginscan_bm(node->ss.ss_currentRelation, node->ss.ps.state->es_snapshot, 0, - NULL); + NULL, + SO_NONE); } node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator; diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 9eab81fd1c8..02df40f32c5 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -95,7 +95,8 @@ IndexOnlyNext(IndexOnlyScanState *node) estate->es_snapshot, node->ioss_Instrument, node->ioss_NumScanKeys, - node->ioss_NumOrderByKeys); + node->ioss_NumOrderByKeys, + SO_NONE); node->ioss_ScanDesc = scandesc; @@ -794,7 +795,8 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node, node->ioss_Instrument, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys, - piscan); + piscan, + SO_NONE); node->ioss_ScanDesc->xs_want_itup = true; node->ioss_VMBuffer = InvalidBuffer; @@ -860,7 +862,8 @@ ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, node->ioss_Instrument, node->ioss_NumScanKeys, node->ioss_NumOrderByKeys, - piscan); + piscan, + SO_NONE); node->ioss_ScanDesc->xs_want_itup = true; /* diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 06143e94c5a..3c0b8daf664 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -113,7 +113,8 @@ IndexNext(IndexScanState *node) estate->es_snapshot, node->iss_Instrument, node->iss_NumScanKeys, - node->iss_NumOrderByKeys); + node->iss_NumOrderByKeys, + SO_NONE); node->iss_ScanDesc = scandesc; @@ -209,7 +210,8 @@ IndexNextWithReorder(IndexScanState *node) estate->es_snapshot, node->iss_Instrument, node->iss_NumScanKeys, - node->iss_NumOrderByKeys); + node->iss_NumOrderByKeys, + SO_NONE); node->iss_ScanDesc = scandesc; @@ -1730,7 +1732,8 @@ ExecIndexScanInitializeDSM(IndexScanState *node, node->iss_Instrument, node->iss_NumScanKeys, node->iss_NumOrderByKeys, - piscan); + piscan, + SO_NONE); /* * If no run-time keys to calculate or they are ready, go ahead and pass @@ -1794,7 +1797,8 @@ ExecIndexScanInitializeWorker(IndexScanState *node, node->iss_Instrument, node->iss_NumScanKeys, node->iss_NumOrderByKeys, - piscan); + piscan, + SO_NONE); /* * If no run-time keys to calculate or they are ready, go ahead and pass diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index 6b0d65f752f..cf32df33d82 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -298,7 +298,8 @@ tablesample_init(SampleScanState *scanstate) 0, NULL, scanstate->use_bulkread, allow_sync, - scanstate->use_pagemode); + scanstate->use_pagemode, + SO_NONE); } else { diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 8f219f60a93..09ccc65de1c 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -71,7 +71,8 @@ SeqNext(SeqScanState *node) */ scandesc = table_beginscan(node->ss.ss_currentRelation, estate->es_snapshot, - 0, NULL); + 0, NULL, + SO_NONE); node->ss.ss_currentScanDesc = scandesc; } @@ -375,7 +376,8 @@ ExecSeqScanInitializeDSM(SeqScanState *node, estate->es_snapshot); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, + SO_NONE); } /* ---------------------------------------------------------------- @@ -408,5 +410,6 @@ ExecSeqScanInitializeWorker(SeqScanState *node, pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, + SO_NONE); } diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c index 617713bde04..084e4c6ec90 100644 --- a/src/backend/executor/nodeTidrangescan.c +++ b/src/backend/executor/nodeTidrangescan.c @@ -245,7 +245,8 @@ TidRangeNext(TidRangeScanState *node) scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation, estate->es_snapshot, &node->trss_mintid, - &node->trss_maxtid); + &node->trss_maxtid, + SO_NONE); node->ss.ss_currentScanDesc = scandesc; } else @@ -460,7 +461,7 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt) shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, - pscan); + pscan, SO_NONE); } /* ---------------------------------------------------------------- @@ -494,5 +495,5 @@ ExecTidRangeScanInitializeWorker(TidRangeScanState *node, pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, - pscan); + pscan, SO_NONE); } diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index 0ca312ac27d..f867d1b75a5 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -3362,7 +3362,8 @@ check_default_partition_contents(Relation parent, Relation default_rel, econtext = GetPerTupleExprContext(estate); snapshot = RegisterSnapshot(GetLatestSnapshot()); tupslot = table_slot_create(part_rel, &estate->es_tupleTable); - scan = table_beginscan(part_rel, snapshot, 0, NULL); + scan = table_beginscan(part_rel, snapshot, 0, NULL, + SO_NONE); /* * Switch to per-tuple memory context and reset it for each tuple diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 53f85ccde01..4160d2d6e24 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -7178,7 +7178,8 @@ get_actual_variable_endpoint(Relation heapRel, index_scan = index_beginscan(heapRel, indexRel, &SnapshotNonVacuumable, NULL, - 1, 0); + 1, 0, + SO_NONE); /* Set it up for index-only scan */ index_scan->xs_want_itup = true; index_rescan(index_scan, scankeys, 1, NULL, 0); diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 1a27bf060b3..b69320a7fc8 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -158,7 +158,8 @@ extern IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, IndexScanInstrumentation *instrument, - int nkeys, int norderbys); + int nkeys, int norderbys, + uint32 flags); extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation, Snapshot snapshot, IndexScanInstrumentation *instrument, @@ -184,7 +185,8 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel, Relation indexrel, IndexScanInstrumentation *instrument, int nkeys, int norderbys, - ParallelIndexScanDesc pscan); + ParallelIndexScanDesc pscan, + uint32 flags); extern ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction); extern bool index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot); diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 53e86c0d6e1..4e982d76889 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -95,10 +95,7 @@ typedef struct HeapScanDescData */ ParallelBlockTableScanWorkerData *rs_parallelworkerdata; - /* - * For sequential scans and bitmap heap scans. The current heap block's - * corresponding page in the visibility map. - */ + /* Current heap block's corresponding page in the visibility map */ Buffer rs_vmbuffer; /* these fields only used in page-at-a-time mode and for bitmap scans */ diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index ce340c076f8..960abf6c214 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -122,6 +122,12 @@ typedef struct ParallelBlockTableScanWorkerData *ParallelBlockTableScanWorker; typedef struct IndexFetchTableData { Relation rel; + + /* + * Bitmask of ScanOptions affecting the relation. No SO_INTERNAL_FLAGS are + * permitted. + */ + uint32 flags; } IndexFetchTableData; struct IndexScanInstrumentation; diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 3b017e043a0..cd14f242d65 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -45,6 +45,8 @@ typedef struct ValidateIndexState ValidateIndexState; */ typedef enum ScanOptions { + SO_NONE = 0, + /* one of SO_TYPE_* may be specified */ SO_TYPE_SEQSCAN = 1 << 0, SO_TYPE_BITMAPSCAN = 1 << 1, @@ -65,6 +67,19 @@ typedef enum ScanOptions SO_TEMP_SNAPSHOT = 1 << 9, } ScanOptions; +/* + * Mask of flags that are set internally by the table scan functions and + * shouldn't be passed by callers. Some of these are effectively set by callers + * through parameters to table scan functions (e.g. SO_ALLOW_STRAT/allow_strat), + * however, for now, retain tight control over them and don't allow users to + * pass these themselves to table scan functions. + */ +#define SO_INTERNAL_FLAGS \ + (SO_TYPE_SEQSCAN | SO_TYPE_BITMAPSCAN | SO_TYPE_SAMPLESCAN | \ + SO_TYPE_TIDSCAN | SO_TYPE_TIDRANGESCAN | SO_TYPE_ANALYZE | \ + SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE | \ + SO_TEMP_SNAPSHOT) + /* * Result codes for table_{update,delete,lock_tuple}, and for visibility * routines inside table AMs. @@ -324,8 +339,9 @@ typedef struct TableAmRoutine * `flags` is a bitmask indicating the type of scan (ScanOptions's * SO_TYPE_*, currently only one may be specified), options controlling * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be - * specified, an AM may ignore unsupported ones) and whether the snapshot - * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT). + * specified, an AM may ignore unsupported ones), whether the snapshot + * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT), + * and any number of the other ScanOptions values. */ TableScanDesc (*scan_begin) (Relation rel, Snapshot snapshot, @@ -421,9 +437,12 @@ typedef struct TableAmRoutine * IndexFetchTableData, which the AM will typically embed in a larger * structure with additional information. * + * flags is a bitmask of ScanOptions affecting underlying table scan + * behavior. See scan_begin() for more information on passing these. + * * Tuples for an index scan can then be fetched via index_fetch_tuple. */ - struct IndexFetchTableData *(*index_fetch_begin) (Relation rel); + struct IndexFetchTableData *(*index_fetch_begin) (Relation rel, uint32 flags); /* * Reset index fetch. Typically this will release cross index fetch @@ -874,12 +893,19 @@ extern TupleTableSlot *table_slot_create(Relation relation, List **reglist); * A wrapper around the Table Access Method scan_begin callback, to centralize * error checking. All calls to ->scan_begin() should go through this * function. + * + * The caller-provided user_flags are validated against SO_INTERNAL_FLAGS to + * catch callers that accidentally pass scan-type or other internal flags. */ static TableScanDesc table_beginscan_common(Relation rel, Snapshot snapshot, int nkeys, ScanKeyData *key, ParallelTableScanDesc pscan, - uint32 flags) + uint32 flags, uint32 user_flags) { + Assert((user_flags & SO_INTERNAL_FLAGS) == 0); + Assert((flags & ~SO_INTERNAL_FLAGS) == 0); + flags |= user_flags; + /* * We don't allow scans to be started while CheckXidAlive is set, except * via systable_beginscan() et al. See detailed comments in xact.c where @@ -894,15 +920,18 @@ table_beginscan_common(Relation rel, Snapshot snapshot, int nkeys, /* * Start a scan of `rel`. Returned tuples pass a visibility test of * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys. + * + * flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted. */ static inline TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, - int nkeys, ScanKeyData *key) + int nkeys, ScanKeyData *key, uint32 flags) { - uint32 flags = SO_TYPE_SEQSCAN | + uint32 internal_flags = SO_TYPE_SEQSCAN | SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE; - return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags); + return table_beginscan_common(rel, snapshot, nkeys, key, NULL, + internal_flags, flags); } /* @@ -931,7 +960,8 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, if (allow_sync) flags |= SO_ALLOW_SYNC; - return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags); + return table_beginscan_common(rel, snapshot, nkeys, key, NULL, + flags, SO_NONE); } /* @@ -939,14 +969,17 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, * TableScanDesc for a bitmap heap scan. Although that scan technology is * really quite unlike a standard seqscan, there is just enough commonality to * make it worth using the same data structure. + * + * flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted. */ static inline TableScanDesc table_beginscan_bm(Relation rel, Snapshot snapshot, - int nkeys, ScanKeyData *key) + int nkeys, ScanKeyData *key, uint32 flags) { - uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE; + uint32 internal_flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE; - return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags); + return table_beginscan_common(rel, snapshot, nkeys, key, NULL, + internal_flags, flags); } /* @@ -955,23 +988,26 @@ table_beginscan_bm(Relation rel, Snapshot snapshot, * using the same data structure although the behavior is rather different. * In addition to the options offered by table_beginscan_strat, this call * also allows control of whether page-mode visibility checking is used. + * + * flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted. */ static inline TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, int nkeys, ScanKeyData *key, bool allow_strat, bool allow_sync, - bool allow_pagemode) + bool allow_pagemode, uint32 flags) { - uint32 flags = SO_TYPE_SAMPLESCAN; + uint32 internal_flags = SO_TYPE_SAMPLESCAN; if (allow_strat) - flags |= SO_ALLOW_STRAT; + internal_flags |= SO_ALLOW_STRAT; if (allow_sync) - flags |= SO_ALLOW_SYNC; + internal_flags |= SO_ALLOW_SYNC; if (allow_pagemode) - flags |= SO_ALLOW_PAGEMODE; + internal_flags |= SO_ALLOW_PAGEMODE; - return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags); + return table_beginscan_common(rel, snapshot, nkeys, key, NULL, + internal_flags, flags); } /* @@ -984,7 +1020,8 @@ table_beginscan_tid(Relation rel, Snapshot snapshot) { uint32 flags = SO_TYPE_TIDSCAN; - return table_beginscan_common(rel, snapshot, 0, NULL, NULL, flags); + return table_beginscan_common(rel, snapshot, 0, NULL, NULL, + flags, SO_NONE); } /* @@ -997,7 +1034,8 @@ table_beginscan_analyze(Relation rel) { uint32 flags = SO_TYPE_ANALYZE; - return table_beginscan_common(rel, NULL, 0, NULL, NULL, flags); + return table_beginscan_common(rel, NULL, 0, NULL, NULL, + flags, SO_NONE); } /* @@ -1058,16 +1096,19 @@ table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableS /* * table_beginscan_tidrange is the entry point for setting up a TableScanDesc * for a TID range scan. + * + * flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted. */ static inline TableScanDesc table_beginscan_tidrange(Relation rel, Snapshot snapshot, ItemPointer mintid, - ItemPointer maxtid) + ItemPointer maxtid, uint32 flags) { TableScanDesc sscan; - uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE; + uint32 internal_flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE; - sscan = table_beginscan_common(rel, snapshot, 0, NULL, NULL, flags); + sscan = table_beginscan_common(rel, snapshot, 0, NULL, NULL, + internal_flags, flags); /* Set the range of TIDs to scan */ sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid); @@ -1139,20 +1180,26 @@ extern void table_parallelscan_initialize(Relation rel, * table_parallelscan_initialize(), for the same relation. The initialization * does not need to have happened in this backend. * + * flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted. + * * Caller must hold a suitable lock on the relation. */ extern TableScanDesc table_beginscan_parallel(Relation relation, - ParallelTableScanDesc pscan); + ParallelTableScanDesc pscan, + uint32 flags); /* * Begin a parallel tid range scan. `pscan` needs to have been initialized * with table_parallelscan_initialize(), for the same relation. The * initialization does not need to have happened in this backend. * + * flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted. + * * Caller must hold a suitable lock on the relation. */ extern TableScanDesc table_beginscan_parallel_tidrange(Relation relation, - ParallelTableScanDesc pscan); + ParallelTableScanDesc pscan, + uint32 flags); /* * Restart a parallel scan. Call this in the leader process. Caller is @@ -1175,11 +1222,15 @@ table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan) * Prepare to fetch tuples from the relation, as needed when fetching tuples * for an index scan. * + * flags is a bitmask of ScanOptions. No SO_INTERNAL_FLAGS are permitted. + * * Tuples for an index scan can then be fetched via table_index_fetch_tuple(). */ static inline IndexFetchTableData * -table_index_fetch_begin(Relation rel) +table_index_fetch_begin(Relation rel, uint32 flags) { + Assert((flags & SO_INTERNAL_FLAGS) == 0); + /* * We don't allow scans to be started while CheckXidAlive is set, except * via systable_beginscan() et al. See detailed comments in xact.c where @@ -1188,7 +1239,7 @@ table_index_fetch_begin(Relation rel) if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan)) elog(ERROR, "scan started during logical decoding"); - return rel->rd_tableam->index_fetch_begin(rel); + return rel->rd_tableam->index_fetch_begin(rel, flags); } /*