From b46e1e54d078def33b840ae1fa6c5236a7b12ec2 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 30 Mar 2026 15:47:07 -0400 Subject: [PATCH] Allow on-access pruning to set pages all-visible Many queries do not modify the underlying relation. For such queries, if on-access pruning occurs during the scan, we can check whether the page has become all-visible and update the visibility map accordingly. Previously, only vacuum and COPY FREEZE marked pages as all-visible or all-frozen. This commit implements on-access VM setting for sequential scans, tid range scans, sample scans, bitmap heap scans, and the underlying heap relation in index scans. Setting the visibility map on-access can avoid write amplification caused by vacuum later needing to set the page all-visible, which could trigger a write and potentially an FPI. It also allows more frequent index-only scans, since they require pages to be marked all-visible in the VM. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Kirill Reshke Reviewed-by: Chao Li Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com --- src/backend/access/heap/heapam.c | 3 +- src/backend/access/heap/heapam_handler.c | 6 ++- src/backend/access/heap/pruneheap.c | 66 ++++++++++++++++++------ src/backend/access/heap/vacuumlazy.c | 2 +- src/include/access/heapam.h | 3 +- 5 files changed, 59 insertions(+), 21 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 4db4a2068ee..129b01da864 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -633,7 +633,8 @@ heap_prepare_pagescan(TableScanDesc sscan) /* * Prune and repair fragmentation for the whole page, if possible. */ - heap_page_prune_opt(scan->rs_base.rs_rd, buffer, &scan->rs_vmbuffer); + heap_page_prune_opt(scan->rs_base.rs_rd, buffer, &scan->rs_vmbuffer, + sscan->rs_flags & SO_HINT_REL_READ_ONLY); /* * We must hold share lock on the buffer content while examining tuple diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index e63b12c3c61..cdd153c6b6d 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -149,7 +149,8 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan, */ if (prev_buf != hscan->xs_cbuf) heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf, - &hscan->xs_vmbuffer); + &hscan->xs_vmbuffer, + hscan->xs_base.flags & SO_HINT_REL_READ_ONLY); } /* Obtain share-lock on the buffer so we can examine visibility */ @@ -2546,7 +2547,8 @@ BitmapHeapScanNextBlock(TableScanDesc scan, /* * Prune and repair fragmentation for the whole page, if possible. */ - heap_page_prune_opt(scan->rs_rd, buffer, &hscan->rs_vmbuffer); + heap_page_prune_opt(scan->rs_rd, buffer, &hscan->rs_vmbuffer, + scan->rs_flags & SO_HINT_REL_READ_ONLY); /* * We must hold share lock on the buffer content while examining tuple diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 6693af8da7f..6a2c3513497 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -44,6 +44,8 @@ typedef struct bool mark_unused_now; /* whether to attempt freezing tuples */ bool attempt_freeze; + /* whether to attempt setting the VM */ + bool attempt_set_vm; struct VacuumCutoffs *cutoffs; Relation relation; @@ -75,7 +77,8 @@ typedef struct /* * set_all_visible and set_all_frozen indicate if the all-visible and * all-frozen bits in the visibility map can be set for this page after - * pruning. + * pruning. They are only tracked when the caller requests VM updates + * (attempt_set_vm); otherwise they remain false throughout. * * NOTE: set_all_visible and set_all_frozen initially don't include * LP_DEAD items. That's convenient for heap_page_prune_and_freeze() to @@ -232,7 +235,8 @@ static void page_verify_redirects(Page page); static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune, PruneState *prstate); -static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason); +static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason, + bool do_prune, bool do_freeze); /* @@ -251,9 +255,21 @@ static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason); * reuse the pin across calls, avoiding repeated pin/unpin cycles. If we find * VM corruption during pruning, we will fix it. Caller is responsible for * unpinning *vmbuffer. + * + * rel_read_only is true if we determined at plan time that the query does not + * modify the relation. It is counterproductive to set the VM if the query + * will immediately clear it. + * + * As noted in ScanRelIsReadOnly(), INSERT ... SELECT from the same table will + * report the scan relation as read-only. This is usually harmless in + * practice. It is useful to set scanned pages all-visible that won't be + * inserted into. Pages it does insert to will rarely meet the criteria for + * pruning, and those that do are likely to contain in-progress inserts which + * make the page not fully all-visible. */ void -heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer) +heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer, + bool rel_read_only) { Page page = BufferGetPage(buffer); TransactionId prune_xid; @@ -336,6 +352,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer) * current implementation. */ params.options = HEAP_PAGE_PRUNE_ALLOW_FAST_PATH; + if (rel_read_only) + params.options |= HEAP_PAGE_PRUNE_SET_VM; heap_page_prune_and_freeze(¶ms, &presult, &dummy_off_loc, NULL, NULL); @@ -392,6 +410,7 @@ prune_freeze_setup(PruneFreezeParams *params, /* cutoffs must be provided if we will attempt freezing */ Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs); prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0; + prstate->attempt_set_vm = (params->options & HEAP_PAGE_PRUNE_SET_VM) != 0; prstate->cutoffs = params->cutoffs; prstate->relation = params->relation; prstate->block = BufferGetBlockNumber(params->buffer); @@ -461,14 +480,13 @@ prune_freeze_setup(PruneFreezeParams *params, * We track whether the page will be all-visible/all-frozen at the end of * pruning and freezing. While examining tuple visibility, we'll set * set_all_visible to false if there are tuples on the page not visible to - * all running and future transactions. set_all_visible is always - * maintained but only VACUUM will set the VM if the page ends up being - * all-visible. + * all running and future transactions. If setting the VM is enabled for + * this scan, we will do so if the page ends up being all-visible. * * We also keep track of the newest live XID, which is used to calculate * the snapshot conflict horizon for a WAL record setting the VM. */ - prstate->set_all_visible = true; + prstate->set_all_visible = prstate->attempt_set_vm; prstate->newest_live_xid = InvalidTransactionId; /* @@ -477,7 +495,9 @@ prune_freeze_setup(PruneFreezeParams *params, * caller passed HEAP_PAGE_PRUNE_FREEZE, because if they did not, we won't * call heap_prepare_freeze_tuple() for each tuple, and set_all_frozen * will never be cleared for tuples that need freezing. This would lead to - * incorrectly setting the visibility map all-frozen for this page. + * incorrectly setting the visibility map all-frozen for this page. We + * can't set the page all-frozen in the VM if the caller didn't pass + * HEAP_PAGE_PRUNE_SET_VM. * * When freezing is not required (no XIDs/MXIDs older than the freeze * cutoff), we may still choose to "opportunistically" freeze if doing so @@ -494,7 +514,7 @@ prune_freeze_setup(PruneFreezeParams *params, * whether to freeze, but before updating the VM, to avoid setting the VM * bits incorrectly. */ - prstate->set_all_frozen = prstate->attempt_freeze; + prstate->set_all_frozen = prstate->attempt_freeze && prstate->attempt_set_vm; } /* @@ -920,21 +940,34 @@ heap_page_fix_vm_corruption(PruneState *prstate, OffsetNumber offnum, * This function does not actually set the VM bits or page-level visibility * hint, PD_ALL_VISIBLE. * + * This should be called only after do_freeze has been decided (and do_prune + * has been set), as these factor into our heuristic-based decision. + * * Returns true if one or both VM bits should be set and false otherwise. */ static bool -heap_page_will_set_vm(PruneState *prstate, PruneReason reason) +heap_page_will_set_vm(PruneState *prstate, PruneReason reason, + bool do_prune, bool do_freeze) { - /* - * Though on-access pruning maintains prstate->set_all_visible, we don't - * set the VM on-access for now. - */ - if (reason == PRUNE_ON_ACCESS) + if (!prstate->attempt_set_vm) return false; if (!prstate->set_all_visible) return false; + /* + * If this is an on-access call and we're not actually pruning, avoid + * setting the visibility map if it would newly dirty the heap page or, if + * the page is already dirty, if doing so would require including a + * full-page image (FPI) of the heap page in the WAL. + */ + if (reason == PRUNE_ON_ACCESS && !do_prune && !do_freeze && + (!BufferIsDirty(prstate->buffer) || XLogCheckBufferNeedsBackup(prstate->buffer))) + { + prstate->set_all_visible = prstate->set_all_frozen = false; + return false; + } + prstate->new_vmbits = VISIBILITYMAP_ALL_VISIBLE; if (prstate->set_all_frozen) @@ -1165,9 +1198,10 @@ heap_page_prune_and_freeze(PruneFreezeParams *params, prstate.set_all_visible = prstate.set_all_frozen = false; Assert(!prstate.set_all_frozen || prstate.set_all_visible); + Assert(!prstate.set_all_visible || prstate.attempt_set_vm); Assert(!prstate.set_all_visible || (prstate.lpdead_items == 0)); - do_set_vm = heap_page_will_set_vm(&prstate, params->reason); + do_set_vm = heap_page_will_set_vm(&prstate, params->reason, do_prune, do_freeze); /* * new_vmbits should be 0 regardless of whether or not the page is diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index f698c2d899b..24001b27387 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -2021,7 +2021,7 @@ lazy_scan_prune(LVRelState *vacrel, .buffer = buf, .vmbuffer = vmbuffer, .reason = PRUNE_VACUUM_SCAN, - .options = HEAP_PAGE_PRUNE_FREEZE, + .options = HEAP_PAGE_PRUNE_FREEZE | HEAP_PAGE_PRUNE_SET_VM, .vistest = vacrel->vistest, .cutoffs = &vacrel->cutoffs, }; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 696b1f49a9d..6018dacf0f7 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -43,6 +43,7 @@ #define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW (1 << 0) #define HEAP_PAGE_PRUNE_FREEZE (1 << 1) #define HEAP_PAGE_PRUNE_ALLOW_FAST_PATH (1 << 2) +#define HEAP_PAGE_PRUNE_SET_VM (1 << 3) typedef struct BulkInsertStateData *BulkInsertState; typedef struct GlobalVisState GlobalVisState; @@ -431,7 +432,7 @@ extern TransactionId heap_index_delete_tuples(Relation rel, /* in heap/pruneheap.c */ extern void heap_page_prune_opt(Relation relation, Buffer buffer, - Buffer *vmbuffer); + Buffer *vmbuffer, bool rel_read_only); extern void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc,