WAL log VM setting during vacuum phase I in XLOG_HEAP2_PRUNE_VACUUM_SCAN

Vacuum no longer emits a separate WAL record for each page set
all-visible or all-frozen during phase I. Instead, visibility map
updates are now included in the XLOG_HEAP2_PRUNE_VACUUM_SCAN record that
is already emitted for pruning and freezing.

Previously, heap_page_prune_and_freeze() determined whether a page was
all-visible, but the corresponding VM bits were only set later in
lazy_scan_prune(). Now the VM is updated immediately in
heap_page_prune_and_freeze(), at the same time as the heap
modifications. This reduces WAL volume produced by vacuum.

For now, vacuum is still the only user of heap_page_prune_and_freeze()
allowed to set the VM. On-access pruning is not yet able to set the VM.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Earlier version Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
This commit is contained in:
Melanie Plageman 2026-03-24 16:49:46 -04:00
parent dc47beacaa
commit 1252a4ee28
3 changed files with 205 additions and 190 deletions

View file

@ -72,6 +72,21 @@ typedef struct
OffsetNumber nowunused[MaxHeapTuplesPerPage];
HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
/*
* set_all_visible and set_all_frozen indicate if the all-visible and
* all-frozen bits in the visibility map can be set for this page after
* pruning.
*
* NOTE: set_all_visible and set_all_frozen initially don't include
* LP_DEAD items. That's convenient for heap_page_prune_and_freeze() to
* use them to decide whether to opportunistically freeze the page or not.
* The set_all_visible and set_all_frozen values ultimately used to set
* the VM are adjusted to include LP_DEAD items after we determine whether
* or not to opportunistically freeze.
*/
bool set_all_visible;
bool set_all_frozen;
/*-------------------------------------------------------
* Working state for HOT chain processing
*-------------------------------------------------------
@ -122,12 +137,16 @@ typedef struct
/*
* Caller must provide a pinned vmbuffer corresponding to the heap block
* passed to heap_page_prune_and_freeze(). We will fix any corruption
* found in the VM.
* found in the VM and set the VM if the page is all-visible/all-frozen.
*/
Buffer vmbuffer;
/* Bits in the vmbuffer for this heap page */
/*
* The state of the VM bits at the beginning of pruning and the state they
* will be in at the end.
*/
uint8 old_vmbits;
uint8 new_vmbits;
/* The newest xmin of live tuples on the page */
TransactionId newest_live_xid;
@ -157,21 +176,6 @@ typedef struct
*/
int lpdead_items; /* number of items in the array */
OffsetNumber *deadoffsets; /* points directly to presult->deadoffsets */
/*
* set_all_visible and set_all_frozen indicate if the all-visible and
* all-frozen bits in the visibility map can be set for this page after
* pruning.
*
* NOTE: set_all_visible and set_all_frozen initially don't include
* LP_DEAD items. That's convenient for heap_page_prune_and_freeze() to
* use them to decide whether to freeze the page or not. The
* set_all_visible and set_all_frozen values returned to the caller are
* adjusted to include LP_DEAD items after we determine whether to
* opportunistically freeze.
*/
bool set_all_visible;
bool set_all_frozen;
} PruneState;
/*
@ -228,6 +232,7 @@ static void page_verify_redirects(Page page);
static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
PruneState *prstate);
static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason);
/*
@ -395,6 +400,7 @@ prune_freeze_setup(PruneFreezeParams *params,
Assert(BufferIsValid(params->vmbuffer));
prstate->vmbuffer = params->vmbuffer;
prstate->new_vmbits = 0;
prstate->old_vmbits = visibilitymap_get_status(prstate->relation,
prstate->block,
&prstate->vmbuffer);
@ -907,6 +913,42 @@ heap_page_fix_vm_corruption(PruneState *prstate, OffsetNumber offnum,
}
}
/*
* Decide whether to set the visibility map bits (all-visible and all-frozen)
* for the current page using information from the PruneState and VM.
*
* This function does not actually set the VM bits or page-level visibility
* hint, PD_ALL_VISIBLE.
*
* Returns true if one or both VM bits should be set and false otherwise.
*/
static bool
heap_page_will_set_vm(PruneState *prstate, PruneReason reason)
{
/*
* Though on-access pruning maintains prstate->set_all_visible, we don't
* set the VM on-access for now.
*/
if (reason == PRUNE_ON_ACCESS)
return false;
if (!prstate->set_all_visible)
return false;
prstate->new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
if (prstate->set_all_frozen)
prstate->new_vmbits |= VISIBILITYMAP_ALL_FROZEN;
if (prstate->new_vmbits == prstate->old_vmbits)
{
prstate->new_vmbits = 0;
return false;
}
return true;
}
/*
* If the page is already all-frozen, or already all-visible and freezing
* won't be attempted, there is no remaining work and we can use the fast path
@ -940,8 +982,6 @@ prune_freeze_fast_path(PruneState *prstate, PruneFreezeResult *presult)
/* We'll fill in presult for the caller */
memset(presult, 0, sizeof(PruneFreezeResult));
presult->old_vmbits = prstate->old_vmbits;
/* Clear any stale prune hint */
if (TransactionIdIsValid(PageGetPruneXid(page)))
{
@ -976,7 +1016,8 @@ prune_freeze_fast_path(PruneState *prstate, PruneFreezeResult *presult)
/*
* Prune and repair fragmentation and potentially freeze tuples on the
* specified page.
* specified page. If the page's visibility status has changed, update it in
* the VM.
*
* Caller must have pin and buffer cleanup lock on the page. Note that we
* don't update the FSM information for page on caller's behalf. Caller might
@ -991,12 +1032,10 @@ prune_freeze_fast_path(PruneState *prstate, PruneFreezeResult *presult)
* tuples if it's required in order to advance relfrozenxid / relminmxid, or
* if it's considered advantageous for overall system performance to do so
* now. The 'params.cutoffs', 'presult', 'new_relfrozen_xid' and
* 'new_relmin_mxid' arguments are required when freezing. When
* HEAP_PAGE_PRUNE_FREEZE option is passed, we also set
* presult->set_all_visible and presult->set_all_frozen after determining
* whether or not to opportunistically freeze, to indicate if the VM bits can
* be set. 'set_all_frozen' is always set to false when the
* HEAP_PAGE_PRUNE_FREEZE option is not passed.
* 'new_relmin_mxid' arguments are required when freezing.
*
* A vmbuffer corresponding to the heap page is also passed and if the page is
* found to be all-visible/all-frozen, we will set it in the VM.
*
* presult contains output parameters needed by callers, such as the number of
* tuples removed and the offsets of dead items on the page after pruning.
@ -1024,8 +1063,10 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
bool do_freeze;
bool do_prune;
bool do_hint_prune;
bool do_set_vm;
bool did_tuple_hint_fpi;
int64 fpi_before = pgWalUsage.wal_fpi;
TransactionId conflict_xid;
/* Initialize prstate */
prune_freeze_setup(params,
@ -1124,6 +1165,31 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
prstate.set_all_visible = prstate.set_all_frozen = false;
Assert(!prstate.set_all_frozen || prstate.set_all_visible);
Assert(!prstate.set_all_visible || (prstate.lpdead_items == 0));
do_set_vm = heap_page_will_set_vm(&prstate, params->reason);
/*
* new_vmbits should be 0 regardless of whether or not the page is
* all-visible if we do not intend to set the VM.
*/
Assert(do_set_vm || prstate.new_vmbits == 0);
/*
* The snapshot conflict horizon for the whole record is the most
* conservative (newest) horizon required by any change in the record.
*/
conflict_xid = InvalidTransactionId;
if (do_set_vm)
conflict_xid = prstate.newest_live_xid;
if (do_freeze && TransactionIdFollows(prstate.pagefrz.FreezePageConflictXid, conflict_xid))
conflict_xid = prstate.pagefrz.FreezePageConflictXid;
if (do_prune && TransactionIdFollows(prstate.latest_xid_removed, conflict_xid))
conflict_xid = prstate.latest_xid_removed;
/* Lock vmbuffer before entering a critical section */
if (do_set_vm)
LockBuffer(prstate.vmbuffer, BUFFER_LOCK_EXCLUSIVE);
/* Any error while applying the changes is critical */
START_CRIT_SECTION();
@ -1145,14 +1211,17 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
/*
* If that's all we had to do to the page, this is a non-WAL-logged
* hint. If we are going to freeze or prune the page, we will mark
* the buffer dirty below.
* hint. If we are going to freeze or prune the page or set
* PD_ALL_VISIBLE, we will mark the buffer dirty below.
*
* Setting PD_ALL_VISIBLE is fully WAL-logged because it is forbidden
* for the VM to be set and PD_ALL_VISIBLE to be clear.
*/
if (!do_freeze && !do_prune)
if (!do_freeze && !do_prune && !do_set_vm)
MarkBufferDirtyHint(prstate.buffer, true);
}
if (do_prune || do_freeze)
if (do_prune || do_freeze || do_set_vm)
{
/* Apply the planned item changes and repair page fragmentation. */
if (do_prune)
@ -1166,6 +1235,27 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
if (do_freeze)
heap_freeze_prepared_tuples(prstate.buffer, prstate.frozen, prstate.nfrozen);
/* Set the visibility map and page visibility hint */
if (do_set_vm)
{
/*
* While it is valid for PD_ALL_VISIBLE to be set when the
* corresponding VM bit is clear, we strongly prefer to keep them
* in sync.
*
* The heap buffer must be marked dirty before adding it to the
* WAL chain when setting the VM. We don't worry about
* unnecessarily dirtying the heap buffer if PD_ALL_VISIBLE is
* already set, though. It is extremely rare to have a clean heap
* buffer with PD_ALL_VISIBLE already set and the VM bits clear,
* so there is no point in optimizing it.
*/
PageSetAllVisible(prstate.page);
PageClearPrunable(prstate.page);
visibilitymap_set_vmbits(prstate.block, prstate.vmbuffer, prstate.new_vmbits,
prstate.relation->rd_locator);
}
MarkBufferDirty(prstate.buffer);
/*
@ -1173,29 +1263,12 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
*/
if (RelationNeedsWAL(prstate.relation))
{
/*
* The snapshotConflictHorizon for the whole record should be the
* most conservative of all the horizons calculated for any of the
* possible modifications. If this record will prune tuples, any
* queries on the standby older than the newest xid of the most
* recently removed tuple this record will prune will conflict. If
* this record will freeze tuples, any queries on the standby with
* xids older than the newest tuple this record will freeze will
* conflict.
*/
TransactionId conflict_xid;
if (TransactionIdFollows(prstate.pagefrz.FreezePageConflictXid,
prstate.latest_xid_removed))
conflict_xid = prstate.pagefrz.FreezePageConflictXid;
else
conflict_xid = prstate.latest_xid_removed;
log_heap_prune_and_freeze(prstate.relation, prstate.buffer,
InvalidBuffer, /* vmbuffer */
0, /* vmflags */
do_set_vm ? prstate.vmbuffer : InvalidBuffer,
do_set_vm ? prstate.new_vmbits : 0,
conflict_xid,
true, params->reason,
true, /* cleanup lock */
params->reason,
prstate.frozen, prstate.nfrozen,
prstate.redirected, prstate.nredirected,
prstate.nowdead, prstate.ndead,
@ -1205,33 +1278,71 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
END_CRIT_SECTION();
if (do_set_vm)
LockBuffer(prstate.vmbuffer, BUFFER_LOCK_UNLOCK);
/*
* During its second pass over the heap, VACUUM calls
* heap_page_would_be_all_visible() to determine whether a page is
* all-visible and all-frozen. The logic here is similar. After completing
* pruning and freezing, use an assertion to verify that our results
* remain consistent with heap_page_would_be_all_visible(). It's also a
* valuable cross-check of the page state after pruning and freezing.
*/
#ifdef USE_ASSERT_CHECKING
if (prstate.set_all_visible)
{
TransactionId debug_cutoff;
bool debug_all_frozen;
Assert(prstate.lpdead_items == 0);
Assert(heap_page_is_all_visible(prstate.relation, prstate.buffer,
prstate.vistest,
&debug_all_frozen,
&debug_cutoff, off_loc));
Assert(!TransactionIdIsValid(debug_cutoff) ||
debug_cutoff == prstate.newest_live_xid);
/*
* It's possible the page is composed entirely of frozen tuples but is
* not set all-frozen in the VM and did not pass
* HEAP_PAGE_PRUNE_FREEZE. In this case, it's possible
* heap_page_is_all_visible() finds the page completely frozen, even
* though prstate.set_all_frozen is false.
*/
Assert(!prstate.set_all_frozen || debug_all_frozen);
}
#endif
/* Copy information back for caller */
presult->ndeleted = prstate.ndeleted;
presult->nnewlpdead = prstate.ndead;
presult->nfrozen = prstate.nfrozen;
presult->live_tuples = prstate.live_tuples;
presult->recently_dead_tuples = prstate.recently_dead_tuples;
presult->set_all_visible = prstate.set_all_visible;
presult->set_all_frozen = prstate.set_all_frozen;
presult->hastup = prstate.hastup;
presult->old_vmbits = prstate.old_vmbits;
/*
* For callers planning to update the visibility map, the conflict horizon
* for that record must be the newest xmin on the page. However, if the
* page is completely frozen, there can be no conflict and the
* vm_conflict_horizon should remain InvalidTransactionId. This includes
* the case that we just froze all the tuples; the prune-freeze record
* included the conflict XID already so the caller doesn't need it.
*/
if (presult->set_all_frozen)
presult->vm_conflict_horizon = InvalidTransactionId;
else
presult->vm_conflict_horizon = prstate.newest_live_xid;
presult->lpdead_items = prstate.lpdead_items;
/* the presult->deadoffsets array was already filled in */
presult->newly_all_visible = false;
presult->newly_all_frozen = false;
presult->newly_all_visible_frozen = false;
if (do_set_vm)
{
if ((prstate.old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
{
presult->newly_all_visible = true;
if (prstate.set_all_frozen)
presult->newly_all_visible_frozen = true;
}
else if ((prstate.old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
prstate.set_all_frozen)
presult->newly_all_frozen = true;
}
if (prstate.attempt_freeze)
{
if (presult->nfrozen > 0)

View file

@ -466,13 +466,6 @@ static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *
static void dead_items_reset(LVRelState *vacrel);
static void dead_items_cleanup(LVRelState *vacrel);
#ifdef USE_ASSERT_CHECKING
static bool heap_page_is_all_visible(Relation rel, Buffer buf,
GlobalVisState *vistest,
bool *all_frozen,
TransactionId *newest_live_xid,
OffsetNumber *logging_offnum);
#endif
static bool heap_page_would_be_all_visible(Relation rel, Buffer buf,
GlobalVisState *vistest,
bool allow_update_vistest,
@ -2022,8 +2015,6 @@ lazy_scan_prune(LVRelState *vacrel,
.vistest = vacrel->vistest,
.cutoffs = &vacrel->cutoffs,
};
uint8 old_vmbits = 0;
uint8 new_vmbits = 0;
Assert(BufferGetBlockNumber(buf) == blkno);
@ -2074,32 +2065,6 @@ lazy_scan_prune(LVRelState *vacrel,
vacrel->new_frozen_tuple_pages++;
}
/*
* VACUUM will call heap_page_is_all_visible() during the second pass over
* the heap to determine all_visible and all_frozen for the page -- this
* is a specialized version of the logic from this function. Now that
* we've finished pruning and freezing, make sure that we're in total
* agreement with heap_page_is_all_visible() using an assertion.
*/
#ifdef USE_ASSERT_CHECKING
if (presult.set_all_visible)
{
TransactionId debug_cutoff;
bool debug_all_frozen;
Assert(presult.lpdead_items == 0);
Assert(heap_page_is_all_visible(vacrel->rel, buf,
vacrel->vistest, &debug_all_frozen,
&debug_cutoff, &vacrel->offnum));
Assert(presult.set_all_frozen == debug_all_frozen);
Assert(!TransactionIdIsValid(debug_cutoff) ||
debug_cutoff == presult.vm_conflict_horizon);
}
#endif
/*
* Now save details of the LP_DEAD items from the page in vacrel
*/
@ -2120,6 +2085,17 @@ lazy_scan_prune(LVRelState *vacrel,
}
/* Finally, add page-local counts to whole-VACUUM counts */
if (presult.newly_all_visible)
vacrel->new_all_visible_pages++;
if (presult.newly_all_visible_frozen)
vacrel->new_all_visible_all_frozen_pages++;
if (presult.newly_all_frozen)
vacrel->new_all_frozen_pages++;
/* Capture if the page was newly set frozen */
*vm_page_frozen = presult.newly_all_visible_frozen ||
presult.newly_all_frozen;
vacrel->tuples_deleted += presult.ndeleted;
vacrel->tuples_frozen += presult.nfrozen;
vacrel->lpdead_items += presult.lpdead_items;
@ -2133,71 +2109,6 @@ lazy_scan_prune(LVRelState *vacrel,
/* Did we find LP_DEAD items? */
*has_lpdead_items = (presult.lpdead_items > 0);
Assert(!presult.set_all_visible || !(*has_lpdead_items));
Assert(!presult.set_all_frozen || presult.set_all_visible);
if (!presult.set_all_visible)
return presult.ndeleted;
/* Set the visibility map and page visibility hint */
old_vmbits = presult.old_vmbits;
new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
if (presult.set_all_frozen)
new_vmbits |= VISIBILITYMAP_ALL_FROZEN;
/* Nothing to do */
if (old_vmbits == new_vmbits)
return presult.ndeleted;
/*
* It should never be the case that the visibility map page is set while
* the page-level bit is clear (and if so, we cleared it above), but the
* reverse is allowed (if checksums are not enabled). Regardless, set both
* bits so that we get back in sync.
*
* The heap buffer must be marked dirty before adding it to the WAL chain
* when setting the VM. We don't worry about unnecessarily dirtying the
* heap buffer if PD_ALL_VISIBLE is already set, though. It is extremely
* rare to have a clean heap buffer with PD_ALL_VISIBLE already set and
* the VM bits clear, so there is no point in optimizing it.
*/
PageSetAllVisible(page);
PageClearPrunable(page);
MarkBufferDirty(buf);
/*
* If the page is being set all-frozen, we pass InvalidTransactionId as
* the cutoff_xid, since a snapshot conflict horizon sufficient to make
* everything safe for REDO was logged when the page's tuples were frozen.
*/
Assert(!presult.set_all_frozen ||
!TransactionIdIsValid(presult.vm_conflict_horizon));
visibilitymap_set(vacrel->rel, blkno, buf,
InvalidXLogRecPtr,
vmbuffer, presult.vm_conflict_horizon,
new_vmbits);
/*
* If the page wasn't already set all-visible and/or all-frozen in the VM,
* count it as newly set for logging.
*/
if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
{
vacrel->new_all_visible_pages++;
if (presult.set_all_frozen)
{
vacrel->new_all_visible_all_frozen_pages++;
*vm_page_frozen = true;
}
}
else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
presult.set_all_frozen)
{
vacrel->new_all_frozen_pages++;
*vm_page_frozen = true;
}
return presult.ndeleted;
}
@ -3613,7 +3524,7 @@ dead_items_cleanup(LVRelState *vacrel)
* that expect no LP_DEAD on the page. Currently assert-only, but there is no
* reason not to use it outside of asserts.
*/
static bool
bool
heap_page_is_all_visible(Relation rel, Buffer buf,
GlobalVisState *vistest,
bool *all_frozen,

View file

@ -265,7 +265,8 @@ typedef struct PruneFreezeParams
/*
* Callers should provide a pinned vmbuffer corresponding to the heap
* block in buffer. We will check for and repair any corruption in the VM.
* block in buffer. We will check for and repair any corruption in the VM
* and set the VM after pruning if the page is all-visible/all-frozen.
*/
Buffer vmbuffer;
@ -281,8 +282,7 @@ typedef struct PruneFreezeParams
* HEAP_PAGE_PRUNE_MARK_UNUSED_NOW indicates that dead items can be set
* LP_UNUSED during pruning.
*
* HEAP_PAGE_PRUNE_FREEZE indicates that we will also freeze tuples, and
* will return 'all_visible', 'all_frozen' flags to the caller.
* HEAP_PAGE_PRUNE_FREEZE indicates that we will also freeze tuples.
*/
int options;
@ -316,26 +316,12 @@ typedef struct PruneFreezeResult
int recently_dead_tuples;
/*
* set_all_visible and set_all_frozen indicate if the all-visible and
* all-frozen bits in the visibility map should be set for this page after
* pruning.
*
* vm_conflict_horizon is the newest xmin of live tuples on the page. The
* caller can use it as the conflict horizon when setting the VM bits. It
* is only valid if we froze some tuples (nfrozen > 0), and set_all_frozen
* is true.
*
* These are only set if the HEAP_PAGE_PRUNE_FREEZE option is set.
* Whether or not the page was newly set all-visible and all-frozen during
* phase I of vacuuming.
*/
bool set_all_visible;
bool set_all_frozen;
TransactionId vm_conflict_horizon;
/*
* The value of the vmbuffer's vmbits at the beginning of pruning. It is
* cleared if VM corruption is found and corrected.
*/
uint8 old_vmbits;
bool newly_all_visible;
bool newly_all_visible_frozen;
bool newly_all_frozen;
/*
* Whether or not the page makes rel truncation unsafe. This is set to
@ -472,6 +458,13 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
/* in heap/vacuumlazy.c */
extern void heap_vacuum_rel(Relation rel,
const VacuumParams params, BufferAccessStrategy bstrategy);
#ifdef USE_ASSERT_CHECKING
extern bool heap_page_is_all_visible(Relation rel, Buffer buf,
GlobalVisState *vistest,
bool *all_frozen,
TransactionId *newest_live_xid,
OffsetNumber *logging_offnum);
#endif
/* in heap/heapam_visibility.c */
extern bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot,