mirror of
https://github.com/postgres/postgres.git
synced 2026-04-15 22:10:45 -04:00
Allow on-access pruning to set pages all-visible
Many queries do not modify the underlying relation. For such queries, if on-access pruning occurs during the scan, we can check whether the page has become all-visible and update the visibility map accordingly. Previously, only vacuum and COPY FREEZE marked pages as all-visible or all-frozen. This commit implements on-access VM setting for sequential scans, tid range scans, sample scans, bitmap heap scans, and the underlying heap relation in index scans. Setting the visibility map on-access can avoid write amplification caused by vacuum later needing to set the page all-visible, which could trigger a write and potentially an FPI. It also allows more frequent index-only scans, since they require pages to be marked all-visible in the VM. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Kirill Reshke <reshkekirill@gmail.com> Reviewed-by: Chao Li <li.evan.chao@gmail.com> Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
This commit is contained in:
parent
e3637a05dc
commit
b46e1e54d0
5 changed files with 59 additions and 21 deletions
|
|
@ -633,7 +633,8 @@ heap_prepare_pagescan(TableScanDesc sscan)
|
||||||
/*
|
/*
|
||||||
* Prune and repair fragmentation for the whole page, if possible.
|
* Prune and repair fragmentation for the whole page, if possible.
|
||||||
*/
|
*/
|
||||||
heap_page_prune_opt(scan->rs_base.rs_rd, buffer, &scan->rs_vmbuffer);
|
heap_page_prune_opt(scan->rs_base.rs_rd, buffer, &scan->rs_vmbuffer,
|
||||||
|
sscan->rs_flags & SO_HINT_REL_READ_ONLY);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We must hold share lock on the buffer content while examining tuple
|
* We must hold share lock on the buffer content while examining tuple
|
||||||
|
|
|
||||||
|
|
@ -149,7 +149,8 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
|
||||||
*/
|
*/
|
||||||
if (prev_buf != hscan->xs_cbuf)
|
if (prev_buf != hscan->xs_cbuf)
|
||||||
heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf,
|
heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf,
|
||||||
&hscan->xs_vmbuffer);
|
&hscan->xs_vmbuffer,
|
||||||
|
hscan->xs_base.flags & SO_HINT_REL_READ_ONLY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Obtain share-lock on the buffer so we can examine visibility */
|
/* Obtain share-lock on the buffer so we can examine visibility */
|
||||||
|
|
@ -2546,7 +2547,8 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
|
||||||
/*
|
/*
|
||||||
* Prune and repair fragmentation for the whole page, if possible.
|
* Prune and repair fragmentation for the whole page, if possible.
|
||||||
*/
|
*/
|
||||||
heap_page_prune_opt(scan->rs_rd, buffer, &hscan->rs_vmbuffer);
|
heap_page_prune_opt(scan->rs_rd, buffer, &hscan->rs_vmbuffer,
|
||||||
|
scan->rs_flags & SO_HINT_REL_READ_ONLY);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We must hold share lock on the buffer content while examining tuple
|
* We must hold share lock on the buffer content while examining tuple
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,8 @@ typedef struct
|
||||||
bool mark_unused_now;
|
bool mark_unused_now;
|
||||||
/* whether to attempt freezing tuples */
|
/* whether to attempt freezing tuples */
|
||||||
bool attempt_freeze;
|
bool attempt_freeze;
|
||||||
|
/* whether to attempt setting the VM */
|
||||||
|
bool attempt_set_vm;
|
||||||
struct VacuumCutoffs *cutoffs;
|
struct VacuumCutoffs *cutoffs;
|
||||||
Relation relation;
|
Relation relation;
|
||||||
|
|
||||||
|
|
@ -75,7 +77,8 @@ typedef struct
|
||||||
/*
|
/*
|
||||||
* set_all_visible and set_all_frozen indicate if the all-visible and
|
* set_all_visible and set_all_frozen indicate if the all-visible and
|
||||||
* all-frozen bits in the visibility map can be set for this page after
|
* all-frozen bits in the visibility map can be set for this page after
|
||||||
* pruning.
|
* pruning. They are only tracked when the caller requests VM updates
|
||||||
|
* (attempt_set_vm); otherwise they remain false throughout.
|
||||||
*
|
*
|
||||||
* NOTE: set_all_visible and set_all_frozen initially don't include
|
* NOTE: set_all_visible and set_all_frozen initially don't include
|
||||||
* LP_DEAD items. That's convenient for heap_page_prune_and_freeze() to
|
* LP_DEAD items. That's convenient for heap_page_prune_and_freeze() to
|
||||||
|
|
@ -232,7 +235,8 @@ static void page_verify_redirects(Page page);
|
||||||
|
|
||||||
static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
|
static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
|
||||||
PruneState *prstate);
|
PruneState *prstate);
|
||||||
static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason);
|
static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason,
|
||||||
|
bool do_prune, bool do_freeze);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -251,9 +255,21 @@ static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason);
|
||||||
* reuse the pin across calls, avoiding repeated pin/unpin cycles. If we find
|
* reuse the pin across calls, avoiding repeated pin/unpin cycles. If we find
|
||||||
* VM corruption during pruning, we will fix it. Caller is responsible for
|
* VM corruption during pruning, we will fix it. Caller is responsible for
|
||||||
* unpinning *vmbuffer.
|
* unpinning *vmbuffer.
|
||||||
|
*
|
||||||
|
* rel_read_only is true if we determined at plan time that the query does not
|
||||||
|
* modify the relation. It is counterproductive to set the VM if the query
|
||||||
|
* will immediately clear it.
|
||||||
|
*
|
||||||
|
* As noted in ScanRelIsReadOnly(), INSERT ... SELECT from the same table will
|
||||||
|
* report the scan relation as read-only. This is usually harmless in
|
||||||
|
* practice. It is useful to set scanned pages all-visible that won't be
|
||||||
|
* inserted into. Pages it does insert to will rarely meet the criteria for
|
||||||
|
* pruning, and those that do are likely to contain in-progress inserts which
|
||||||
|
* make the page not fully all-visible.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
|
heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer,
|
||||||
|
bool rel_read_only)
|
||||||
{
|
{
|
||||||
Page page = BufferGetPage(buffer);
|
Page page = BufferGetPage(buffer);
|
||||||
TransactionId prune_xid;
|
TransactionId prune_xid;
|
||||||
|
|
@ -336,6 +352,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
|
||||||
* current implementation.
|
* current implementation.
|
||||||
*/
|
*/
|
||||||
params.options = HEAP_PAGE_PRUNE_ALLOW_FAST_PATH;
|
params.options = HEAP_PAGE_PRUNE_ALLOW_FAST_PATH;
|
||||||
|
if (rel_read_only)
|
||||||
|
params.options |= HEAP_PAGE_PRUNE_SET_VM;
|
||||||
|
|
||||||
heap_page_prune_and_freeze(¶ms, &presult, &dummy_off_loc,
|
heap_page_prune_and_freeze(¶ms, &presult, &dummy_off_loc,
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
|
|
@ -392,6 +410,7 @@ prune_freeze_setup(PruneFreezeParams *params,
|
||||||
/* cutoffs must be provided if we will attempt freezing */
|
/* cutoffs must be provided if we will attempt freezing */
|
||||||
Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs);
|
Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs);
|
||||||
prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0;
|
prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0;
|
||||||
|
prstate->attempt_set_vm = (params->options & HEAP_PAGE_PRUNE_SET_VM) != 0;
|
||||||
prstate->cutoffs = params->cutoffs;
|
prstate->cutoffs = params->cutoffs;
|
||||||
prstate->relation = params->relation;
|
prstate->relation = params->relation;
|
||||||
prstate->block = BufferGetBlockNumber(params->buffer);
|
prstate->block = BufferGetBlockNumber(params->buffer);
|
||||||
|
|
@ -461,14 +480,13 @@ prune_freeze_setup(PruneFreezeParams *params,
|
||||||
* We track whether the page will be all-visible/all-frozen at the end of
|
* We track whether the page will be all-visible/all-frozen at the end of
|
||||||
* pruning and freezing. While examining tuple visibility, we'll set
|
* pruning and freezing. While examining tuple visibility, we'll set
|
||||||
* set_all_visible to false if there are tuples on the page not visible to
|
* set_all_visible to false if there are tuples on the page not visible to
|
||||||
* all running and future transactions. set_all_visible is always
|
* all running and future transactions. If setting the VM is enabled for
|
||||||
* maintained but only VACUUM will set the VM if the page ends up being
|
* this scan, we will do so if the page ends up being all-visible.
|
||||||
* all-visible.
|
|
||||||
*
|
*
|
||||||
* We also keep track of the newest live XID, which is used to calculate
|
* We also keep track of the newest live XID, which is used to calculate
|
||||||
* the snapshot conflict horizon for a WAL record setting the VM.
|
* the snapshot conflict horizon for a WAL record setting the VM.
|
||||||
*/
|
*/
|
||||||
prstate->set_all_visible = true;
|
prstate->set_all_visible = prstate->attempt_set_vm;
|
||||||
prstate->newest_live_xid = InvalidTransactionId;
|
prstate->newest_live_xid = InvalidTransactionId;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -477,7 +495,9 @@ prune_freeze_setup(PruneFreezeParams *params,
|
||||||
* caller passed HEAP_PAGE_PRUNE_FREEZE, because if they did not, we won't
|
* caller passed HEAP_PAGE_PRUNE_FREEZE, because if they did not, we won't
|
||||||
* call heap_prepare_freeze_tuple() for each tuple, and set_all_frozen
|
* call heap_prepare_freeze_tuple() for each tuple, and set_all_frozen
|
||||||
* will never be cleared for tuples that need freezing. This would lead to
|
* will never be cleared for tuples that need freezing. This would lead to
|
||||||
* incorrectly setting the visibility map all-frozen for this page.
|
* incorrectly setting the visibility map all-frozen for this page. We
|
||||||
|
* can't set the page all-frozen in the VM if the caller didn't pass
|
||||||
|
* HEAP_PAGE_PRUNE_SET_VM.
|
||||||
*
|
*
|
||||||
* When freezing is not required (no XIDs/MXIDs older than the freeze
|
* When freezing is not required (no XIDs/MXIDs older than the freeze
|
||||||
* cutoff), we may still choose to "opportunistically" freeze if doing so
|
* cutoff), we may still choose to "opportunistically" freeze if doing so
|
||||||
|
|
@ -494,7 +514,7 @@ prune_freeze_setup(PruneFreezeParams *params,
|
||||||
* whether to freeze, but before updating the VM, to avoid setting the VM
|
* whether to freeze, but before updating the VM, to avoid setting the VM
|
||||||
* bits incorrectly.
|
* bits incorrectly.
|
||||||
*/
|
*/
|
||||||
prstate->set_all_frozen = prstate->attempt_freeze;
|
prstate->set_all_frozen = prstate->attempt_freeze && prstate->attempt_set_vm;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -920,21 +940,34 @@ heap_page_fix_vm_corruption(PruneState *prstate, OffsetNumber offnum,
|
||||||
* This function does not actually set the VM bits or page-level visibility
|
* This function does not actually set the VM bits or page-level visibility
|
||||||
* hint, PD_ALL_VISIBLE.
|
* hint, PD_ALL_VISIBLE.
|
||||||
*
|
*
|
||||||
|
* This should be called only after do_freeze has been decided (and do_prune
|
||||||
|
* has been set), as these factor into our heuristic-based decision.
|
||||||
|
*
|
||||||
* Returns true if one or both VM bits should be set and false otherwise.
|
* Returns true if one or both VM bits should be set and false otherwise.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
heap_page_will_set_vm(PruneState *prstate, PruneReason reason)
|
heap_page_will_set_vm(PruneState *prstate, PruneReason reason,
|
||||||
|
bool do_prune, bool do_freeze)
|
||||||
{
|
{
|
||||||
/*
|
if (!prstate->attempt_set_vm)
|
||||||
* Though on-access pruning maintains prstate->set_all_visible, we don't
|
|
||||||
* set the VM on-access for now.
|
|
||||||
*/
|
|
||||||
if (reason == PRUNE_ON_ACCESS)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!prstate->set_all_visible)
|
if (!prstate->set_all_visible)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this is an on-access call and we're not actually pruning, avoid
|
||||||
|
* setting the visibility map if it would newly dirty the heap page or, if
|
||||||
|
* the page is already dirty, if doing so would require including a
|
||||||
|
* full-page image (FPI) of the heap page in the WAL.
|
||||||
|
*/
|
||||||
|
if (reason == PRUNE_ON_ACCESS && !do_prune && !do_freeze &&
|
||||||
|
(!BufferIsDirty(prstate->buffer) || XLogCheckBufferNeedsBackup(prstate->buffer)))
|
||||||
|
{
|
||||||
|
prstate->set_all_visible = prstate->set_all_frozen = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
prstate->new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
|
prstate->new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
|
||||||
|
|
||||||
if (prstate->set_all_frozen)
|
if (prstate->set_all_frozen)
|
||||||
|
|
@ -1165,9 +1198,10 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
|
||||||
prstate.set_all_visible = prstate.set_all_frozen = false;
|
prstate.set_all_visible = prstate.set_all_frozen = false;
|
||||||
|
|
||||||
Assert(!prstate.set_all_frozen || prstate.set_all_visible);
|
Assert(!prstate.set_all_frozen || prstate.set_all_visible);
|
||||||
|
Assert(!prstate.set_all_visible || prstate.attempt_set_vm);
|
||||||
Assert(!prstate.set_all_visible || (prstate.lpdead_items == 0));
|
Assert(!prstate.set_all_visible || (prstate.lpdead_items == 0));
|
||||||
|
|
||||||
do_set_vm = heap_page_will_set_vm(&prstate, params->reason);
|
do_set_vm = heap_page_will_set_vm(&prstate, params->reason, do_prune, do_freeze);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* new_vmbits should be 0 regardless of whether or not the page is
|
* new_vmbits should be 0 regardless of whether or not the page is
|
||||||
|
|
|
||||||
|
|
@ -2021,7 +2021,7 @@ lazy_scan_prune(LVRelState *vacrel,
|
||||||
.buffer = buf,
|
.buffer = buf,
|
||||||
.vmbuffer = vmbuffer,
|
.vmbuffer = vmbuffer,
|
||||||
.reason = PRUNE_VACUUM_SCAN,
|
.reason = PRUNE_VACUUM_SCAN,
|
||||||
.options = HEAP_PAGE_PRUNE_FREEZE,
|
.options = HEAP_PAGE_PRUNE_FREEZE | HEAP_PAGE_PRUNE_SET_VM,
|
||||||
.vistest = vacrel->vistest,
|
.vistest = vacrel->vistest,
|
||||||
.cutoffs = &vacrel->cutoffs,
|
.cutoffs = &vacrel->cutoffs,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,7 @@
|
||||||
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW (1 << 0)
|
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW (1 << 0)
|
||||||
#define HEAP_PAGE_PRUNE_FREEZE (1 << 1)
|
#define HEAP_PAGE_PRUNE_FREEZE (1 << 1)
|
||||||
#define HEAP_PAGE_PRUNE_ALLOW_FAST_PATH (1 << 2)
|
#define HEAP_PAGE_PRUNE_ALLOW_FAST_PATH (1 << 2)
|
||||||
|
#define HEAP_PAGE_PRUNE_SET_VM (1 << 3)
|
||||||
|
|
||||||
typedef struct BulkInsertStateData *BulkInsertState;
|
typedef struct BulkInsertStateData *BulkInsertState;
|
||||||
typedef struct GlobalVisState GlobalVisState;
|
typedef struct GlobalVisState GlobalVisState;
|
||||||
|
|
@ -431,7 +432,7 @@ extern TransactionId heap_index_delete_tuples(Relation rel,
|
||||||
|
|
||||||
/* in heap/pruneheap.c */
|
/* in heap/pruneheap.c */
|
||||||
extern void heap_page_prune_opt(Relation relation, Buffer buffer,
|
extern void heap_page_prune_opt(Relation relation, Buffer buffer,
|
||||||
Buffer *vmbuffer);
|
Buffer *vmbuffer, bool rel_read_only);
|
||||||
extern void heap_page_prune_and_freeze(PruneFreezeParams *params,
|
extern void heap_page_prune_and_freeze(PruneFreezeParams *params,
|
||||||
PruneFreezeResult *presult,
|
PruneFreezeResult *presult,
|
||||||
OffsetNumber *off_loc,
|
OffsetNumber *off_loc,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue