From d774072f0040f9acf148cc9740b79857fbde7042 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Fri, 13 Mar 2026 19:38:17 -0400 Subject: [PATCH] Move fake LSN infrastructure out of GiST. Move utility functions used by GiST to generate fake LSNs into xlog.c and xloginsert.c, so that other index AMs can also generate fake LSNs. Preparation for an upcoming commit that will add support for fake LSNs to nbtree, allowing its dropPin optimization to be used during scans of unlogged relations. That commit is itself preparation for another upcoming commit that will add a new amgetbatch/btgetbatch interface to enable I/O prefetching. Bump XLOG_PAGE_MAGIC due to XLOG_GIST_ASSIGN_LSN becoming XLOG_ASSIGN_LSN. Author: Peter Geoghegan Reviewed-By: Andres Freund Reviewed-By: Tomas Vondra Discussion: https://postgr.es/m/CAH2-WzkehuhxyuA8quc7rRN3EtNXpiKsjPfO8mhb+0Dr2K0Dtg@mail.gmail.com --- src/backend/access/gist/gist.c | 6 +-- src/backend/access/gist/gistutil.c | 50 ------------------------ src/backend/access/gist/gistvacuum.c | 8 ++-- src/backend/access/gist/gistxlog.c | 21 ---------- src/backend/access/rmgrdesc/gistdesc.c | 6 --- src/backend/access/rmgrdesc/xlogdesc.c | 7 ++++ src/backend/access/transam/xlog.c | 28 +++++++++++++ src/backend/access/transam/xloginsert.c | 52 +++++++++++++++++++++++++ src/backend/storage/buffer/bufmgr.c | 6 +-- src/include/access/gist_private.h | 4 -- src/include/access/gistxlog.h | 2 +- src/include/access/xlog.h | 1 + src/include/access/xlog_internal.h | 2 +- src/include/access/xloginsert.h | 2 + src/include/catalog/pg_control.h | 2 +- 15 files changed, 103 insertions(+), 94 deletions(-) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index dfffce3e396..8565e225be7 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -517,7 +517,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, dist, oldrlink, oldnsn, leftchildbuf, markfollowright); else - recptr = gistGetFakeLSN(rel); + recptr = XLogGetFakeLSN(rel); } for (ptr = dist; ptr; ptr = ptr->next) @@ -594,7 +594,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, leftchildbuf); } else - recptr = gistGetFakeLSN(rel); + recptr = XLogGetFakeLSN(rel); } PageSetLSN(page, recptr); @@ -1733,7 +1733,7 @@ gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel) PageSetLSN(page, recptr); } else - PageSetLSN(page, gistGetFakeLSN(rel)); + PageSetLSN(page, XLogGetFakeLSN(rel)); END_CRIT_SECTION(); } diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index e7e4b273e19..0f58f61879f 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -1007,56 +1007,6 @@ gistproperty(Oid index_oid, int attno, return true; } -/* - * Some indexes are not WAL-logged, but we need LSNs to detect concurrent page - * splits anyway. This function provides a fake sequence of LSNs for that - * purpose. - */ -XLogRecPtr -gistGetFakeLSN(Relation rel) -{ - if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) - { - /* - * Temporary relations are only accessible in our session, so a simple - * backend-local counter will do. - */ - static XLogRecPtr counter = FirstNormalUnloggedLSN; - - return counter++; - } - else if (RelationIsPermanent(rel)) - { - /* - * WAL-logging on this relation will start after commit, so its LSNs - * must be distinct numbers smaller than the LSN at the next commit. - * Emit a dummy WAL record if insert-LSN hasn't advanced after the - * last call. - */ - static XLogRecPtr lastlsn = InvalidXLogRecPtr; - XLogRecPtr currlsn = GetXLogInsertEndRecPtr(); - - /* Shouldn't be called for WAL-logging relations */ - Assert(!RelationNeedsWAL(rel)); - - /* No need for an actual record if we already have a distinct LSN */ - if (XLogRecPtrIsValid(lastlsn) && lastlsn == currlsn) - currlsn = gistXLogAssignLSN(); - - lastlsn = currlsn; - return currlsn; - } - else - { - /* - * Unlogged relations are accessible from other backends, and survive - * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us. - */ - Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED); - return GetFakeLSNForUnloggedRel(); - } -} - /* * This is a stratnum translation support function for GiST opclasses that use * the RT*StrategyNumber constants. diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 9e714980d26..686a0418054 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -16,7 +16,7 @@ #include "access/genam.h" #include "access/gist_private.h" -#include "access/transam.h" +#include "access/xloginsert.h" #include "commands/vacuum.h" #include "lib/integerset.h" #include "miscadmin.h" @@ -182,7 +182,7 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, if (RelationNeedsWAL(rel)) vstate.startNSN = GetInsertRecPtr(); else - vstate.startNSN = gistGetFakeLSN(rel); + vstate.startNSN = XLogGetFakeLSN(rel); /* * The outer loop iterates over all index pages, in physical order (we @@ -413,7 +413,7 @@ restart: PageSetLSN(page, recptr); } else - PageSetLSN(page, gistGetFakeLSN(rel)); + PageSetLSN(page, XLogGetFakeLSN(rel)); END_CRIT_SECTION(); @@ -707,7 +707,7 @@ gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, if (RelationNeedsWAL(info->index)) recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink); else - recptr = gistGetFakeLSN(info->index); + recptr = XLogGetFakeLSN(info->index); PageSetLSN(parentPage, recptr); PageSetLSN(leafPage, recptr); diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index c783838495f..ae538dc81ca 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -421,9 +421,6 @@ gist_redo(XLogReaderState *record) case XLOG_GIST_PAGE_DELETE: gistRedoPageDelete(record); break; - case XLOG_GIST_ASSIGN_LSN: - /* nop. See gistGetFakeLSN(). */ - break; default: elog(PANIC, "gist_redo: unknown op code %u", info); } @@ -567,24 +564,6 @@ gistXLogPageDelete(Buffer buffer, FullTransactionId xid, return recptr; } -/* - * Write an empty XLOG record to assign a distinct LSN. - */ -XLogRecPtr -gistXLogAssignLSN(void) -{ - int dummy = 0; - - /* - * Records other than XLOG_SWITCH must have content. We use an integer 0 - * to follow the restriction. - */ - XLogBeginInsert(); - XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT); - XLogRegisterData(&dummy, sizeof(dummy)); - return XLogInsert(RM_GIST_ID, XLOG_GIST_ASSIGN_LSN); -} - /* * Write XLOG record about reuse of a deleted page. */ diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index 79a839cc24b..67789e0253b 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -80,9 +80,6 @@ gist_desc(StringInfo buf, XLogReaderState *record) case XLOG_GIST_PAGE_DELETE: out_gistxlogPageDelete(buf, (gistxlogPageDelete *) rec); break; - case XLOG_GIST_ASSIGN_LSN: - /* No details to write out */ - break; } } @@ -108,9 +105,6 @@ gist_identify(uint8 info) case XLOG_GIST_PAGE_DELETE: id = "PAGE_DELETE"; break; - case XLOG_GIST_ASSIGN_LSN: - id = "ASSIGN_LSN"; - break; } return id; diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index ff078f22264..9044b952117 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -175,6 +175,10 @@ xlog_desc(StringInfo buf, XLogReaderState *record) memcpy(&enabled, rec, sizeof(bool)); appendStringInfoString(buf, enabled ? "true" : "false"); } + else if (info == XLOG_ASSIGN_LSN) + { + /* no further information to print */ + } } const char * @@ -229,6 +233,9 @@ xlog_identify(uint8 info) case XLOG_LOGICAL_DECODING_STATUS_CHANGE: id = "LOGICAL_DECODING_STATUS_CHANGE"; break; + case XLOG_ASSIGN_LSN: + id = "ASSIGN_LSN"; + break; } return id; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f760291e10c..f5c9a34374d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8224,6 +8224,30 @@ XLogRestorePoint(const char *rpName) return RecPtr; } +/* + * Write an empty XLOG record to assign a distinct LSN. + * + * This is used by some index AMs when building indexes on permanent relations + * with wal_level=minimal. In that scenario, WAL-logging will start after + * commit, but the index AM needs distinct LSNs to detect concurrent page + * modifications. When the current WAL insert position hasn't advanced since + * the last call, we emit a dummy record to ensure we get a new, distinct LSN. + */ +XLogRecPtr +XLogAssignLSN(void) +{ + int dummy = 0; + + /* + * Records other than XLOG_SWITCH must have content. We use an integer 0 + * to satisfy this restriction. + */ + XLogBeginInsert(); + XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT); + XLogRegisterData(&dummy, sizeof(dummy)); + return XLogInsert(RM_XLOG_ID, XLOG_ASSIGN_LSN); +} + /* * Check if any of the GUC parameters that are critical for hot standby * have changed, and update the value in pg_control file if necessary. @@ -8591,6 +8615,10 @@ xlog_redo(XLogReaderState *record) { /* nothing to do here, handled in xlogrecovery.c */ } + else if (info == XLOG_ASSIGN_LSN) + { + /* nothing to do here, see XLogGetFakeLSN() */ + } else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT) { /* diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index ac3c1a78396..e4a819efeeb 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -41,6 +41,7 @@ #include "storage/proc.h" #include "utils/memutils.h" #include "utils/pgstat_internal.h" +#include "utils/rel.h" /* * Guess the maximum buffer size required to store a compressed version of @@ -547,6 +548,57 @@ XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value) return XLogInsert(rmid, info); } +/* + * XLogGetFakeLSN - get a fake LSN for an index page that isn't WAL-logged. + * + * Some index AMs use LSNs to detect concurrent page modifications, but not + * all index pages are WAL-logged. This function provides a sequence of fake + * LSNs for that purpose. + */ +XLogRecPtr +XLogGetFakeLSN(Relation rel) +{ + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + { + /* + * Temporary relations are only accessible in our session, so a simple + * backend-local counter will do. + */ + static XLogRecPtr counter = FirstNormalUnloggedLSN; + + return counter++; + } + else if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) + { + /* + * Unlogged relations are accessible from other backends, and survive + * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us. + */ + return GetFakeLSNForUnloggedRel(); + } + else + { + /* + * WAL-logging on this relation will start after commit, so its LSNs + * must be distinct numbers smaller than the LSN at the next commit. + * Emit a dummy WAL record if insert-LSN hasn't advanced after the + * last call. + */ + static XLogRecPtr lastlsn = InvalidXLogRecPtr; + XLogRecPtr currlsn = GetXLogInsertEndRecPtr(); + + Assert(!RelationNeedsWAL(rel)); + Assert(RelationIsPermanent(rel)); + + /* No need for an actual record if we already have a distinct LSN */ + if (XLogRecPtrIsValid(lastlsn) && lastlsn == currlsn) + currlsn = XLogAssignLSN(); + + lastlsn = currlsn; + return currlsn; + } +} + /* * Assemble a WAL record from the registered data and buffers into an * XLogRecData chain, ready for insertion with XLogInsertRecord(). diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 6f30a253779..00bc609529a 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -4462,9 +4462,9 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, * lost after a crash anyway. Most unlogged relation pages do not bear * LSNs since we never emit WAL records for them, and therefore flushing * up through the buffer LSN would be useless, but harmless. However, - * GiST indexes use LSNs internally to track page-splits, and therefore - * unlogged GiST pages bear "fake" LSNs generated by - * GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake + * some index AMs use LSNs internally to detect concurrent page + * modifications, and therefore unlogged index pages bear "fake" LSNs + * generated by XLogGetFakeLSN. It is unlikely but possible that the fake * LSN counter could advance past the WAL insertion point; and if it did * happen, attempting to flush WAL through that location would fail, with * disastrous system-wide consequences. To make sure that can't happen, diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 552f605c0aa..44514f1cb8d 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -457,8 +457,6 @@ extern XLogRecPtr gistXLogSplit(bool page_is_leaf, BlockNumber origrlink, GistNSN orignsn, Buffer leftchildbuf, bool markfollowright); -extern XLogRecPtr gistXLogAssignLSN(void); - /* gistget.c */ extern bool gistgettuple(IndexScanDesc scan, ScanDirection dir); extern int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm); @@ -531,8 +529,6 @@ extern void gistMakeUnionKey(GISTSTATE *giststate, int attno, GISTENTRY *entry2, bool isnull2, Datum *dst, bool *dstisnull); -extern XLogRecPtr gistGetFakeLSN(Relation rel); - /* gistvacuum.c */ extern IndexBulkDeleteResult *gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h index d3d1c6549be..1c2cf6e813a 100644 --- a/src/include/access/gistxlog.h +++ b/src/include/access/gistxlog.h @@ -26,7 +26,7 @@ /* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */ /* #define XLOG_GIST_CREATE_INDEX 0x50 */ /* not used anymore */ #define XLOG_GIST_PAGE_DELETE 0x60 -#define XLOG_GIST_ASSIGN_LSN 0x70 /* nop, assign new LSN */ + /* #define XLOG_GIST_ASSIGN_LSN 0x70 */ /* not used anymore */ /* * Backup Blk 0: updated page. diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 958f39edda4..dcc12eb8cbe 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -259,6 +259,7 @@ extern bool CreateRestartPoint(int flags); extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN); extern void XLogPutNextOid(Oid nextOid); extern XLogRecPtr XLogRestorePoint(const char *rpName); +extern XLogRecPtr XLogAssignLSN(void); extern void UpdateFullPageWrites(void); extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p); extern XLogRecPtr GetRedoRecPtr(void); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 58ae12bb20f..629ac3a7d3e 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -31,7 +31,7 @@ /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD11C /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD11D /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index 16ebc76e743..91dfbd5627f 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -64,6 +64,8 @@ extern void log_newpage_range(Relation rel, ForkNumber forknum, BlockNumber startblk, BlockNumber endblk, bool page_std); extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std); +extern XLogRecPtr XLogGetFakeLSN(Relation rel); + extern void InitXLogInsert(void); #endif /* XLOGINSERT_H */ diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 7503db1af51..77a661e818b 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -78,7 +78,7 @@ typedef struct CheckPoint #define XLOG_END_OF_RECOVERY 0x90 #define XLOG_FPI_FOR_HINT 0xA0 #define XLOG_FPI 0xB0 -/* 0xC0 is used in Postgres 9.5-11 */ +#define XLOG_ASSIGN_LSN 0xC0 #define XLOG_OVERWRITE_CONTRECORD 0xD0 #define XLOG_CHECKPOINT_REDO 0xE0 #define XLOG_LOGICAL_DECODING_STATUS_CHANGE 0xF0