Move fake LSN infrastructure out of GiST.

Move utility functions used by GiST to generate fake LSNs into xlog.c
and xloginsert.c, so that other index AMs can also generate fake LSNs.

Preparation for an upcoming commit that will add support for fake LSNs
to nbtree, allowing its dropPin optimization to be used during scans of
unlogged relations.  That commit is itself preparation for another
upcoming commit that will add a new amgetbatch/btgetbatch interface to
enable I/O prefetching.

Bump XLOG_PAGE_MAGIC due to XLOG_GIST_ASSIGN_LSN becoming
XLOG_ASSIGN_LSN.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Tomas Vondra <tomas@vondra.me>
Discussion: https://postgr.es/m/CAH2-WzkehuhxyuA8quc7rRN3EtNXpiKsjPfO8mhb+0Dr2K0Dtg@mail.gmail.com
This commit is contained in:
Peter Geoghegan 2026-03-13 19:38:17 -04:00
parent 9b860373da
commit d774072f00
15 changed files with 103 additions and 94 deletions

View file

@ -517,7 +517,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
dist, oldrlink, oldnsn, leftchildbuf,
markfollowright);
else
recptr = gistGetFakeLSN(rel);
recptr = XLogGetFakeLSN(rel);
}
for (ptr = dist; ptr; ptr = ptr->next)
@ -594,7 +594,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
leftchildbuf);
}
else
recptr = gistGetFakeLSN(rel);
recptr = XLogGetFakeLSN(rel);
}
PageSetLSN(page, recptr);
@ -1733,7 +1733,7 @@ gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
PageSetLSN(page, recptr);
}
else
PageSetLSN(page, gistGetFakeLSN(rel));
PageSetLSN(page, XLogGetFakeLSN(rel));
END_CRIT_SECTION();
}

View file

@ -1007,56 +1007,6 @@ gistproperty(Oid index_oid, int attno,
return true;
}
/*
* Some indexes are not WAL-logged, but we need LSNs to detect concurrent page
* splits anyway. This function provides a fake sequence of LSNs for that
* purpose.
*/
XLogRecPtr
gistGetFakeLSN(Relation rel)
{
if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
{
/*
* Temporary relations are only accessible in our session, so a simple
* backend-local counter will do.
*/
static XLogRecPtr counter = FirstNormalUnloggedLSN;
return counter++;
}
else if (RelationIsPermanent(rel))
{
/*
* WAL-logging on this relation will start after commit, so its LSNs
* must be distinct numbers smaller than the LSN at the next commit.
* Emit a dummy WAL record if insert-LSN hasn't advanced after the
* last call.
*/
static XLogRecPtr lastlsn = InvalidXLogRecPtr;
XLogRecPtr currlsn = GetXLogInsertEndRecPtr();
/* Shouldn't be called for WAL-logging relations */
Assert(!RelationNeedsWAL(rel));
/* No need for an actual record if we already have a distinct LSN */
if (XLogRecPtrIsValid(lastlsn) && lastlsn == currlsn)
currlsn = gistXLogAssignLSN();
lastlsn = currlsn;
return currlsn;
}
else
{
/*
* Unlogged relations are accessible from other backends, and survive
* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
*/
Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
return GetFakeLSNForUnloggedRel();
}
}
/*
* This is a stratnum translation support function for GiST opclasses that use
* the RT*StrategyNumber constants.

View file

@ -16,7 +16,7 @@
#include "access/genam.h"
#include "access/gist_private.h"
#include "access/transam.h"
#include "access/xloginsert.h"
#include "commands/vacuum.h"
#include "lib/integerset.h"
#include "miscadmin.h"
@ -182,7 +182,7 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
if (RelationNeedsWAL(rel))
vstate.startNSN = GetInsertRecPtr();
else
vstate.startNSN = gistGetFakeLSN(rel);
vstate.startNSN = XLogGetFakeLSN(rel);
/*
* The outer loop iterates over all index pages, in physical order (we
@ -413,7 +413,7 @@ restart:
PageSetLSN(page, recptr);
}
else
PageSetLSN(page, gistGetFakeLSN(rel));
PageSetLSN(page, XLogGetFakeLSN(rel));
END_CRIT_SECTION();
@ -707,7 +707,7 @@ gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
if (RelationNeedsWAL(info->index))
recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
else
recptr = gistGetFakeLSN(info->index);
recptr = XLogGetFakeLSN(info->index);
PageSetLSN(parentPage, recptr);
PageSetLSN(leafPage, recptr);

View file

@ -421,9 +421,6 @@ gist_redo(XLogReaderState *record)
case XLOG_GIST_PAGE_DELETE:
gistRedoPageDelete(record);
break;
case XLOG_GIST_ASSIGN_LSN:
/* nop. See gistGetFakeLSN(). */
break;
default:
elog(PANIC, "gist_redo: unknown op code %u", info);
}
@ -567,24 +564,6 @@ gistXLogPageDelete(Buffer buffer, FullTransactionId xid,
return recptr;
}
/*
* Write an empty XLOG record to assign a distinct LSN.
*/
XLogRecPtr
gistXLogAssignLSN(void)
{
int dummy = 0;
/*
* Records other than XLOG_SWITCH must have content. We use an integer 0
* to follow the restriction.
*/
XLogBeginInsert();
XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
XLogRegisterData(&dummy, sizeof(dummy));
return XLogInsert(RM_GIST_ID, XLOG_GIST_ASSIGN_LSN);
}
/*
* Write XLOG record about reuse of a deleted page.
*/

View file

@ -80,9 +80,6 @@ gist_desc(StringInfo buf, XLogReaderState *record)
case XLOG_GIST_PAGE_DELETE:
out_gistxlogPageDelete(buf, (gistxlogPageDelete *) rec);
break;
case XLOG_GIST_ASSIGN_LSN:
/* No details to write out */
break;
}
}
@ -108,9 +105,6 @@ gist_identify(uint8 info)
case XLOG_GIST_PAGE_DELETE:
id = "PAGE_DELETE";
break;
case XLOG_GIST_ASSIGN_LSN:
id = "ASSIGN_LSN";
break;
}
return id;

View file

@ -175,6 +175,10 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
memcpy(&enabled, rec, sizeof(bool));
appendStringInfoString(buf, enabled ? "true" : "false");
}
else if (info == XLOG_ASSIGN_LSN)
{
/* no further information to print */
}
}
const char *
@ -229,6 +233,9 @@ xlog_identify(uint8 info)
case XLOG_LOGICAL_DECODING_STATUS_CHANGE:
id = "LOGICAL_DECODING_STATUS_CHANGE";
break;
case XLOG_ASSIGN_LSN:
id = "ASSIGN_LSN";
break;
}
return id;

View file

@ -8224,6 +8224,30 @@ XLogRestorePoint(const char *rpName)
return RecPtr;
}
/*
* Write an empty XLOG record to assign a distinct LSN.
*
* This is used by some index AMs when building indexes on permanent relations
* with wal_level=minimal. In that scenario, WAL-logging will start after
* commit, but the index AM needs distinct LSNs to detect concurrent page
* modifications. When the current WAL insert position hasn't advanced since
* the last call, we emit a dummy record to ensure we get a new, distinct LSN.
*/
XLogRecPtr
XLogAssignLSN(void)
{
int dummy = 0;
/*
* Records other than XLOG_SWITCH must have content. We use an integer 0
* to satisfy this restriction.
*/
XLogBeginInsert();
XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
XLogRegisterData(&dummy, sizeof(dummy));
return XLogInsert(RM_XLOG_ID, XLOG_ASSIGN_LSN);
}
/*
* Check if any of the GUC parameters that are critical for hot standby
* have changed, and update the value in pg_control file if necessary.
@ -8591,6 +8615,10 @@ xlog_redo(XLogReaderState *record)
{
/* nothing to do here, handled in xlogrecovery.c */
}
else if (info == XLOG_ASSIGN_LSN)
{
/* nothing to do here, see XLogGetFakeLSN() */
}
else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
{
/*

View file

@ -41,6 +41,7 @@
#include "storage/proc.h"
#include "utils/memutils.h"
#include "utils/pgstat_internal.h"
#include "utils/rel.h"
/*
* Guess the maximum buffer size required to store a compressed version of
@ -547,6 +548,57 @@ XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value)
return XLogInsert(rmid, info);
}
/*
* XLogGetFakeLSN - get a fake LSN for an index page that isn't WAL-logged.
*
* Some index AMs use LSNs to detect concurrent page modifications, but not
* all index pages are WAL-logged. This function provides a sequence of fake
* LSNs for that purpose.
*/
XLogRecPtr
XLogGetFakeLSN(Relation rel)
{
if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
{
/*
* Temporary relations are only accessible in our session, so a simple
* backend-local counter will do.
*/
static XLogRecPtr counter = FirstNormalUnloggedLSN;
return counter++;
}
else if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
{
/*
* Unlogged relations are accessible from other backends, and survive
* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
*/
return GetFakeLSNForUnloggedRel();
}
else
{
/*
* WAL-logging on this relation will start after commit, so its LSNs
* must be distinct numbers smaller than the LSN at the next commit.
* Emit a dummy WAL record if insert-LSN hasn't advanced after the
* last call.
*/
static XLogRecPtr lastlsn = InvalidXLogRecPtr;
XLogRecPtr currlsn = GetXLogInsertEndRecPtr();
Assert(!RelationNeedsWAL(rel));
Assert(RelationIsPermanent(rel));
/* No need for an actual record if we already have a distinct LSN */
if (XLogRecPtrIsValid(lastlsn) && lastlsn == currlsn)
currlsn = XLogAssignLSN();
lastlsn = currlsn;
return currlsn;
}
}
/*
* Assemble a WAL record from the registered data and buffers into an
* XLogRecData chain, ready for insertion with XLogInsertRecord().

View file

@ -4462,9 +4462,9 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
* lost after a crash anyway. Most unlogged relation pages do not bear
* LSNs since we never emit WAL records for them, and therefore flushing
* up through the buffer LSN would be useless, but harmless. However,
* GiST indexes use LSNs internally to track page-splits, and therefore
* unlogged GiST pages bear "fake" LSNs generated by
* GetFakeLSNForUnloggedRel. It is unlikely but possible that the fake
* some index AMs use LSNs internally to detect concurrent page
* modifications, and therefore unlogged index pages bear "fake" LSNs
* generated by XLogGetFakeLSN. It is unlikely but possible that the fake
* LSN counter could advance past the WAL insertion point; and if it did
* happen, attempting to flush WAL through that location would fail, with
* disastrous system-wide consequences. To make sure that can't happen,

View file

@ -457,8 +457,6 @@ extern XLogRecPtr gistXLogSplit(bool page_is_leaf,
BlockNumber origrlink, GistNSN orignsn,
Buffer leftchildbuf, bool markfollowright);
extern XLogRecPtr gistXLogAssignLSN(void);
/* gistget.c */
extern bool gistgettuple(IndexScanDesc scan, ScanDirection dir);
extern int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
@ -531,8 +529,6 @@ extern void gistMakeUnionKey(GISTSTATE *giststate, int attno,
GISTENTRY *entry2, bool isnull2,
Datum *dst, bool *dstisnull);
extern XLogRecPtr gistGetFakeLSN(Relation rel);
/* gistvacuum.c */
extern IndexBulkDeleteResult *gistbulkdelete(IndexVacuumInfo *info,
IndexBulkDeleteResult *stats,

View file

@ -26,7 +26,7 @@
/* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
/* #define XLOG_GIST_CREATE_INDEX 0x50 */ /* not used anymore */
#define XLOG_GIST_PAGE_DELETE 0x60
#define XLOG_GIST_ASSIGN_LSN 0x70 /* nop, assign new LSN */
/* #define XLOG_GIST_ASSIGN_LSN 0x70 */ /* not used anymore */
/*
* Backup Blk 0: updated page.

View file

@ -259,6 +259,7 @@ extern bool CreateRestartPoint(int flags);
extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN);
extern void XLogPutNextOid(Oid nextOid);
extern XLogRecPtr XLogRestorePoint(const char *rpName);
extern XLogRecPtr XLogAssignLSN(void);
extern void UpdateFullPageWrites(void);
extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p);
extern XLogRecPtr GetRedoRecPtr(void);

View file

@ -31,7 +31,7 @@
/*
* Each page of XLOG file has a header like this:
*/
#define XLOG_PAGE_MAGIC 0xD11C /* can be used as WAL version indicator */
#define XLOG_PAGE_MAGIC 0xD11D /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{

View file

@ -64,6 +64,8 @@ extern void log_newpage_range(Relation rel, ForkNumber forknum,
BlockNumber startblk, BlockNumber endblk, bool page_std);
extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
extern XLogRecPtr XLogGetFakeLSN(Relation rel);
extern void InitXLogInsert(void);
#endif /* XLOGINSERT_H */

View file

@ -78,7 +78,7 @@ typedef struct CheckPoint
#define XLOG_END_OF_RECOVERY 0x90
#define XLOG_FPI_FOR_HINT 0xA0
#define XLOG_FPI 0xB0
/* 0xC0 is used in Postgres 9.5-11 */
#define XLOG_ASSIGN_LSN 0xC0
#define XLOG_OVERWRITE_CONTRECORD 0xD0
#define XLOG_CHECKPOINT_REDO 0xE0
#define XLOG_LOGICAL_DECODING_STATUS_CHANGE 0xF0