postgresql/src/include/access/xlogutils.h
Fujii Masao d2980067bc Fix unlogged sequence corruption after standby promotion
Previously, if an unlogged sequence was created on the primary and
replicated to a standby, reading the sequence after promoting the
standby (for example, with nextval()) could trigger the following
assertion failure:

    TRAP: failed Assert("((const PageHeaderData *) page)->pd_special >= SizeOfPageHeaderData")

In non-assert builds, the same operation could instead fail with an
error such as:

    ERROR:  bad magic number in sequence

The problem was that seq_redo() updated the init fork page in shared
buffers but did not flush it to disk. During promotion,
ResetUnloggedRelations() recreates the main fork of unlogged
relations by copying the init fork from disk, bypassing shared
buffers. As a result, the main fork could be recreated from a stale
init fork instead of the WAL-replayed page.

Fix this by introducing a helper to flush init fork buffers
immediately, and make seq_redo() use it. As a result, the main fork
of an unlogged sequence is recreated from the up-to-date init fork on
disk, allowing the unlogged sequence to be read successfully after
standby promotion.

Backpatch to v15, where unlogged sequences were introduced.

Author: Fujii Masao <masao.fujii@gmail.com>
Reviewed-by: vignesh C <vignesh21@gmail.com>
Discussion: https://postgr.es/m/CAHGQGwH1Ssze3XM6wjoTjSLVOR041c6xP+vsdLP951=w8oG8bA@mail.gmail.com
Backpatch-through: 15
2026-06-30 08:52:50 +09:00

120 lines
4.2 KiB
C

/*
* xlogutils.h
*
* Utilities for replaying WAL records.
*
* Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/access/xlogutils.h
*/
#ifndef XLOG_UTILS_H
#define XLOG_UTILS_H
#include "access/xlogreader.h"
#include "storage/bufmgr.h"
/*
* Prior to 8.4, all activity during recovery was carried out by the startup
* process. This local variable continues to be used in many parts of the
* code to indicate actions taken by RecoveryManagers. Other processes that
* potentially perform work during recovery should check RecoveryInProgress().
* See XLogCtl notes in xlog.c.
*/
extern PGDLLIMPORT bool InRecovery;
/*
* Like InRecovery, standbyState is only valid in the startup process.
* In all other processes it will have the value STANDBY_DISABLED (so
* InHotStandby will read as false).
*
* In DISABLED state, we're performing crash recovery or hot standby was
* disabled in postgresql.conf.
*
* In INITIALIZED state, we've run InitRecoveryTransactionEnvironment, but
* we haven't yet processed a RUNNING_XACTS or shutdown-checkpoint WAL record
* to initialize our primary-transaction tracking system.
*
* When the transaction tracking is initialized, we enter the SNAPSHOT_PENDING
* state. The tracked information might still be incomplete, so we can't allow
* connections yet, but redo functions must update the in-memory state when
* appropriate.
*
* In SNAPSHOT_READY mode, we have full knowledge of transactions that are
* (or were) running on the primary at the current WAL location. Snapshots
* can be taken, and read-only queries can be run.
*/
typedef enum
{
STANDBY_DISABLED,
STANDBY_INITIALIZED,
STANDBY_SNAPSHOT_PENDING,
STANDBY_SNAPSHOT_READY
} HotStandbyState;
extern PGDLLIMPORT HotStandbyState standbyState;
#define InHotStandby (standbyState >= STANDBY_SNAPSHOT_PENDING)
extern bool XLogHaveInvalidPages(void);
extern void XLogCheckInvalidPages(void);
extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum);
extern void XLogDropDatabase(Oid dbid);
extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
BlockNumber nblocks);
/* Result codes for XLogReadBufferForRedo[Extended] */
typedef enum
{
BLK_NEEDS_REDO, /* changes from WAL record need to be applied */
BLK_DONE, /* block is already up-to-date */
BLK_RESTORED, /* block was restored from a full-page image */
BLK_NOTFOUND /* block was not found (and hence does not
* need to be replayed) */
} XLogRedoAction;
/* Private data of the read_local_xlog_page_no_wait callback. */
typedef struct ReadLocalXLogPageNoWaitPrivate
{
bool end_of_wal; /* true, when end of WAL is reached */
} ReadLocalXLogPageNoWaitPrivate;
extern XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record,
uint8 buffer_id, Buffer *buf);
extern Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id);
extern void XLogFlushBufferForRedoIfInit(XLogReaderState *record,
uint8 block_id, Buffer buffer);
extern XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record,
uint8 buffer_id,
ReadBufferMode mode, bool get_cleanup_lock,
Buffer *buf);
extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, ReadBufferMode mode,
Buffer recent_buffer);
extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
extern void FreeFakeRelcacheEntry(Relation fakerel);
extern int read_local_xlog_page(XLogReaderState *state,
XLogRecPtr targetPagePtr, int reqLen,
XLogRecPtr targetRecPtr, char *cur_page);
extern int read_local_xlog_page_no_wait(XLogReaderState *state,
XLogRecPtr targetPagePtr, int reqLen,
XLogRecPtr targetRecPtr,
char *cur_page);
extern void wal_segment_open(XLogReaderState *state,
XLogSegNo nextSegNo,
TimeLineID *tli_p);
extern void wal_segment_close(XLogReaderState *state);
extern void XLogReadDetermineTimeline(XLogReaderState *state,
XLogRecPtr wantPage,
uint32 wantLength,
TimeLineID currTLI);
extern void WALReadRaiseError(WALReadError *errinfo);
#endif