mirror of
https://github.com/postgres/postgres.git
synced 2026-07-01 05:34:32 -04:00
Fix unlogged sequence corruption after standby promotion
Previously, if an unlogged sequence was created on the primary and
replicated to a standby, reading the sequence after promoting the
standby (for example, with nextval()) could trigger the following
assertion failure:
TRAP: failed Assert("((const PageHeaderData *) page)->pd_special >= SizeOfPageHeaderData")
In non-assert builds, the same operation could instead fail with an
error such as:
ERROR: bad magic number in sequence
The problem was that seq_redo() updated the init fork page in shared
buffers but did not flush it to disk. During promotion,
ResetUnloggedRelations() recreates the main fork of unlogged
relations by copying the init fork from disk, bypassing shared
buffers. As a result, the main fork could be recreated from a stale
init fork instead of the WAL-replayed page.
Fix this by introducing a helper to flush init fork buffers
immediately, and make seq_redo() use it. As a result, the main fork
of an unlogged sequence is recreated from the up-to-date init fork on
disk, allowing the unlogged sequence to be read successfully after
standby promotion.
Backpatch to v15, where unlogged sequences were introduced.
Author: Fujii Masao <masao.fujii@gmail.com>
Reviewed-by: vignesh C <vignesh21@gmail.com>
Discussion: https://postgr.es/m/CAHGQGwH1Ssze3XM6wjoTjSLVOR041c6xP+vsdLP951=w8oG8bA@mail.gmail.com
Backpatch-through: 15
This commit is contained in:
parent
12bff46ff3
commit
d2980067bc
5 changed files with 67 additions and 27 deletions
|
|
@ -32,7 +32,6 @@ hash_xlog_init_meta_page(XLogReaderState *record)
|
|||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
Page page;
|
||||
Buffer metabuf;
|
||||
ForkNumber forknum;
|
||||
|
||||
xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
|
||||
|
||||
|
|
@ -44,16 +43,7 @@ hash_xlog_init_meta_page(XLogReaderState *record)
|
|||
page = (Page) BufferGetPage(metabuf);
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(metabuf);
|
||||
|
||||
/*
|
||||
* Force the on-disk state of init forks to always be in sync with the
|
||||
* state in shared buffers. See XLogReadBufferForRedoExtended. We need
|
||||
* special handling for init forks as create index operations don't log a
|
||||
* full page image of the metapage.
|
||||
*/
|
||||
XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
|
||||
if (forknum == INIT_FORKNUM)
|
||||
FlushOneBuffer(metabuf);
|
||||
XLogFlushBufferForRedoIfInit(record, 0, metabuf);
|
||||
|
||||
/* all done */
|
||||
UnlockReleaseBuffer(metabuf);
|
||||
|
|
@ -71,7 +61,6 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
|
|||
Page page;
|
||||
HashMetaPage metap;
|
||||
uint32 num_buckets;
|
||||
ForkNumber forknum;
|
||||
|
||||
xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
|
||||
|
||||
|
|
@ -82,16 +71,7 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
|
|||
_hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
|
||||
PageSetLSN(BufferGetPage(bitmapbuf), lsn);
|
||||
MarkBufferDirty(bitmapbuf);
|
||||
|
||||
/*
|
||||
* Force the on-disk state of init forks to always be in sync with the
|
||||
* state in shared buffers. See XLogReadBufferForRedoExtended. We need
|
||||
* special handling for init forks as create index operations don't log a
|
||||
* full page image of the metapage.
|
||||
*/
|
||||
XLogRecGetBlockTag(record, 0, NULL, &forknum, NULL);
|
||||
if (forknum == INIT_FORKNUM)
|
||||
FlushOneBuffer(bitmapbuf);
|
||||
XLogFlushBufferForRedoIfInit(record, 0, bitmapbuf);
|
||||
UnlockReleaseBuffer(bitmapbuf);
|
||||
|
||||
/* add the new bitmap page to the metapage's list of bitmaps */
|
||||
|
|
@ -112,10 +92,7 @@ hash_xlog_init_bitmap_page(XLogReaderState *record)
|
|||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(metabuf);
|
||||
|
||||
XLogRecGetBlockTag(record, 1, NULL, &forknum, NULL);
|
||||
if (forknum == INIT_FORKNUM)
|
||||
FlushOneBuffer(metabuf);
|
||||
XLogFlushBufferForRedoIfInit(record, 1, metabuf);
|
||||
}
|
||||
if (BufferIsValid(metabuf))
|
||||
UnlockReleaseBuffer(metabuf);
|
||||
|
|
|
|||
|
|
@ -334,6 +334,28 @@ XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
|
|||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a redo routine modified an init fork, flush the buffer immediately.
|
||||
*
|
||||
* At the end of crash recovery the init forks of unlogged relations are
|
||||
* copied to the main fork directly from disk, without going through shared
|
||||
* buffers. Therefore, redo routines that update init forks without
|
||||
* restoring a full-page image must call this after setting the page LSN and
|
||||
* marking the buffer dirty.
|
||||
*/
|
||||
void
|
||||
XLogFlushBufferForRedoIfInit(XLogReaderState *record, uint8 block_id,
|
||||
Buffer buffer)
|
||||
{
|
||||
ForkNumber forknum;
|
||||
|
||||
Assert(BufferIsValid(buffer));
|
||||
|
||||
XLogRecGetBlockTag(record, block_id, NULL, &forknum, NULL);
|
||||
if (forknum == INIT_FORKNUM)
|
||||
FlushOneBuffer(buffer);
|
||||
}
|
||||
|
||||
/*
|
||||
* XLogReadBufferForRedoExtended
|
||||
* Like XLogReadBufferForRedo, but with extra options.
|
||||
|
|
@ -411,7 +433,9 @@ XLogReadBufferForRedoExtended(XLogReaderState *record,
|
|||
* At the end of crash recovery the init forks of unlogged relations
|
||||
* are copied, without going through shared buffers. So we need to
|
||||
* force the on-disk state of init forks to always be in sync with the
|
||||
* state in shared buffers.
|
||||
* state in shared buffers. Use XLogFlushBufferForRedoIfInit() for
|
||||
* redo routines that dirty init-fork buffers without restoring a
|
||||
* full-page image.
|
||||
*/
|
||||
if (forknum == INIT_FORKNUM)
|
||||
FlushOneBuffer(*buf);
|
||||
|
|
|
|||
|
|
@ -1908,6 +1908,7 @@ seq_redo(XLogReaderState *record)
|
|||
|
||||
memcpy(page, localpage, BufferGetPageSize(buffer));
|
||||
MarkBufferDirty(buffer);
|
||||
XLogFlushBufferForRedoIfInit(record, 0, buffer);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
pfree(localpage);
|
||||
|
|
|
|||
|
|
@ -84,6 +84,8 @@ typedef struct ReadLocalXLogPageNoWaitPrivate
|
|||
extern XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record,
|
||||
uint8 buffer_id, Buffer *buf);
|
||||
extern Buffer XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id);
|
||||
extern void XLogFlushBufferForRedoIfInit(XLogReaderState *record,
|
||||
uint8 block_id, Buffer buffer);
|
||||
extern XLogRedoAction XLogReadBufferForRedoExtended(XLogReaderState *record,
|
||||
uint8 buffer_id,
|
||||
ReadBufferMode mode, bool get_cleanup_lock,
|
||||
|
|
|
|||
36
src/test/recovery/t/054_unlogged_sequence_promotion.pl
Normal file
36
src/test/recovery/t/054_unlogged_sequence_promotion.pl
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
# Copyright (c) 2026, PostgreSQL Global Development Group
|
||||
|
||||
# Test that unlogged sequences created on a primary can be read after
|
||||
# promotion of a standby that replayed their init fork.
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use PostgreSQL::Test::Utils;
|
||||
use Test::More;
|
||||
|
||||
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
|
||||
$node_primary->init(allows_streaming => 1);
|
||||
$node_primary->start;
|
||||
|
||||
my $backup_name = 'my_backup';
|
||||
$node_primary->backup($backup_name);
|
||||
|
||||
my $node_standby = PostgreSQL::Test::Cluster->new('standby');
|
||||
$node_standby->init_from_backup($node_primary, $backup_name,
|
||||
has_streaming => 1);
|
||||
$node_standby->start;
|
||||
|
||||
# Create the unlogged sequence after the standby has started, so its init fork
|
||||
# is generated by WAL replay on the standby.
|
||||
$node_primary->safe_psql('postgres', "CREATE UNLOGGED SEQUENCE ulseq");
|
||||
$node_primary->wait_for_catchup($node_standby);
|
||||
|
||||
$node_standby->promote;
|
||||
$node_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()")
|
||||
or die "Timed out waiting for promotion";
|
||||
|
||||
is($node_standby->safe_psql('postgres', "SELECT nextval('ulseq')"),
|
||||
1, 'unlogged sequence can be read after standby promotion');
|
||||
|
||||
done_testing();
|
||||
Loading…
Reference in a new issue