diff --git a/doc/src/sgml/ref/wait_for.sgml b/doc/src/sgml/ref/wait_for.sgml index 9ba785ea321..8819973c774 100644 --- a/doc/src/sgml/ref/wait_for.sgml +++ b/doc/src/sgml/ref/wait_for.sgml @@ -105,30 +105,25 @@ WAIT FOR LSN 'lsn' standby_write: Wait for the WAL containing the - LSN to be received from the primary and written to disk on a - standby server, but not yet flushed. This is faster than + LSN to be written to disk on a standby server, but not yet + necessarily flushed. This is faster than standby_flush but provides weaker durability guarantees since the data may still be in operating system - buffers. After successful completion, the - written_lsn column in - - pg_stat_wal_receiver will show - a value greater than or equal to the target LSN. This mode can - only be used during recovery. + buffers. This is satisfied by WAL already present on the + standby from a base backup, archive restore, or prior + streaming, as well as WAL newly received from the primary. + This mode can only be used during recovery. standby_flush: Wait for the WAL containing the - LSN to be received from the primary and flushed to disk on a - standby server. This provides a durability guarantee without - waiting for the WAL to be applied. After successful completion, - pg_last_wal_receive_lsn() will return a - value greater than or equal to the target LSN. This value is - also available as the flushed_lsn - column in - pg_stat_wal_receiver. This mode - can only be used during recovery. + LSN to be flushed to disk on a standby server. This provides + a durability guarantee without waiting for the WAL to be + applied. This is satisfied by WAL already present on the + standby from a base backup, archive restore, or prior + streaming, as well as WAL newly received from the primary. + This mode can only be used during recovery. @@ -238,10 +233,11 @@ WAIT FOR LSN 'lsn' useful to achieve read-your-writes consistency while using an async replica for reads and the primary for writes. The standby_flush mode waits for the WAL to be flushed - to durable storage on the replica, providing a durability guarantee - without waiting for replay. The standby_write mode - waits for the WAL to be written to the operating system, which is - faster than flush but provides weaker durability guarantees. The + to durable storage on the replica, or to have already been replayed + from WAL present on the standby. The standby_write mode + waits for the WAL to be written to the operating system, or to have + already been replayed, which is faster than flush for newly received + WAL but provides weaker durability guarantees. The primary_flush mode waits for WAL to be flushed on a primary server. In all cases, the LSN of the last modification should be stored on the client application side or the diff --git a/src/backend/access/transam/xlogwait.c b/src/backend/access/transam/xlogwait.c index 6a27183c207..18f78338330 100644 --- a/src/backend/access/transam/xlogwait.c +++ b/src/backend/access/transam/xlogwait.c @@ -111,10 +111,27 @@ GetCurrentLSNForWaitType(WaitLSNType lsnType) return GetXLogReplayRecPtr(NULL); case WAIT_LSN_TYPE_STANDBY_WRITE: - return GetWalRcvWriteRecPtr(); + { + XLogRecPtr recptr = GetWalRcvWriteRecPtr(); + XLogRecPtr replay = GetXLogReplayRecPtr(NULL); + + /* + * Use the replay position as a floor. WAL up to the replay + * point is already on disk from a base backup, archive + * restore, or prior streaming, so there is no reason to wait + * for the walreceiver to re-receive it. + */ + return Max(recptr, replay); + } case WAIT_LSN_TYPE_STANDBY_FLUSH: - return GetWalRcvFlushRecPtr(NULL, NULL); + { + XLogRecPtr recptr = GetWalRcvFlushRecPtr(NULL, NULL); + XLogRecPtr replay = GetXLogReplayRecPtr(NULL); + + /* Same floor as standby_write; see comment above. */ + return Max(recptr, replay); + } case WAIT_LSN_TYPE_PRIMARY_FLUSH: return GetFlushRecPtr(NULL); diff --git a/src/test/recovery/t/049_wait_for_lsn.pl b/src/test/recovery/t/049_wait_for_lsn.pl index 0e74175f9eb..26790fda5be 100644 --- a/src/test/recovery/t/049_wait_for_lsn.pl +++ b/src/test/recovery/t/049_wait_for_lsn.pl @@ -674,4 +674,77 @@ for (my $i = 0; $i < 3; $i++) $wait_sessions[$i]->{run}->finish; } +# 9. Archive-only standby tests: verify standby_write/standby_flush work +# without a walreceiver. These exercises the replay-position floor in +# GetCurrentLSNForWaitType(). +# +# We set up a separate primary with archiving and an archive-only standby +# (has_restoring, no has_streaming), so no walreceiver ever starts and the +# shared walreceiver positions (writtenUpto, flushedUpto) stay at their +# zero-initialized values. + +my $arc_primary = PostgreSQL::Test::Cluster->new('arc_primary'); +$arc_primary->init(has_archiving => 1, allows_streaming => 1); +$arc_primary->start; + +$arc_primary->safe_psql('postgres', + "CREATE TABLE arc_test AS SELECT generate_series(1,10) AS a"); + +my $arc_backup_name = 'arc_backup'; +$arc_primary->backup($arc_backup_name); + +# Generate WAL that will be archived and replayed on the standby. +$arc_primary->safe_psql('postgres', + "INSERT INTO arc_test VALUES (generate_series(11, 20))"); +my $arc_target_lsn = + $arc_primary->safe_psql('postgres', "SELECT pg_current_wal_insert_lsn()"); + +# Force WAL to be archived by switching segments, then wait for archiving. +my $arc_segment = $arc_primary->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn())"); +$arc_primary->safe_psql('postgres', "SELECT pg_switch_wal()"); +$arc_primary->poll_query_until('postgres', + qq{SELECT last_archived_wal >= '$arc_segment' FROM pg_stat_archiver}, 't') + or die "Timed out waiting for WAL archiving on arc_primary"; + +# Create an archive-only standby: has_restoring but NOT has_streaming. +# No primary_conninfo means no walreceiver will start. +my $arc_standby = PostgreSQL::Test::Cluster->new('arc_standby'); +$arc_standby->init_from_backup($arc_primary, $arc_backup_name, + has_restoring => 1); +$arc_standby->start; + +# Wait for the standby to replay past our target LSN via archive recovery. +$arc_standby->poll_query_until('postgres', + qq{SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$arc_target_lsn') >= 0} +) or die "Timed out waiting for archive replay on arc_standby"; + +# Sanity: verify no walreceiver is running. +$output = $arc_standby->safe_psql('postgres', + "SELECT count(*) FROM pg_stat_wal_receiver"); +is($output, '0', "arc_standby has no walreceiver"); + +# 9a. Getter fallback: standby_write/standby_flush succeed immediately when +# the target LSN has already been replayed, even though writtenUpto and +# flushedUpto are zero. GetCurrentLSNForWaitType() returns +# Max(walrcv_pos, replay), so replay >= target satisfies the check on the +# first loop iteration without ever sleeping. + +$output = $arc_standby->safe_psql( + 'postgres', qq[ + WAIT FOR LSN '${arc_target_lsn}' + WITH (MODE 'standby_write', timeout '3s', no_throw);]); +ok($output eq "success", + "standby_write succeeds on archive-only standby (getter fallback)"); + +$output = $arc_standby->safe_psql( + 'postgres', qq[ + WAIT FOR LSN '${arc_target_lsn}' + WITH (MODE 'standby_flush', timeout '3s', no_throw);]); +ok($output eq "success", + "standby_flush succeeds on archive-only standby (getter fallback)"); + +$arc_standby->stop; +$arc_primary->stop; + done_testing();