Fix pgstat_count_io_op_time() calls passing incorrect information

Several calls of pgstat_count_io_op_time() have been used as data to
count negative values returned by pg_pread() or pg_pwrite(), leading to
an incorrect count reported, casting them back to uint64.

Most of the problematic calls updated here are adjusted so as we do not
report buggy negative numbers anymore.  In xlogrecovery.c, the spot
updated still counts short reads.  In xlog.c, after a WAL segment
initialization, I/O numbers are aggregated only after checking that the
operation has succeeded.

issues introduced by a051e71e28.

Reported-by: Peter Eisentraut <peter@eisentraut.org>
Author: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Ayush Tiwari <ayushtiwari.slg01@gmail.com>
Discussion: https://postgr.es/m/0db864e6-4477-4eba-b2be-d3523cc86564@eisentraut.org
Backpatch-through: 18
This commit is contained in:
Michael Paquier 2026-06-17 16:05:11 +09:00
parent f29299c42b
commit 3048e81308
4 changed files with 23 additions and 19 deletions

View file

@ -2455,9 +2455,6 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
written = pg_pwrite(openLogFile, from, nleft, startoffset);
pgstat_report_wait_end();
pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
IOOP_WRITE, start, 1, written);
if (written <= 0)
{
char xlogfname[MAXFNAMELEN];
@ -2475,6 +2472,9 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
xlogfname, startoffset, nleft)));
}
pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
IOOP_WRITE, start, 1, written);
nleft -= written;
from += written;
startoffset += written;
@ -3331,14 +3331,6 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
}
pgstat_report_wait_end();
/*
* A full segment worth of data is written when using wal_init_zero. One
* byte is written when not using it.
*/
pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_INIT, IOOP_WRITE,
io_start, 1,
wal_init_zero ? wal_segment_size : 1);
if (save_errno)
{
/*
@ -3355,6 +3347,14 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
errmsg("could not write to file \"%s\": %m", tmppath)));
}
/*
* A full segment worth of data is written when using wal_init_zero. One
* byte is written when not using it.
*/
pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_INIT, IOOP_WRITE,
io_start, 1,
wal_init_zero ? wal_segment_size : 1);
/* Measure I/O timing when flushing segment */
io_start = pgstat_prepare_io_time(track_wal_io_timing);

View file

@ -1597,9 +1597,6 @@ WALRead(XLogReaderState *state,
#ifndef FRONTEND
pgstat_report_wait_end();
pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
io_start, 1, readbytes);
#endif
if (readbytes <= 0)
@ -1612,6 +1609,11 @@ WALRead(XLogReaderState *state,
return false;
}
#ifndef FRONTEND
pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
io_start, 1, readbytes);
#endif
/* Update state for read */
recptr += readbytes;
nbytes -= readbytes;

View file

@ -3390,8 +3390,10 @@ retry:
pgstat_report_wait_end();
pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
io_start, 1, r);
/* Count I/O stats only for successful short reads */
if (r > 0)
pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
io_start, 1, r);
XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
if (r < 0)

View file

@ -954,9 +954,6 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli)
byteswritten = pg_pwrite(recvFile, buf, segbytes, (pgoff_t) startoff);
pgstat_report_wait_end();
pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
IOOP_WRITE, start, 1, byteswritten);
if (byteswritten <= 0)
{
char xlogfname[MAXFNAMELEN];
@ -976,6 +973,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli)
xlogfname, startoff, segbytes)));
}
pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
IOOP_WRITE, start, 1, byteswritten);
/* Update state for write */
recptr += byteswritten;