diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c index c9595ea10c7..31f9e35dee3 100644 --- a/src/backend/storage/aio/read_stream.c +++ b/src/backend/storage/aio/read_stream.c @@ -99,6 +99,7 @@ struct ReadStream int16 forwarded_buffers; int16 pinned_buffers; int16 distance; + uint16 distance_decay_holdoff; int16 initialized_buffers; int16 resume_distance; int read_buffers_flags; @@ -364,9 +365,22 @@ read_stream_start_pending_read(ReadStream *stream) /* Remember whether we need to wait before returning this buffer. */ if (!need_wait) { - /* Look-ahead distance decays, no I/O necessary. */ - if (stream->distance > 1) - stream->distance--; + /* + * If there currently is no IO in progress, and we have not needed to + * issue IO recently, decay the look-ahead distance. We detect if we + * had to issue IO recently by having a decay holdoff that's set to + * the max look-ahead distance whenever we need to do IO. This is + * important to ensure we eventually reach a high enough distance to + * perform IO asynchronously when starting out with a small look-ahead + * distance. + */ + if (stream->distance > 1 && stream->ios_in_progress == 0) + { + if (stream->distance_decay_holdoff == 0) + stream->distance--; + else + stream->distance_decay_holdoff--; + } } else { @@ -702,6 +716,7 @@ read_stream_begin_impl(int flags, stream->seq_blocknum = InvalidBlockNumber; stream->seq_until_processed = InvalidBlockNumber; stream->temporary = SmgrIsTemp(smgr); + stream->distance_decay_holdoff = 0; /* * Skip the initial ramp-up phase if the caller says we're going to be @@ -954,6 +969,20 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data) distance = Min(distance, stream->max_pinned_buffers); stream->distance = distance; + /* + * As we needed IO, prevent distance from being reduced within our + * maximum look-ahead window. This avoids having distance collapse too + * quickly in workloads where most of the required blocks are cached, + * but where the remaining IOs are a sufficient enough factor to cause + * a substantial slowdown if executed synchronously. + * + * There are valid arguments for preventing decay for max_ios or for + * max_pinned_buffers. But the argument for max_pinned_buffers seems + * clearer - if we can't see any misses within the maximum look-ahead + * distance, we can't do any useful read-ahead. + */ + stream->distance_decay_holdoff = stream->max_pinned_buffers; + /* * If we've reached the first block of a sequential region we're * issuing advice for, cancel that until the next jump. The kernel @@ -1128,6 +1157,7 @@ read_stream_reset(ReadStream *stream) /* Start off assuming data is cached. */ stream->distance = 1; stream->resume_distance = stream->distance; + stream->distance_decay_holdoff = 0; } /*