aio: io_uring: Allow IO methods to check if IO completed in the background

Until now pgaio_wref_check_done() with io_method=io_uring would not detect if
IOs are known to have completed to the kernel, but the completion has not yet
been consumed by userspace.  This can lead to inferior performance and also
makes it harder to use smarter feedback logic in read_stream, because we
cannot use knowledge about whether an IO completed to control the readahead
distance.

This commit just adds the io_uring specific infrastructure. Later commits will
return whether a wait was needed from WaitReadBuffers() and then use that
knowledge.

Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Nazir Bilal Yavuz <byavuz81@gmail.com>
Discussion: https://postgr.es/m/f3xxfrkafjxpyqxywcxricxgyizjirfceychyxsgn7bwjp5eda@kwbduhy7tfmu
Discussion: https://postgr.es/m/CAH2-Wz%3DkMg3PNay96cHMT0LFwtxP-cQSRZTZzh1Cixxf8G%3Dzrw%40mail.gmail.com
This commit is contained in:
Andres Freund 2026-04-01 09:26:43 -04:00
parent edee563456
commit 6e648e353f
3 changed files with 73 additions and 5 deletions

View file

@ -1019,6 +1019,21 @@ pgaio_wref_check_done(PgAioWaitRef *iow)
am_owner = ioh->owner_procno == MyProcNumber;
/*
* If the IO is not executing synchronously, allow the IO method to check
* if the IO already has completed.
*/
if (pgaio_method_ops->check_one && !(ioh->flags & PGAIO_HF_SYNCHRONOUS))
{
pgaio_method_ops->check_one(ioh, ref_generation);
if (pgaio_io_was_recycled(ioh, ref_generation, &state))
return true;
if (state == PGAIO_HS_IDLE)
return true;
}
if (state == PGAIO_HS_COMPLETED_SHARED ||
state == PGAIO_HS_COMPLETED_LOCAL)
{
@ -1032,11 +1047,6 @@ pgaio_wref_check_done(PgAioWaitRef *iow)
return true;
}
/*
* XXX: It likely would be worth checking in with the io method, to give
* the IO method a chance to check if there are completion events queued.
*/
return false;
}

View file

@ -54,6 +54,7 @@ static void pgaio_uring_shmem_init(bool first_time);
static void pgaio_uring_init_backend(void);
static int pgaio_uring_submit(uint16 num_staged_ios, PgAioHandle **staged_ios);
static void pgaio_uring_wait_one(PgAioHandle *ioh, uint64 ref_generation);
static void pgaio_uring_check_one(PgAioHandle *ioh, uint64 ref_generation);
/* helper functions */
static void pgaio_uring_sq_from_io(PgAioHandle *ioh, struct io_uring_sqe *sqe);
@ -75,6 +76,7 @@ const IoMethodOps pgaio_uring_ops = {
.submit = pgaio_uring_submit,
.wait_one = pgaio_uring_wait_one,
.check_one = pgaio_uring_check_one,
};
/*
@ -658,6 +660,47 @@ pgaio_uring_wait_one(PgAioHandle *ioh, uint64 ref_generation)
waited);
}
static void
pgaio_uring_check_one(PgAioHandle *ioh, uint64 ref_generation)
{
ProcNumber owner_procno = ioh->owner_procno;
PgAioUringContext *owner_context = &pgaio_uring_contexts[owner_procno];
/*
* This check is not reliable when not holding the completion lock, but
* it's a useful cheap pre-check to see if it's worth trying to get the
* completion lock.
*/
if (!io_uring_cq_ready(&owner_context->io_uring_ring))
return;
/*
* If the completion lock is currently held, the holder will likely
* process any pending completions, give up.
*/
if (!LWLockConditionalAcquire(&owner_context->completion_lock, LW_EXCLUSIVE))
return;
pgaio_debug_io(DEBUG3, ioh,
"check_one io_gen: %" PRIu64 ", ref_gen: %" PRIu64,
ioh->generation,
ref_generation);
/*
* Recheck if there are any completions, another backend could have
* processed them since we checked above, or our unlocked pre-check could
* have been reading outdated values.
*
* It is possible that the IO handle has been reused since the start of
* the call, but now that we have the lock, we can just as well drain all
* completions.
*/
if (io_uring_cq_ready(&owner_context->io_uring_ring))
pgaio_uring_drain_locked(owner_context);
LWLockRelease(&owner_context->completion_lock);
}
static void
pgaio_uring_sq_from_io(PgAioHandle *ioh, struct io_uring_sqe *sqe)
{

View file

@ -328,6 +328,21 @@ typedef struct IoMethodOps
*/
void (*wait_one) (PgAioHandle *ioh,
uint64 ref_generation);
/* ---
* Check if IO has already completed. Optional.
*
* Some IO methods need to poll a kernel object to see if IO has already
* completed in the background. This callback allows to do so.
*
* This callback may not wait for IO to complete, however it is allowed,
* although not desirable, to wait for short-lived locks. It is ok from a
* correctness perspective to not process any/all available completions,
* it just can lead to inferior performance.
* ---
*/
void (*check_one) (PgAioHandle *ioh,
uint64 ref_generation);
} IoMethodOps;