diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c index e4ae3031fef..8f7e26607b9 100644 --- a/src/backend/storage/aio/aio.c +++ b/src/backend/storage/aio/aio.c @@ -1019,6 +1019,21 @@ pgaio_wref_check_done(PgAioWaitRef *iow) am_owner = ioh->owner_procno == MyProcNumber; + /* + * If the IO is not executing synchronously, allow the IO method to check + * if the IO already has completed. + */ + if (pgaio_method_ops->check_one && !(ioh->flags & PGAIO_HF_SYNCHRONOUS)) + { + pgaio_method_ops->check_one(ioh, ref_generation); + + if (pgaio_io_was_recycled(ioh, ref_generation, &state)) + return true; + + if (state == PGAIO_HS_IDLE) + return true; + } + if (state == PGAIO_HS_COMPLETED_SHARED || state == PGAIO_HS_COMPLETED_LOCAL) { @@ -1032,11 +1047,6 @@ pgaio_wref_check_done(PgAioWaitRef *iow) return true; } - /* - * XXX: It likely would be worth checking in with the io method, to give - * the IO method a chance to check if there are completion events queued. - */ - return false; } diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c index 4867ded35ea..39984df31b4 100644 --- a/src/backend/storage/aio/method_io_uring.c +++ b/src/backend/storage/aio/method_io_uring.c @@ -54,6 +54,7 @@ static void pgaio_uring_shmem_init(bool first_time); static void pgaio_uring_init_backend(void); static int pgaio_uring_submit(uint16 num_staged_ios, PgAioHandle **staged_ios); static void pgaio_uring_wait_one(PgAioHandle *ioh, uint64 ref_generation); +static void pgaio_uring_check_one(PgAioHandle *ioh, uint64 ref_generation); /* helper functions */ static void pgaio_uring_sq_from_io(PgAioHandle *ioh, struct io_uring_sqe *sqe); @@ -75,6 +76,7 @@ const IoMethodOps pgaio_uring_ops = { .submit = pgaio_uring_submit, .wait_one = pgaio_uring_wait_one, + .check_one = pgaio_uring_check_one, }; /* @@ -658,6 +660,47 @@ pgaio_uring_wait_one(PgAioHandle *ioh, uint64 ref_generation) waited); } +static void +pgaio_uring_check_one(PgAioHandle *ioh, uint64 ref_generation) +{ + ProcNumber owner_procno = ioh->owner_procno; + PgAioUringContext *owner_context = &pgaio_uring_contexts[owner_procno]; + + /* + * This check is not reliable when not holding the completion lock, but + * it's a useful cheap pre-check to see if it's worth trying to get the + * completion lock. + */ + if (!io_uring_cq_ready(&owner_context->io_uring_ring)) + return; + + /* + * If the completion lock is currently held, the holder will likely + * process any pending completions, give up. + */ + if (!LWLockConditionalAcquire(&owner_context->completion_lock, LW_EXCLUSIVE)) + return; + + pgaio_debug_io(DEBUG3, ioh, + "check_one io_gen: %" PRIu64 ", ref_gen: %" PRIu64, + ioh->generation, + ref_generation); + + /* + * Recheck if there are any completions, another backend could have + * processed them since we checked above, or our unlocked pre-check could + * have been reading outdated values. + * + * It is possible that the IO handle has been reused since the start of + * the call, but now that we have the lock, we can just as well drain all + * completions. + */ + if (io_uring_cq_ready(&owner_context->io_uring_ring)) + pgaio_uring_drain_locked(owner_context); + + LWLockRelease(&owner_context->completion_lock); +} + static void pgaio_uring_sq_from_io(PgAioHandle *ioh, struct io_uring_sqe *sqe) { diff --git a/src/include/storage/aio_internal.h b/src/include/storage/aio_internal.h index 5feea15be9e..33e1e2dc048 100644 --- a/src/include/storage/aio_internal.h +++ b/src/include/storage/aio_internal.h @@ -328,6 +328,21 @@ typedef struct IoMethodOps */ void (*wait_one) (PgAioHandle *ioh, uint64 ref_generation); + + /* --- + * Check if IO has already completed. Optional. + * + * Some IO methods need to poll a kernel object to see if IO has already + * completed in the background. This callback allows to do so. + * + * This callback may not wait for IO to complete, however it is allowed, + * although not desirable, to wait for short-lived locks. It is ok from a + * correctness perspective to not process any/all available completions, + * it just can lead to inferior performance. + * --- + */ + void (*check_one) (PgAioHandle *ioh, + uint64 ref_generation); } IoMethodOps;