From 8b25373c586806a8ff1de5c4e8c04e2f2ad6f6c6 Mon Sep 17 00:00:00 2001 From: 1seal Date: Thu, 26 Feb 2026 20:34:25 +0100 Subject: [PATCH] raft: scope join retry context to core shutdown lifecycle The raft join HTTP handler passes context.Background() into JoinRaftCluster. When retry=true, this can spawn a background retry goroutine that cannot be canceled, because the context never finishes. Add a shutdown-scoped context to Core that is canceled in Core.Shutdown(), and use it in the raft join HTTP handler so that join retry goroutines are automatically stopped when the core shuts down. The config-based retry_join path (InitiateRetryJoin) already receives a properly scoped context from the server command, so it is not affected. --- http/sys_raft.go | 2 +- vault/core.go | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/http/sys_raft.go b/http/sys_raft.go index b2055d674c..f05af7f490 100644 --- a/http/sys_raft.go +++ b/http/sys_raft.go @@ -87,7 +87,7 @@ func handleSysRaftJoinPost(core *vault.Core, w http.ResponseWriter, r *http.Requ }, } - joined, err := core.JoinRaftCluster(context.Background(), leaderInfos, req.NonVoter) + joined, err := core.JoinRaftCluster(core.ShutdownContext(), leaderInfos, req.NonVoter) if err != nil { respondError(w, http.StatusInternalServerError, err) return diff --git a/vault/core.go b/vault/core.go index 5e71829661..16630b7bd8 100644 --- a/vault/core.go +++ b/vault/core.go @@ -309,6 +309,12 @@ type Core struct { // that the join is complete raftJoinDoneCh chan struct{} + // shutdownCtx is a context that is canceled when the Core is shut down. + // It is used to scope background operations (such as raft join retries) + // that must not outlive the server process. + shutdownCtx context.Context + shutdownCtxCancel context.CancelFunc + // postUnsealStarted informs the raft retry join routine that unseal key // validation is completed and post unseal has started so that it can complete // the join process when Shamir seal is in use @@ -1067,6 +1073,8 @@ func CreateCore(conf *CoreConfig) (*Core, error) { mountsLock := locking.CreateConfigurableRWMutex(detectDeadlocks, "mountsLock") authLock := locking.CreateConfigurableRWMutex(detectDeadlocks, "authLock") + shutdownCtx, shutdownCtxCancel := context.WithCancel(context.Background()) + // Setup the core c := &Core{ entCore: entCore{}, @@ -1125,6 +1133,8 @@ func CreateCore(conf *CoreConfig) (*Core, error) { postUnsealStarted: new(uint32), raftInfo: new(atomic.Value), raftJoinDoneCh: make(chan struct{}), + shutdownCtx: shutdownCtx, + shutdownCtxCancel: shutdownCtxCancel, clusterHeartbeatInterval: clusterHeartbeatInterval, activityLogConfig: conf.ActivityLogConfig, billingConfig: conf.BillingConfig, @@ -1651,6 +1661,9 @@ func (c *Core) ShutdownCoreError(err error) { // happens as quickly as possible. func (c *Core) Shutdown() error { c.logger.Debug("shutdown called") + if c.shutdownCtxCancel != nil { + c.shutdownCtxCancel() + } err := c.sealInternal() c.stateLock.Lock() @@ -1679,6 +1692,15 @@ func (c *Core) ShutdownDone() <-chan struct{} { return c.shutdownDoneCh.Load().(chan struct{}) } +// ShutdownContext returns a context that is canceled when the Core shuts down. +// Use this for background operations that must not outlive the server process. +func (c *Core) ShutdownContext() context.Context { + if c.shutdownCtx == nil { + return context.Background() + } + return c.shutdownCtx +} + // CORSConfig returns the current CORS configuration func (c *Core) CORSConfig() *CORSConfig { return c.corsConfig