From be2f109275a10b7f2ed2591ea46cf4f76bd25733 Mon Sep 17 00:00:00 2001 From: Alexander Scheel Date: Tue, 8 Aug 2023 10:42:02 -0500 Subject: [PATCH] Ignore errors from rollback manager invocations (#22235) * Ignore errors from rollback manager invocations During reload and mount move operations, we want to ensure that errors created by the final Rollback are not fatal (which risk failing replication in Enterprise when the core/mounts table gets invalidated). This mirrors the behavior of the periodic rollback manager, which only logs the error. This updates the noop backend to allow failing just rollback operations, which we can use in tests to verify this behavior and ensure the core operations (plugin reload, plugin move, and seal/unseal) are not broken by this. Note that most of these operations were asynchronous from the client's PoV and thus did not fail anyways prior to this change. Signed-off-by: Alexander Scheel * Add changelog entry Signed-off-by: Alexander Scheel * Update vault/external_tests/router/router_ext_test.go Co-authored-by: Nick Cabatoff --------- Signed-off-by: Alexander Scheel Co-authored-by: Nick Cabatoff --- changelog/22235.txt | 3 ++ .../external_tests/router/router_ext_test.go | 33 +++++++++++++++++++ vault/mount.go | 16 ++++++--- vault/router_testing.go | 10 ++++++ 4 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 changelog/22235.txt diff --git a/changelog/22235.txt b/changelog/22235.txt new file mode 100644 index 0000000000..3d62e70cb1 --- /dev/null +++ b/changelog/22235.txt @@ -0,0 +1,3 @@ +```release-note:improvement +core: Log rollback manager failures during unmount, remount to prevent replication failures on secondary clusters. +``` diff --git a/vault/external_tests/router/router_ext_test.go b/vault/external_tests/router/router_ext_test.go index 86c282e94d..4faef03eae 100644 --- a/vault/external_tests/router/router_ext_test.go +++ b/vault/external_tests/router/router_ext_test.go @@ -8,6 +8,8 @@ import ( "github.com/hashicorp/vault/api" "github.com/hashicorp/vault/helper/testhelpers/minimal" + "github.com/hashicorp/vault/sdk/logical" + "github.com/hashicorp/vault/vault" ) func TestRouter_MountSubpath_Checks(t *testing.T) { @@ -50,3 +52,34 @@ func testRouter_MountSubpath(t *testing.T, mountPoints []string) { cluster.UnsealCores(t) t.Logf("Done: %#v", mountPoints) } + +func TestRouter_UnmountRollbackIsntFatal(t *testing.T) { + cluster := minimal.NewTestSoloCluster(t, &vault.CoreConfig{ + LogicalBackends: map[string]logical.Factory{ + "noop": vault.NoopBackendRollbackErrFactory, + }, + }) + client := cluster.Cores[0].Client + + if err := client.Sys().Mount("noop", &api.MountInput{ + Type: "noop", + }); err != nil { + t.Fatalf("failed to mount PKI: %v", err) + } + + if _, err := client.Logical().Write("sys/plugins/reload/backend", map[string]interface{}{ + "mounts": "noop", + }); err != nil { + t.Fatalf("expected reload of noop with broken periodic func to succeed; got err=%v", err) + } + + if _, err := client.Logical().Write("sys/remount", map[string]interface{}{ + "from": "noop", + "to": "noop-to", + }); err != nil { + t.Fatalf("expected remount of noop with broken periodic func to succeed; got err=%v", err) + } + + cluster.EnsureCoresSealed(t) + cluster.UnsealCores(t) +} diff --git a/vault/mount.go b/vault/mount.go index a485f2acf4..cd5a3295d4 100644 --- a/vault/mount.go +++ b/vault/mount.go @@ -874,9 +874,13 @@ func (c *Core) unmountInternal(ctx context.Context, path string, updateStorage b rCtx := namespace.ContextWithNamespace(c.activeContext, ns) if backend != nil && c.rollback != nil { - // Invoke the rollback manager a final time + // Invoke the rollback manager a final time. This is not fatal as + // various periodic funcs (e.g., PKI) can legitimately error; the + // periodic rollback manager logs these errors rather than failing + // replication like returning this error would do. if err := c.rollback.Rollback(rCtx, path); err != nil { - return err + c.logger.Error("ignoring rollback error during unmount", "error", err, "path", path) + err = nil } } if backend != nil && c.expiration != nil && updateStorage { @@ -1142,11 +1146,15 @@ func (c *Core) remountSecretsEngine(ctx context.Context, src, dst namespace.Moun } if !c.IsDRSecondary() { - // Invoke the rollback manager a final time + // Invoke the rollback manager a final time. This is not fatal as + // various periodic funcs (e.g., PKI) can legitimately error; the + // periodic rollback manager logs these errors rather than failing + // replication like returning this error would do. rCtx := namespace.ContextWithNamespace(c.activeContext, ns) if c.rollback != nil && c.router.MatchingBackend(ctx, srcRelativePath) != nil { if err := c.rollback.Rollback(rCtx, srcRelativePath); err != nil { - return err + c.logger.Error("ignoring rollback error during remount", "error", err, "path", src.Namespace.Path+src.MountPath) + err = nil } } diff --git a/vault/router_testing.go b/vault/router_testing.go index 78f84a6e65..56e1863097 100644 --- a/vault/router_testing.go +++ b/vault/router_testing.go @@ -29,17 +29,27 @@ type NoopBackend struct { DefaultLeaseTTL time.Duration MaxLeaseTTL time.Duration BackendType logical.BackendType + + RollbackErrs bool } func NoopBackendFactory(_ context.Context, _ *logical.BackendConfig) (logical.Backend, error) { return &NoopBackend{}, nil } +func NoopBackendRollbackErrFactory(_ context.Context, _ *logical.BackendConfig) (logical.Backend, error) { + return &NoopBackend{RollbackErrs: true}, nil +} + func (n *NoopBackend) HandleRequest(ctx context.Context, req *logical.Request) (*logical.Response, error) { if req.TokenEntry() != nil { panic("got a non-nil TokenEntry") } + if n.RollbackErrs && req.Operation == "rollback" { + return nil, fmt.Errorf("no-op backend rollback has erred out") + } + var err error resp := n.Response if n.RequestHandler != nil {