From b3962bd05745387e5d4bcf2b0f7418d1046a9610 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 30 Jan 2026 20:57:49 +0000 Subject: [PATCH] Fix restart of control-plane-only nodes attempting to reconcile from local datastore Signed-off-by: Brad Davidson --- pkg/cluster/bootstrap.go | 4 ++-- pkg/etcd/etcd.go | 17 ++++++++++++++--- pkg/etcd/store/store.go | 14 +++++++++++--- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/pkg/cluster/bootstrap.go b/pkg/cluster/bootstrap.go index c5c2d76e7ad..ce4f0502476 100644 --- a/pkg/cluster/bootstrap.go +++ b/pkg/cluster/bootstrap.go @@ -287,7 +287,7 @@ func (c *Cluster) ReconcileBootstrapData(ctx context.Context, buf io.ReadSeeker, storageClient, err := store.NewTemporaryStore(filepath.Join(c.config.DataDir, "db", "etcd")) if err != nil { - return err + return pkgerrors.WithMessage(err, "failed to create temporary datastore client") } defer storageClient.Close() @@ -465,7 +465,7 @@ func (c *Cluster) bootstrap(ctx context.Context) error { logrus.Debugf("Failed to get bootstrap data from etcd proxy: %v", err) } else { if err := c.ReconcileBootstrapData(ctx, bytes.NewReader(data), &c.config.Runtime.ControlRuntimeBootstrap, false); err != nil { - logrus.Debugf("Failed to reconcile bootstrap data from etcd proxy: %v", err) + logrus.Debugf("Failed to reconcile with local datastore: %v", err) } else { return nil } diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index 8ddb879e62a..9164d426c69 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -718,6 +718,11 @@ func (e *ETCD) Register(handler http.Handler) (http.Handler, error) { // or if force is set to true, a new name will be generated and written to disk. The persistent // name is used on subsequent calls. func (e *ETCD) setName(force bool) error { + // don't create the name file if etcd is disabled + if e.config.DisableETCD { + return nil + } + fileName := nameFile(e.config) data, err := os.ReadFile(fileName) if os.IsNotExist(err) || force { @@ -1085,6 +1090,11 @@ func addPort(address string, offset int) (string, error) { // RemovePeer removes a peer from the cluster. The peer name and IP address must both match. func (e *ETCD) RemovePeer(ctx context.Context, name, address string, allowSelfRemoval bool) error { + // do not remove self if we have never started etcd on this node + if name == "" { + return nil + } + ctx, cancel := context.WithTimeout(ctx, memberRemovalTimeout) defer cancel() members, err := e.client.MemberList(ctx) @@ -1676,9 +1686,10 @@ func (e *ETCD) RemoveSelf(ctx context.Context) error { // backup the data dir to avoid issues when re-enabling etcd oldDataDir := dbDir(e.config) + "-old-" + strconv.Itoa(int(time.Now().Unix())) - - // move the data directory to a temp path - return os.Rename(dbDir(e.config), oldDataDir) + if err := os.Rename(dbDir(e.config), oldDataDir); err != nil && !os.IsNotExist(err) { + return err + } + return nil } // DefaultEndpointConfig returns default kine endpoint config, with k3s default diff --git a/pkg/etcd/store/store.go b/pkg/etcd/store/store.go index 87ce09c6b1d..a53a5a77440 100644 --- a/pkg/etcd/store/store.go +++ b/pkg/etcd/store/store.go @@ -9,6 +9,7 @@ import ( "github.com/k3s-io/kine/pkg/endpoint" "github.com/otiai10/copy" + pkgerrors "github.com/pkg/errors" "github.com/rancher/wrangler/v3/pkg/merr" "github.com/sirupsen/logrus" "go.etcd.io/etcd/api/v3/mvccpb" @@ -61,7 +62,6 @@ func NewRemoteStore(config endpoint.ETCDConfig) (*RemoteStore, error) { if err != nil { return nil, err } - c, err := clientv3.New(clientv3.Config{ Endpoints: config.Endpoints, DialTimeout: 5 * time.Second, @@ -205,11 +205,19 @@ func NewStore(dataDir string) (*Store, error) { } cfg := config.ServerConfig{Logger: logger, DataDir: dataDir} - logrus.Infof("Opening etcd MVCC KV store at %s", cfg.BackendPath()) + path := cfg.BackendPath() + + // need to check for backend path ourselves, as backend.New just logs a panic + // via zap if it doesn't exist, which isn't fatal. + if _, err := os.Stat(path); err != nil { + return nil, pkgerrors.WithMessage(err, "failed to stat MVCC KV store backend path") + } + + logrus.Infof("Opening etcd MVCC KV store at %s", path) // open backend database bcfg := backend.DefaultBackendConfig(logger) - bcfg.Path = cfg.BackendPath() + bcfg.Path = path bcfg.UnsafeNoFsync = true bcfg.BatchInterval = 0 bcfg.BatchLimit = 0