Fix restart of control-plane-only nodes attempting to reconcile from local datastore

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
This commit is contained in:
Brad Davidson 2026-01-30 20:57:49 +00:00 committed by Brad Davidson
parent f061615cb4
commit b3962bd057
3 changed files with 27 additions and 8 deletions

View file

@ -287,7 +287,7 @@ func (c *Cluster) ReconcileBootstrapData(ctx context.Context, buf io.ReadSeeker,
storageClient, err := store.NewTemporaryStore(filepath.Join(c.config.DataDir, "db", "etcd"))
if err != nil {
return err
return pkgerrors.WithMessage(err, "failed to create temporary datastore client")
}
defer storageClient.Close()
@ -465,7 +465,7 @@ func (c *Cluster) bootstrap(ctx context.Context) error {
logrus.Debugf("Failed to get bootstrap data from etcd proxy: %v", err)
} else {
if err := c.ReconcileBootstrapData(ctx, bytes.NewReader(data), &c.config.Runtime.ControlRuntimeBootstrap, false); err != nil {
logrus.Debugf("Failed to reconcile bootstrap data from etcd proxy: %v", err)
logrus.Debugf("Failed to reconcile with local datastore: %v", err)
} else {
return nil
}

View file

@ -718,6 +718,11 @@ func (e *ETCD) Register(handler http.Handler) (http.Handler, error) {
// or if force is set to true, a new name will be generated and written to disk. The persistent
// name is used on subsequent calls.
func (e *ETCD) setName(force bool) error {
// don't create the name file if etcd is disabled
if e.config.DisableETCD {
return nil
}
fileName := nameFile(e.config)
data, err := os.ReadFile(fileName)
if os.IsNotExist(err) || force {
@ -1085,6 +1090,11 @@ func addPort(address string, offset int) (string, error) {
// RemovePeer removes a peer from the cluster. The peer name and IP address must both match.
func (e *ETCD) RemovePeer(ctx context.Context, name, address string, allowSelfRemoval bool) error {
// do not remove self if we have never started etcd on this node
if name == "" {
return nil
}
ctx, cancel := context.WithTimeout(ctx, memberRemovalTimeout)
defer cancel()
members, err := e.client.MemberList(ctx)
@ -1676,9 +1686,10 @@ func (e *ETCD) RemoveSelf(ctx context.Context) error {
// backup the data dir to avoid issues when re-enabling etcd
oldDataDir := dbDir(e.config) + "-old-" + strconv.Itoa(int(time.Now().Unix()))
// move the data directory to a temp path
return os.Rename(dbDir(e.config), oldDataDir)
if err := os.Rename(dbDir(e.config), oldDataDir); err != nil && !os.IsNotExist(err) {
return err
}
return nil
}
// DefaultEndpointConfig returns default kine endpoint config, with k3s default

View file

@ -9,6 +9,7 @@ import (
"github.com/k3s-io/kine/pkg/endpoint"
"github.com/otiai10/copy"
pkgerrors "github.com/pkg/errors"
"github.com/rancher/wrangler/v3/pkg/merr"
"github.com/sirupsen/logrus"
"go.etcd.io/etcd/api/v3/mvccpb"
@ -61,7 +62,6 @@ func NewRemoteStore(config endpoint.ETCDConfig) (*RemoteStore, error) {
if err != nil {
return nil, err
}
c, err := clientv3.New(clientv3.Config{
Endpoints: config.Endpoints,
DialTimeout: 5 * time.Second,
@ -205,11 +205,19 @@ func NewStore(dataDir string) (*Store, error) {
}
cfg := config.ServerConfig{Logger: logger, DataDir: dataDir}
logrus.Infof("Opening etcd MVCC KV store at %s", cfg.BackendPath())
path := cfg.BackendPath()
// need to check for backend path ourselves, as backend.New just logs a panic
// via zap if it doesn't exist, which isn't fatal.
if _, err := os.Stat(path); err != nil {
return nil, pkgerrors.WithMessage(err, "failed to stat MVCC KV store backend path")
}
logrus.Infof("Opening etcd MVCC KV store at %s", path)
// open backend database
bcfg := backend.DefaultBackendConfig(logger)
bcfg.Path = cfg.BackendPath()
bcfg.Path = path
bcfg.UnsafeNoFsync = true
bcfg.BatchInterval = 0
bcfg.BatchLimit = 0