mirror of
https://github.com/hashicorp/vault.git
synced 2026-02-03 20:40:45 -05:00
Fix race with dead server cleanup when adding new raft nodes (#20986)
* Don't call AddPeer for a new raft cluster member until it's in raftFollowerStates, or it might get cleaned up by dead server cleanup.
This commit is contained in:
parent
be4979dfbb
commit
da5d0ca498
3 changed files with 29 additions and 22 deletions
3
changelog/20986.txt
Normal file
3
changelog/20986.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
```release-note:bug
|
||||
storage/raft: Fix race where new follower joining can get pruned by dead server cleanup.
|
||||
```
|
||||
|
|
@ -215,13 +215,15 @@ func NewFollowerStates() *FollowerStates {
|
|||
}
|
||||
}
|
||||
|
||||
// Update the peer information in the follower states. Note that this function runs on the active node.
|
||||
func (s *FollowerStates) Update(req *EchoRequestUpdate) {
|
||||
// Update the peer information in the follower states. Note that this function
|
||||
// runs on the active node. Returns true if a new entry was added, as opposed
|
||||
// to modifying one already present.
|
||||
func (s *FollowerStates) Update(req *EchoRequestUpdate) bool {
|
||||
s.l.Lock()
|
||||
defer s.l.Unlock()
|
||||
|
||||
state, ok := s.followers[req.NodeID]
|
||||
if !ok {
|
||||
state, present := s.followers[req.NodeID]
|
||||
if !present {
|
||||
state = &FollowerState{
|
||||
IsDead: atomic.NewBool(false),
|
||||
}
|
||||
|
|
@ -236,6 +238,8 @@ func (s *FollowerStates) Update(req *EchoRequestUpdate) {
|
|||
state.Version = req.SDKVersion
|
||||
state.UpgradeVersion = req.UpgradeVersion
|
||||
state.RedundancyZone = req.RedundancyZone
|
||||
|
||||
return !present
|
||||
}
|
||||
|
||||
// Clear wipes all the information regarding peers in the follower states.
|
||||
|
|
|
|||
|
|
@ -248,9 +248,8 @@ func (b *SystemBackend) handleRaftRemovePeerUpdate() framework.OperationFunc {
|
|||
if err := raftBackend.RemovePeer(ctx, serverID); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if b.Core.raftFollowerStates != nil {
|
||||
b.Core.raftFollowerStates.Delete(serverID)
|
||||
}
|
||||
|
||||
b.Core.raftFollowerStates.Delete(serverID)
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
|
@ -351,16 +350,6 @@ func (b *SystemBackend) handleRaftBootstrapAnswerWrite() framework.OperationFunc
|
|||
return nil, errors.New("could not decode raft TLS configuration")
|
||||
}
|
||||
|
||||
switch nonVoter {
|
||||
case true:
|
||||
err = raftBackend.AddNonVotingPeer(ctx, serverID, clusterAddr)
|
||||
default:
|
||||
err = raftBackend.AddPeer(ctx, serverID, clusterAddr)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var desiredSuffrage string
|
||||
switch nonVoter {
|
||||
case true:
|
||||
|
|
@ -369,11 +358,22 @@ func (b *SystemBackend) handleRaftBootstrapAnswerWrite() framework.OperationFunc
|
|||
desiredSuffrage = "voter"
|
||||
}
|
||||
|
||||
if b.Core.raftFollowerStates != nil {
|
||||
b.Core.raftFollowerStates.Update(&raft.EchoRequestUpdate{
|
||||
NodeID: serverID,
|
||||
DesiredSuffrage: desiredSuffrage,
|
||||
})
|
||||
added := b.Core.raftFollowerStates.Update(&raft.EchoRequestUpdate{
|
||||
NodeID: serverID,
|
||||
DesiredSuffrage: desiredSuffrage,
|
||||
})
|
||||
|
||||
switch nonVoter {
|
||||
case true:
|
||||
err = raftBackend.AddNonVotingPeer(ctx, serverID, clusterAddr)
|
||||
default:
|
||||
err = raftBackend.AddPeer(ctx, serverID, clusterAddr)
|
||||
}
|
||||
if err != nil {
|
||||
if added {
|
||||
b.Core.raftFollowerStates.Delete(serverID)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
peers, err := raftBackend.Peers(ctx)
|
||||
|
|
|
|||
Loading…
Reference in a new issue