Merge pull request #8525 from shuaili87/pre-vote-compatible

raft: fix deadlock during PreVote migration process
release-3.4
Xiang Li 2018-01-26 16:34:59 -08:00 committed by GitHub
commit d54f281b26
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 153 additions and 1 deletions

View File

@ -803,7 +803,7 @@ func (r *raft) Step(m pb.Message) error {
}
case m.Term < r.Term:
if r.checkQuorum && (m.Type == pb.MsgHeartbeat || m.Type == pb.MsgApp) {
if (r.checkQuorum || r.preVote) && (m.Type == pb.MsgHeartbeat || m.Type == pb.MsgApp) {
// We have received messages from a leader at a lower term. It is possible
// that these messages were simply delayed in the network, but this could
// also mean that this node has advanced its term number during a network
@ -817,7 +817,15 @@ func (r *raft) Step(m pb.Message) error {
// removed node will send MsgVotes (or MsgPreVotes) which will be ignored,
// but it will not receive MsgApp or MsgHeartbeat, so it will not create
// disruptive term increases
// The above comments also true for Pre-Vote
r.send(pb.Message{To: m.From, Type: pb.MsgAppResp})
} else if m.Type == pb.MsgPreVote {
// Before Pre-Vote enable, there may have candidate with higher term,
// but less log. After update to Pre-Vote, the cluster may deadlock if
// we drop messages with a lower term.
r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] rejected %s from %x [logterm: %d, index: %d] at term %d",
r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.Type, m.From, m.LogTerm, m.Index, r.Term)
r.send(pb.Message{To: m.From, Term: r.Term, Type: pb.MsgPreVoteResp, Reject: true})
} else {
// ignore other cases
r.logger.Infof("%x [term: %d] ignored a %s message with lower term from %x [term: %d]",

View File

@ -3544,6 +3544,150 @@ func TestPreVoteWithSplitVote(t *testing.T) {
}
}
// simulate rolling update a cluster for Pre-Vote. cluster has 3 nodes [n1, n2, n3].
// n1 is leader with term 2
// n2 is follower with term 2
// n3 is partitioned, with term 4 and less log, state is candidate
func newPreVoteMigrationCluster(t *testing.T) *network {
n1 := newTestRaft(1, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
n2 := newTestRaft(2, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
n3 := newTestRaft(3, []uint64{1, 2, 3}, 10, 1, NewMemoryStorage())
n1.becomeFollower(1, None)
n2.becomeFollower(1, None)
n3.becomeFollower(1, None)
n1.preVote = true
n2.preVote = true
// We intentionally do not enable PreVote for n3, this is done so in order
// to simulate a rolling restart process where it's possible to have a mixed
// version cluster with replicas with PreVote enabled, and replicas without.
nt := newNetwork(n1, n2, n3)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup})
// Cause a network partition to isolate n3.
nt.isolate(3)
nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("some data")}}})
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
// check state
// n1.state == StateLeader
// n2.state == StateFollower
// n3.state == StateCandidate
if n1.state != StateLeader {
t.Fatalf("node 1 state: %s, want %s", n1.state, StateLeader)
}
if n2.state != StateFollower {
t.Fatalf("node 2 state: %s, want %s", n2.state, StateFollower)
}
if n3.state != StateCandidate {
t.Fatalf("node 3 state: %s, want %s", n3.state, StateCandidate)
}
// check term
// n1.Term == 2
// n2.Term == 2
// n3.Term == 4
if n1.Term != 2 {
t.Fatalf("node 1 term: %d, want %d", n1.Term, 2)
}
if n2.Term != 2 {
t.Fatalf("node 2 term: %d, want %d", n2.Term, 2)
}
if n3.Term != 4 {
t.Fatalf("node 3 term: %d, want %d", n3.Term, 4)
}
// Enable prevote on n3, then recover the network
n3.preVote = true
nt.recover()
return nt
}
func TestPreVoteMigrationCanCompleteElection(t *testing.T) {
nt := newPreVoteMigrationCluster(t)
// n1 is leader with term 2
// n2 is follower with term 2
// n3 is pre-candidate with term 4, and less log
n2 := nt.peers[2].(*raft)
n3 := nt.peers[3].(*raft)
// simulate leader down
nt.isolate(1)
// Call for elections from both n2 and n3.
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup})
// check state
// n2.state == Follower
// n3.state == PreCandidate
if n2.state != StateFollower {
t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower)
}
if n3.state != StatePreCandidate {
t.Errorf("node 3 state: %s, want %s", n3.state, StatePreCandidate)
}
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup})
// Do we have a leader?
if n2.state != StateLeader && n3.state != StateFollower {
t.Errorf("no leader")
}
}
func TestPreVoteMigrationWithFreeStuckPreCandidate(t *testing.T) {
nt := newPreVoteMigrationCluster(t)
// n1 is leader with term 2
// n2 is follower with term 2
// n3 is pre-candidate with term 4, and less log
n1 := nt.peers[1].(*raft)
n2 := nt.peers[2].(*raft)
n3 := nt.peers[3].(*raft)
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
if n1.state != StateLeader {
t.Errorf("node 1 state: %s, want %s", n1.state, StateLeader)
}
if n2.state != StateFollower {
t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower)
}
if n3.state != StatePreCandidate {
t.Errorf("node 3 state: %s, want %s", n3.state, StatePreCandidate)
}
// Pre-Vote again for safety
nt.send(pb.Message{From: 3, To: 3, Type: pb.MsgHup})
if n1.state != StateLeader {
t.Errorf("node 1 state: %s, want %s", n1.state, StateLeader)
}
if n2.state != StateFollower {
t.Errorf("node 2 state: %s, want %s", n2.state, StateFollower)
}
if n3.state != StatePreCandidate {
t.Errorf("node 3 state: %s, want %s", n3.state, StatePreCandidate)
}
nt.send(pb.Message{From: 1, To: 3, Type: pb.MsgHeartbeat, Term: n1.Term})
// Disrupt the leader so that the stuck peer is freed
if n1.state != StateFollower {
t.Errorf("state = %s, want %s", n1.state, StateFollower)
}
if n3.Term != n1.Term {
t.Errorf("term = %d, want %d", n3.Term, n1.Term)
}
}
func entsWithConfig(configFunc func(*Config), terms ...uint64) *raft {
storage := NewMemoryStorage()
for i, term := range terms {