From c8445260028cec41fe7b74b5d0a385a7d29083df Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Mon, 17 Jun 2019 09:20:45 +0200 Subject: [PATCH] raft: prevent learners from becoming leader We were already taking some precautions against learners campaigning, but there was no safeguard against an explicit call to `Campaign()`. The newly added test also verifies that leadership transfers to learners are ignored. --- raft/raft.go | 11 +++++++++++ raft/raft_test.go | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/raft/raft.go b/raft/raft.go index 4619a53d0..61df549b9 100644 --- a/raft/raft.go +++ b/raft/raft.go @@ -724,7 +724,14 @@ func (r *raft) becomeLeader() { r.logger.Infof("%x became leader at term %d", r.id, r.Term) } +// campaign transitions the raft instance to candidate state. This must only be +// called after verifying that this is a legitimate transition. func (r *raft) campaign(t CampaignType) { + if !r.promotable() { + // This path should not be hit (callers are supposed to check), but + // better safe than sorry. + r.logger.Warningf("%x is unpromotable; campaign() should have been called", r.id) + } var term uint64 var voteMsg pb.MessageType if t == campaignPreElection { @@ -858,6 +865,10 @@ func (r *raft) Step(m pb.Message) error { switch m.Type { case pb.MsgHup: if r.state != StateLeader { + if !r.promotable() { + r.logger.Warningf("%x is unpromotable and can not campaign; ignoring MsgHup", r.id) + return nil + } ents, err := r.raftLog.slice(r.raftLog.applied+1, r.raftLog.committed+1, noLimit) if err != nil { r.logger.Panicf("unexpected error getting unapplied entries (%v)", err) diff --git a/raft/raft_test.go b/raft/raft_test.go index 13a59f728..d4f0fa268 100644 --- a/raft/raft_test.go +++ b/raft/raft_test.go @@ -4082,6 +4082,40 @@ func TestPreVoteWithCheckQuorum(t *testing.T) { } } +// TestLearnerCampaign verifies that a learner won't campaign even if it receives +// a MsgHup or MsgTimeoutNow. +func TestLearnerCampaign(t *testing.T) { + n1 := newTestRaft(1, []uint64{1}, 10, 1, NewMemoryStorage()) + n1.addLearner(2) + n2 := newTestRaft(2, []uint64{1}, 10, 1, NewMemoryStorage()) + n2.addLearner(2) + nt := newNetwork(n1, n2) + nt.send(pb.Message{From: 2, To: 2, Type: pb.MsgHup}) + + if !n2.isLearner { + t.Fatalf("failed to make n2 a learner") + } + + if n2.state != StateFollower { + t.Fatalf("n2 campaigned despite being learner") + } + + nt.send(pb.Message{From: 1, To: 1, Type: pb.MsgHup}) + if n1.state != StateLeader || n1.lead != 1 { + t.Fatalf("n1 did not become leader") + } + + // NB: TransferLeader already checks that the recipient is not a learner, but + // the check could have happened by the time the recipient becomes a learner, + // in which case it will receive MsgTimeoutNow as in this test case and we + // verify that it's ignored. + nt.send(pb.Message{From: 1, To: 2, Type: pb.MsgTimeoutNow}) + + if n2.state != StateFollower { + t.Fatalf("n2 accepted leadership transfer despite being learner") + } +} + // simulate rolling update a cluster for Pre-Vote. cluster has 3 nodes [n1, n2, n3]. // n1 is leader with term 2 // n2 is follower with term 2