// Copyright 2015 The etcd Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package raft import ( "bytes" "context" "fmt" "math" "reflect" "testing" "go.etcd.io/etcd/v3/raft/quorum" pb "go.etcd.io/etcd/v3/raft/raftpb" "go.etcd.io/etcd/v3/raft/tracker" ) // rawNodeAdapter is essentially a lint that makes sure that RawNode implements // "most of" Node. The exceptions (some of which are easy to fix) are listed // below. type rawNodeAdapter struct { *RawNode } var _ Node = (*rawNodeAdapter)(nil) // Node specifies lead, which is pointless, can just be filled in. func (a *rawNodeAdapter) TransferLeadership(ctx context.Context, lead, transferee uint64) { a.RawNode.TransferLeader(transferee) } // Node has a goroutine, RawNode doesn't need this. func (a *rawNodeAdapter) Stop() {} // RawNode returns a *Status. func (a *rawNodeAdapter) Status() Status { return a.RawNode.Status() } // RawNode takes a Ready. It doesn't really have to do that I think? It can hold on // to it internally. But maybe that approach is frail. func (a *rawNodeAdapter) Advance() { a.RawNode.Advance(Ready{}) } // RawNode returns a Ready, not a chan of one. func (a *rawNodeAdapter) Ready() <-chan Ready { return nil } // Node takes more contexts. Easy enough to fix. func (a *rawNodeAdapter) Campaign(context.Context) error { return a.RawNode.Campaign() } func (a *rawNodeAdapter) ReadIndex(_ context.Context, rctx []byte) error { a.RawNode.ReadIndex(rctx) // RawNode swallowed the error in ReadIndex, it probably should not do that. return nil } func (a *rawNodeAdapter) Step(_ context.Context, m pb.Message) error { return a.RawNode.Step(m) } func (a *rawNodeAdapter) Propose(_ context.Context, data []byte) error { return a.RawNode.Propose(data) } func (a *rawNodeAdapter) ProposeConfChange(_ context.Context, cc pb.ConfChangeI) error { return a.RawNode.ProposeConfChange(cc) } // TestRawNodeStep ensures that RawNode.Step ignore local message. func TestRawNodeStep(t *testing.T) { for i, msgn := range pb.MessageType_name { t.Run(msgn, func(t *testing.T) { s := NewMemoryStorage() s.SetHardState(pb.HardState{Term: 1, Commit: 1}) s.Append([]pb.Entry{{Term: 1, Index: 1}}) if err := s.ApplySnapshot(pb.Snapshot{Metadata: pb.SnapshotMetadata{ ConfState: pb.ConfState{ Voters: []uint64{1}, }, Index: 1, Term: 1, }}); err != nil { t.Fatal(err) } // Append an empty entry to make sure the non-local messages (like // vote requests) are ignored and don't trigger assertions. rawNode, err := NewRawNode(newTestConfig(1, nil, 10, 1, s)) if err != nil { t.Fatal(err) } msgt := pb.MessageType(i) err = rawNode.Step(pb.Message{Type: msgt}) // LocalMsg should be ignored. if IsLocalMsg(msgt) { if err != ErrStepLocalMsg { t.Errorf("%d: step should ignore %s", msgt, msgn) } } }) } } // TestNodeStepUnblock from node_test.go has no equivalent in rawNode because there is // no goroutine in RawNode. // TestRawNodeProposeAndConfChange tests the configuration change mechanism. Each // test case sends a configuration change which is either simple or joint, verifies // that it applies and that the resulting ConfState matches expectations, and for // joint configurations makes sure that they are exited successfully. func TestRawNodeProposeAndConfChange(t *testing.T) { testCases := []struct { cc pb.ConfChangeI exp pb.ConfState exp2 *pb.ConfState }{ // V1 config change. { pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: 2}, pb.ConfState{Voters: []uint64{1, 2}}, nil, }, // Proposing the same as a V2 change works just the same, without entering // a joint config. { pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeAddNode, NodeID: 2}, }, }, pb.ConfState{Voters: []uint64{1, 2}}, nil, }, // Ditto if we add it as a learner instead. { pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeAddLearnerNode, NodeID: 2}, }, }, pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}}, nil, }, // We can ask explicitly for joint consensus if we want it. { pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeAddLearnerNode, NodeID: 2}, }, Transition: pb.ConfChangeTransitionJointExplicit, }, pb.ConfState{Voters: []uint64{1}, VotersOutgoing: []uint64{1}, Learners: []uint64{2}}, &pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}}, }, // Ditto, but with implicit transition (the harness checks this). { pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeAddLearnerNode, NodeID: 2}, }, Transition: pb.ConfChangeTransitionJointImplicit, }, pb.ConfState{ Voters: []uint64{1}, VotersOutgoing: []uint64{1}, Learners: []uint64{2}, AutoLeave: true, }, &pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}}, }, // Add a new node and demote n1. This exercises the interesting case in // which we really need joint config changes and also need LearnersNext. { pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {NodeID: 2, Type: pb.ConfChangeAddNode}, {NodeID: 1, Type: pb.ConfChangeAddLearnerNode}, {NodeID: 3, Type: pb.ConfChangeAddLearnerNode}, }, }, pb.ConfState{ Voters: []uint64{2}, VotersOutgoing: []uint64{1}, Learners: []uint64{3}, LearnersNext: []uint64{1}, AutoLeave: true, }, &pb.ConfState{Voters: []uint64{2}, Learners: []uint64{1, 3}}, }, // Ditto explicit. { pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {NodeID: 2, Type: pb.ConfChangeAddNode}, {NodeID: 1, Type: pb.ConfChangeAddLearnerNode}, {NodeID: 3, Type: pb.ConfChangeAddLearnerNode}, }, Transition: pb.ConfChangeTransitionJointExplicit, }, pb.ConfState{ Voters: []uint64{2}, VotersOutgoing: []uint64{1}, Learners: []uint64{3}, LearnersNext: []uint64{1}, }, &pb.ConfState{Voters: []uint64{2}, Learners: []uint64{1, 3}}, }, // Ditto implicit. { pb.ConfChangeV2{ Changes: []pb.ConfChangeSingle{ {NodeID: 2, Type: pb.ConfChangeAddNode}, {NodeID: 1, Type: pb.ConfChangeAddLearnerNode}, {NodeID: 3, Type: pb.ConfChangeAddLearnerNode}, }, Transition: pb.ConfChangeTransitionJointImplicit, }, pb.ConfState{ Voters: []uint64{2}, VotersOutgoing: []uint64{1}, Learners: []uint64{3}, LearnersNext: []uint64{1}, AutoLeave: true, }, &pb.ConfState{Voters: []uint64{2}, Learners: []uint64{1, 3}}, }, } for _, tc := range testCases { t.Run("", func(t *testing.T) { s := NewMemoryStorage() rawNode, err := NewRawNode(newTestConfig(1, []uint64{1}, 10, 1, s)) if err != nil { t.Fatal(err) } rawNode.Campaign() proposed := false var ( lastIndex uint64 ccdata []byte ) // Propose the ConfChange, wait until it applies, save the resulting // ConfState. var cs *pb.ConfState for cs == nil { rd := rawNode.Ready() s.Append(rd.Entries) for _, ent := range rd.CommittedEntries { var cc pb.ConfChangeI if ent.Type == pb.EntryConfChange { var ccc pb.ConfChange if err = ccc.Unmarshal(ent.Data); err != nil { t.Fatal(err) } cc = ccc } else if ent.Type == pb.EntryConfChangeV2 { var ccc pb.ConfChangeV2 if err = ccc.Unmarshal(ent.Data); err != nil { t.Fatal(err) } cc = ccc } if cc != nil { cs = rawNode.ApplyConfChange(cc) } } rawNode.Advance(rd) // Once we are the leader, propose a command and a ConfChange. if !proposed && rd.SoftState.Lead == rawNode.raft.id { if err = rawNode.Propose([]byte("somedata")); err != nil { t.Fatal(err) } if ccv1, ok := tc.cc.AsV1(); ok { ccdata, err = ccv1.Marshal() if err != nil { t.Fatal(err) } rawNode.ProposeConfChange(ccv1) } else { ccv2 := tc.cc.AsV2() ccdata, err = ccv2.Marshal() if err != nil { t.Fatal(err) } rawNode.ProposeConfChange(ccv2) } proposed = true } } // Check that the last index is exactly the conf change we put in, // down to the bits. Note that this comes from the Storage, which // will not reflect any unstable entries that we'll only be presented // with in the next Ready. lastIndex, err = s.LastIndex() if err != nil { t.Fatal(err) } entries, err := s.Entries(lastIndex-1, lastIndex+1, noLimit) if err != nil { t.Fatal(err) } if len(entries) != 2 { t.Fatalf("len(entries) = %d, want %d", len(entries), 2) } if !bytes.Equal(entries[0].Data, []byte("somedata")) { t.Errorf("entries[0].Data = %v, want %v", entries[0].Data, []byte("somedata")) } typ := pb.EntryConfChange if _, ok := tc.cc.AsV1(); !ok { typ = pb.EntryConfChangeV2 } if entries[1].Type != typ { t.Fatalf("type = %v, want %v", entries[1].Type, typ) } if !bytes.Equal(entries[1].Data, ccdata) { t.Errorf("data = %v, want %v", entries[1].Data, ccdata) } if exp := &tc.exp; !reflect.DeepEqual(exp, cs) { t.Fatalf("exp:\n%+v\nact:\n%+v", exp, cs) } var maybePlusOne uint64 if autoLeave, ok := tc.cc.AsV2().EnterJoint(); ok && autoLeave { // If this is an auto-leaving joint conf change, it will have // appended the entry that auto-leaves, so add one to the last // index that forms the basis of our expectations on // pendingConfIndex. (Recall that lastIndex was taken from stable // storage, but this auto-leaving entry isn't on stable storage // yet). maybePlusOne = 1 } if exp, act := lastIndex+maybePlusOne, rawNode.raft.pendingConfIndex; exp != act { t.Fatalf("pendingConfIndex: expected %d, got %d", exp, act) } // Move the RawNode along. If the ConfChange was simple, nothing else // should happen. Otherwise, we're in a joint state, which is either // left automatically or not. If not, we add the proposal that leaves // it manually. rd := rawNode.Ready() var context []byte if !tc.exp.AutoLeave { if len(rd.Entries) > 0 { t.Fatal("expected no more entries") } if tc.exp2 == nil { return } context = []byte("manual") t.Log("leaving joint state manually") if err := rawNode.ProposeConfChange(pb.ConfChangeV2{Context: context}); err != nil { t.Fatal(err) } rd = rawNode.Ready() } // Check that the right ConfChange comes out. if len(rd.Entries) != 1 || rd.Entries[0].Type != pb.EntryConfChangeV2 { t.Fatalf("expected exactly one more entry, got %+v", rd) } var cc pb.ConfChangeV2 if err := cc.Unmarshal(rd.Entries[0].Data); err != nil { t.Fatal(err) } if !reflect.DeepEqual(cc, pb.ConfChangeV2{Context: context}) { t.Fatalf("expected zero ConfChangeV2, got %+v", cc) } // Lie and pretend the ConfChange applied. It won't do so because now // we require the joint quorum and we're only running one node. cs = rawNode.ApplyConfChange(cc) if exp := tc.exp2; !reflect.DeepEqual(exp, cs) { t.Fatalf("exp:\n%+v\nact:\n%+v", exp, cs) } }) } } // TestRawNodeJointAutoLeave tests the configuration change auto leave even leader // lost leadership. func TestRawNodeJointAutoLeave(t *testing.T) { testCc := pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeAddLearnerNode, NodeID: 2}, }, Transition: pb.ConfChangeTransitionJointImplicit, } expCs := pb.ConfState{ Voters: []uint64{1}, VotersOutgoing: []uint64{1}, Learners: []uint64{2}, AutoLeave: true, } exp2Cs := pb.ConfState{Voters: []uint64{1}, Learners: []uint64{2}} t.Run("", func(t *testing.T) { s := NewMemoryStorage() rawNode, err := NewRawNode(newTestConfig(1, []uint64{1}, 10, 1, s)) if err != nil { t.Fatal(err) } rawNode.Campaign() proposed := false var ( lastIndex uint64 ccdata []byte ) // Propose the ConfChange, wait until it applies, save the resulting // ConfState. var cs *pb.ConfState for cs == nil { rd := rawNode.Ready() s.Append(rd.Entries) for _, ent := range rd.CommittedEntries { var cc pb.ConfChangeI if ent.Type == pb.EntryConfChangeV2 { var ccc pb.ConfChangeV2 if err = ccc.Unmarshal(ent.Data); err != nil { t.Fatal(err) } cc = &ccc } if cc != nil { // Force it step down. rawNode.Step(pb.Message{Type: pb.MsgHeartbeatResp, From: 1, Term: rawNode.raft.Term + 1}) cs = rawNode.ApplyConfChange(cc) } } rawNode.Advance(rd) // Once we are the leader, propose a command and a ConfChange. if !proposed && rd.SoftState.Lead == rawNode.raft.id { if err = rawNode.Propose([]byte("somedata")); err != nil { t.Fatal(err) } ccdata, err = testCc.Marshal() if err != nil { t.Fatal(err) } rawNode.ProposeConfChange(testCc) proposed = true } } // Check that the last index is exactly the conf change we put in, // down to the bits. Note that this comes from the Storage, which // will not reflect any unstable entries that we'll only be presented // with in the next Ready. lastIndex, err = s.LastIndex() if err != nil { t.Fatal(err) } entries, err := s.Entries(lastIndex-1, lastIndex+1, noLimit) if err != nil { t.Fatal(err) } if len(entries) != 2 { t.Fatalf("len(entries) = %d, want %d", len(entries), 2) } if !bytes.Equal(entries[0].Data, []byte("somedata")) { t.Errorf("entries[0].Data = %v, want %v", entries[0].Data, []byte("somedata")) } if entries[1].Type != pb.EntryConfChangeV2 { t.Fatalf("type = %v, want %v", entries[1].Type, pb.EntryConfChangeV2) } if !bytes.Equal(entries[1].Data, ccdata) { t.Errorf("data = %v, want %v", entries[1].Data, ccdata) } if !reflect.DeepEqual(&expCs, cs) { t.Fatalf("exp:\n%+v\nact:\n%+v", expCs, cs) } if 0 != rawNode.raft.pendingConfIndex { t.Fatalf("pendingConfIndex: expected %d, got %d", 0, rawNode.raft.pendingConfIndex) } // Move the RawNode along. It should not leave joint because it's follower. rd := rawNode.readyWithoutAccept() // Check that the right ConfChange comes out. if len(rd.Entries) != 0 { t.Fatalf("expected zero entry, got %+v", rd) } // Make it leader again. It should leave joint automatically after moving apply index. rawNode.Campaign() rd = rawNode.Ready() s.Append(rd.Entries) rawNode.Advance(rd) rd = rawNode.Ready() s.Append(rd.Entries) // Check that the right ConfChange comes out. if len(rd.Entries) != 1 || rd.Entries[0].Type != pb.EntryConfChangeV2 { t.Fatalf("expected exactly one more entry, got %+v", rd) } var cc pb.ConfChangeV2 if err := cc.Unmarshal(rd.Entries[0].Data); err != nil { t.Fatal(err) } if !reflect.DeepEqual(cc, pb.ConfChangeV2{Context: nil}) { t.Fatalf("expected zero ConfChangeV2, got %+v", cc) } // Lie and pretend the ConfChange applied. It won't do so because now // we require the joint quorum and we're only running one node. cs = rawNode.ApplyConfChange(cc) if exp := exp2Cs; !reflect.DeepEqual(&exp, cs) { t.Fatalf("exp:\n%+v\nact:\n%+v", exp, cs) } }) } // TestRawNodeProposeAddDuplicateNode ensures that two proposes to add the same node should // not affect the later propose to add new node. func TestRawNodeProposeAddDuplicateNode(t *testing.T) { s := NewMemoryStorage() rawNode, err := NewRawNode(newTestConfig(1, []uint64{1}, 10, 1, s)) if err != nil { t.Fatal(err) } rd := rawNode.Ready() s.Append(rd.Entries) rawNode.Advance(rd) rawNode.Campaign() for { rd = rawNode.Ready() s.Append(rd.Entries) if rd.SoftState.Lead == rawNode.raft.id { rawNode.Advance(rd) break } rawNode.Advance(rd) } proposeConfChangeAndApply := func(cc pb.ConfChange) { rawNode.ProposeConfChange(cc) rd = rawNode.Ready() s.Append(rd.Entries) for _, entry := range rd.CommittedEntries { if entry.Type == pb.EntryConfChange { var cc pb.ConfChange cc.Unmarshal(entry.Data) rawNode.ApplyConfChange(cc) } } rawNode.Advance(rd) } cc1 := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: 1} ccdata1, err := cc1.Marshal() if err != nil { t.Fatal(err) } proposeConfChangeAndApply(cc1) // try to add the same node again proposeConfChangeAndApply(cc1) // the new node join should be ok cc2 := pb.ConfChange{Type: pb.ConfChangeAddNode, NodeID: 2} ccdata2, err := cc2.Marshal() if err != nil { t.Fatal(err) } proposeConfChangeAndApply(cc2) lastIndex, err := s.LastIndex() if err != nil { t.Fatal(err) } // the last three entries should be: ConfChange cc1, cc1, cc2 entries, err := s.Entries(lastIndex-2, lastIndex+1, noLimit) if err != nil { t.Fatal(err) } if len(entries) != 3 { t.Fatalf("len(entries) = %d, want %d", len(entries), 3) } if !bytes.Equal(entries[0].Data, ccdata1) { t.Errorf("entries[0].Data = %v, want %v", entries[0].Data, ccdata1) } if !bytes.Equal(entries[2].Data, ccdata2) { t.Errorf("entries[2].Data = %v, want %v", entries[2].Data, ccdata2) } } // TestRawNodeReadIndex ensures that Rawnode.ReadIndex sends the MsgReadIndex message // to the underlying raft. It also ensures that ReadState can be read out. func TestRawNodeReadIndex(t *testing.T) { msgs := []pb.Message{} appendStep := func(r *raft, m pb.Message) error { msgs = append(msgs, m) return nil } wrs := []ReadState{{Index: uint64(1), RequestCtx: []byte("somedata")}} s := NewMemoryStorage() c := newTestConfig(1, []uint64{1}, 10, 1, s) rawNode, err := NewRawNode(c) if err != nil { t.Fatal(err) } rawNode.raft.readStates = wrs // ensure the ReadStates can be read out hasReady := rawNode.HasReady() if !hasReady { t.Errorf("HasReady() returns %t, want %t", hasReady, true) } rd := rawNode.Ready() if !reflect.DeepEqual(rd.ReadStates, wrs) { t.Errorf("ReadStates = %d, want %d", rd.ReadStates, wrs) } s.Append(rd.Entries) rawNode.Advance(rd) // ensure raft.readStates is reset after advance if rawNode.raft.readStates != nil { t.Errorf("readStates = %v, want %v", rawNode.raft.readStates, nil) } wrequestCtx := []byte("somedata2") rawNode.Campaign() for { rd = rawNode.Ready() s.Append(rd.Entries) if rd.SoftState.Lead == rawNode.raft.id { rawNode.Advance(rd) // Once we are the leader, issue a ReadIndex request rawNode.raft.step = appendStep rawNode.ReadIndex(wrequestCtx) break } rawNode.Advance(rd) } // ensure that MsgReadIndex message is sent to the underlying raft if len(msgs) != 1 { t.Fatalf("len(msgs) = %d, want %d", len(msgs), 1) } if msgs[0].Type != pb.MsgReadIndex { t.Errorf("msg type = %d, want %d", msgs[0].Type, pb.MsgReadIndex) } if !bytes.Equal(msgs[0].Entries[0].Data, wrequestCtx) { t.Errorf("data = %v, want %v", msgs[0].Entries[0].Data, wrequestCtx) } } // TestBlockProposal from node_test.go has no equivalent in rawNode because there is // no leader check in RawNode. // TestNodeTick from node_test.go has no equivalent in rawNode because // it reaches into the raft object which is not exposed. // TestNodeStop from node_test.go has no equivalent in rawNode because there is // no goroutine in RawNode. // TestRawNodeStart ensures that a node can be started correctly. Note that RawNode // requires the application to bootstrap the state, i.e. it does not accept peers // and will not create faux configuration change entries. func TestRawNodeStart(t *testing.T) { want := Ready{ SoftState: &SoftState{Lead: 1, RaftState: StateLeader}, HardState: pb.HardState{Term: 1, Commit: 3, Vote: 1}, Entries: []pb.Entry{ {Term: 1, Index: 2, Data: nil}, // empty entry {Term: 1, Index: 3, Data: []byte("foo")}, // empty entry }, CommittedEntries: []pb.Entry{ {Term: 1, Index: 2, Data: nil}, // empty entry {Term: 1, Index: 3, Data: []byte("foo")}, // empty entry }, MustSync: true, } storage := NewMemoryStorage() storage.ents[0].Index = 1 // TODO(tbg): this is a first prototype of what bootstrapping could look // like (without the annoying faux ConfChanges). We want to persist a // ConfState at some index and make sure that this index can't be reached // from log position 1, so that followers are forced to pick up the // ConfState in order to move away from log position 1 (unless they got // bootstrapped in the same way already). Failing to do so would mean that // followers diverge from the bootstrapped nodes and don't learn about the // initial config. // // NB: this is exactly what CockroachDB does. The Raft log really begins at // index 10, so empty followers (at index 1) always need a snapshot first. type appenderStorage interface { Storage ApplySnapshot(pb.Snapshot) error } bootstrap := func(storage appenderStorage, cs pb.ConfState) error { if len(cs.Voters) == 0 { return fmt.Errorf("no voters specified") } fi, err := storage.FirstIndex() if err != nil { return err } if fi < 2 { return fmt.Errorf("FirstIndex >= 2 is prerequisite for bootstrap") } if _, err = storage.Entries(fi, fi, math.MaxUint64); err == nil { // TODO(tbg): match exact error return fmt.Errorf("should not have been able to load first index") } li, err := storage.LastIndex() if err != nil { return err } if _, err = storage.Entries(li, li, math.MaxUint64); err == nil { return fmt.Errorf("should not have been able to load last index") } hs, ics, err := storage.InitialState() if err != nil { return err } if !IsEmptyHardState(hs) { return fmt.Errorf("HardState not empty") } if len(ics.Voters) != 0 { return fmt.Errorf("ConfState not empty") } meta := pb.SnapshotMetadata{ Index: 1, Term: 0, ConfState: cs, } snap := pb.Snapshot{Metadata: meta} return storage.ApplySnapshot(snap) } if err := bootstrap(storage, pb.ConfState{Voters: []uint64{1}}); err != nil { t.Fatal(err) } rawNode, err := NewRawNode(newTestConfig(1, nil, 10, 1, storage)) if err != nil { t.Fatal(err) } if rawNode.HasReady() { t.Fatalf("unexpected ready: %+v", rawNode.Ready()) } rawNode.Campaign() rawNode.Propose([]byte("foo")) if !rawNode.HasReady() { t.Fatal("expected a Ready") } rd := rawNode.Ready() storage.Append(rd.Entries) rawNode.Advance(rd) rd.SoftState, want.SoftState = nil, nil if !reflect.DeepEqual(rd, want) { t.Fatalf("unexpected Ready:\n%+v\nvs\n%+v", rd, want) } if rawNode.HasReady() { t.Errorf("unexpected Ready: %+v", rawNode.Ready()) } } func TestRawNodeRestart(t *testing.T) { entries := []pb.Entry{ {Term: 1, Index: 1}, {Term: 1, Index: 2, Data: []byte("foo")}, } st := pb.HardState{Term: 1, Commit: 1} want := Ready{ HardState: emptyState, // commit up to commit index in st CommittedEntries: entries[:st.Commit], MustSync: false, } storage := NewMemoryStorage() storage.SetHardState(st) storage.Append(entries) rawNode, err := NewRawNode(newTestConfig(1, []uint64{1}, 10, 1, storage)) if err != nil { t.Fatal(err) } rd := rawNode.Ready() if !reflect.DeepEqual(rd, want) { t.Errorf("g = %+v,\n w %+v", rd, want) } rawNode.Advance(rd) if rawNode.HasReady() { t.Errorf("unexpected Ready: %+v", rawNode.Ready()) } } func TestRawNodeRestartFromSnapshot(t *testing.T) { snap := pb.Snapshot{ Metadata: pb.SnapshotMetadata{ ConfState: pb.ConfState{Voters: []uint64{1, 2}}, Index: 2, Term: 1, }, } entries := []pb.Entry{ {Term: 1, Index: 3, Data: []byte("foo")}, } st := pb.HardState{Term: 1, Commit: 3} want := Ready{ HardState: emptyState, // commit up to commit index in st CommittedEntries: entries, MustSync: false, } s := NewMemoryStorage() s.SetHardState(st) s.ApplySnapshot(snap) s.Append(entries) rawNode, err := NewRawNode(newTestConfig(1, nil, 10, 1, s)) if err != nil { t.Fatal(err) } if rd := rawNode.Ready(); !reflect.DeepEqual(rd, want) { t.Errorf("g = %+v,\n w %+v", rd, want) } else { rawNode.Advance(rd) } if rawNode.HasReady() { t.Errorf("unexpected Ready: %+v", rawNode.HasReady()) } } // TestNodeAdvance from node_test.go has no equivalent in rawNode because there is // no dependency check between Ready() and Advance() func TestRawNodeStatus(t *testing.T) { s := NewMemoryStorage() rn, err := NewRawNode(newTestConfig(1, []uint64{1}, 10, 1, s)) if err != nil { t.Fatal(err) } if status := rn.Status(); status.Progress != nil { t.Fatalf("expected no Progress because not leader: %+v", status.Progress) } if err := rn.Campaign(); err != nil { t.Fatal(err) } status := rn.Status() if status.Lead != 1 { t.Fatal("not lead") } if status.RaftState != StateLeader { t.Fatal("not leader") } if exp, act := *rn.raft.prs.Progress[1], status.Progress[1]; !reflect.DeepEqual(exp, act) { t.Fatalf("want: %+v\ngot: %+v", exp, act) } expCfg := tracker.Config{Voters: quorum.JointConfig{ quorum.MajorityConfig{1: {}}, nil, }} if !reflect.DeepEqual(expCfg, status.Config) { t.Fatalf("want: %+v\ngot: %+v", expCfg, status.Config) } } // TestRawNodeCommitPaginationAfterRestart is the RawNode version of // TestNodeCommitPaginationAfterRestart. The anomaly here was even worse as the // Raft group would forget to apply entries: // // - node learns that index 11 is committed // - nextEnts returns index 1..10 in CommittedEntries (but index 10 already // exceeds maxBytes), which isn't noticed internally by Raft // - Commit index gets bumped to 10 // - the node persists the HardState, but crashes before applying the entries // - upon restart, the storage returns the same entries, but `slice` takes a // different code path and removes the last entry. // - Raft does not emit a HardState, but when the app calls Advance(), it bumps // its internal applied index cursor to 10 (when it should be 9) // - the next Ready asks the app to apply index 11 (omitting index 10), losing a // write. func TestRawNodeCommitPaginationAfterRestart(t *testing.T) { s := &ignoreSizeHintMemStorage{ MemoryStorage: NewMemoryStorage(), } persistedHardState := pb.HardState{ Term: 1, Vote: 1, Commit: 10, } s.hardState = persistedHardState s.ents = make([]pb.Entry, 10) var size uint64 for i := range s.ents { ent := pb.Entry{ Term: 1, Index: uint64(i + 1), Type: pb.EntryNormal, Data: []byte("a"), } s.ents[i] = ent size += uint64(ent.Size()) } cfg := newTestConfig(1, []uint64{1}, 10, 1, s) // Set a MaxSizePerMsg that would suggest to Raft that the last committed entry should // not be included in the initial rd.CommittedEntries. However, our storage will ignore // this and *will* return it (which is how the Commit index ended up being 10 initially). cfg.MaxSizePerMsg = size - uint64(s.ents[len(s.ents)-1].Size()) - 1 s.ents = append(s.ents, pb.Entry{ Term: 1, Index: uint64(11), Type: pb.EntryNormal, Data: []byte("boom"), }) rawNode, err := NewRawNode(cfg) if err != nil { t.Fatal(err) } for highestApplied := uint64(0); highestApplied != 11; { rd := rawNode.Ready() n := len(rd.CommittedEntries) if n == 0 { t.Fatalf("stopped applying entries at index %d", highestApplied) } if next := rd.CommittedEntries[0].Index; highestApplied != 0 && highestApplied+1 != next { t.Fatalf("attempting to apply index %d after index %d, leaving a gap", next, highestApplied) } highestApplied = rd.CommittedEntries[n-1].Index rawNode.Advance(rd) rawNode.Step(pb.Message{ Type: pb.MsgHeartbeat, To: 1, From: 1, // illegal, but we get away with it Term: 1, Commit: 11, }) } } // TestRawNodeBoundedLogGrowthWithPartition tests a scenario where a leader is // partitioned from a quorum of nodes. It verifies that the leader's log is // protected from unbounded growth even as new entries continue to be proposed. // This protection is provided by the MaxUncommittedEntriesSize configuration. func TestRawNodeBoundedLogGrowthWithPartition(t *testing.T) { const maxEntries = 16 data := []byte("testdata") testEntry := pb.Entry{Data: data} maxEntrySize := uint64(maxEntries * PayloadSize(testEntry)) s := NewMemoryStorage() cfg := newTestConfig(1, []uint64{1}, 10, 1, s) cfg.MaxUncommittedEntriesSize = maxEntrySize rawNode, err := NewRawNode(cfg) if err != nil { t.Fatal(err) } rd := rawNode.Ready() s.Append(rd.Entries) rawNode.Advance(rd) // Become the leader. rawNode.Campaign() for { rd = rawNode.Ready() s.Append(rd.Entries) if rd.SoftState.Lead == rawNode.raft.id { rawNode.Advance(rd) break } rawNode.Advance(rd) } // Simulate a network partition while we make our proposals by never // committing anything. These proposals should not cause the leader's // log to grow indefinitely. for i := 0; i < 1024; i++ { rawNode.Propose(data) } // Check the size of leader's uncommitted log tail. It should not exceed the // MaxUncommittedEntriesSize limit. checkUncommitted := func(exp uint64) { t.Helper() if a := rawNode.raft.uncommittedSize; exp != a { t.Fatalf("expected %d uncommitted entry bytes, found %d", exp, a) } } checkUncommitted(maxEntrySize) // Recover from the partition. The uncommitted tail of the Raft log should // disappear as entries are committed. rd = rawNode.Ready() if len(rd.CommittedEntries) != maxEntries { t.Fatalf("expected %d entries, got %d", maxEntries, len(rd.CommittedEntries)) } s.Append(rd.Entries) rawNode.Advance(rd) checkUncommitted(0) } func BenchmarkStatus(b *testing.B) { setup := func(members int) *RawNode { peers := make([]uint64, members) for i := range peers { peers[i] = uint64(i + 1) } cfg := newTestConfig(1, peers, 3, 1, NewMemoryStorage()) cfg.Logger = discardLogger r := newRaft(cfg) r.becomeFollower(1, 1) r.becomeCandidate() r.becomeLeader() return &RawNode{raft: r} } for _, members := range []int{1, 3, 5, 100} { b.Run(fmt.Sprintf("members=%d", members), func(b *testing.B) { rn := setup(members) b.Run("Status", func(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { _ = rn.Status() } }) b.Run("Status-example", func(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { s := rn.Status() var n uint64 for _, pr := range s.Progress { n += pr.Match } _ = n } }) b.Run("BasicStatus", func(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { _ = rn.BasicStatus() } }) b.Run("WithProgress", func(b *testing.B) { b.ReportAllocs() visit := func(uint64, ProgressType, tracker.Progress) {} for i := 0; i < b.N; i++ { rn.WithProgress(visit) } }) b.Run("WithProgress-example", func(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { var n uint64 visit := func(_ uint64, _ ProgressType, pr tracker.Progress) { n += pr.Match } rn.WithProgress(visit) _ = n } }) }) } } func TestRawNodeConsumeReady(t *testing.T) { // Check that readyWithoutAccept() does not call acceptReady (which resets // the messages) but Ready() does. s := NewMemoryStorage() rn := newTestRawNode(1, []uint64{1}, 3, 1, s) m1 := pb.Message{Context: []byte("foo")} m2 := pb.Message{Context: []byte("bar")} // Inject first message, make sure it's visible via readyWithoutAccept. rn.raft.msgs = append(rn.raft.msgs, m1) rd := rn.readyWithoutAccept() if len(rd.Messages) != 1 || !reflect.DeepEqual(rd.Messages[0], m1) { t.Fatalf("expected only m1 sent, got %+v", rd.Messages) } if len(rn.raft.msgs) != 1 || !reflect.DeepEqual(rn.raft.msgs[0], m1) { t.Fatalf("expected only m1 in raft.msgs, got %+v", rn.raft.msgs) } // Now call Ready() which should move the message into the Ready (as opposed // to leaving it in both places). rd = rn.Ready() if len(rn.raft.msgs) > 0 { t.Fatalf("messages not reset: %+v", rn.raft.msgs) } if len(rd.Messages) != 1 || !reflect.DeepEqual(rd.Messages[0], m1) { t.Fatalf("expected only m1 sent, got %+v", rd.Messages) } // Add a message to raft to make sure that Advance() doesn't drop it. rn.raft.msgs = append(rn.raft.msgs, m2) rn.Advance(rd) if len(rn.raft.msgs) != 1 || !reflect.DeepEqual(rn.raft.msgs[0], m2) { t.Fatalf("expected only m2 in raft.msgs, got %+v", rn.raft.msgs) } }