etcd/raft/node.go

589 lines
18 KiB
Go
Raw Normal View History

2016-05-13 06:49:15 +03:00
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package raft
import (
"context"
2014-09-01 05:49:07 +04:00
"errors"
pb "go.etcd.io/etcd/v3/raft/raftpb"
)
2014-06-05 21:49:34 +04:00
type SnapshotStatus int
const (
SnapshotFinish SnapshotStatus = 1
SnapshotFailure SnapshotStatus = 2
)
2014-09-09 08:15:35 +04:00
var (
2014-09-16 04:35:02 +04:00
emptyState = pb.HardState{}
2014-10-08 02:22:35 +04:00
// ErrStopped is returned by methods on Nodes that have been stopped.
2014-09-09 08:15:35 +04:00
ErrStopped = errors.New("raft: stopped")
)
2014-09-01 05:49:07 +04:00
2014-09-16 04:35:02 +04:00
// SoftState provides state that is useful for logging and debugging.
// The state is volatile and does not need to be persisted to the WAL.
type SoftState struct {
Lead uint64 // must use atomic operations to access; keep 64-bit aligned.
RaftState StateType
2014-09-16 04:35:02 +04:00
}
func (a *SoftState) equal(b *SoftState) bool {
2014-12-09 08:17:04 +03:00
return a.Lead == b.Lead && a.RaftState == b.RaftState
2014-09-16 04:35:02 +04:00
}
// Ready encapsulates the entries and messages that are ready to read,
// be saved to stable storage, committed or sent to other peers.
// All fields in Ready are read-only.
type Ready struct {
2014-09-16 04:35:02 +04:00
// The current volatile state of a Node.
// SoftState will be nil if there is no update.
// It is not required to consume or store SoftState.
*SoftState
// The current state of a Node to be saved to stable storage BEFORE
// Messages are sent.
// HardState will be equal to empty state if there is no update.
pb.HardState
// ReadStates can be used for node to serve linearizable read requests locally
// when its applied index is greater than the index in ReadState.
// Note that the readState will be returned when raft receives msgReadIndex.
// The returned is only valid for the request that requested to read.
ReadStates []ReadState
// Entries specifies entries to be saved to stable storage BEFORE
// Messages are sent.
2014-08-28 05:53:18 +04:00
Entries []pb.Entry
2014-09-17 05:18:45 +04:00
// Snapshot specifies the snapshot to be saved to stable storage.
Snapshot pb.Snapshot
// CommittedEntries specifies entries to be committed to a
// store/state-machine. These have previously been committed to stable
// store.
2014-08-28 05:53:18 +04:00
CommittedEntries []pb.Entry
// Messages specifies outbound messages to be sent AFTER Entries are
// committed to stable storage.
// If it contains a MsgSnap message, the application MUST report back to raft
// when the snapshot has been received or has failed by calling ReportSnapshot.
2014-08-28 05:53:18 +04:00
Messages []pb.Message
// MustSync indicates whether the HardState and Entries must be synchronously
// written to disk or if an asynchronous write is permissible.
MustSync bool
}
func isHardStateEqual(a, b pb.HardState) bool {
2014-09-16 01:34:23 +04:00
return a.Term == b.Term && a.Vote == b.Vote && a.Commit == b.Commit
2014-08-25 07:09:06 +04:00
}
2014-05-29 00:53:26 +04:00
2014-10-08 02:22:35 +04:00
// IsEmptyHardState returns true if the given HardState is empty.
func IsEmptyHardState(st pb.HardState) bool {
return isHardStateEqual(st, emptyState)
2014-09-09 08:45:10 +04:00
}
2014-10-08 02:22:35 +04:00
// IsEmptySnap returns true if the given Snapshot is empty.
2014-09-17 05:18:45 +04:00
func IsEmptySnap(sp pb.Snapshot) bool {
return sp.Metadata.Index == 0
2014-09-17 05:18:45 +04:00
}
func (rd Ready) containsUpdates() bool {
return rd.SoftState != nil || !IsEmptyHardState(rd.HardState) ||
!IsEmptySnap(rd.Snapshot) || len(rd.Entries) > 0 ||
len(rd.CommittedEntries) > 0 || len(rd.Messages) > 0 || len(rd.ReadStates) != 0
2014-06-05 21:49:34 +04:00
}
// appliedCursor extracts from the Ready the highest index the client has
// applied (once the Ready is confirmed via Advance). If no information is
// contained in the Ready, returns zero.
func (rd Ready) appliedCursor() uint64 {
if n := len(rd.CommittedEntries); n > 0 {
return rd.CommittedEntries[n-1].Index
}
if index := rd.Snapshot.Metadata.Index; index > 0 {
return index
}
return 0
}
2014-10-08 02:22:35 +04:00
// Node represents a node in a raft cluster.
2014-09-17 23:23:44 +04:00
type Node interface {
// Tick increments the internal logical clock for the Node by a single tick. Election
// timeouts and heartbeat timeouts are in units of ticks.
Tick()
// Campaign causes the Node to transition to candidate state and start campaigning to become leader.
2014-09-17 23:23:44 +04:00
Campaign(ctx context.Context) error
// Propose proposes that data be appended to the log. Note that proposals can be lost without
// notice, therefore it is user's job to ensure proposal retries.
2014-09-17 23:23:44 +04:00
Propose(ctx context.Context, data []byte) error
// ProposeConfChange proposes a configuration change. Like any proposal, the
// configuration change may be dropped with or without an error being
// returned. In particular, configuration changes are dropped unless the
// leader has certainty that there is no prior unapplied configuration
// change in its log.
//
// The method accepts either a pb.ConfChange (deprecated) or pb.ConfChangeV2
// message. The latter allows arbitrary configuration changes via joint
// consensus, notably including replacing a voter. Passing a ConfChangeV2
// message is only allowed if all Nodes participating in the cluster run a
// version of this library aware of the V2 API. See pb.ConfChangeV2 for
// usage details and semantics.
ProposeConfChange(ctx context.Context, cc pb.ConfChangeI) error
2014-09-17 23:23:44 +04:00
// Step advances the state machine using the given message. ctx.Err() will be returned, if any.
Step(ctx context.Context, msg pb.Message) error
2015-09-21 07:13:33 +03:00
// Ready returns a channel that returns the current point-in-time state.
// Users of the Node must call Advance after retrieving the state returned by Ready.
//
// NOTE: No committed entries from the next Ready may be applied until all committed entries
// and snapshots from the previous one have finished.
2014-09-17 23:23:44 +04:00
Ready() <-chan Ready
// Advance notifies the Node that the application has saved progress up to the last Ready.
// It prepares the node to return the next available Ready.
//
// The application should generally call Advance after it applies the entries in last Ready.
//
// However, as an optimization, the application may call Advance while it is applying the
// commands. For example. when the last Ready contains a snapshot, the application might take
// a long time to apply the snapshot data. To continue receiving Ready without blocking raft
// progress, it can call Advance before finishing applying the last ready.
Advance()
// ApplyConfChange applies a config change (previously passed to
// ProposeConfChange) to the node. This must be called whenever a config
// change is observed in Ready.CommittedEntries, except when the app decides
// to reject the configuration change (i.e. treats it as a noop instead), in
// which case it must not be called.
//
// Returns an opaque non-nil ConfState protobuf which must be recorded in
// snapshots.
ApplyConfChange(cc pb.ConfChangeI) *pb.ConfState
2016-07-20 00:19:13 +03:00
// TransferLeadership attempts to transfer leadership to the given transferee.
TransferLeadership(ctx context.Context, lead, transferee uint64)
2016-07-20 00:19:13 +03:00
// ReadIndex request a read state. The read state will be set in the ready.
// Read state has a read index. Once the application advances further than the read
// index, any linearizable read requests issued before the read request can be
// processed safely. The read state will have the same rctx attached.
ReadIndex(ctx context.Context, rctx []byte) error
// Status returns the current status of the raft state machine.
Status() Status
// ReportUnreachable reports the given node is not reachable for the last send.
2015-02-05 01:41:14 +03:00
ReportUnreachable(id uint64)
// ReportSnapshot reports the status of the sent snapshot. The id is the raft ID of the follower
// who is meant to receive the snapshot, and the status is SnapshotFinish or SnapshotFailure.
// Calling ReportSnapshot with SnapshotFinish is a no-op. But, any failure in applying a
// snapshot (for e.g., while streaming it from leader to follower), should be reported to the
// leader with SnapshotFailure. When leader sends a snapshot to a follower, it pauses any raft
// log probes until the follower can apply the snapshot and advance its state. If the follower
// can't do that, for e.g., due to a crash, it could end up in a limbo, never getting any
// updates from the leader. Therefore, it is crucial that the application ensures that any
// failure in snapshot sending is caught and reported back to the leader; so it can resume raft
// log probing in the follower.
ReportSnapshot(id uint64, status SnapshotStatus)
2015-09-21 07:13:33 +03:00
// Stop performs any necessary termination of the Node.
2014-09-17 23:23:44 +04:00
Stop()
}
type Peer struct {
ID uint64
Context []byte
}
2015-03-24 07:19:03 +03:00
// StartNode returns a new Node given configuration and a list of raft peers.
// It appends a ConfChangeAddNode entry for each given peer to the initial log.
raft: clean up bootstrap This is the first (maybe not last) step in cleaning up the bootstrap code around StartNode. Initializing a Raft group for the first time is awkward, since a configuration has to be pulled from thin air. The way this is solved today is unclean: The app is supposed to pass peers to StartNode(), we add configuration changes for them to the log, immediately pretend that they are applied, but actually leave them unapplied (to give the app a chance to observe them, though if the app did decide to not apply them things would really go off the rails), and then return control to the app. The app will then process the initial Readys and as a result the configuration will be persisted to disk; restarts of the node then use RestartNode which doesn't take any peers. The code that did this lived awkwardly in two places fairly deep down the callstack, though it was really only necessary in StartNode(). This commit refactors things to make this more obvious: only StartNode does this dance now. In particular, RawNode does not support this at all any more; it expects the app to set up its Storage correctly. Future work may provide helpers to make this "preseeding" of the Storage more user-friendly. It isn't entirely straightforward to do so since the Storage interface doesn't provide the right accessors for this purpose. Briefly speaking, we want to make sure that a non-bootstrapped node can never catch up via the log so that we can implicitly use one of the "skipped" log entries to represent the configuration change into the bootstrap configuration. This is an invasive change that affects all consumers of raft, and it is of lower urgency since the code (post this commit) already encapsulates the complexity sufficiently.
2019-07-18 14:41:52 +03:00
//
// Peers must not be zero length; call RestartNode in that case.
2015-03-24 07:19:03 +03:00
func StartNode(c *Config, peers []Peer) Node {
raft: clean up bootstrap This is the first (maybe not last) step in cleaning up the bootstrap code around StartNode. Initializing a Raft group for the first time is awkward, since a configuration has to be pulled from thin air. The way this is solved today is unclean: The app is supposed to pass peers to StartNode(), we add configuration changes for them to the log, immediately pretend that they are applied, but actually leave them unapplied (to give the app a chance to observe them, though if the app did decide to not apply them things would really go off the rails), and then return control to the app. The app will then process the initial Readys and as a result the configuration will be persisted to disk; restarts of the node then use RestartNode which doesn't take any peers. The code that did this lived awkwardly in two places fairly deep down the callstack, though it was really only necessary in StartNode(). This commit refactors things to make this more obvious: only StartNode does this dance now. In particular, RawNode does not support this at all any more; it expects the app to set up its Storage correctly. Future work may provide helpers to make this "preseeding" of the Storage more user-friendly. It isn't entirely straightforward to do so since the Storage interface doesn't provide the right accessors for this purpose. Briefly speaking, we want to make sure that a non-bootstrapped node can never catch up via the log so that we can implicitly use one of the "skipped" log entries to represent the configuration change into the bootstrap configuration. This is an invasive change that affects all consumers of raft, and it is of lower urgency since the code (post this commit) already encapsulates the complexity sufficiently.
2019-07-18 14:41:52 +03:00
if len(peers) == 0 {
panic("no peers given; use RestartNode instead")
}
rn, err := NewRawNode(c)
if err != nil {
panic(err)
}
rn.Bootstrap(peers)
raft: clean up bootstrap This is the first (maybe not last) step in cleaning up the bootstrap code around StartNode. Initializing a Raft group for the first time is awkward, since a configuration has to be pulled from thin air. The way this is solved today is unclean: The app is supposed to pass peers to StartNode(), we add configuration changes for them to the log, immediately pretend that they are applied, but actually leave them unapplied (to give the app a chance to observe them, though if the app did decide to not apply them things would really go off the rails), and then return control to the app. The app will then process the initial Readys and as a result the configuration will be persisted to disk; restarts of the node then use RestartNode which doesn't take any peers. The code that did this lived awkwardly in two places fairly deep down the callstack, though it was really only necessary in StartNode(). This commit refactors things to make this more obvious: only StartNode does this dance now. In particular, RawNode does not support this at all any more; it expects the app to set up its Storage correctly. Future work may provide helpers to make this "preseeding" of the Storage more user-friendly. It isn't entirely straightforward to do so since the Storage interface doesn't provide the right accessors for this purpose. Briefly speaking, we want to make sure that a non-bootstrapped node can never catch up via the log so that we can implicitly use one of the "skipped" log entries to represent the configuration change into the bootstrap configuration. This is an invasive change that affects all consumers of raft, and it is of lower urgency since the code (post this commit) already encapsulates the complexity sufficiently.
2019-07-18 14:41:52 +03:00
n := newNode(rn)
go n.run()
2014-09-17 23:23:44 +04:00
return &n
2014-09-05 08:15:39 +04:00
}
// RestartNode is similar to StartNode but does not take a list of peers.
// The current membership of the cluster will be restored from the Storage.
// If the caller has an existing state machine, pass in the last log index that
// has been applied to it; otherwise use zero.
2015-03-24 07:19:03 +03:00
func RestartNode(c *Config) Node {
raft: clean up bootstrap This is the first (maybe not last) step in cleaning up the bootstrap code around StartNode. Initializing a Raft group for the first time is awkward, since a configuration has to be pulled from thin air. The way this is solved today is unclean: The app is supposed to pass peers to StartNode(), we add configuration changes for them to the log, immediately pretend that they are applied, but actually leave them unapplied (to give the app a chance to observe them, though if the app did decide to not apply them things would really go off the rails), and then return control to the app. The app will then process the initial Readys and as a result the configuration will be persisted to disk; restarts of the node then use RestartNode which doesn't take any peers. The code that did this lived awkwardly in two places fairly deep down the callstack, though it was really only necessary in StartNode(). This commit refactors things to make this more obvious: only StartNode does this dance now. In particular, RawNode does not support this at all any more; it expects the app to set up its Storage correctly. Future work may provide helpers to make this "preseeding" of the Storage more user-friendly. It isn't entirely straightforward to do so since the Storage interface doesn't provide the right accessors for this purpose. Briefly speaking, we want to make sure that a non-bootstrapped node can never catch up via the log so that we can implicitly use one of the "skipped" log entries to represent the configuration change into the bootstrap configuration. This is an invasive change that affects all consumers of raft, and it is of lower urgency since the code (post this commit) already encapsulates the complexity sufficiently.
2019-07-18 14:41:52 +03:00
rn, err := NewRawNode(c)
if err != nil {
panic(err)
}
n := newNode(rn)
go n.run()
raft: clean up bootstrap This is the first (maybe not last) step in cleaning up the bootstrap code around StartNode. Initializing a Raft group for the first time is awkward, since a configuration has to be pulled from thin air. The way this is solved today is unclean: The app is supposed to pass peers to StartNode(), we add configuration changes for them to the log, immediately pretend that they are applied, but actually leave them unapplied (to give the app a chance to observe them, though if the app did decide to not apply them things would really go off the rails), and then return control to the app. The app will then process the initial Readys and as a result the configuration will be persisted to disk; restarts of the node then use RestartNode which doesn't take any peers. The code that did this lived awkwardly in two places fairly deep down the callstack, though it was really only necessary in StartNode(). This commit refactors things to make this more obvious: only StartNode does this dance now. In particular, RawNode does not support this at all any more; it expects the app to set up its Storage correctly. Future work may provide helpers to make this "preseeding" of the Storage more user-friendly. It isn't entirely straightforward to do so since the Storage interface doesn't provide the right accessors for this purpose. Briefly speaking, we want to make sure that a non-bootstrapped node can never catch up via the log so that we can implicitly use one of the "skipped" log entries to represent the configuration change into the bootstrap configuration. This is an invasive change that affects all consumers of raft, and it is of lower urgency since the code (post this commit) already encapsulates the complexity sufficiently.
2019-07-18 14:41:52 +03:00
return &n
2014-09-17 23:23:44 +04:00
}
type msgWithResult struct {
m pb.Message
result chan error
}
2014-09-17 23:23:44 +04:00
// node is the canonical implementation of the Node interface
type node struct {
propc chan msgWithResult
recvc chan pb.Message
confc chan pb.ConfChangeV2
confstatec chan pb.ConfState
readyc chan Ready
advancec chan struct{}
tickc chan struct{}
done chan struct{}
stop chan struct{}
2015-01-19 02:23:50 +03:00
status chan chan Status
rn *RawNode
2014-09-05 08:15:39 +04:00
}
func newNode(rn *RawNode) node {
2014-09-17 23:23:44 +04:00
return node{
propc: make(chan msgWithResult),
recvc: make(chan pb.Message),
confc: make(chan pb.ConfChangeV2),
confstatec: make(chan pb.ConfState),
readyc: make(chan Ready),
advancec: make(chan struct{}),
// make tickc a buffered chan, so raft node can buffer some ticks when the node
// is busy processing raft messages. Raft node will resume process buffered
// ticks when it becomes idle.
tickc: make(chan struct{}, 128),
done: make(chan struct{}),
stop: make(chan struct{}),
status: make(chan chan Status),
rn: rn,
2014-08-25 07:09:06 +04:00
}
}
2014-09-17 23:23:44 +04:00
func (n *node) Stop() {
select {
case n.stop <- struct{}{}:
// Not already stopped, so trigger it
case <-n.done:
// Node has already been stopped - no need to do anything
return
}
// Block until the stop has been acknowledged by run()
<-n.done
2014-09-01 05:49:07 +04:00
}
func (n *node) run() {
var propc chan msgWithResult
var readyc chan Ready
var advancec chan struct{}
var rd Ready
2014-06-20 01:39:17 +04:00
r := n.rn.raft
2014-09-16 04:35:02 +04:00
lead := None
2014-09-05 08:15:39 +04:00
2014-08-25 07:09:06 +04:00
for {
if advancec != nil {
2014-09-16 04:35:02 +04:00
readyc = nil
} else if n.rn.HasReady() {
// Populate a Ready. Note that this Ready is not guaranteed to
// actually be handled. We will arm readyc, but there's no guarantee
// that we will actually send on it. It's possible that we will
// service another channel instead, loop around, and then populate
// the Ready again. We could instead force the previous Ready to be
// handled first, but it's generally good to emit larger Readys plus
// it simplifies testing (by emitting less frequently and more
// predictably).
rd = n.rn.readyWithoutAccept()
readyc = n.readyc
}
2014-09-16 04:35:02 +04:00
if lead != r.lead {
if r.hasLeader() {
if lead == None {
r.logger.Infof("raft.node: %x elected leader %x at term %d", r.id, r.lead, r.Term)
} else {
r.logger.Infof("raft.node: %x changed leader from %x to %x at term %d", r.id, lead, r.lead, r.Term)
}
propc = n.propc
} else {
r.logger.Infof("raft.node: %x lost leader %x at term %d", r.id, lead, r.Term)
propc = nil
2014-09-03 07:21:58 +04:00
}
lead = r.lead
}
2014-08-25 07:09:06 +04:00
select {
// TODO: maybe buffer the config propose if there exists one (the way
// described in raft dissertation)
// Currently it is dropped in Step silently.
case pm := <-propc:
m := pm.m
m.From = r.id
err := r.Step(m)
if pm.result != nil {
pm.result <- err
close(pm.result)
}
2014-08-25 07:09:06 +04:00
case m := <-n.recvc:
2015-02-05 01:41:14 +03:00
// filter out response message from unknown From.
if pr := r.prs.Progress[m.From]; pr != nil || !IsResponseMsg(m.Type) {
r.Step(m)
}
case cc := <-n.confc:
_, okBefore := r.prs.Progress[r.id]
cs := r.applyConfChange(cc)
// If the node was removed, block incoming proposals. Note that we
// only do this if the node was in the config before. Nodes may be
// a member of the group without knowing this (when they're catching
// up on the log and don't have the latest config) and we don't want
// to block the proposal channel in that case.
//
// NB: propc is reset when the leader changes, which, if we learn
// about it, sort of implies that we got readded, maybe? This isn't
// very sound and likely has bugs.
if _, okAfter := r.prs.Progress[r.id]; okBefore && !okAfter {
var found bool
outer:
2019-07-23 01:34:46 +03:00
for _, sl := range [][]uint64{cs.Voters, cs.VotersOutgoing} {
for _, id := range sl {
if id == r.id {
found = true
break outer
}
}
}
if !found {
2016-04-23 01:45:33 +03:00
propc = nil
}
}
select {
case n.confstatec <- cs:
case <-n.done:
}
2014-08-25 07:09:06 +04:00
case <-n.tickc:
n.rn.Tick()
case readyc <- rd:
n.rn.acceptReady(rd)
advancec = n.advancec
case <-advancec:
n.rn.Advance(rd)
rd = Ready{}
advancec = nil
2015-01-19 02:23:50 +03:00
case c := <-n.status:
c <- getStatus(r)
2014-11-12 23:32:20 +03:00
case <-n.stop:
close(n.done)
2014-08-25 07:09:06 +04:00
return
2014-07-15 10:41:19 +04:00
}
2014-05-29 00:53:26 +04:00
}
}
2014-06-05 21:49:34 +04:00
// Tick increments the internal logical clock for this Node. Election timeouts
// and heartbeat timeouts are in units of ticks.
2014-09-17 23:23:44 +04:00
func (n *node) Tick() {
2014-08-25 07:09:06 +04:00
select {
case n.tickc <- struct{}{}:
2014-09-01 05:49:07 +04:00
case <-n.done:
default:
n.rn.raft.logger.Warningf("%x A tick missed to fire. Node blocks too long!", n.rn.raft.id)
2014-06-08 13:50:39 +04:00
}
2014-06-05 21:49:34 +04:00
}
func (n *node) Campaign(ctx context.Context) error { return n.step(ctx, pb.Message{Type: pb.MsgHup}) }
2014-08-29 03:41:42 +04:00
2014-09-17 23:23:44 +04:00
func (n *node) Propose(ctx context.Context, data []byte) error {
return n.stepWait(ctx, pb.Message{Type: pb.MsgProp, Entries: []pb.Entry{{Data: data}}})
}
func (n *node) Step(ctx context.Context, m pb.Message) error {
// ignore unexpected local messages receiving over network
2016-03-24 14:59:41 +03:00
if IsLocalMsg(m.Type) {
// TODO: return an error?
return nil
}
return n.step(ctx, m)
2014-07-25 01:11:53 +04:00
}
2014-07-31 04:21:27 +04:00
func confChangeToMsg(c pb.ConfChangeI) (pb.Message, error) {
typ, data, err := pb.MarshalConfChange(c)
if err != nil {
return pb.Message{}, err
}
return pb.Message{Type: pb.MsgProp, Entries: []pb.Entry{{Type: typ, Data: data}}}, nil
}
func (n *node) ProposeConfChange(ctx context.Context, cc pb.ConfChangeI) error {
msg, err := confChangeToMsg(cc)
if err != nil {
return err
}
return n.Step(ctx, msg)
}
func (n *node) step(ctx context.Context, m pb.Message) error {
return n.stepWithWaitOption(ctx, m, false)
}
func (n *node) stepWait(ctx context.Context, m pb.Message) error {
return n.stepWithWaitOption(ctx, m, true)
}
2014-08-28 04:22:34 +04:00
// Step advances the state machine using msgs. The ctx.Err() will be returned,
2014-08-25 23:49:14 +04:00
// if any.
func (n *node) stepWithWaitOption(ctx context.Context, m pb.Message, wait bool) error {
if m.Type != pb.MsgProp {
select {
case n.recvc <- m:
return nil
case <-ctx.Done():
return ctx.Err()
case <-n.done:
return ErrStopped
}
}
ch := n.propc
pm := msgWithResult{m: m}
if wait {
pm.result = make(chan error, 1)
2014-08-28 04:22:34 +04:00
}
select {
case ch <- pm:
if !wait {
return nil
}
case <-ctx.Done():
return ctx.Err()
case <-n.done:
return ErrStopped
}
select {
case err := <-pm.result:
if err != nil {
return err
}
2014-08-28 04:22:34 +04:00
case <-ctx.Done():
return ctx.Err()
2014-09-01 05:49:07 +04:00
case <-n.done:
return ErrStopped
2014-08-01 02:18:44 +04:00
}
return nil
2014-07-31 04:21:27 +04:00
}
func (n *node) Ready() <-chan Ready { return n.readyc }
2014-09-09 08:50:04 +04:00
func (n *node) Advance() {
select {
case n.advancec <- struct{}{}:
case <-n.done:
}
}
func (n *node) ApplyConfChange(cc pb.ConfChangeI) *pb.ConfState {
var cs pb.ConfState
2014-09-17 05:18:45 +04:00
select {
case n.confc <- cc.AsV2():
2014-09-17 05:18:45 +04:00
case <-n.done:
}
select {
case cs = <-n.confstatec:
case <-n.done:
}
return &cs
}
2015-01-19 02:23:50 +03:00
func (n *node) Status() Status {
c := make(chan Status)
select {
case n.status <- c:
return <-c
case <-n.done:
return Status{}
}
2015-01-19 02:23:50 +03:00
}
2015-02-05 01:41:14 +03:00
func (n *node) ReportUnreachable(id uint64) {
select {
case n.recvc <- pb.Message{Type: pb.MsgUnreachable, From: id}:
case <-n.done:
}
}
func (n *node) ReportSnapshot(id uint64, status SnapshotStatus) {
rej := status == SnapshotFailure
select {
case n.recvc <- pb.Message{Type: pb.MsgSnapStatus, From: id, Reject: rej}:
case <-n.done:
}
}
func (n *node) TransferLeadership(ctx context.Context, lead, transferee uint64) {
select {
// manually set 'from' and 'to', so that leader can voluntarily transfers its leadership
case n.recvc <- pb.Message{Type: pb.MsgTransferLeader, From: transferee, To: lead}:
case <-n.done:
case <-ctx.Done():
}
}
2016-07-20 00:19:13 +03:00
func (n *node) ReadIndex(ctx context.Context, rctx []byte) error {
return n.step(ctx, pb.Message{Type: pb.MsgReadIndex, Entries: []pb.Entry{{Data: rctx}}})
}
func newReady(r *raft, prevSoftSt *SoftState, prevHardSt pb.HardState) Ready {
2014-09-09 08:50:04 +04:00
rd := Ready{
Entries: r.raftLog.unstableEntries(),
2014-09-09 08:50:04 +04:00
CommittedEntries: r.raftLog.nextEnts(),
Messages: r.msgs,
}
2014-09-16 04:35:02 +04:00
if softSt := r.softState(); !softSt.equal(prevSoftSt) {
rd.SoftState = softSt
}
if hardSt := r.hardState(); !isHardStateEqual(hardSt, prevHardSt) {
rd.HardState = hardSt
2014-09-09 08:50:04 +04:00
}
if r.raftLog.unstable.snapshot != nil {
rd.Snapshot = *r.raftLog.unstable.snapshot
2014-09-17 05:18:45 +04:00
}
if len(r.readStates) != 0 {
rd.ReadStates = r.readStates
}
rd.MustSync = MustSync(r.hardState(), prevHardSt, len(rd.Entries))
2014-09-09 08:50:04 +04:00
return rd
}
// MustSync returns true if the hard state and count of Raft entries indicate
// that a synchronous write to persistent storage is required.
func MustSync(st, prevst pb.HardState, entsnum int) bool {
// Persistent state on all servers:
// (Updated on stable storage before responding to RPCs)
// currentTerm
// votedFor
// log entries[]
return entsnum != 0 || st.Vote != prevst.Vote || st.Term != prevst.Term
}