etcd/raft/node.go

234 lines
5.3 KiB
Go
Raw Normal View History

package raft
2014-06-05 21:49:34 +04:00
import (
2014-07-20 21:51:15 +04:00
"encoding/binary"
2014-06-05 21:49:34 +04:00
"encoding/json"
2014-07-24 03:15:25 +04:00
"log"
2014-07-15 09:59:16 +04:00
"math/rand"
"time"
2014-06-05 21:49:34 +04:00
)
type Interface interface {
2014-06-14 03:07:27 +04:00
Step(m Message) bool
Msgs() []Message
}
2014-07-11 09:51:37 +04:00
type tick int64
2014-05-29 00:53:26 +04:00
2014-06-27 00:48:59 +04:00
type Config struct {
2014-07-08 09:27:40 +04:00
NodeId int64
Addr string
Context []byte
2014-06-05 21:49:34 +04:00
}
type Node struct {
2014-06-14 02:23:08 +04:00
sm *stateMachine
2014-07-15 10:41:19 +04:00
elapsed tick
electionRand tick
election tick
heartbeat tick
// TODO: it needs garbage collection later
rmNodes map[int64]struct{}
removed bool
}
2014-07-09 22:53:27 +04:00
func New(id int64, heartbeat, election tick) *Node {
2014-05-29 00:53:26 +04:00
if election < heartbeat*3 {
panic("election is least three times as heartbeat [election: %d, heartbeat: %d]")
}
2014-07-15 09:59:16 +04:00
rand.Seed(time.Now().UnixNano())
n := &Node{
2014-07-15 10:41:19 +04:00
heartbeat: heartbeat,
election: election,
electionRand: election + tick(rand.Int31())%election,
sm: newStateMachine(id, []int64{id}),
rmNodes: make(map[int64]struct{}),
}
2014-05-29 00:53:26 +04:00
return n
}
2014-07-28 07:51:16 +04:00
func Recover(id int64, ents []Entry, state State, heartbeat, election tick) *Node {
n := New(id, heartbeat, election)
n.sm.loadEnts(ents)
n.sm.loadState(state)
return n
}
func (n *Node) Id() int64 { return n.sm.id }
2014-06-14 01:50:12 +04:00
2014-07-20 21:51:15 +04:00
func (n *Node) ClusterId() int64 { return n.sm.clusterId }
2014-07-11 10:07:22 +04:00
func (n *Node) Index() int64 { return n.sm.index.Get() }
2014-07-06 21:19:23 +04:00
2014-07-11 09:55:57 +04:00
func (n *Node) Term() int64 { return n.sm.term.Get() }
2014-07-06 21:19:23 +04:00
2014-07-24 03:15:25 +04:00
func (n *Node) Applied() int64 { return n.sm.raftLog.applied }
2014-07-06 21:19:23 +04:00
2014-07-11 07:54:16 +04:00
func (n *Node) HasLeader() bool { return n.Leader() != none }
2014-06-20 01:39:17 +04:00
2014-07-11 07:54:16 +04:00
func (n *Node) IsLeader() bool { return n.Leader() == n.Id() }
2014-07-06 21:19:23 +04:00
2014-07-11 09:12:55 +04:00
func (n *Node) Leader() int64 { return n.sm.lead.Get() }
2014-07-06 21:19:23 +04:00
func (n *Node) IsRemoved() bool { return n.removed }
2014-06-14 01:41:22 +04:00
// Propose asynchronously proposes data be applied to the underlying state machine.
2014-06-20 01:08:35 +04:00
func (n *Node) Propose(data []byte) { n.propose(Normal, data) }
2014-06-14 01:55:49 +04:00
2014-07-11 09:51:37 +04:00
func (n *Node) propose(t int64, data []byte) {
2014-07-20 21:51:15 +04:00
n.Step(Message{From: n.sm.id, ClusterId: n.ClusterId(), Type: msgProp, Entries: []Entry{{Type: t, Data: data}}})
2014-06-10 06:37:41 +04:00
}
2014-07-20 21:51:15 +04:00
func (n *Node) Campaign() { n.Step(Message{From: n.sm.id, ClusterId: n.ClusterId(), Type: msgHup}) }
func (n *Node) InitCluster(clusterId int64) {
d := make([]byte, 10)
2014-07-20 21:51:15 +04:00
wn := binary.PutVarint(d, clusterId)
n.propose(ClusterInit, d[:wn])
}
2014-06-05 21:49:34 +04:00
2014-07-08 09:27:40 +04:00
func (n *Node) Add(id int64, addr string, context []byte) {
n.UpdateConf(AddNode, &Config{NodeId: id, Addr: addr, Context: context})
2014-07-08 09:27:40 +04:00
}
2014-06-27 00:48:59 +04:00
2014-07-20 21:51:15 +04:00
func (n *Node) Remove(id int64) {
n.UpdateConf(RemoveNode, &Config{NodeId: id})
}
2014-06-05 21:49:34 +04:00
2014-06-14 02:03:30 +04:00
func (n *Node) Msgs() []Message { return n.sm.Msgs() }
2014-06-14 03:07:27 +04:00
func (n *Node) Step(m Message) bool {
if m.Type == msgDenied {
n.removed = true
return false
}
2014-07-20 21:51:15 +04:00
if n.ClusterId() != none && m.ClusterId != none && m.ClusterId != n.ClusterId() {
2014-07-24 03:15:25 +04:00
log.Printf("denied a message from node %d, cluster %d. accept cluster: %d\n", m.From, m.ClusterId, n.ClusterId())
2014-07-20 21:51:15 +04:00
n.sm.send(Message{To: m.From, ClusterId: n.ClusterId(), Type: msgDenied})
return true
}
if _, ok := n.rmNodes[m.From]; ok {
if m.From != n.sm.id {
2014-07-20 21:51:15 +04:00
n.sm.send(Message{To: m.From, ClusterId: n.ClusterId(), Type: msgDenied})
}
return true
}
l := len(n.sm.msgs)
2014-07-20 21:51:15 +04:00
2014-06-14 03:07:27 +04:00
if !n.sm.Step(m) {
return false
}
2014-07-20 21:51:15 +04:00
for _, m := range n.sm.msgs[l:] {
2014-05-29 00:53:26 +04:00
switch m.Type {
case msgAppResp:
2014-06-14 03:31:40 +04:00
// We just heard from the leader of the same term.
2014-05-29 00:53:26 +04:00
n.elapsed = 0
case msgVoteResp:
2014-06-14 03:31:40 +04:00
// We just heard from the candidate the node voted for.
2014-05-29 00:53:26 +04:00
if m.Index >= 0 {
n.elapsed = 0
}
}
}
2014-06-14 03:07:27 +04:00
return true
}
2014-05-19 11:24:02 +04:00
2014-06-14 02:15:52 +04:00
// Next returns all the appliable entries
2014-06-11 00:59:05 +04:00
func (n *Node) Next() []Entry {
2014-06-05 21:49:34 +04:00
ents := n.sm.nextEnts()
for i := range ents {
switch ents[i].Type {
2014-06-20 01:08:35 +04:00
case Normal:
2014-07-20 21:51:15 +04:00
case ClusterInit:
cid, nr := binary.Varint(ents[i].Data)
if nr <= 0 {
panic("init cluster failed: cannot read clusterId")
}
if n.ClusterId() != -1 {
panic("cannot init a started cluster")
}
n.sm.clusterId = cid
case AddNode:
2014-06-27 00:48:59 +04:00
c := new(Config)
if err := json.Unmarshal(ents[i].Data, c); err != nil {
2014-07-24 03:15:25 +04:00
log.Println(err)
2014-06-05 21:49:34 +04:00
continue
}
2014-06-14 03:08:59 +04:00
n.sm.addNode(c.NodeId)
delete(n.rmNodes, c.NodeId)
case RemoveNode:
2014-06-27 00:48:59 +04:00
c := new(Config)
if err := json.Unmarshal(ents[i].Data, c); err != nil {
2014-07-24 03:15:25 +04:00
log.Println(err)
continue
}
2014-06-14 03:08:59 +04:00
n.sm.removeNode(c.NodeId)
n.rmNodes[c.NodeId] = struct{}{}
if c.NodeId == n.sm.id {
n.removed = true
}
2014-06-05 21:49:34 +04:00
default:
panic("unexpected entry type")
}
}
2014-06-14 02:14:44 +04:00
return ents
2014-05-19 11:24:02 +04:00
}
2014-05-29 00:53:26 +04:00
// Tick triggers the node to do a tick.
// If the current elapsed is greater or equal than the timeout,
// node will send corresponding message to the statemachine.
func (n *Node) Tick() {
if !n.sm.promotable() {
return
}
2014-07-15 10:41:19 +04:00
timeout, msgType := n.electionRand, msgHup
2014-05-29 00:53:26 +04:00
if n.sm.state == stateLeader {
timeout, msgType = n.heartbeat, msgBeat
}
if n.elapsed >= timeout {
2014-07-20 21:51:15 +04:00
n.Step(Message{From: n.sm.id, ClusterId: n.ClusterId(), Type: msgType})
2014-05-29 00:53:26 +04:00
n.elapsed = 0
2014-07-15 10:41:19 +04:00
if n.sm.state != stateLeader {
n.electionRand = n.election + tick(rand.Int31())%n.election
}
2014-05-29 00:53:26 +04:00
} else {
n.elapsed++
}
}
2014-06-05 21:49:34 +04:00
2014-07-29 00:04:04 +04:00
// IsEmpty returns ture if the log of the node is empty.
func (n *Node) IsEmpty() bool {
return n.sm.raftLog.isEmpty()
}
func (n *Node) UpdateConf(t int64, c *Config) {
2014-06-08 13:50:39 +04:00
data, err := json.Marshal(c)
if err != nil {
panic(err)
}
2014-06-14 01:55:49 +04:00
n.propose(t, data)
2014-06-05 21:49:34 +04:00
}
// UnstableEnts retuens all the entries that need to be persistent.
2014-07-25 23:03:03 +04:00
// The first return value is offset, and the second one is unstable entries.
2014-08-02 08:43:08 +04:00
func (n *Node) UnstableEnts() []Entry {
return n.sm.raftLog.unstableEnts()
}
2014-07-25 01:11:53 +04:00
func (n *Node) UnstableState() State {
2014-07-29 00:04:04 +04:00
if n.sm.unstableState == EmptyState {
return EmptyState
2014-07-25 01:11:53 +04:00
}
s := n.sm.unstableState
n.sm.clearState()
return s
}