etcd/raft/raft.go

368 lines
6.9 KiB
Go
Raw Normal View History

2014-05-06 10:28:14 +04:00
package raft
import (
"errors"
"sort"
)
const none = -1
type messageType int
const (
msgHup messageType = iota
2014-05-29 00:53:26 +04:00
msgBeat
2014-05-06 10:28:14 +04:00
msgProp
msgApp
msgAppResp
msgVote
msgVoteResp
)
var mtmap = [...]string{
msgHup: "msgHup",
2014-05-29 00:53:26 +04:00
msgBeat: "msgBeat",
2014-05-06 10:28:14 +04:00
msgProp: "msgProp",
msgApp: "msgApp",
msgAppResp: "msgAppResp",
msgVote: "msgVote",
msgVoteResp: "msgVoteResp",
}
func (mt messageType) String() string {
return mtmap[int(mt)]
}
var errNoLeader = errors.New("no leader")
const (
2014-05-16 21:11:21 +04:00
stateFollower stateType = iota
2014-05-06 10:28:14 +04:00
stateCandidate
stateLeader
)
type stateType int
var stmap = [...]string{
stateFollower: "stateFollower",
stateCandidate: "stateCandidate",
stateLeader: "stateLeader",
}
func (st stateType) String() string {
return stmap[int(st)]
}
type Message struct {
Type messageType
To int
From int
Term int
LogTerm int
Index int
PrevTerm int
Entries []Entry
Commit int
}
type index struct {
match, next int
}
func (in *index) update(n int) {
in.match = n
in.next = n + 1
}
func (in *index) decr() {
if in.next--; in.next < 1 {
in.next = 1
}
}
type stateMachine struct {
addr int
// the term we are participating in at any time
term int
// who we voted for in term
vote int
// the log
2014-05-27 23:24:49 +04:00
log *log
2014-05-06 10:28:14 +04:00
2014-06-10 03:45:42 +04:00
ins map[int]*index
2014-05-06 10:28:14 +04:00
state stateType
votes map[int]bool
2014-05-24 00:30:04 +04:00
msgs []Message
2014-05-06 10:28:14 +04:00
// the leader addr
lead int
// pending reconfiguration
pendingConf bool
2014-05-06 10:28:14 +04:00
}
2014-06-10 04:17:38 +04:00
func newStateMachine(addr int, peers []int) *stateMachine {
2014-06-10 03:45:42 +04:00
sm := &stateMachine{addr: addr, log: newLog(), ins: make(map[int]*index)}
2014-06-10 04:17:38 +04:00
for p := range peers {
2014-06-10 03:45:42 +04:00
sm.ins[p] = &index{}
}
2014-05-06 10:28:14 +04:00
sm.reset()
return sm
}
func (sm *stateMachine) canStep(m Message) bool {
if m.Type == msgProp {
return sm.lead != none
}
return true
}
func (sm *stateMachine) poll(addr int, v bool) (granted int) {
if _, ok := sm.votes[addr]; !ok {
sm.votes[addr] = v
}
for _, vv := range sm.votes {
if vv {
granted++
}
}
return granted
}
// send persists state to stable storage and then sends to its mailbox.
2014-05-06 10:28:14 +04:00
func (sm *stateMachine) send(m Message) {
m.From = sm.addr
m.Term = sm.term
2014-05-24 00:30:04 +04:00
sm.msgs = append(sm.msgs, m)
2014-05-06 10:28:14 +04:00
}
// sendAppend sends RRPC, with entries to the given peer.
func (sm *stateMachine) sendAppend(to int) {
in := sm.ins[to]
m := Message{}
m.Type = msgApp
m.To = to
m.Index = in.next - 1
m.LogTerm = sm.log.term(in.next - 1)
m.Entries = sm.log.entries(in.next)
m.Commit = sm.log.committed
sm.send(m)
}
// bcastAppend sends RRPC, with entries to all peers that are not up-to-date according to sm.mis.
func (sm *stateMachine) bcastAppend() {
2014-06-10 03:45:42 +04:00
for i := range sm.ins {
2014-05-06 10:28:14 +04:00
if i == sm.addr {
continue
}
sm.sendAppend(i)
2014-05-06 10:28:14 +04:00
}
}
func (sm *stateMachine) maybeCommit() bool {
2014-05-06 10:28:14 +04:00
// TODO(bmizerany): optimize.. Currently naive
mis := make([]int, len(sm.ins))
for i := range mis {
mis[i] = sm.ins[i].match
}
sort.Sort(sort.Reverse(sort.IntSlice(mis)))
mci := mis[sm.q()-1]
2014-05-28 19:24:09 +04:00
return sm.log.maybeCommit(mci, sm.term)
2014-05-06 10:28:14 +04:00
}
// nextEnts returns the appliable entries and updates the applied index
2014-05-19 11:24:02 +04:00
func (sm *stateMachine) nextEnts() (ents []Entry) {
2014-05-27 23:24:49 +04:00
return sm.log.nextEnts()
2014-05-06 10:28:14 +04:00
}
func (sm *stateMachine) reset() {
sm.lead = none
sm.vote = none
sm.votes = make(map[int]bool)
for i := range sm.ins {
2014-06-10 03:45:42 +04:00
sm.ins[i] = &index{next: sm.log.lastIndex() + 1}
2014-05-25 10:03:13 +04:00
if i == sm.addr {
sm.ins[i].match = sm.log.lastIndex()
}
2014-05-06 10:28:14 +04:00
}
}
func (sm *stateMachine) q() int {
2014-06-10 03:45:42 +04:00
return len(sm.ins)/2 + 1
2014-05-06 10:28:14 +04:00
}
func (sm *stateMachine) becomeFollower(term, lead int) {
sm.reset()
sm.term = term
sm.lead = lead
sm.state = stateFollower
2014-06-10 03:56:04 +04:00
sm.pendingConf = false
2014-05-06 10:28:14 +04:00
}
2014-05-24 00:30:04 +04:00
func (sm *stateMachine) becomeCandidate() {
// TODO(xiangli) remove the panic when the raft implementation is stable
if sm.state == stateLeader {
panic("invalid transition [leader -> candidate]")
}
sm.reset()
sm.term++
sm.vote = sm.addr
sm.state = stateCandidate
}
func (sm *stateMachine) becomeLeader() {
// TODO(xiangli) remove the panic when the raft implementation is stable
if sm.state == stateFollower {
panic("invalid transition [follower -> leader]")
}
sm.reset()
sm.lead = sm.addr
sm.state = stateLeader
2014-06-10 03:56:04 +04:00
for _, e := range sm.log.ents[sm.log.committed:] {
if e.Type == config {
sm.pendingConf = true
}
}
2014-05-24 00:30:04 +04:00
}
func (sm *stateMachine) Msgs() []Message {
msgs := sm.msgs
sm.msgs = make([]Message, 0)
return msgs
}
func (sm *stateMachine) Step(m Message) {
2014-05-06 10:28:14 +04:00
switch m.Type {
case msgHup:
2014-05-24 00:30:04 +04:00
sm.becomeCandidate()
if sm.q() == sm.poll(sm.addr, true) {
sm.becomeLeader()
return
}
2014-06-10 03:45:42 +04:00
for i := range sm.ins {
2014-05-06 10:28:14 +04:00
if i == sm.addr {
continue
}
2014-05-28 09:50:47 +04:00
lasti := sm.log.lastIndex()
2014-05-27 23:24:49 +04:00
sm.send(Message{To: i, Type: msgVote, Index: lasti, LogTerm: sm.log.term(lasti)})
2014-05-06 10:28:14 +04:00
}
return
2014-05-29 00:53:26 +04:00
case msgBeat:
if sm.state != stateLeader {
return
}
2014-06-02 21:37:32 +04:00
sm.bcastAppend()
return
2014-05-06 10:28:14 +04:00
case msgProp:
if len(m.Entries) != 1 {
panic("unexpected length(entries) of a msgProp")
}
2014-05-06 10:28:14 +04:00
switch sm.lead {
case sm.addr:
e := m.Entries[0]
if e.Type == config {
if sm.pendingConf {
// todo: deny
return
}
sm.pendingConf = true
}
e.Term = sm.term
sm.log.append(sm.log.lastIndex(), e)
sm.ins[sm.addr].update(sm.log.lastIndex())
sm.maybeCommit()
sm.bcastAppend()
2014-05-06 10:28:14 +04:00
case none:
panic("msgProp given without leader")
default:
m.To = sm.lead
sm.send(m)
}
return
}
switch {
case m.Term > sm.term:
sm.becomeFollower(m.Term, m.From)
case m.Term < sm.term:
// ignore
return
}
handleAppendEntries := func() {
2014-05-28 19:41:52 +04:00
if sm.log.maybeAppend(m.Index, m.LogTerm, m.Commit, m.Entries...) {
2014-05-28 09:50:47 +04:00
sm.send(Message{To: m.From, Type: msgAppResp, Index: sm.log.lastIndex()})
2014-05-06 10:28:14 +04:00
} else {
sm.send(Message{To: m.From, Type: msgAppResp, Index: -1})
}
}
switch sm.state {
case stateLeader:
switch m.Type {
case msgAppResp:
if m.Index < 0 {
sm.ins[m.From].decr()
sm.sendAppend(m.From)
2014-05-06 10:28:14 +04:00
} else {
sm.ins[m.From].update(m.Index)
if sm.maybeCommit() {
sm.bcastAppend()
}
2014-05-06 10:28:14 +04:00
}
case msgVote:
sm.send(Message{To: m.From, Type: msgVoteResp, Index: -1})
2014-05-06 10:28:14 +04:00
}
case stateCandidate:
switch m.Type {
case msgApp:
sm.becomeFollower(sm.term, m.From)
handleAppendEntries()
2014-05-25 08:08:06 +04:00
case msgVote:
sm.send(Message{To: m.From, Type: msgVoteResp, Index: -1})
2014-05-06 10:28:14 +04:00
case msgVoteResp:
gr := sm.poll(m.From, m.Index >= 0)
switch sm.q() {
case gr:
2014-05-24 00:30:04 +04:00
sm.becomeLeader()
sm.bcastAppend()
2014-05-06 10:28:14 +04:00
case len(sm.votes) - gr:
sm.becomeFollower(sm.term, none)
2014-05-06 10:28:14 +04:00
}
}
case stateFollower:
switch m.Type {
case msgApp:
handleAppendEntries()
case msgVote:
if (sm.vote == none || sm.vote == m.From) && sm.log.isUpToDate(m.Index, m.LogTerm) {
sm.vote = m.From
2014-05-28 09:50:47 +04:00
sm.send(Message{To: m.From, Type: msgVoteResp, Index: sm.log.lastIndex()})
} else {
sm.send(Message{To: m.From, Type: msgVoteResp, Index: -1})
2014-05-06 10:28:14 +04:00
}
}
}
}
2014-06-10 03:59:43 +04:00
func (sm *stateMachine) Add(addr int) {
sm.ins[addr] = &index{next: sm.log.lastIndex() + 1}
sm.pendingConf = false
}
func (sm *stateMachine) Remove(addr int) {
delete(sm.ins, addr)
sm.pendingConf = false
}