2015-03-19 20:04:48 +03:00
|
|
|
|
// Copyright 2015 CoreOS, Inc.
|
|
|
|
|
//
|
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
|
//
|
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
//
|
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
|
|
package raft
|
|
|
|
|
|
|
|
|
|
import "fmt"
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
ProgressStateProbe ProgressStateType = iota
|
|
|
|
|
ProgressStateReplicate
|
|
|
|
|
ProgressStateSnapshot
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type ProgressStateType uint64
|
|
|
|
|
|
|
|
|
|
var prstmap = [...]string{
|
|
|
|
|
"ProgressStateProbe",
|
|
|
|
|
"ProgressStateReplicate",
|
|
|
|
|
"ProgressStateSnapshot",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (st ProgressStateType) String() string { return prstmap[uint64(st)] }
|
|
|
|
|
|
|
|
|
|
// Progress represents a follower’s progress in the view of the leader. Leader maintains
|
|
|
|
|
// progresses of all followers, and sends entries to the follower based on its progress.
|
|
|
|
|
type Progress struct {
|
|
|
|
|
Match, Next uint64
|
|
|
|
|
// When in ProgressStateProbe, leader sends at most one replication message
|
|
|
|
|
// per heartbeat interval. It also probes actual progress of the follower.
|
|
|
|
|
//
|
|
|
|
|
// When in ProgressStateReplicate, leader optimistically increases next
|
|
|
|
|
// to the latest entry sent after sending replication message. This is
|
|
|
|
|
// an optimized state for fast replicating log entries to the follower.
|
|
|
|
|
//
|
|
|
|
|
// When in ProgressStateSnapshot, leader should have sent out snapshot
|
|
|
|
|
// before and stops sending any replication message.
|
|
|
|
|
State ProgressStateType
|
|
|
|
|
// Paused is used in ProgressStateProbe.
|
|
|
|
|
// When Paused is true, raft should pause sending replication message to this peer.
|
|
|
|
|
Paused bool
|
|
|
|
|
// PendingSnapshot is used in ProgressStateSnapshot.
|
|
|
|
|
// If there is a pending snapshot, the pendingSnapshot will be set to the
|
|
|
|
|
// index of the snapshot. If pendingSnapshot is set, the replication process of
|
|
|
|
|
// this Progress will be paused. raft will not resend snapshot until the pending one
|
|
|
|
|
// is reported to be failed.
|
|
|
|
|
PendingSnapshot uint64
|
2015-03-20 04:06:51 +03:00
|
|
|
|
|
|
|
|
|
// inflights is a sliding window for the inflight messages.
|
|
|
|
|
// When inflights is full, no more message should be sent.
|
|
|
|
|
// When sends out a message, the index of the last entry should
|
|
|
|
|
// be add to inflights. The index MUST be added into inflights
|
|
|
|
|
// in order.
|
|
|
|
|
// When receives a reply, the previous inflights should be freed
|
|
|
|
|
// by calling inflights.freeTo.
|
|
|
|
|
ins *inflights
|
2015-03-19 20:04:48 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pr *Progress) resetState(state ProgressStateType) {
|
|
|
|
|
pr.Paused = false
|
|
|
|
|
pr.PendingSnapshot = 0
|
|
|
|
|
pr.State = state
|
2015-03-20 04:06:51 +03:00
|
|
|
|
pr.ins.reset()
|
2015-03-19 20:04:48 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pr *Progress) becomeProbe() {
|
|
|
|
|
// If the original state is ProgressStateSnapshot, progress knows that
|
|
|
|
|
// the pending snapshot has been sent to this peer successfully, then
|
|
|
|
|
// probes from pendingSnapshot + 1.
|
|
|
|
|
if pr.State == ProgressStateSnapshot {
|
|
|
|
|
pendingSnapshot := pr.PendingSnapshot
|
|
|
|
|
pr.resetState(ProgressStateProbe)
|
|
|
|
|
pr.Next = max(pr.Match+1, pendingSnapshot+1)
|
|
|
|
|
} else {
|
|
|
|
|
pr.resetState(ProgressStateProbe)
|
|
|
|
|
pr.Next = pr.Match + 1
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pr *Progress) becomeReplicate() {
|
|
|
|
|
pr.resetState(ProgressStateReplicate)
|
|
|
|
|
pr.Next = pr.Match + 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pr *Progress) becomeSnapshot(snapshoti uint64) {
|
|
|
|
|
pr.resetState(ProgressStateSnapshot)
|
|
|
|
|
pr.PendingSnapshot = snapshoti
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// maybeUpdate returns false if the given n index comes from an outdated message.
|
|
|
|
|
// Otherwise it updates the progress and returns true.
|
|
|
|
|
func (pr *Progress) maybeUpdate(n uint64) bool {
|
|
|
|
|
var updated bool
|
|
|
|
|
if pr.Match < n {
|
|
|
|
|
pr.Match = n
|
|
|
|
|
updated = true
|
|
|
|
|
pr.resume()
|
|
|
|
|
}
|
|
|
|
|
if pr.Next < n+1 {
|
|
|
|
|
pr.Next = n + 1
|
|
|
|
|
}
|
|
|
|
|
return updated
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pr *Progress) optimisticUpdate(n uint64) { pr.Next = n + 1 }
|
|
|
|
|
|
|
|
|
|
// maybeDecrTo returns false if the given to index comes from an out of order message.
|
|
|
|
|
// Otherwise it decreases the progress next index to min(rejected, last) and returns true.
|
|
|
|
|
func (pr *Progress) maybeDecrTo(rejected, last uint64) bool {
|
|
|
|
|
if pr.State == ProgressStateReplicate {
|
|
|
|
|
// the rejection must be stale if the progress has matched and "rejected"
|
|
|
|
|
// is smaller than "match".
|
|
|
|
|
if rejected <= pr.Match {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
// directly decrease next to match + 1
|
|
|
|
|
pr.Next = pr.Match + 1
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// the rejection must be stale if "rejected" does not match next - 1
|
|
|
|
|
if pr.Next-1 != rejected {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if pr.Next = min(rejected, last+1); pr.Next < 1 {
|
|
|
|
|
pr.Next = 1
|
|
|
|
|
}
|
|
|
|
|
pr.resume()
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pr *Progress) pause() { pr.Paused = true }
|
|
|
|
|
func (pr *Progress) resume() { pr.Paused = false }
|
|
|
|
|
|
|
|
|
|
// isPaused returns whether progress stops sending message.
|
|
|
|
|
func (pr *Progress) isPaused() bool {
|
2015-03-20 04:06:51 +03:00
|
|
|
|
switch pr.State {
|
|
|
|
|
case ProgressStateProbe:
|
|
|
|
|
return pr.Paused
|
|
|
|
|
case ProgressStateReplicate:
|
|
|
|
|
return pr.ins.full()
|
|
|
|
|
case ProgressStateSnapshot:
|
|
|
|
|
return true
|
|
|
|
|
default:
|
|
|
|
|
panic("unexpected state")
|
|
|
|
|
}
|
2015-03-19 20:04:48 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pr *Progress) snapshotFailure() { pr.PendingSnapshot = 0 }
|
|
|
|
|
|
|
|
|
|
// maybeSnapshotAbort unsets pendingSnapshot if Match is equal or higher than
|
|
|
|
|
// the pendingSnapshot
|
|
|
|
|
func (pr *Progress) maybeSnapshotAbort() bool {
|
|
|
|
|
return pr.State == ProgressStateSnapshot && pr.Match >= pr.PendingSnapshot
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (pr *Progress) String() string {
|
|
|
|
|
return fmt.Sprintf("next = %d, match = %d, state = %s, waiting = %v, pendingSnapshot = %d", pr.Next, pr.Match, pr.State, pr.isPaused(), pr.PendingSnapshot)
|
|
|
|
|
}
|
2015-03-20 04:06:51 +03:00
|
|
|
|
|
|
|
|
|
type inflights struct {
|
|
|
|
|
// the starting index in the buffer
|
|
|
|
|
start int
|
|
|
|
|
// number of inflights in the buffer
|
|
|
|
|
count int
|
|
|
|
|
|
|
|
|
|
// the size of the buffer
|
|
|
|
|
size int
|
|
|
|
|
buffer []uint64
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func newInflights(size int) *inflights {
|
|
|
|
|
return &inflights{
|
|
|
|
|
size: size,
|
|
|
|
|
buffer: make([]uint64, size),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// add adds an inflight into inflights
|
|
|
|
|
func (in *inflights) add(inflight uint64) {
|
|
|
|
|
if in.full() {
|
|
|
|
|
panic("cannot add into a full inflights")
|
|
|
|
|
}
|
|
|
|
|
next := in.start + in.count
|
|
|
|
|
if next >= in.size {
|
|
|
|
|
next -= in.size
|
|
|
|
|
}
|
|
|
|
|
in.buffer[next] = inflight
|
|
|
|
|
in.count++
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// freeTo frees the inflights smaller or equal to the given `to` flight.
|
|
|
|
|
func (in *inflights) freeTo(to uint64) {
|
2015-04-03 10:17:40 +03:00
|
|
|
|
if in.count == 0 || to < in.buffer[in.start] {
|
|
|
|
|
// out of the left side of the window
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
i, idx := 0, in.start
|
|
|
|
|
for i = 0; i < in.count; i++ {
|
|
|
|
|
if to < in.buffer[idx] { // found the first large inflight
|
2015-03-20 04:06:51 +03:00
|
|
|
|
break
|
|
|
|
|
}
|
2015-04-03 10:17:40 +03:00
|
|
|
|
|
|
|
|
|
// increase index and maybe rotate
|
|
|
|
|
if idx += 1; idx >= in.size {
|
|
|
|
|
idx -= in.size
|
|
|
|
|
}
|
2015-03-20 04:06:51 +03:00
|
|
|
|
}
|
2015-04-03 10:17:40 +03:00
|
|
|
|
// free i inflights and set new start index
|
|
|
|
|
in.count -= i
|
|
|
|
|
in.start = idx
|
2015-03-20 04:06:51 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (in *inflights) freeFirstOne() { in.freeTo(in.buffer[in.start]) }
|
|
|
|
|
|
|
|
|
|
// full returns true if the inflights is full.
|
|
|
|
|
func (in *inflights) full() bool {
|
|
|
|
|
return in.count == in.size
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// resets frees all inflights.
|
|
|
|
|
func (in *inflights) reset() {
|
|
|
|
|
in.count = 0
|
|
|
|
|
in.start = 0
|
|
|
|
|
}
|