Compare commits

..

1 Commits

Author SHA1 Message Date
Vitaliy Filippov a0b20be22c Pre-vote protocol (experimental) 2023-06-28 02:06:52 +03:00
7 changed files with 185 additions and 645 deletions

View File

@ -1,49 +0,0 @@
module.exports = {
"env": {
"es6": true,
"node": true
},
"extends": [
"eslint:recommended",
"plugin:node/recommended"
],
"parserOptions": {
"ecmaVersion": 2020
},
"plugins": [
],
"rules": {
"indent": [
"error",
4
],
"brace-style": [
"error",
"allman",
{ "allowSingleLine": true }
],
"linebreak-style": [
"error",
"unix"
],
"semi": [
"error",
"always"
],
"no-useless-escape": [
"off"
],
"no-control-regex": [
"off"
],
"no-empty": [
"off"
],
"no-process-exit": [
"off"
],
"node/shebang": [
"off"
]
}
};

110
README.md
View File

@ -1,110 +0,0 @@
# TinyRaft
Raft leader election isolated from the rest of the algorithm.
TinyRaft doesn't know anything about replication and doesn't require you to
implement it.
Actual network communication is also abstracted away and hidden behind a simple
callback interface.
The only task of TinyRaft is to elect the leader and guarantee that there is
only one leader at each moment.
TinyRaft can be used:
- As a simple leader election algorithm without replication at all
- As a building block for the standard Raft algorithm if you add log replication
- For other variations of "weaker consensus" if you add another method of replication
Some replication ideas for you:
- Log-less replication: Add a version number for each key in the database
and make the leader synchronize follower databases by simply dumping all
followers' databases with the newest term (followers with older terms should
be ignored), comparing version numbers and making the newest version of each
key win.
- Erasure coding: Suppose you store large values. You can split each value into
N parts, add K parity parts to it using Reed-Solomon codes (ISA-L/jerasure)
and store them on different nodes in the form of Raft-like logs or similar
to the log-less replication with version numbers, and make master synchronize
followers by reconstructing every original value.
## Example Application
[AntiEtcd](https://git.yourcmc.ru/vitalif/antietcd/)
## Usage
```js
const node = new TinyRaft({
nodes: [ 1, 2, 3 ],
nodeId: 1,
electionTimeout: 5000,
heartbeatTimeout: 1000,
leadershipTimeout: 10000,
initialTerm: 0,
leaderPriority: 0,
send: function(to, msg)
{
// Function to send message <msg> to node with ID <to>
// msg.type is one of TinyRaft.VOTE_REQUEST, TinyRaft.VOTE, TinyRaft.PING, TinyRaft.PONG
// msg.leader is the leader ID or null
// msg.term is the term number
// msg.priority is the optional leadership priority if set in config
},
});
// Watch for election state
node.on('change', (st) =>
{
console.log(
'node '+node.nodeId+': '+(st.state == TinyRaft.FOLLOWER ? 'following '+st.leader : st.state)+
', term '+st.term+(st.state == TinyRaft.LEADER ? ', followers: '+st.followers.join(', ') : '')
);
});
// Start Raft node or start a new election round
node.start();
// Optional; may be called for a follower when it receives a message from a live leader,
// for example, a replication message, and causes the follower to move its round expiration forward
node.markAlive();
// Update cluster node list
node.setNodes([ 1, 2, 3 ]);
// Incoming messages should be fed to TinyRaft like this (from = ID of the sender):
node.onReceive(from, msg);
// Stop Raft node
node.stop();
```
## Additional features
### Leader expiration
Supports leader expiration like in NuRaft:
https://github.com/eBay/NuRaft/blob/master/docs/leadership_expiration.md
When leader expiration is enabled, followers respond to leader heartbeats
(pings) with "pong" messages and if the leader doesn't receive a quorum of
replies in leadershipTimeout - it starts a new round of voting.
### Leadership priorities
Also supports leader priorities, similar to NuRaft but even simpler:
if a node receives a VoteRequest message with larger term but with smaller
priority than its own, it immediately starts a new voting round.
It guarantees that a node with non-maximum priority can't become leader
without being re-elected.
If all priorities are equal (or just zero), the election algorithm
becomes identical to the basic algorithm without priorities.
# Author and License
Author: Vitaliy Filippov, 2024
License: [Mozilla Public License 2.0](https://www.mozilla.org/media/MPL/2.0/index.f75d2927d3c1.txt)
or [Vitastor Network Public License 1.1](https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/VNPL-1.1.txt)

View File

@ -1,44 +0,0 @@
// Fake timer for node.js which _guarantees_ that all asynchronous timer
// callbacks are called during a tested period
function FakeTimer(startTime)
{
this.time = Number(startTime)||0;
this.timers = [];
this.id = 0;
this.setTimeout = (func, ms) =>
{
this.timers.push({ id: ++this.id, func, at: this.time+ms });
return this.id;
};
this.setInterval = (func, ms) =>
{
this.timers.push({ id: ++this.id, func, at: this.time+ms, repeat: ms });
return this.id;
};
this.clearInterval = this.clearTimeout = (id) =>
{
this.timers = this.timers.filter(t => t.id != id);
};
this.setImmediate = (func) => this.setTimeout(func, 0);
this.runFor = (ms) =>
{
const end = this.time+ms;
this.timers.sort((a, b) => a.at - b.at);
while (this.timers.length && this.timers[0].at <= end)
{
const { func, at, repeat } = this.timers[0];
if (!repeat)
this.timers.shift();
else
this.timers[0].at += repeat;
if (at > this.time)
this.time = at;
func();
this.timers.sort((a, b) => a.at - b.at);
}
this.time = end;
};
}
module.exports = FakeTimer;

View File

@ -1,35 +0,0 @@
{
"name": "tinyraft",
"version": "1.0.1",
"description": "Tiny & abstract Raft leader election algorithm",
"main": "tinyraft.js",
"scripts": {
"lint": "eslint tinyraft.js tinyraft.spec.js faketimer.js",
"test": "node tinyraft.spec.js"
},
"repository": {
"type": "git",
"url": "https://git.yourcmc.ru/vitalif/tinyraft"
},
"homepage": "https://git.yourcmc.ru/vitalif/tinyraft",
"bugs": {
"url": "https://git.yourcmc.ru/vitalif/tinyraft/issues"
},
"files": [
"tinyraft.js",
"tinyraft.d.ts",
"README.md"
],
"keywords": [
"raft"
],
"author": "Vitaliy Filippov",
"license": "MPL-2.0",
"engines": {
"node": ">=12.0.0"
},
"devDependencies": {
"eslint": "^8.0.0",
"eslint-plugin-node": "^11.1.0"
}
}

36
tinyraft.d.ts vendored
View File

@ -1,36 +0,0 @@
import type { EventEmitter } from 'events';
export type TinyRaftEvents = {
change: {
state: 'LEADER'|'FOLLOWER'|'CANDIDATE',
term: number,
leader: string|number,
followers: (string|number)[],
}[],
};
export type RaftMessage = {
type: 'VOTE_REQUEST'|'VOTE'|'PING'|'PONG',
term: number,
leader?: string|number,
priority?: number,
};
export class TinyRaft extends EventEmitter<TinyRaftEvents>
{
constructor(cfg: {
nodes: (number|string)[],
nodeId: number|string,
electionTimeout?: number,
heartbeatTimeout?: number,
leadershipTimeout?: number,
initialTerm?: number,
leaderPriority?: number,
send: (to: number|string, msg: RaftMessage) => void,
});
start();
stop();
onReceive(from: number|string, msg: RaftMessage);
markAlive();
setNodes(nodes: (number|string)[]);
}

View File

@ -9,20 +9,9 @@
// The only requirement is to guarantee preservation of entries confirmed by
// all hosts participating in consensus.
//
// Supports leader expiration like in NuRaft:
// Supports pre-vote protocol and leader expiration like in NuRaft:
// https://github.com/eBay/NuRaft/blob/master/docs/prevote_protocol.md
// https://github.com/eBay/NuRaft/blob/master/docs/leadership_expiration.md
//
// Also supports leader priorities, similar to NuRaft but even simpler:
// If a node receives a VoteRequest message with larger term but with smaller
// priority than its own, it immediately starts a new voting round.
// It guarantees that a node with non-maximum priority can't become leader
// without being re-elected.
// If all priorities are equal (or just zero), the election algorithm
// becomes identical to the basic algorithm without priorities.
//
// (c) Vitaliy Filippov, 2024
// License: Mozilla Public License 2.0 or Vitastor Network Public License 1.1
// (https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/VNPL-1.1.txt)
const EventEmitter = require('events');
@ -30,6 +19,10 @@ const VOTE_REQUEST = 'vote_request';
const VOTE = 'vote';
const PING = 'ping';
const PONG = 'pong';
// Following 3 are required only for the "pre-vote" feature
const PRE_VOTE_REQUEST = 'pre_vote_request';
const PRE_VOTE = 'pre_vote';
const FOLLOW = 'follow';
const CANDIDATE = 'candidate';
const LEADER = 'leader';
@ -43,11 +36,9 @@ class TinyRaft extends EventEmitter
// electionTimeout?: number,
// heartbeatTimeout?: number,
// leadershipTimeout?: number,
// enablePrevote?: bool,
// initialTerm?: number,
// leaderPriority?: number,
// }
// NOTE: leadershipTimeout, if enabled, should be > heartbeatTimeout, and ideally < electionTimeout
// In that case a leader with dead network will be expired before being re-elected
constructor(config)
{
super();
@ -58,7 +49,7 @@ class TinyRaft extends EventEmitter
this.randomTimeout = config.randomTimeout > 0 ? Number(config.randomTimeout) : this.electionTimeout;
this.heartbeatTimeout = Number(config.heartbeatTimeout) || 1000;
this.leadershipTimeout = Number(config.leadershipTimeout) || 0;
this.leaderPriority = Number(config.leaderPriority) || undefined;
this.enablePrevote = config.enablePrevote ? true : false;
if (!this.nodeId || this.nodeId instanceof Object ||
!(this.nodes instanceof Array) || this.nodes.filter(n => !n || n instanceof Object).length > 0 ||
!(this.send instanceof Function))
@ -68,42 +59,63 @@ class TinyRaft extends EventEmitter
this.term = 0;
this.state = null;
this.leader = null;
this.confirmed = {};
this.preVoting = false;
}
_nextTerm(after)
{
if (this.electionTimer)
{
TinyRaft.clearTimeout(this.electionTimer);
clearTimeout(this.electionTimer);
this.electionTimer = null;
}
if (after >= 0)
{
this.electionTimer = TinyRaft.setTimeout(() => this.start(), after + this.randomTimeout * Math.random());
this.electionTimer = setTimeout(() => this.start(), after + this.randomTimeout * Math.random());
}
}
_prevote()
{
this.leaderTimedOut = true;
this.preVoting = true;
this.preVoteOk = this.preVoteFail = 0;
this.votes = { [this.nodeId]: [ this.nodeId ] };
for (const node of this.nodes)
{
if (node != this.nodeId)
{
this.send(node, { type: PRE_VOTE_REQUEST, term: this.term });
}
}
this._nextTerm(this.electionTimeout);
}
start()
{
this._nextTerm(-1);
this.electionTimer = null;
if (this.enablePrevote && !this.preVoting)
{
this._prevote();
return;
}
this.preVoting = false;
this.term++;
this.voted = 1;
this.votes = { [this.nodeId]: [ this.nodeId ] };
this.state = CANDIDATE;
this.followers = null;
this.confirmed = {};
this.leader = this.nodeId;
if (!this.heartbeatTimer)
{
this.heartbeatTimer = TinyRaft.setInterval(() => this._heartbeat(), this.heartbeatTimeout);
this.heartbeatTimer = setInterval(() => this._heartbeat(), this.heartbeatTimeout);
}
for (const node of this.nodes)
{
if (node != this.nodeId)
{
this.send(node, { type: VOTE_REQUEST, term: this.term, leader: this.leader, priority: this.leaderPriority });
this.send(node, { type: VOTE_REQUEST, term: this.term, leader: this.leader });
}
}
// Next term will start right after this one times out
@ -115,12 +127,12 @@ class TinyRaft extends EventEmitter
{
if (this.electionTimer)
{
TinyRaft.clearTimeout(this.electionTimer);
clearTimeout(this.electionTimer);
this.electionTimer = null;
}
if (this.heartbeatTimer)
{
TinyRaft.clearInterval(this.heartbeatTimer);
clearTimeout(this.heartbeatTimer);
this.heartbeatTimer = null;
}
}
@ -129,12 +141,11 @@ class TinyRaft extends EventEmitter
{
if (this.state == LEADER)
{
this.confirmed = {};
for (const node of (this.leadershipTimeout ? this.followers : this.nodes))
for (const node of this.votes[this.nodeId])
{
if (node != this.nodeId)
{
this.send(node, { type: PING, term: this.term, priority: this.leaderPriority });
this.send(node, { type: PING, term: this.term });
}
}
}
@ -142,155 +153,117 @@ class TinyRaft extends EventEmitter
onReceive(from, msg)
{
if (msg.type == VOTE_REQUEST)
if (msg.type == PRE_VOTE_REQUEST)
{
this._onReceiveVoteRequest(from, msg);
this.send(from, { type: PRE_VOTE, term: this.term, leader: this.leaderTimedOut ? null : this.leader });
}
else if (msg.type == VOTE)
else if (msg.type == PRE_VOTE && this.preVoting)
{
this._onReceiveVote(from, msg);
}
else if (msg.type == PING)
{
this._onReceivePing(from, msg);
}
else if (msg.type == PONG)
{
this._onReceivePong(from, msg);
}
}
_onReceiveVoteRequest(from, msg)
{
if (msg.term > this.term && msg.leader)
{
if (this.leaderPriority && (msg.priority||0) < this.leaderPriority)
if (msg.term == this.term && msg.leader)
{
this.term = msg.term;
this.start();
this.preVoteOk++;
}
else
{
this.preVoteFail++;
}
if (this.preVoteOk > this.nodes.length/2)
{
this.preVoting = false;
this.preVoteOk = 0;
this.preVoteFail = 0;
this._nextTerm(this.electionTimeout);
}
if (this.preVoteFail > this.nodes.length/2)
{
this._nextTerm(-1);
this.preVoteOk = 0;
this.preVoteFail = 0;
this.start();
}
}
else if (msg.type == VOTE_REQUEST)
{
if (msg.term > this.term && msg.leader)
{
this.leader = msg.leader;
this.term = msg.term;
this.state = CANDIDATE;
this.followers = null;
this.confirmed = {};
this._nextTerm(this.heartbeatTimeout*2 + this.electionTimeout);
this.emit('change', { state: this.state, term: this.term, leader: this.leader });
}
this.send(from, { type: VOTE, term: this.term, leader: this.leader });
}
const prio = (this.leader == this.nodeId ? this.leaderPriority : undefined);
this.send(from, { type: VOTE, term: this.term, leader: this.leader, priority: prio });
}
_onReceiveVote(from, msg)
{
if (!msg.leader || msg.term < this.term)
{
return;
}
if (msg.term > this.term)
{
this.term = msg.term;
this.state = FOLLOWER;
this.followers = null;
this.confirmed = {};
this.leader = msg.leader;
this.votes = {};
this.emit('change', { state: this.state, term: this.term, leader: this.leader });
// Repeat VOTE to the leader to join it
this.send(this.leader, { type: VOTE, term: this.term, leader: this.leader, priority: msg.priority });
return;
}
// add <from> as voter for <msg.leader>
this.votes[msg.leader] = this.votes[msg.leader] || [];
let found = false;
for (const voter of this.votes[msg.leader])
{
if (voter == from)
{
found = true;
break;
}
}
if (!found)
else if (msg.type == VOTE && msg.term == this.term)
{
this.voted++;
this.votes[msg.leader] = this.votes[msg.leader] || [];
this.votes[msg.leader].push(from);
}
const n = this.votes[msg.leader].length;
if (n == 1 + (0 | this.nodes.length/2))
{
if (msg.leader == this.nodeId)
const n = this.votes[msg.leader].length;
if (n == 1 + (0 | this.nodes.length/2))
{
if (msg.leader == this.nodeId)
{
this.leader = msg.leader;
this.state = LEADER;
this._nextTerm(this.leadershipTimeout > 0 ? this.leadershipTimeout : -1);
this.followers = this.votes[this.nodeId];
for (const follower of this.followers)
{
if (follower != this.nodeId)
{
// Send a heartbeat to confirm leadership
this.send(follower, { type: PING, term: this.term });
}
}
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId, followers: this.votes[this.nodeId] });
}
else
{
this._nextTerm(0);
}
}
else if (n > this.nodes.length/2 && this.state == LEADER && msg.leader == this.nodeId)
{
this.leader = msg.leader;
this.state = LEADER;
this.confirmed = {};
this._nextTerm(this.leadershipTimeout > 0 ? this.leadershipTimeout : -1);
this.followers = this.votes[this.nodeId];
// Send a heartbeat to confirm leadership
this._heartbeat();
this.send(from, { type: PING, term: this.term });
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId, followers: this.votes[this.nodeId] });
}
else
else if (this._isVotingFailed())
{
this._nextTerm(0);
}
}
else if (n > this.nodes.length/2 && this.state == LEADER && msg.leader == this.nodeId)
else if (msg.type == PING)
{
this.followers = this.votes[this.nodeId];
// Send a heartbeat to confirm leadership
this.send(from, { type: PING, term: this.term, priority: this.leaderPriority });
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId, followers: this.votes[this.nodeId] });
}
else if (this._isVotingFailed())
{
this._nextTerm(0);
}
}
_onReceivePing(from, msg)
{
if (this.state == CANDIDATE && this.term == msg.term && from == this.leader)
{
this.state = FOLLOWER;
this.emit('change', { state: this.state, term: this.term, leader: this.leader });
}
if (this.state == FOLLOWER && from == this.leader)
{
this.markAlive();
}
if (this.leadershipTimeout > 0 || this.state == FOLLOWER && from != this.leader)
{
this.send(from, { type: PONG, term: this.term, leader: this.leader });
}
}
_onReceivePong(from, msg)
{
if (this.state != LEADER)
{
return;
}
if (msg.leader != this.nodeId || msg.term != this.term)
{
this.start();
}
else if (this.leadershipTimeout > 0)
{
this.confirmed[from] = true;
if (Object.keys(this.confirmed).length >= (1 + (0 | this.nodes.length/2)))
if (this.state == CANDIDATE && this.term == msg.term && from == this.leader)
{
// We have quorum
this._nextTerm(this.leadershipTimeout);
this.state = FOLLOWER;
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId });
}
if (this.state == FOLLOWER && from == this.leader)
{
this.markAlive();
}
if (this.leadershipTimeout > 0)
{
this.send(from, { type: PONG, term: this.term, leader: this.leader });
}
}
else if (msg.type == PONG && this.state == LEADER)
{
this._nextTerm(-1);
if (msg.leader != this.nodeId)
{
this.start();
}
}
}
markAlive()
{
this.leaderTimedOut = false;
this._nextTerm(this.heartbeatTimeout*2 + this.electionTimeout);
}
@ -316,22 +289,18 @@ class TinyRaft extends EventEmitter
if (this.state == LEADER)
{
this.votes[this.nodeId] = this.votes[this.nodeId].filter(n => nodes.indexOf(n) >= 0);
if (this.votes[this.nodeId].length < (1 + (0 | this.nodes.length/2)))
this.start();
else
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId, followers: this.votes[this.nodeId] });
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId, followers: this.votes[this.nodeId] });
}
else if (this.state == FOLLOWER && nodes.indexOf(this.leader) < 0 || this.state == CANDIDATE)
{
this.start();
r.nextTerm(-1);
r.start();
}
}
}
TinyRaft.setTimeout = setTimeout;
TinyRaft.clearTimeout = clearTimeout;
TinyRaft.setInterval = setInterval;
TinyRaft.clearInterval = clearInterval;
TinyRaft.PRE_VOTE_REQUEST = PRE_VOTE_REQUEST;
TinyRaft.PRE_VOTE = PRE_VOTE;
TinyRaft.VOTE_REQUEST = VOTE_REQUEST;
TinyRaft.VOTE = VOTE;
TinyRaft.PING = PING;

View File

@ -1,51 +1,39 @@
const TinyRaft = require('./tinyraft.js');
const FakeTimer = require('./faketimer.js');
const fake = new FakeTimer();
TinyRaft.setTimeout = fake.setTimeout;
TinyRaft.clearTimeout = fake.clearTimeout;
TinyRaft.setInterval = fake.setInterval;
TinyRaft.clearInterval = fake.clearInterval;
function newNode(id, nodes, partitions, mod)
function newNode(id, nodes, partitions)
{
partitions = partitions || {};
let cfg = {
let n = new TinyRaft({
nodes: [ 1, 2, 3, 4, 5 ],
nodeId: id,
heartbeatTimeout: 100,
electionTimeout: 500,
send: function(to, msg)
{
if (!partitions[n.nodeId+'-'+to] && !partitions[to+'-'+n.nodeId] && nodes[to])
if (!partitions[n.nodeId+'-'+to] && nodes[to])
{
console.log('['+fake.time+'] received from '+n.nodeId+' to '+to+': '+JSON.stringify(msg));
fake.setImmediate(function() { nodes[to].onReceive(n.nodeId, msg); });
console.log('received from '+n.nodeId+' to '+to+': '+JSON.stringify(msg));
setImmediate(function() { nodes[to].onReceive(n.nodeId, msg); });
}
else
console.log('['+fake.time+'] failed to send from '+n.nodeId+' to '+to+': '+JSON.stringify(msg));
},
};
if (mod)
mod(cfg);
let n = new TinyRaft(cfg);
});
n.on('change', (st) =>
{
console.log(
'['+fake.time+'] node '+n.nodeId+': '+(st.state == TinyRaft.FOLLOWER ? 'following '+st.leader : st.state)+
'node '+n.nodeId+': '+(st.state == TinyRaft.FOLLOWER ? 'following '+st.leader : st.state)+
', term '+st.term+(st.state == TinyRaft.LEADER ? ', followers: '+st.followers.join(', ') : '')
);
});
nodes[id] = n;
}
function newNodes(count, partitions, mod)
function newNodes(count, partitions)
{
partitions = partitions || {};
const nodes = {};
for (let i = 1; i <= count; i++)
{
newNode(i, nodes, partitions, mod);
newNode(i, nodes, partitions);
}
for (let i = 1; i <= count; i++)
{
@ -57,7 +45,6 @@ function newNodes(count, partitions, mod)
function checkQuorum(nodes, count)
{
let leaders = 0;
let out = 0;
for (const i in nodes)
{
if (nodes[i].state == TinyRaft.LEADER)
@ -68,16 +55,14 @@ function checkQuorum(nodes, count)
}
else if (nodes[i].state != TinyRaft.FOLLOWER)
{
out++;
if (out > nodes.length-count)
throw new Error('node '+i+' is not in quorum: state '+nodes[i].state);
throw new Error('node '+i+' is not in quorum: state '+nodes[i].state);
}
}
if (leaders != 1)
{
throw new Error('we have '+leaders+' leaders instead of exactly 1');
}
console.log('['+fake.time+'] OK: '+count+' nodes in quorum');
console.log('OK: '+count+' nodes in quorum');
}
function checkNoQuorum(nodes)
@ -87,12 +72,11 @@ function checkNoQuorum(nodes)
{
throw new Error('we have non-candidates ('+nc.map(n => n.nodeId).join(', ')+'), but we should not');
}
console.log('['+fake.time+'] OK: '+Object.keys(nodes).length+' candidates, no quorum');
console.log('OK: '+Object.keys(nodes).length+' candidates, no quorum');
}
async function testStartThenRemoveNode()
{
console.log('--------------------------------------------------------------------------------');
console.log('testStartThenRemoveNode');
const nodes = newNodes(5);
let leaderChanges = 0, prevLeader = null;
@ -104,7 +88,7 @@ async function testStartThenRemoveNode()
leaderChanges++;
}
});
fake.runFor(2000);
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 5);
if (leaderChanges >= 3)
{
@ -116,7 +100,7 @@ async function testStartThenRemoveNode()
nodes[leader].stop();
delete nodes[leader];
// Check quorum after 2000ms
fake.runFor(2000);
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 4);
// Stop the leader again
leader = nodes[Object.keys(nodes)[0]].leader;
@ -124,7 +108,7 @@ async function testStartThenRemoveNode()
nodes[leader].stop();
delete nodes[leader];
// Check quorum after 2000ms
fake.runFor(2000);
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 3);
// Stop the leader again
leader = nodes[Object.keys(nodes)[0]].leader;
@ -132,7 +116,7 @@ async function testStartThenRemoveNode()
nodes[leader].stop();
delete nodes[leader];
// Check that no quorum exists
fake.runFor(2000);
await new Promise(ok => setTimeout(ok, 2000));
checkNoQuorum(nodes);
// Clean up
for (const id in nodes)
@ -144,10 +128,9 @@ async function testStartThenRemoveNode()
async function testAddNode()
{
console.log('--------------------------------------------------------------------------------');
console.log('testAddNode');
const nodes = newNodes(5, {}, cfg => cfg.initialTerm = 1000);
fake.runFor(2000);
const nodes = newNodes(5);
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 5);
// Add node
newNode(6, nodes);
@ -155,7 +138,7 @@ async function testAddNode()
nodes[i].setNodes([ 1, 2, 3, 4, 5, 6 ]);
nodes[6].start();
// Check quorum after 2000ms
fake.runFor(2000);
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 6);
// Clean up
for (const id in nodes)
@ -165,200 +148,62 @@ async function testAddNode()
console.log('testAddNode: OK');
}
async function testLeadershipExpiration()
async function testPreVote()
{
console.log('--------------------------------------------------------------------------------');
console.log('testLeadershipExpiration');
const partitions = {};
const nodes = newNodes(5, partitions, cfg => cfg.leadershipTimeout = 1500);
// Check that 5 nodes are in quorum after 2000ms
fake.runFor(2000);
checkQuorum(nodes, 5);
// Break network on the leader
let leader = nodes[1].leader;
console.log("["+fake.time+"] --> stopping the leader's ("+leader+") network");
for (let i = 1; i <= 5; i++)
// The original example for pre-vote protocol from here:
// https://github.com/eBay/NuRaft/blob/master/docs/prevote_protocol.md
// only has partitions between nodes 1-5, 2-4, 3-4.
// But that setup has a problem of leader jumping between nodes 3 and 4
// which can't be prevented by prevotes because 4 only has connections
// to 2 nodes so they can't form a majority to reject the vote.
const partitions = {
'1-5': true,
'2-4': true,
'3-4': true,
'2-3': true,
'5-1': true,
'4-2': true,
'4-3': true,
'3-2': true,
};
const nodes = newNodes(5, partitions);
let leader = await new Promise((ok, no) =>
{
partitions[i+'-'+leader] = true;
partitions[leader+'-'+i] = true;
}
// Check that the leader loses leadership after 2 * leadershipTimeout
fake.runFor(3000);
if (nodes[leader].state != TinyRaft.CANDIDATE)
{
throw new Error("leadership expiration doesn't work");
}
// Clean up
for (const id in nodes)
{
nodes[id].stop();
}
console.log('testLeadershipExpiration: OK');
}
async function testRestart()
{
console.log('--------------------------------------------------------------------------------');
console.log('testRestart');
const nodes = newNodes(5, {}, cfg => cfg.initialTerm = 1000);
let leaderChanges = 0, prevLeader = null;
nodes[2].on('change', (st) =>
{
const leader = st.state == TinyRaft.CANDIDATE ? null : st.leader;
if (leader != prevLeader)
setTimeout(() => no(new Error('no leader in 500 ms')), 500);
const chg = (st) =>
{
prevLeader = leader;
leaderChanges++;
}
if (st.leader)
{
nodes[1].off('change', chg);
if (st.leader != 1 && st.leader != 5)
no('leader is not 1 or 5');
ok(st.leader);
}
};
nodes[1].on('change', chg);
});
// Check that 5 nodes are in quorum after 2000ms
fake.runFor(2000);
checkQuorum(nodes, 5);
if (leaderChanges >= 3)
console.log('OK: Got leader in 500 ms');
await new Promise((ok, no) =>
{
throw new Error("leaderChanges = "+leaderChanges+" (expected < 3)");
}
// Stop a follower
let restarted = 1 + (prevLeader % 5);
if (restarted == 2)
{
restarted = 1 + (prevLeader + 1) % 5;
}
console.log("["+fake.time+"] --> stopping a follower (node "+restarted+")");
nodes[restarted].stop();
delete nodes[restarted];
// Wait 2000ms
fake.runFor(2000);
// Restart a follower
console.log("["+fake.time+"] --> restarting a follower (node "+restarted+")");
leaderChanges = 0;
newNode(restarted, nodes, {}, null);
nodes[restarted].start();
// Check quorum and the fact that the leader didn't change after 2000ms
fake.runFor(2000);
checkQuorum(nodes, 5);
if (leaderChanges > 0)
{
throw new Error("leader changed after restart of a follower");
}
// Clean up
for (const id in nodes)
{
nodes[id].stop();
}
console.log('testRestart: OK');
}
async function testChangeNodes()
{
console.log('--------------------------------------------------------------------------------');
console.log('testChangeNodes');
console.log('['+fake.time+'] --> starting nodes 1-5');
const nodes = newNodes(5, {}, cfg => cfg.initialTerm = 1000);
// Check that 5 nodes are in quorum after 2000ms
fake.runFor(2000);
checkQuorum(nodes, 5);
// Stop node 4
console.log('['+fake.time+'] --> stopping node 4');
nodes[4].stop();
delete nodes[4];
// Wait 1000ms
fake.runFor(1000);
// Change nodes from 1 2 3 4 5 to 1 2 3 5 6
console.log('['+fake.time+'] --> starting node 6');
newNode(6, nodes);
nodes[6].start();
nodes[1].setNodes([ 1, 2, 3, 5, 6 ]);
nodes[2].setNodes([ 1, 2, 3, 5, 6 ]);
nodes[3].setNodes([ 1, 2, 3, 5, 6 ]);
nodes[5].setNodes([ 1, 2, 3, 5, 6 ]);
nodes[6].setNodes([ 1, 2, 3, 5, 6 ]);
// Check that 5 nodes are in quorum after 2000ms
fake.runFor(2000);
checkQuorum(nodes, 5);
// Clean up
for (const id in nodes)
{
if (nodes[id])
nodes[id].stop();
}
console.log('testChangeNodes: OK');
}
async function testLeaderPriority()
{
console.log('--------------------------------------------------------------------------------');
console.log('testLeaderPriority');
console.log('['+fake.time+'] --> starting nodes 1-5');
const nodes = newNodes(5, {}, cfg => cfg.leaderPriority = cfg.nodeId+1);
// Check that 5 nodes are in quorum after 2000ms
fake.runFor(2000);
checkQuorum(nodes, 5);
if (nodes[1].leader != 5)
{
throw new Error('leader is not 5');
}
// Stop node 5
console.log('['+fake.time+'] --> stopping node 5');
nodes[5].stop();
delete nodes[5];
// Wait 2000ms and check that the leader is now 4
fake.runFor(2000);
checkQuorum(nodes, 4);
if (nodes[1].leader != 4)
{
throw new Error('leader is not 4');
}
// Stop node 4
console.log('['+fake.time+'] --> stopping node 4');
nodes[4].stop();
delete nodes[4];
// Wait 2000ms and check that the leader is now 3
fake.runFor(2000);
checkQuorum(nodes, 3);
if (nodes[1].leader != 3)
{
throw new Error('leader is not 3');
}
// Clean up
for (const id in nodes)
{
if (nodes[id])
nodes[id].stop();
}
console.log('testLeaderPriority: OK');
}
async function testPartition1_3()
{
console.log('--------------------------------------------------------------------------------');
console.log('testPartition1_3');
const partitions = { '1-3': true, '3-1': true };
const nodes = newNodes(3, partitions, cfg => cfg.nodes = [ 1, 2, 3 ]);
// Check that 2 or 3 nodes are in quorum after 5000ms
// This situation should be fixed by "prevote protocol", but it breaks other things
fake.runFor(5000);
if (nodes[2].leader == 2)
checkQuorum(nodes, 3);
else
checkQuorum(nodes, 2);
// Clean up
for (const id in nodes)
{
nodes[id].stop();
}
console.log('testPartition1_3: OK');
setTimeout(ok, 5000);
const chg = (st) =>
{
if (st.state != TinyRaft.FOLLOWER || st.leader != leader)
{
nodes[5].off('change', chg);
no(new Error('leader changed in 5000 ms'));
}
};
nodes[5].on('change', chg);
});
console.log('OK: No leader change in 5000 ms');
}
async function run()
{
await testStartThenRemoveNode();
await testAddNode();
await testLeadershipExpiration();
await testRestart();
await testChangeNodes();
await testLeaderPriority();
await testPartition1_3();
await testPreVote();
process.exit(0);
}