Compare commits

...

4 Commits

3 changed files with 126 additions and 54 deletions

44
faketimer.js Normal file
View File

@ -0,0 +1,44 @@
// Fake timer for node.js which _guarantees_ that all asynchronous timer
// callbacks are called during a tested period
function FakeTimer(startTime)
{
this.time = Number(startTime)||0;
this.timers = [];
this.id = 0;
this.setTimeout = (func, ms) =>
{
this.timers.push({ id: ++this.id, func, at: this.time+ms });
return this.id;
};
this.setInterval = (func, ms) =>
{
this.timers.push({ id: ++this.id, func, at: this.time+ms, repeat: ms });
return this.id;
};
this.clearInterval = this.clearTimeout = (id) =>
{
this.timers = this.timers.filter(t => t.id != id);
};
this.setImmediate = (func) => this.setTimeout(func, 0);
this.runFor = async (ms) =>
{
const end = this.time+ms;
this.timers.sort((a, b) => a.at - b.at);
while (this.timers.length && this.timers[0].at <= end)
{
const { func, at, repeat } = this.timers[0];
if (!repeat)
this.timers.shift();
else
this.timers[0].at += repeat;
if (at > this.time)
this.time = at;
func();
this.timers.sort((a, b) => a.at - b.at);
}
this.time = end;
};
}
module.exports = FakeTimer;

View File

@ -62,18 +62,19 @@ class TinyRaft extends EventEmitter
this.term = 0;
this.state = null;
this.leader = null;
this.confirmed = {};
}
_nextTerm(after)
{
if (this.electionTimer)
{
clearTimeout(this.electionTimer);
TinyRaft.clearTimeout(this.electionTimer);
this.electionTimer = null;
}
if (after >= 0)
{
this.electionTimer = setTimeout(() => this.start(), after + this.randomTimeout * Math.random());
this.electionTimer = TinyRaft.setTimeout(() => this.start(), after + this.randomTimeout * Math.random());
}
}
@ -86,10 +87,11 @@ class TinyRaft extends EventEmitter
this.votes = { [this.nodeId]: [ this.nodeId ] };
this.state = CANDIDATE;
this.followers = null;
this.confirmed = {};
this.leader = this.nodeId;
if (!this.heartbeatTimer)
{
this.heartbeatTimer = setInterval(() => this._heartbeat(), this.heartbeatTimeout);
this.heartbeatTimer = TinyRaft.setInterval(() => this._heartbeat(), this.heartbeatTimeout);
}
for (const node of this.nodes)
{
@ -107,12 +109,12 @@ class TinyRaft extends EventEmitter
{
if (this.electionTimer)
{
clearTimeout(this.electionTimer);
TinyRaft.clearTimeout(this.electionTimer);
this.electionTimer = null;
}
if (this.heartbeatTimer)
{
clearTimeout(this.heartbeatTimer);
TinyRaft.clearInterval(this.heartbeatTimer);
this.heartbeatTimer = null;
}
}
@ -121,6 +123,7 @@ class TinyRaft extends EventEmitter
{
if (this.state == LEADER)
{
this.confirmed = {};
for (const node of (this.leadershipTimeout ? this.followers : this.nodes))
{
if (node != this.nodeId)
@ -165,6 +168,8 @@ class TinyRaft extends EventEmitter
this.leader = msg.leader;
this.term = msg.term;
this.state = CANDIDATE;
this.followers = null;
this.confirmed = {};
this._nextTerm(this.heartbeatTimeout*2 + this.electionTimeout);
this.emit('change', { state: this.state, term: this.term, leader: this.leader });
}
@ -184,6 +189,7 @@ class TinyRaft extends EventEmitter
this.term = msg.term;
this.state = FOLLOWER;
this.followers = null;
this.confirmed = {};
this.leader = msg.leader;
this.votes = {};
this.emit('change', { state: this.state, term: this.term, leader: this.leader });
@ -214,6 +220,7 @@ class TinyRaft extends EventEmitter
{
this.leader = msg.leader;
this.state = LEADER;
this.confirmed = {};
this._nextTerm(this.leadershipTimeout > 0 ? this.leadershipTimeout : -1);
this.followers = this.votes[this.nodeId];
// Send a heartbeat to confirm leadership
@ -249,7 +256,7 @@ class TinyRaft extends EventEmitter
{
this.markAlive();
}
if (this.leadershipTimeout > 0)
if (this.leadershipTimeout > 0 || this.state == FOLLOWER && from != this.leader)
{
this.send(from, { type: PONG, term: this.term, leader: this.leader });
}
@ -261,13 +268,18 @@ class TinyRaft extends EventEmitter
{
return;
}
if (msg.leader != this.nodeId)
if (msg.leader != this.nodeId || msg.term != this.term)
{
this.start();
}
else
else if (this.leadershipTimeout > 0)
{
this._nextTerm(this.leadershipTimeout > 0 ? this.leadershipTimeout : -1);
this.confirmed[from] = true;
if (Object.keys(this.confirmed).length >= (1 + (0 | this.nodes.length/2)))
{
// We have quorum
this._nextTerm(this.leadershipTimeout);
}
}
}
@ -311,6 +323,10 @@ class TinyRaft extends EventEmitter
}
}
TinyRaft.setTimeout = setTimeout;
TinyRaft.clearTimeout = clearTimeout;
TinyRaft.setInterval = setInterval;
TinyRaft.clearInterval = clearInterval;
TinyRaft.VOTE_REQUEST = VOTE_REQUEST;
TinyRaft.VOTE = VOTE;
TinyRaft.PING = PING;

View File

@ -1,4 +1,11 @@
const TinyRaft = require('./tinyraft.js');
const FakeTimer = require('./faketimer.js');
const fake = new FakeTimer();
TinyRaft.setTimeout = fake.setTimeout;
TinyRaft.clearTimeout = fake.clearTimeout;
TinyRaft.setInterval = fake.setInterval;
TinyRaft.clearInterval = fake.clearInterval;
function newNode(id, nodes, partitions, mod)
{
@ -12,8 +19,8 @@ function newNode(id, nodes, partitions, mod)
{
if (!partitions[n.nodeId+'-'+to] && !partitions[to+'-'+n.nodeId] && nodes[to])
{
console.log('received from '+n.nodeId+' to '+to+': '+JSON.stringify(msg));
setImmediate(function() { nodes[to].onReceive(n.nodeId, msg); });
console.log('['+fake.time+'] received from '+n.nodeId+' to '+to+': '+JSON.stringify(msg));
fake.setImmediate(function() { nodes[to].onReceive(n.nodeId, msg); });
}
},
};
@ -23,7 +30,7 @@ function newNode(id, nodes, partitions, mod)
n.on('change', (st) =>
{
console.log(
'node '+n.nodeId+': '+(st.state == TinyRaft.FOLLOWER ? 'following '+st.leader : st.state)+
'['+fake.time+'] node '+n.nodeId+': '+(st.state == TinyRaft.FOLLOWER ? 'following '+st.leader : st.state)+
', term '+st.term+(st.state == TinyRaft.LEADER ? ', followers: '+st.followers.join(', ') : '')
);
});
@ -48,6 +55,7 @@ function newNodes(count, partitions, mod)
function checkQuorum(nodes, count)
{
let leaders = 0;
let out = 0;
for (const i in nodes)
{
if (nodes[i].state == TinyRaft.LEADER)
@ -58,14 +66,16 @@ function checkQuorum(nodes, count)
}
else if (nodes[i].state != TinyRaft.FOLLOWER)
{
throw new Error('node '+i+' is not in quorum: state '+nodes[i].state);
out++;
if (out > nodes.length-count)
throw new Error('node '+i+' is not in quorum: state '+nodes[i].state);
}
}
if (leaders != 1)
{
throw new Error('we have '+leaders+' leaders instead of exactly 1');
}
console.log('OK: '+count+' nodes in quorum');
console.log('['+fake.time+'] OK: '+count+' nodes in quorum');
}
function checkNoQuorum(nodes)
@ -75,11 +85,12 @@ function checkNoQuorum(nodes)
{
throw new Error('we have non-candidates ('+nc.map(n => n.nodeId).join(', ')+'), but we should not');
}
console.log('OK: '+Object.keys(nodes).length+' candidates, no quorum');
console.log('['+fake.time+'] OK: '+Object.keys(nodes).length+' candidates, no quorum');
}
async function testStartThenRemoveNode()
{
console.log('--------------------------------------------------------------------------------');
console.log('testStartThenRemoveNode');
const nodes = newNodes(5);
let leaderChanges = 0, prevLeader = null;
@ -91,7 +102,7 @@ async function testStartThenRemoveNode()
leaderChanges++;
}
});
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 5);
if (leaderChanges >= 3)
{
@ -103,7 +114,7 @@ async function testStartThenRemoveNode()
nodes[leader].stop();
delete nodes[leader];
// Check quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 4);
// Stop the leader again
leader = nodes[Object.keys(nodes)[0]].leader;
@ -111,7 +122,7 @@ async function testStartThenRemoveNode()
nodes[leader].stop();
delete nodes[leader];
// Check quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 3);
// Stop the leader again
leader = nodes[Object.keys(nodes)[0]].leader;
@ -119,7 +130,7 @@ async function testStartThenRemoveNode()
nodes[leader].stop();
delete nodes[leader];
// Check that no quorum exists
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkNoQuorum(nodes);
// Clean up
for (const id in nodes)
@ -131,9 +142,10 @@ async function testStartThenRemoveNode()
async function testAddNode()
{
console.log('--------------------------------------------------------------------------------');
console.log('testAddNode');
const nodes = newNodes(5, {}, cfg => cfg.initialTerm = 1000);
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 5);
// Add node
newNode(6, nodes);
@ -141,7 +153,7 @@ async function testAddNode()
nodes[i].setNodes([ 1, 2, 3, 4, 5, 6 ]);
nodes[6].start();
// Check quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 6);
// Clean up
for (const id in nodes)
@ -153,22 +165,23 @@ async function testAddNode()
async function testLeadershipExpiration()
{
console.log('--------------------------------------------------------------------------------');
console.log('testLeadershipExpiration');
const partitions = {};
const nodes = newNodes(5, partitions, cfg => cfg.leadershipTimeout = 1500);
// Check that 5 nodes are in quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 5);
// Break network on the leader
let leader = nodes[1].leader;
console.log("stopping the leader's ("+leader+") network");
console.log("["+fake.time+"] --> stopping the leader's ("+leader+") network");
for (let i = 1; i <= 5; i++)
{
partitions[i+'-'+leader] = true;
partitions[leader+'-'+i] = true;
}
// Check that the leader loses leadership after 2 * leadershipTimeout
await new Promise(ok => setTimeout(ok, 3000));
await fake.runFor(3000);
if (nodes[leader].state != TinyRaft.CANDIDATE)
{
throw new Error("leadership expiration doesn't work");
@ -183,6 +196,7 @@ async function testLeadershipExpiration()
async function testRestart()
{
console.log('--------------------------------------------------------------------------------');
console.log('testRestart');
const nodes = newNodes(5, {}, cfg => cfg.initialTerm = 1000);
let leaderChanges = 0, prevLeader = null;
@ -196,7 +210,7 @@ async function testRestart()
}
});
// Check that 5 nodes are in quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 5);
if (leaderChanges >= 3)
{
@ -208,18 +222,18 @@ async function testRestart()
{
restarted = 1 + (prevLeader + 1) % 5;
}
console.log("stopping a follower (node "+restarted+")");
console.log("["+fake.time+"] --> stopping a follower (node "+restarted+")");
nodes[restarted].stop();
delete nodes[restarted];
// Wait 2000ms
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
// Restart a follower
console.log("restarting a follower (node "+restarted+")");
console.log("["+fake.time+"] --> restarting a follower (node "+restarted+")");
leaderChanges = 0;
newNode(restarted, nodes, {}, null);
nodes[restarted].start();
// Check quorum and the fact that the leader didn't change after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 5);
if (leaderChanges > 0)
{
@ -235,20 +249,21 @@ async function testRestart()
async function testChangeNodes()
{
console.log('--------------------------------------------------------------------------------');
console.log('testChangeNodes');
console.log('starting nodes 1-5');
console.log('['+fake.time+'] --> starting nodes 1-5');
const nodes = newNodes(5, {}, cfg => cfg.initialTerm = 1000);
// Check that 5 nodes are in quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 5);
// Stop node 4
console.log('stopping node 4');
console.log('['+fake.time+'] --> stopping node 4');
nodes[4].stop();
delete nodes[4];
// Wait 1000ms
await new Promise(ok => setTimeout(ok, 1000));
await fake.runFor(1000);
// Change nodes from 1 2 3 4 5 to 1 2 3 5 6
console.log('starting node 6');
console.log('['+fake.time+'] --> starting node 6');
newNode(6, nodes);
nodes[6].start();
nodes[1].setNodes([ 1, 2, 3, 5, 6 ]);
@ -257,7 +272,7 @@ async function testChangeNodes()
nodes[5].setNodes([ 1, 2, 3, 5, 6 ]);
nodes[6].setNodes([ 1, 2, 3, 5, 6 ]);
// Check that 5 nodes are in quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 5);
// Clean up
for (const id in nodes)
@ -270,33 +285,34 @@ async function testChangeNodes()
async function testLeaderPriority()
{
console.log('--------------------------------------------------------------------------------');
console.log('testLeaderPriority');
console.log('starting nodes 1-5');
console.log('['+fake.time+'] --> starting nodes 1-5');
const nodes = newNodes(5, {}, cfg => cfg.leaderPriority = cfg.nodeId+1);
// Check that 5 nodes are in quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 5);
if (nodes[1].leader != 5)
{
throw new Error('leader is not 5');
}
// Stop node 5
console.log('stopping node 5');
console.log('['+fake.time+'] --> stopping node 5');
nodes[5].stop();
delete nodes[5];
// Wait 2000ms and check that the leader is now 4
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 4);
if (nodes[1].leader != 4)
{
throw new Error('leader is not 4');
}
// Stop node 4
console.log('stopping node 4');
console.log('['+fake.time+'] --> stopping node 4');
nodes[4].stop();
delete nodes[4];
// Wait 2000ms and check that the leader is now 3
await new Promise(ok => setTimeout(ok, 2000));
await fake.runFor(2000);
checkQuorum(nodes, 3);
if (nodes[1].leader != 3)
{
@ -313,6 +329,7 @@ async function testLeaderPriority()
async function testPartition1_3()
{
console.log('--------------------------------------------------------------------------------');
console.log('testPartition1_3');
const partitions = { '1-3': true, '3-1': true };
const nodes = newNodes(3, partitions, cfg => cfg.nodes = [ 1, 2, 3 ]);
@ -325,24 +342,19 @@ async function testPartition1_3()
leaderChanges++;
}
});
// Check that 3 nodes are in quorum after 2000ms
// 2 is always elected as leader because it's the first node that can determine
// that the first voting round has failed: it receives a vote for 1 from 1 and
// a vote for 3 from 3. So it knows that no node can get 2 votes. 1 and 3, at
// the same time, don't know it for sure because they don't receive failures
// from each other due to the network partition.
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 3);
if (leaderChanges >= 3)
{
throw new Error('More than 3 leader changes in 2000ms: '+leaderChanges);
}
// Check that 2 or 3 nodes are in quorum after 5000ms
// This situation should be fixed by "prevote protocol", but it breaks other things
await fake.runFor(5000);
if (nodes[2].leader == 2)
checkQuorum(nodes, 3);
else
checkQuorum(nodes, 2);
// Clean up
for (const id in nodes)
{
nodes[id].stop();
}
console.log('testLeadershipExpiration: OK');
console.log('testPartition1_3: OK');
}
async function run()