
498 lines
17 KiB

const ws = require('ws');
const TinyRaft = require('./tinyraft.js');
const { runCallbacks, RequestError } = require('./common.js');
const LEADER_MISMATCH = 'raft leader/term mismatch';
class AntiCluster
this.antietcd = antietcd;
this.cfg = antietcd.cfg;
this.cluster_connections = {};
this.last_request_id = 1;
this.subrequests = {};
this.synced = false;
this.wait_sync = [];
if (!this.cfg.node_id || !this.cfg.cluster_key)
throw new Error('node_id and cluster_key are required in configuration if cluster is set');
if (!(this.cfg.cluster instanceof Object))
this.cfg.cluster = (''+this.cfg.cluster).trim().split(/[\s,]*,[\s,]*/)
.reduce((a, c) => { c = c.split(/\s*=\s*/); a[c[0]] = c[1]; return a; }, {});
this.raft = new TinyRaft({
nodes: Object.keys(this.cfg.cluster),
nodeId: this.cfg.node_id,
heartbeatTimeout: this.cfg.heartbeat_timeout,
electionTimeout: this.cfg.election_timeout,
leaderPriority: this.cfg.leader_priority||undefined,
initialTerm: this.antietcd.stored_term,
send: (to, msg) => this._sendRaftMessage(to, msg),
this.raft.on('change', (event) => this._handleRaftChange(event));
// Connect to all nodes and reconnect forever
for (const node_id in this.cfg.cluster)
if (node_id != this.cfg.node_id && this.cfg.cluster[node_id] &&
(!this.cluster_connections[node_id] || !this.antietcd.clients[this.cluster_connections[node_id]]))
const socket = new ws.WebSocket(this.cfg.cluster[node_id].replace(/^http/, 'ws'));
const client_id = this.antietcd.startWebsocket(socket, () => setTimeout(() => this.connectToNode(node_id), this.cfg.reconnect_interval||1000));
this.cluster_connections[node_id] = client_id;
socket.on('open', () =>
if (this.antietcd.clients[client_id])
this.antietcd.clients[client_id].ready = true;
this.antietcd.clients[client_id].raft_node_id = node_id;
this.antietcd.clients[client_id].addr = socket._socket.remoteAddress+':'+socket._socket.remotePort;
socket.send(JSON.stringify({ identify: { key: this.cfg.cluster_key, node_id: this.cfg.node_id } }));
_peerRequest(client, request, timeout)
const request_id = this.last_request_id++;
request.request_id = request_id;
const req = this.subrequests[request_id] = { client_id: client.id };
const promise = new Promise(ok => req.cb = ok);
req.timer_id = setTimeout(() => this._completeRequest(null, request_id, { error: 'timeout' }), timeout);
return promise;
async replicateChange(msg)
if (this.raft.state !== TinyRaft.LEADER)
const mod_revision = this.antietcd.etctree.mod_revision;
await this._requestFollowers({ replicate: msg }, this.cfg.replication_timeout||1000);
// We have a guarantee that all revisions before mod_revision are applied by followers,
// because replication messages are either processed synchronously or serialized in
// AntiPersistence against <wait_persist>
this.sync_revision = mod_revision;
if (this.sync_revision - this.antietcd.etctree.compact_revision > (this.cfg.compact_revisions||1000)*2)
const revision = this.sync_revision - (this.cfg.compact_revisions||1000);
await this._requestFollowers({ compact: { revision } }, this.cfg.compact_timeout||1000);
async _requestFollowers(msg, timeout)
msg.term = this.raft.term;
for (const follower of this.raft.followers)
if (follower != this.cfg.node_id)
const client = this._getPeer(follower);
if (!client)
// One of peers is unavailable - immediate failure, request should be retried
console.log('Lost peer connection during replication - restarting election');
throw new RequestError(503, 'Peer connection is lost, please retry request');
const promises = [];
for (const follower of this.raft.followers)
if (follower != this.cfg.node_id)
const client = this._getPeer(follower);
const promise = this._peerRequest(client, msg, timeout);
const results = await Promise.all(promises);
for (const result of results)
if (!result)
// One of peers is unavailable - immediate failure, request should be retried
console.log('Lost peer connection during replication - restarting election');
throw new RequestError(503, 'Peer connection is lost, please retry request');
_completeRequest(client_id, request_id, result)
const req = this.subrequests[request_id];
if (!req || client_id && req.client_id != client_id)
delete this.subrequests[request_id];
if (req.timer_id && result !== null)
'Raft '+this.cfg.node_id+': '+(event.state == TinyRaft.FOLLOWER ? 'following '+event.leader : event.state)+
', term '+event.term+(event.state == TinyRaft.LEADER ? ', followers: '+event.followers.join(', ') : '')
if (event.state == TinyRaft.LEADER)
// (Re)sync with the new set of followers
this.synced = false;
this.synced = false;
if (!this.resync_state)
this.resync_state = {
dumps: {},
loads: {},
const seen = {};
for (const f of followers)
seen[f] = true;
if (f != this.cfg.node_id && !(f in this.resync_state.dumps))
const client = this._getPeer(f);
if (client)
this.resync_state.dumps[f] = null;
this._peerRequest(client, { request: {}, handler: 'dump' }, this.cfg.dump_timeout||5000).then(res =>
if (this.resync_state && client.raft_node_id &&
(client.raft_node_id in this.resync_state.dumps))
if (res.error)
console.error(client.raft_node_id+' dump failed with error: '+res.error);
console.log('Got dump from '+client.raft_node_id+' with stored term '+res.term);
this.resync_state.dumps[client.raft_node_id] = res.error ? null : res;
for (const f in this.resync_state.dumps)
if (!seen[f])
delete this.resync_state.dumps[f];
if (Object.values(this.resync_state.dumps).filter(d => !d).length > 0)
// Some dump(s) are still pending
this.resync_state.dumps[this.cfg.node_id] = { ...this.antietcd.etctree.dump(), term: this.antietcd.stored_term };
let max_term = -1, with_max = [];
for (const follower in this.resync_state.dumps)
const dump = this.resync_state.dumps[follower];
if (dump.term > max_term)
max_term = dump.term;
with_max = [ follower ];
else if (dump.term == max_term)
if (max_term < 0 || with_max.length == 0)
throw new Error('BUG: no max term during resync');
console.log('Local term '+this.antietcd.stored_term+', max follower term '+max_term+' at nodes '+with_max.join(', '));
with_max = with_max.filter(w => w != this.cfg.node_id);
// Merge databases of all nodes with maximum term
// Force other nodes to replicate the merged DB, throwing away their own states
for (let i = 0; i < with_max.length; i++)
const update_only = !(i == 0 && this.antietcd.stored_term != max_term);
console.log(update_only ? 'Updating database from node '+with_max[i]+' state' : 'Copying node '+with_max[i]+' state');
this.antietcd.etctree.load(this.resync_state.dumps[with_max[i]], update_only);
let wait = 0;
const load_request = { term: this.raft.term, load: this.antietcd.etctree.dump() };
for (const follower in this.resync_state.dumps)
if (follower != this.cfg.node_id)
const dump = this.resync_state.dumps[follower];
if (dump.term <= max_term)
const client = this._getPeer(follower);
if (!client)
console.log('Lost peer connection during resync - restarting election');
console.log('Copying state to '+follower);
const loadstate = this.resync_state.loads[follower] = {};
this._peerRequest(client, load_request, this.cfg.load_timeout||5000).then(res =>
loadstate.result = res;
if (!wait)
if (Object.values(this.resync_state.dumps).filter(d => !d).length > 0 ||
Object.values(this.resync_state.loads).filter(d => !d.result).length > 0)
// All current peers have copied the database, we can proceed
this.antietcd.stored_term = this.raft.term;
this.synced = true;
runCallbacks(this, 'wait_sync', []);
console.log('Synchronized with followers, new term is '+this.raft.term);
_isWrite(path, data)
if (path == 'kv_txn')
return ((!data.compare || !data.compare.length) &&
(!data.success || !data.success.filter(f => f.request_put || f.requestPut || f.request_delete_range || f.requestDeleteRange).length) &&
(!data.failure || !data.failure.filter(f => f.request_put || f.requestPut || f.request_delete_range || f.requestDeleteRange).length));
return path != 'kv_range';
async checkRaftState(path, requestUrl, data)
if (!this.raft)
return null;
if (requestUrl.searchParams.get('leaderonly') &&
this.raft.state != TinyRaft.LEADER)
throw new RequestError(503, 'Not leader');
if (this.raft.state == TinyRaft.CANDIDATE)
throw new RequestError(503, 'Quorum not available');
else if (this.raft.state == TinyRaft.FOLLOWER &&
(!this.cfg.stale_read || this._isWrite(path, data)))
// Forward to leader
return await this._forwardToLeader(path, data);
else if (!this.synced)
// Wait for initial sync for read-only requests
await new Promise(ok => this.wait_sync.push(ok));
return null;
async _forwardToLeader(handler, data)
const client = this._getPeer(this.raft.leader);
if (!client)
throw new RequestError(503, 'Leader is unavailable');
return await this._peerRequest(client, { handler, request: data }, this.cfg.forward_timeout||1000);
handleWsMsg(client, msg)
if (msg.raft)
if (client.raft_node_id)
this.raft.onReceive(client.raft_node_id, msg.raft);
else if (msg.identify)
if (msg.identify.key === this.cfg.cluster_key &&
msg.identify.node_id != this.cfg.node_id)
client.raft_node_id = msg.identify.node_id;
console.log('Got a connection from '+client.raft_node_id);
else if (msg.load)
this._handleLoadMsg(client, msg);
else if (msg.replicate)
this._handleReplicateMsg(client, msg).catch(console.error);
else if (msg.request)
this._handleRequestMsg(client, msg).catch(console.error);
else if (msg.reply)
this._completeRequest(client.id, msg.request_id, msg.reply);
else if (msg.compact)
this._handleCompactMsg(client, msg);
async _handleRequestMsg(client, msg)
const cb = this.antietcd['handle_'+msg.handler];
if (cb)
let res = cb.call(this.antietcd, msg.request);
if (res instanceof Promise)
res = await res;
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: res }));
catch (e)
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: e.message } }));
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: 'unknown handler' } }));
_handleLoadMsg(client, msg)
if (client.raft_node_id && this.raft.state == TinyRaft.FOLLOWER &&
this.raft.leader === client.raft_node_id && this.raft.term == msg.term)
this.antietcd.stored_term = msg.term;
this.synced = true;
runCallbacks(this, 'wait_sync', []);
console.log('Synchronized with leader, new term is '+msg.term);
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: {} }));
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: LEADER_MISMATCH } }));
async _handleReplicateMsg(client, msg)
if (client.raft_node_id && this.raft.state == TinyRaft.FOLLOWER &&
this.raft.leader === client.raft_node_id && this.raft.term == msg.term)
await this.antietcd.etctree.apply_replication(msg.replicate);
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: {} }));
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: LEADER_MISMATCH } }));
_handleCompactMsg(client, msg)
if (client.raft_node_id && this.raft.state == TinyRaft.FOLLOWER &&
this.raft.leader === client.raft_node_id && this.raft.term == msg.term)
console.log('Compacted deletions up to '+msg.compact.revision);
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: {} }));
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: LEADER_MISMATCH } }));
if (to == this.cfg.node_id)
throw new Error('BUG: attempt to get connection to self');
const client_id = this.cluster_connections[to];
if (!client_id)
return null;
const client = this.antietcd.clients[client_id];
if (!client || !client.ready)
return null;
return client;
_sendRaftMessage(to, msg)
const client = this._getPeer(to);
if (client)
client.socket.send(JSON.stringify({ raft: msg }));
module.exports = AntiCluster;