499 lines
18 KiB
JavaScript
499 lines
18 KiB
JavaScript
const ws = require('ws');
|
|
|
|
const TinyRaft = require('./tinyraft.js');
|
|
const { runCallbacks, RequestError } = require('./common.js');
|
|
|
|
const LEADER_MISMATCH = 'raft leader/term mismatch';
|
|
|
|
class AntiCluster
|
|
{
|
|
constructor(antietcd)
|
|
{
|
|
this.antietcd = antietcd;
|
|
this.cfg = antietcd.cfg;
|
|
this.cluster_connections = {};
|
|
this.last_request_id = 1;
|
|
this.subrequests = {};
|
|
this.synced = false;
|
|
this.wait_sync = [];
|
|
if (!this.cfg.node_id || !this.cfg.cluster_key)
|
|
{
|
|
throw new Error('node_id and cluster_key are required in configuration if cluster is set');
|
|
}
|
|
if (!(this.cfg.cluster instanceof Object))
|
|
{
|
|
this.cfg.cluster = (''+this.cfg.cluster).trim().split(/[\s,]*,[\s,]*/)
|
|
.reduce((a, c) => { c = c.split(/\s*=\s*/); a[c[0]] = c[1]; return a; }, {});
|
|
}
|
|
this.raft = new TinyRaft({
|
|
nodes: Object.keys(this.cfg.cluster),
|
|
nodeId: this.cfg.node_id,
|
|
heartbeatTimeout: this.cfg.heartbeat_timeout,
|
|
electionTimeout: this.cfg.election_timeout,
|
|
leaderPriority: this.cfg.leader_priority||undefined,
|
|
initialTerm: this.antietcd.stored_term,
|
|
send: (to, msg) => this._sendRaftMessage(to, msg),
|
|
});
|
|
this.raft.on('change', (event) => this._handleRaftChange(event));
|
|
this.raft.start();
|
|
// Connect to all nodes and reconnect forever
|
|
for (const node_id in this.cfg.cluster)
|
|
{
|
|
this.connectToNode(node_id);
|
|
}
|
|
}
|
|
|
|
connectToNode(node_id)
|
|
{
|
|
if (node_id != this.cfg.node_id && this.cfg.cluster[node_id] &&
|
|
(!this.cluster_connections[node_id] || !this.antietcd.clients[this.cluster_connections[node_id]]))
|
|
{
|
|
const socket = new ws.WebSocket(this.cfg.cluster[node_id].replace(/^http/, 'ws'));
|
|
const client_id = this.antietcd.startWebsocket(socket, () => setTimeout(() => this.connectToNode(node_id), this.cfg.reconnect_interval||1000));
|
|
this.cluster_connections[node_id] = client_id;
|
|
socket.on('open', () =>
|
|
{
|
|
if (this.antietcd.clients[client_id])
|
|
{
|
|
this.antietcd.clients[client_id].ready = true;
|
|
this.antietcd.clients[client_id].raft_node_id = node_id;
|
|
this.antietcd.clients[client_id].addr = socket._socket.remoteAddress+':'+socket._socket.remotePort;
|
|
socket.send(JSON.stringify({ identify: { key: this.cfg.cluster_key, node_id: this.cfg.node_id } }));
|
|
this.raft.start();
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
_peerRequest(client, request, timeout)
|
|
{
|
|
const request_id = this.last_request_id++;
|
|
request.request_id = request_id;
|
|
client.socket.send(JSON.stringify(request));
|
|
const req = this.subrequests[request_id] = { client_id: client.id };
|
|
const promise = new Promise(ok => req.cb = ok);
|
|
req.timer_id = setTimeout(() => this._completeRequest(null, request_id, { error: 'timeout' }), timeout);
|
|
return promise;
|
|
}
|
|
|
|
async replicateChange(msg)
|
|
{
|
|
if (this.raft.state !== TinyRaft.LEADER)
|
|
{
|
|
return;
|
|
}
|
|
const mod_revision = this.antietcd.etctree.mod_revision;
|
|
await this._requestFollowers({ replicate: msg }, this.cfg.replication_timeout||1000);
|
|
// We have a guarantee that all revisions before mod_revision are applied by followers,
|
|
// because replication messages are either processed synchronously or serialized in
|
|
// AntiPersistence against <wait_persist>
|
|
this.sync_revision = mod_revision;
|
|
if (this.sync_revision - this.antietcd.etctree.compact_revision > (this.cfg.compact_revisions||1000)*2)
|
|
{
|
|
const revision = this.sync_revision - (this.cfg.compact_revisions||1000);
|
|
await this._requestFollowers({ compact: { revision } }, this.cfg.compact_timeout||1000);
|
|
this.antietcd.etctree.compact(revision);
|
|
}
|
|
}
|
|
|
|
async _requestFollowers(msg, timeout)
|
|
{
|
|
msg.term = this.raft.term;
|
|
for (const follower of this.raft.followers)
|
|
{
|
|
if (follower != this.cfg.node_id)
|
|
{
|
|
const client = this._getPeer(follower);
|
|
if (!client)
|
|
{
|
|
// One of peers is unavailable - immediate failure, request should be retried
|
|
console.log('Lost peer connection during replication - restarting election');
|
|
this.raft.start();
|
|
throw new RequestError(503, 'Peer connection is lost, please retry request');
|
|
}
|
|
}
|
|
}
|
|
const promises = [];
|
|
for (const follower of this.raft.followers)
|
|
{
|
|
if (follower != this.cfg.node_id)
|
|
{
|
|
const client = this._getPeer(follower);
|
|
const promise = this._peerRequest(client, msg, timeout);
|
|
promises.push(promise);
|
|
}
|
|
}
|
|
const results = await Promise.all(promises);
|
|
for (const result of results)
|
|
{
|
|
if (!result)
|
|
{
|
|
// One of peers is unavailable - immediate failure, request should be retried
|
|
console.log('Lost peer connection during replication - restarting election');
|
|
this.raft.start();
|
|
throw new RequestError(503, 'Peer connection is lost, please retry request');
|
|
}
|
|
}
|
|
}
|
|
|
|
_completeRequest(client_id, request_id, result)
|
|
{
|
|
const req = this.subrequests[request_id];
|
|
if (!req || client_id && req.client_id != client_id)
|
|
{
|
|
return;
|
|
}
|
|
delete this.subrequests[request_id];
|
|
if (req.timer_id && result !== null)
|
|
{
|
|
clearTimeout(req.timer_id);
|
|
}
|
|
req.cb(result);
|
|
}
|
|
|
|
_handleRaftChange(event)
|
|
{
|
|
console.log(
|
|
'Raft '+this.cfg.node_id+': '+(event.state == TinyRaft.FOLLOWER ? 'following '+event.leader : event.state)+
|
|
', term '+event.term+(event.state == TinyRaft.LEADER ? ', followers: '+event.followers.join(', ') : '')
|
|
);
|
|
if (event.state == TinyRaft.LEADER)
|
|
{
|
|
// (Re)sync with the new set of followers
|
|
this._resync(event.followers);
|
|
this.antietcd.etctree.resume_leases();
|
|
}
|
|
else
|
|
{
|
|
this.synced = false;
|
|
this.antietcd.etctree.pause_leases();
|
|
}
|
|
}
|
|
|
|
_resync(followers)
|
|
{
|
|
this.synced = false;
|
|
if (!this.resync_state)
|
|
{
|
|
this.resync_state = {
|
|
dumps: {},
|
|
loads: {},
|
|
};
|
|
}
|
|
const seen = {};
|
|
for (const f of followers)
|
|
{
|
|
seen[f] = true;
|
|
if (f != this.cfg.node_id && !(f in this.resync_state.dumps))
|
|
{
|
|
const client = this._getPeer(f);
|
|
if (client)
|
|
{
|
|
this.resync_state.dumps[f] = null;
|
|
this._peerRequest(client, { request: {}, handler: 'dump' }, this.cfg.dump_timeout||5000).then(res =>
|
|
{
|
|
if (this.resync_state && client.raft_node_id &&
|
|
(client.raft_node_id in this.resync_state.dumps))
|
|
{
|
|
if (res.error)
|
|
{
|
|
console.error(client.raft_node_id+' dump failed with error: '+res.error);
|
|
}
|
|
else
|
|
{
|
|
console.log('Got dump from '+client.raft_node_id+' with stored term '+res.term);
|
|
}
|
|
this.resync_state.dumps[client.raft_node_id] = res.error ? null : res;
|
|
this._continueResync();
|
|
}
|
|
});
|
|
}
|
|
}
|
|
}
|
|
for (const f in this.resync_state.dumps)
|
|
{
|
|
if (!seen[f])
|
|
{
|
|
delete this.resync_state.dumps[f];
|
|
}
|
|
}
|
|
this._continueResync();
|
|
}
|
|
|
|
_continueResync()
|
|
{
|
|
if (Object.values(this.resync_state.dumps).filter(d => !d).length > 0)
|
|
{
|
|
// Some dump(s) are still pending
|
|
return;
|
|
}
|
|
this.resync_state.dumps[this.cfg.node_id] = { ...this.antietcd.etctree.dump(), term: this.antietcd.stored_term };
|
|
let max_term = -1, with_max = [];
|
|
for (const follower in this.resync_state.dumps)
|
|
{
|
|
const dump = this.resync_state.dumps[follower];
|
|
if (dump.term > max_term)
|
|
{
|
|
max_term = dump.term;
|
|
with_max = [ follower ];
|
|
}
|
|
else if (dump.term == max_term)
|
|
{
|
|
with_max.push(follower);
|
|
}
|
|
}
|
|
if (max_term < 0 || with_max.length == 0)
|
|
{
|
|
throw new Error('BUG: no max term during resync');
|
|
}
|
|
console.log('Local term '+this.antietcd.stored_term+', max follower term '+max_term+' at nodes '+with_max.join(', '));
|
|
with_max = with_max.filter(w => w != this.cfg.node_id);
|
|
// Merge databases of all nodes with maximum term
|
|
// Force other nodes to replicate the merged DB, throwing away their own states
|
|
for (let i = 0; i < with_max.length; i++)
|
|
{
|
|
const update_only = !(i == 0 && this.antietcd.stored_term != max_term);
|
|
console.log(update_only ? 'Updating database from node '+with_max[i]+' state' : 'Copying node '+with_max[i]+' state');
|
|
this.antietcd.etctree.load(this.resync_state.dumps[with_max[i]], update_only);
|
|
}
|
|
let wait = 0;
|
|
const load_request = { term: this.raft.term, load: this.antietcd.etctree.dump() };
|
|
for (const follower in this.resync_state.dumps)
|
|
{
|
|
if (follower != this.cfg.node_id)
|
|
{
|
|
const dump = this.resync_state.dumps[follower];
|
|
if (dump.term <= max_term)
|
|
{
|
|
const client = this._getPeer(follower);
|
|
if (!client)
|
|
{
|
|
console.log('Lost peer connection during resync - restarting election');
|
|
this.raft.start();
|
|
return;
|
|
}
|
|
console.log('Copying state to '+follower);
|
|
const loadstate = this.resync_state.loads[follower] = {};
|
|
wait++;
|
|
this._peerRequest(client, load_request, this.cfg.load_timeout||5000).then(res =>
|
|
{
|
|
loadstate.result = res;
|
|
this._finishResync();
|
|
});
|
|
}
|
|
}
|
|
}
|
|
if (!wait)
|
|
{
|
|
this._finishResync();
|
|
}
|
|
}
|
|
|
|
_finishResync()
|
|
{
|
|
if (Object.values(this.resync_state.dumps).filter(d => !d).length > 0 ||
|
|
Object.values(this.resync_state.loads).filter(d => !d.result).length > 0)
|
|
{
|
|
return;
|
|
}
|
|
// All current peers have copied the database, we can proceed
|
|
this.antietcd.stored_term = this.raft.term;
|
|
this.synced = true;
|
|
runCallbacks(this, 'wait_sync', []);
|
|
console.log('Synchronized with followers, new term is '+this.raft.term);
|
|
}
|
|
|
|
_isWrite(path, data)
|
|
{
|
|
if (path == 'kv_txn')
|
|
{
|
|
return ((!data.compare || !data.compare.length) &&
|
|
(!data.success || !data.success.filter(f => f.request_put || f.requestPut || f.request_delete_range || f.requestDeleteRange).length) &&
|
|
(!data.failure || !data.failure.filter(f => f.request_put || f.requestPut || f.request_delete_range || f.requestDeleteRange).length));
|
|
}
|
|
return path != 'kv_range';
|
|
}
|
|
|
|
async checkRaftState(path, requestUrl, data)
|
|
{
|
|
if (!this.raft)
|
|
{
|
|
return null;
|
|
}
|
|
if (requestUrl.searchParams.get('leaderonly') &&
|
|
this.raft.state != TinyRaft.LEADER)
|
|
{
|
|
throw new RequestError(503, 'Not leader');
|
|
}
|
|
if (this.raft.state == TinyRaft.CANDIDATE)
|
|
{
|
|
throw new RequestError(503, 'Quorum not available');
|
|
}
|
|
else if (this.raft.state == TinyRaft.FOLLOWER &&
|
|
(!this.cfg.stale_read || this._isWrite(path, data)))
|
|
{
|
|
// Forward to leader
|
|
return await this._forwardToLeader(path, data);
|
|
}
|
|
else if (!this.synced)
|
|
{
|
|
// Wait for initial sync for read-only requests
|
|
await new Promise(ok => this.wait_sync.push(ok));
|
|
}
|
|
return null;
|
|
}
|
|
|
|
async _forwardToLeader(handler, data)
|
|
{
|
|
const client = this._getPeer(this.raft.leader);
|
|
if (!client)
|
|
{
|
|
throw new RequestError(503, 'Leader is unavailable');
|
|
}
|
|
return await this._peerRequest(client, { handler, request: data }, this.cfg.forward_timeout||1000);
|
|
}
|
|
|
|
handleWsMsg(client, msg)
|
|
{
|
|
if (msg.raft)
|
|
{
|
|
if (client.raft_node_id)
|
|
{
|
|
this.raft.onReceive(client.raft_node_id, msg.raft);
|
|
}
|
|
}
|
|
else if (msg.identify)
|
|
{
|
|
if (msg.identify.key === this.cfg.cluster_key &&
|
|
msg.identify.node_id != this.cfg.node_id)
|
|
{
|
|
client.raft_node_id = msg.identify.node_id;
|
|
console.log('Got a connection from '+client.raft_node_id);
|
|
}
|
|
}
|
|
else if (msg.load)
|
|
{
|
|
this._handleLoadMsg(client, msg);
|
|
}
|
|
else if (msg.replicate)
|
|
{
|
|
this._handleReplicateMsg(client, msg).catch(console.error);
|
|
}
|
|
else if (msg.request)
|
|
{
|
|
this._handleRequestMsg(client, msg).catch(console.error);
|
|
}
|
|
else if (msg.reply)
|
|
{
|
|
this._completeRequest(client.id, msg.request_id, msg.reply);
|
|
}
|
|
else if (msg.compact)
|
|
{
|
|
this._handleCompactMsg(client, msg);
|
|
}
|
|
}
|
|
|
|
async _handleRequestMsg(client, msg)
|
|
{
|
|
const cb = this.antietcd['handle_'+msg.handler];
|
|
if (cb)
|
|
{
|
|
try
|
|
{
|
|
let res = cb.call(this.antietcd, msg.request);
|
|
if (res instanceof Promise)
|
|
{
|
|
res = await res;
|
|
}
|
|
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: res }));
|
|
}
|
|
catch (e)
|
|
{
|
|
console.error(e);
|
|
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: e.message } }));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: 'unknown handler' } }));
|
|
}
|
|
}
|
|
|
|
_handleLoadMsg(client, msg)
|
|
{
|
|
if (client.raft_node_id && this.raft.state == TinyRaft.FOLLOWER &&
|
|
this.raft.leader === client.raft_node_id && this.raft.term == msg.term)
|
|
{
|
|
this.antietcd.etctree.load(msg.load);
|
|
this.antietcd.stored_term = msg.term;
|
|
this.synced = true;
|
|
runCallbacks(this, 'wait_sync', []);
|
|
console.log('Synchronized with leader, new term is '+msg.term);
|
|
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: {} }));
|
|
}
|
|
else
|
|
{
|
|
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: LEADER_MISMATCH } }));
|
|
}
|
|
}
|
|
|
|
async _handleReplicateMsg(client, msg)
|
|
{
|
|
if (client.raft_node_id && this.raft.state == TinyRaft.FOLLOWER &&
|
|
this.raft.leader === client.raft_node_id && this.raft.term == msg.term)
|
|
{
|
|
await this.antietcd.etctree.apply_replication(msg.replicate);
|
|
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: {} }));
|
|
}
|
|
else
|
|
{
|
|
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: LEADER_MISMATCH } }));
|
|
}
|
|
}
|
|
|
|
_handleCompactMsg(client, msg)
|
|
{
|
|
if (client.raft_node_id && this.raft.state == TinyRaft.FOLLOWER &&
|
|
this.raft.leader === client.raft_node_id && this.raft.term == msg.term)
|
|
{
|
|
this.antietcd.etctree.compact(msg.compact.revision);
|
|
console.log('Compacted deletions up to '+msg.compact.revision);
|
|
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: {} }));
|
|
}
|
|
else
|
|
{
|
|
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: LEADER_MISMATCH } }));
|
|
}
|
|
}
|
|
|
|
_getPeer(to)
|
|
{
|
|
if (to == this.cfg.node_id)
|
|
{
|
|
throw new Error('BUG: attempt to get connection to self');
|
|
}
|
|
const client_id = this.cluster_connections[to];
|
|
if (!client_id)
|
|
{
|
|
return null;
|
|
}
|
|
const client = this.antietcd.clients[client_id];
|
|
if (!client || !client.ready)
|
|
{
|
|
return null;
|
|
}
|
|
return client;
|
|
}
|
|
|
|
_sendRaftMessage(to, msg)
|
|
{
|
|
const client = this._getPeer(to);
|
|
if (client)
|
|
{
|
|
client.socket.send(JSON.stringify({ raft: msg }));
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = AntiCluster;
|