Implement simple clustering

master
Vitaliy Filippov 2024-05-07 15:34:37 +03:00
parent 3596ecd92c
commit 1a77faa510
2 changed files with 498 additions and 3 deletions

460
anticluster.js Normal file
View File

@ -0,0 +1,460 @@
const ws = require('ws');
const TinyRaft = require('./tinyraft.js');
const { runCallbacks, RequestError } = require('./common.js');
const LEADER_MISMATCH = 'raft leader/term mismatch';
class AntiCluster
{
constructor(antietcd)
{
this.antietcd = antietcd;
this.cfg = antietcd.cfg;
this.cluster_connections = {};
this.last_request_id = 1;
this.subrequests = {};
this.synced = false;
this.wait_sync = [];
if (!this.cfg.node_id || !this.cfg.cluster_key)
{
throw new Error('node_id and cluster_key are required in configuration if cluster is set');
}
if (!(this.cfg.cluster instanceof Object))
{
this.cfg.cluster = (''+this.cfg.cluster).trim().split(/[\s,]*,[\s,]*/)
.reduce((a, c) => { c = c.split(/\s*=\s*/); a[c[0]] = c[1]; return a; }, {});
}
this.raft = new TinyRaft({
nodes: Object.keys(this.cfg.cluster),
nodeId: this.cfg.node_id,
heartbeatTimeout: this.cfg.heartbeat_timeout,
electionTimeout: this.cfg.election_timeout,
initialTerm: this.antietcd.stored_term,
send: (to, msg) => this._sendRaftMessage(to, msg),
});
this.raft.on('change', (event) => this._handleRaftChange(event));
this.raft.start();
// Connect to all nodes and reconnect forever
for (const node_id in this.cfg.cluster)
{
this.connectToNode(node_id);
}
}
connectToNode(node_id)
{
if (node_id != this.cfg.node_id && this.cfg.cluster[node_id] &&
(!this.cluster_connections[node_id] || !this.antietcd.clients[this.cluster_connections[node_id]]))
{
const socket = new ws.WebSocket(this.cfg.cluster[node_id].replace(/^http/, 'ws'));
const client_id = this.antietcd.startWebsocket(socket, () => setTimeout(() => this.connectToNode(node_id), this.cfg.reconnect_interval||1000));
this.cluster_connections[node_id] = client_id;
socket.on('open', () =>
{
if (this.antietcd.clients[client_id])
{
this.antietcd.clients[client_id].ready = true;
this.antietcd.clients[client_id].raft_node_id = node_id;
this.antietcd.clients[client_id].addr = socket._socket.remoteAddress+':'+socket._socket.remotePort;
socket.send(JSON.stringify({ identify: { key: this.cfg.cluster_key, node_id: this.cfg.node_id } }));
}
});
}
}
_peerRequest(client, request, timeout)
{
const request_id = this.last_request_id++;
request.request_id = request_id;
client.socket.send(JSON.stringify(request));
const req = this.subrequests[request_id] = { client_id: client.id };
const promise = new Promise(ok => req.cb = ok);
req.timer_id = setTimeout(() => this._completeRequest(null, request_id, { error: 'timeout' }), timeout);
return promise;
}
async replicateChange(msg)
{
if (this.raft.state !== TinyRaft.LEADER)
{
return;
}
for (const follower of this.raft.followers)
{
if (follower != this.cfg.node_id)
{
const client = this._getPeer(follower);
if (!client)
{
// One of peers is unavailable - immediate failure, request should be retried
console.log('Lost peer connection during replication - restarting election');
this.raft.start();
throw new RequestError(503, 'Peer connection is lost, please retry request');
}
}
}
const promises = [];
for (const follower of this.raft.followers)
{
if (follower != this.cfg.node_id)
{
const client = this._getPeer(follower);
const promise = this._peerRequest(client, { replicate: msg, term: this.raft.term }, this.cfg.replication_timeout||1000);
promises.push(promise);
}
}
const results = await Promise.all(promises);
for (const result of results)
{
if (!result)
{
// One of peers is unavailable - immediate failure, request should be retried
console.log('Lost peer connection during replication - restarting election');
this.raft.start();
throw new RequestError(503, 'Peer connection is lost, please retry request');
}
}
}
_completeRequest(client_id, request_id, result)
{
const req = this.subrequests[request_id];
if (!req || client_id && req.client_id != client_id)
{
return;
}
delete this.subrequests[request_id];
if (req.timer_id && result !== null)
{
clearTimeout(req.timer_id);
}
req.cb(result);
}
_handleRaftChange(event)
{
console.log(
'Raft '+this.cfg.node_id+': '+(event.state == TinyRaft.FOLLOWER ? 'following '+event.leader : event.state)+
', term '+event.term+(event.state == TinyRaft.LEADER ? ', followers: '+event.followers.join(', ') : '')
);
if (event.state == TinyRaft.LEADER)
{
// (Re)sync with the new set of followers
this._resync(event.followers);
this.antietcd.etctree.resume_leases();
}
else
{
this.synced = false;
this.antietcd.etctree.pause_leases();
}
}
_resync(followers)
{
this.synced = false;
if (!this.resync_state)
{
this.resync_state = {
dumps: {},
loads: {},
};
}
const seen = {};
for (const f of followers)
{
seen[f] = true;
if (f != this.cfg.node_id && !(f in this.resync_state.dumps))
{
const client = this._getPeer(f);
if (client)
{
this.resync_state.dumps[f] = null;
this._peerRequest(client, { request: {}, handler: 'dump' }, this.cfg.dump_timeout||5000).then(res =>
{
if (this.resync_state && client.raft_node_id &&
(client.raft_node_id in this.resync_state.dumps))
{
if (res.error)
{
console.error(client.raft_node_id+' dump failed with error: '+res.error);
}
else
{
console.log('Got dump from '+client.raft_node_id+' with stored term '+res.term);
}
this.resync_state.dumps[client.raft_node_id] = res.error ? null : res;
this._continueResync();
}
});
}
}
}
for (const f in this.resync_state.dumps)
{
if (!seen[f])
{
delete this.resync_state.dumps[f];
}
}
this._continueResync();
}
_continueResync()
{
if (Object.values(this.resync_state.dumps).filter(d => !d).length > 0)
{
// Some dump(s) are still pending
return;
}
this.resync_state.dumps[this.cfg.node_id] = { ...this.antietcd.etctree.dump(), term: this.antietcd.stored_term };
let max_term = -1, with_max = [];
for (const follower in this.resync_state.dumps)
{
const dump = this.resync_state.dumps[follower];
if (dump.term > max_term)
{
max_term = dump.term;
with_max = [ follower ];
}
else if (dump.term == max_term)
{
with_max.push(follower);
}
}
if (max_term < 0 || with_max.length == 0)
{
throw new Error('BUG: no max term during resync');
}
console.log('Local term '+this.antietcd.stored_term+', max follower term '+max_term+' at nodes '+with_max.join(', '));
with_max = with_max.filter(w => w != this.cfg.node_id);
// Merge databases of all nodes with maximum term
// Force other nodes to replicate the merged DB, throwing away their own states
for (let i = 0; i < with_max.length; i++)
{
const update_only = !(i == 0 && this.antietcd.stored_term != max_term);
console.log(update_only ? 'Updating database from node '+with_max[i]+' state' : 'Copying node '+with_max[i]+' state');
this.antietcd.etctree.load(this.resync_state.dumps[with_max[i]], update_only);
}
let wait = 0;
const load_request = { term: this.raft.term, load: this.antietcd.etctree.dump() };
for (const follower in this.resync_state.dumps)
{
if (follower != this.cfg.node_id)
{
const dump = this.resync_state.dumps[follower];
if (dump.term <= max_term)
{
const client = this._getPeer(follower);
if (!client)
{
console.log('Lost peer connection during resync - restarting election');
this.raft.start();
return;
}
console.log('Copying state to '+follower);
const loadstate = this.resync_state.loads[follower] = {};
wait++;
this._peerRequest(client, load_request, this.cfg.load_timeout||5000).then(res =>
{
loadstate.result = res;
this._finishResync();
});
}
}
}
if (!wait)
{
this._finishResync();
}
}
_finishResync()
{
if (Object.values(this.resync_state.dumps).filter(d => !d).length > 0 ||
Object.values(this.resync_state.loads).filter(d => !d.result).length > 0)
{
return;
}
// All current peers have copied the database, we can proceed
this.antietcd.stored_term = this.raft.term;
this.synced = true;
runCallbacks(this, 'wait_sync', []);
console.log('Synchronized with followers, new term is '+this.raft.term);
}
_isWrite(path, data)
{
if (path == 'kv_txn')
{
return ((!data.compare || !data.compare.length) &&
(!data.success || !data.success.filter(f => f.request_put || f.requestPut || f.request_delete_range || f.requestDeleteRange).length) &&
(!data.failure || !data.failure.filter(f => f.request_put || f.requestPut || f.request_delete_range || f.requestDeleteRange).length));
}
return path != 'kv_range';
}
async checkRaftState(path, requestUrl, data)
{
if (!this.raft)
{
return null;
}
if (requestUrl.searchParams.get('leaderonly') &&
this.raft.state != TinyRaft.LEADER)
{
throw new RequestError(503, 'Not leader');
}
if (this.raft.state == TinyRaft.CANDIDATE)
{
throw new RequestError(503, 'Quorum not available');
}
else if (this.raft.state == TinyRaft.FOLLOWER &&
(!this.cfg.stale_read || this._isWrite(path, data)))
{
// Forward to leader
return await this._forwardToLeader(path, data);
}
else if (!this.synced)
{
// Wait for initial sync for read-only requests
await new Promise(ok => this.wait_sync.push(ok));
}
return null;
}
async _forwardToLeader(handler, data)
{
const client = this._getPeer(this.raft.leader);
if (!client)
{
throw new RequestError(503, 'Leader is unavailable');
}
return await this._peerRequest(client, { handler, request: data }, this.cfg.forward_timeout||1000);
}
handleWsMsg(client, msg)
{
if (msg.raft)
{
if (client.raft_node_id)
{
this.raft.onReceive(client.raft_node_id, msg.raft);
}
}
else if (msg.identify)
{
if (msg.identify.key === this.cfg.cluster_key &&
msg.identify.node_id != this.cfg.node_id)
{
client.raft_node_id = msg.identify.node_id;
console.log('Got a connection from '+client.raft_node_id);
}
}
else if (msg.load)
{
this._handleLoadMsg(client, msg);
}
else if (msg.replicate)
{
this._handleReplicateMsg(client, msg).catch(console.error);
}
else if (msg.request)
{
this._handleRequestMsg(client, msg).catch(console.error);
}
else if (msg.reply)
{
this._completeRequest(client.id, msg.request_id, msg.reply);
}
}
async _handleRequestMsg(client, msg)
{
const cb = this.antietcd['handle_'+msg.handler];
if (cb)
{
try
{
let res = cb.call(this.antietcd, msg.request);
if (res instanceof Promise)
{
res = await res;
}
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: res }));
}
catch (e)
{
console.error(e);
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: e.message } }));
}
}
else
{
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: 'unknown handler' } }));
}
}
_handleLoadMsg(client, msg)
{
if (client.raft_node_id && this.raft.state == TinyRaft.FOLLOWER &&
this.raft.leader === client.raft_node_id && this.raft.term == msg.term)
{
this.antietcd.etctree.load(msg.load);
this.antietcd.stored_term = msg.term;
this.synced = true;
runCallbacks(this, 'wait_sync', []);
console.log('Synchronized with leader, new term is '+msg.term);
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: {} }));
}
else
{
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: LEADER_MISMATCH } }));
}
}
async _handleReplicateMsg(client, msg)
{
if (client.raft_node_id && this.raft.state == TinyRaft.FOLLOWER &&
this.raft.leader === client.raft_node_id && this.raft.term == msg.term)
{
await this.antietcd.etctree.apply_replication(msg.replicate);
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: {} }));
}
else
{
client.socket.send(JSON.stringify({ request_id: msg.request_id, reply: { error: LEADER_MISMATCH } }));
}
}
_getPeer(to)
{
if (to == this.cfg.node_id)
{
throw new Error('BUG: attempt to get connection to self');
}
const client_id = this.cluster_connections[to];
if (!client_id)
{
return null;
}
const client = this.antietcd.clients[client_id];
if (!client || !client.ready)
{
return null;
}
return client;
}
_sendRaftMessage(to, msg)
{
const client = this._getPeer(to);
if (client)
{
client.socket.send(JSON.stringify({ raft: msg }));
}
}
}
module.exports = AntiCluster;

View File

@ -9,6 +9,7 @@ const ws = require('ws');
const EtcTree = require('./etctree.js');
const AntiPersistence = require('./antipersistence.js');
const AntiCluster = require('./anticluster.js');
const { runCallbacks, RequestError } = require('./common.js');
class AntiEtcd
@ -19,6 +20,7 @@ class AntiEtcd
this.client_id = 1;
this.etctree = new EtcTree(true);
this.persistence = null;
this.cluster = null;
this.stored_term = 0;
this.cfg = cfg;
this.loading = false;
@ -29,9 +31,9 @@ class AntiEtcd
async run()
{
if (this.cfg.data)
if (this.cfg.data || this.cfg.cluster)
{
this.etctree.set_replicate_watcher(msg => this.persistence.persistChange(msg));
this.etctree.set_replicate_watcher(msg => this.persistAndReplicate(msg));
}
if (this.cfg.data)
{
@ -44,6 +46,10 @@ class AntiEtcd
process.on('SIGTERM', on_stop_cb);
process.on('SIGQUIT', on_stop_cb);
}
if (this.cfg.cluster)
{
this.cluster = new AntiCluster(this);
}
if (this.cfg.cert)
{
this.tls = { key: await fsp.readFile(this.cfg.key), cert: await fsp.readFile(this.cfg.cert) };
@ -78,6 +84,18 @@ class AntiEtcd
process.exit(0);
}
async persistAndReplicate(msg)
{
if (this.persistence)
{
await this.persistence.persistChange(msg);
}
if (this.cluster)
{
await this.cluster.replicateChange(msg);
}
}
handleRequest(req, res)
{
let data = [];
@ -232,6 +250,14 @@ class AntiEtcd
if (requestUrl.pathname.substr(0, 4) == '/v3/')
{
const path = requestUrl.pathname.substr(4).replace(/\/+$/, '').replace(/\/+/g, '_');
if (this.cluster)
{
const res = await this.cluster.checkRaftState(path, requestUrl, data);
if (res)
{
return res;
}
}
const cb = this['handle_'+path];
if (cb)
{
@ -342,6 +368,14 @@ class AntiEtcd
{
socket.send(JSON.stringify({ result: { header: { revision: this.etctree.mod_revision } } }));
}
else
{
if (!this.cluster)
{
return;
}
this.cluster.handleWsMsg(client, msg);
}
}
unsubscribeClient(client_id)
@ -414,7 +448,8 @@ function parse()
'USAGE:\n '+process.argv[0]+' '+process.argv[1]+' [OPTIONS]\n'+
'OPTIONS:\n'+
' [--cert ssl.crt] [--key ssl.key] [--port 12379]\n'+
' [--data data.gz] [--vitastor-persist-filter /vitastor] [--no-persist-filter] [--persist_interval 500]\n'
' [--data data.gz] [--vitastor-persist-filter /vitastor] [--no-persist-filter] [--persist_interval 500]\n'+
' [--node_id node1 --cluster_key abcdef --cluster node1=http://localhost:12379,node2=http://localhost:12380,node3=http://localhost:12381]'
);
process.exit();
}