Compare commits

...

12 Commits
master ... de64

8 changed files with 1927 additions and 66 deletions

443
antietcd.js Normal file
View File

@ -0,0 +1,443 @@
const fs = require('fs');
const fsp = require('fs').promises;
const { URL } = require('url');
const http = require('http');
const https = require('https');
const crypto = require('crypto');
const zlib = require('zlib');
const ws = require('ws');
const EtcTree = require('./etctree.js');
class RequestError
{
constructor(code, text)
{
this.code = code;
this.message = text;
}
}
class AntiEtcd
{
constructor(cfg)
{
this.clients = {};
this.client_id = 1;
this.etctree = new EtcTree(true);
this.cfg = cfg;
this.stopped = false;
this.cfg.use_base64 = true;
}
async run()
{
if (this.cfg.filename)
{
// Load data from disk
const [ err, stat ] = await new Promise(ok => fs.stat(this.cfg.filename, (err, stat) => ok([ err, stat ])));
if (!err)
{
const data = await fsp.readFile(this.cfg.filename);
data = JSON.parse(zlib.Gunzip(data));
this.etctree.load(data);
}
else if (err.code != ENOENT)
{
throw err;
}
// Set exit hook
const on_stop_cb = () => this.on_stop();
process.on('SIGINT', on_stop_cb);
process.on('SIGTERM', on_stop_cb);
process.on('SIGQUIT', on_stop_cb);
}
if (this.cfg.cert)
{
this.tls = { key: await fsp.readFile(this.cfg.key), cert: await fsp.readFile(this.cfg.cert) };
this.server = https.createServer(this.tls, (req, res) => this.handleRequest(req, res));
}
else
{
this.server = http.createServer((req, res) => this.handleRequest(req, res));
}
this.wss = new ws.WebSocketServer({ server: this.server });
this.wss.on('connection', (conn, req) => this.startWebsocket(conn, req));
this.server.listen(this.cfg.port || 2379);
}
async on_stop()
{
if (this.stopped)
{
return;
}
this.stopped = true;
// Wait until all requests complete
while (this.inflight > 0)
{
await new Promise(ok => this.waitInflight.push(ok));
}
await this.persist();
}
async persist()
{
if (!this.cfg.filename)
{
return;
}
let dump = this.etctree.dump(true);
dump = JSON.stringify(dump);
dump = zlib.Gzip(dump);
const fd = await fsp.open(this.cfg.filename+'.tmp', 'w');
await fsp.write(fd, dump);
await fsp.fsync(fd);
await fsp.close(fd);
await fsp.rename(this.cfg.filename+'.tmp', this.cfg.filename);
}
fail(res, code, text)
{
res.writeHead(code);
res.write(text);
res.end();
}
handleRequest(req, res)
{
let data = [];
req.on('data', (chunk) => data.push(chunk));
req.on('end', async () =>
{
data = Buffer.concat(data);
let body = '';
let code = 200;
let reply;
try
{
if (req.headers['content-type'] != 'application/json')
{
throw new RequestError(400, 'content-type should be application/json');
}
body = data.toString();
try
{
data = data.length ? JSON.parse(data) : {};
}
catch (e)
{
throw new RequestError(400, 'body should be valid JSON');
}
if (!(data instanceof Object) || data instanceof Array)
{
throw new RequestError(400, 'body should be JSON object');
}
reply = await this.runHandler(req, data, res);
reply = JSON.stringify(reply);
}
catch (e)
{
if (e instanceof RequestError)
{
code = e.code;
reply = e.message;
}
else
{
console.error(e);
code = 500;
reply = 'Internal error: '+e.message;
}
}
try
{
// Access log
console.log(
new Date().toISOString()+
' '+(req.headers['x-forwarded-for'] || req.socket.remoteAddress)+
' '+req.method+' '+req.url+' '+code+'\n '+body.replace(/\n/g, '\\n')+
'\n '+reply.replace(/\n/g, '\\n')
);
// FIXME: Access log :req[X-Forwarded-For] [:date[clf]] pid=:pid ":method :url HTTP/:http-version" :status :res[content-length] ":referrer" ":user-agent" - :response-time ms
reply = Buffer.from(reply);
res.writeHead(200, {
'Content-Type': 'application/json',
'Content-Length': reply.length,
});
res.write(reply);
res.end();
}
catch (e)
{
console.error(e);
}
});
}
startWebsocket(socket, req)
{
const client_id = this.client_id++;
this.clients[client_id] = {
socket,
alive: true,
watches: {},
};
socket.on('pong', () => this.clients[client_id].alive = true);
socket.on('error', console.error);
const pinger = setInterval(() =>
{
if (!this.clients[client_id])
{
return;
}
if (!this.clients[client_id].alive)
{
return socket.terminate();
}
this.clients[client_id].alive = false;
socket.ping(() => {});
}, 30000);
socket.on('message', (msg) =>
{
try
{
msg = JSON.parse(msg);
}
catch (e)
{
socket.send(JSON.stringify({ error: 'bad-json' }));
return;
}
if (!msg)
{
socket.send(JSON.stringify({ error: 'empty-message' }));
}
else
{
this.handleMessage(client_id, msg, socket);
}
});
socket.on('close', () =>
{
this.unsubscribeClient(client_id);
clearInterval(pinger);
delete this.clients[client_id];
socket.terminate();
});
}
async runHandler(req, data, res)
{
// v3/kv/txn
// v3/kv/range
// v3/kv/put
// v3/kv/deleterange
// v3/lease/grant
// v3/lease/keepalive
// v3/lease/revoke O_o
// v3/kv/lease/revoke O_o
const requestUrl = new URL(req.url, 'http://'+(req.headers.host || 'localhost'));
if (requestUrl.pathname.substr(0, 4) == '/v3/')
{
const path = requestUrl.pathname.substr(4).replace(/\/+$/, '').replace(/\/+/g, '_');
const cb = this['handle_'+path];
if (cb)
{
if (req.method != 'POST')
{
throw new RequestError(405, 'Please use POST method');
}
return cb.call(this, data);
}
}
else if (requestUrl.pathname == '/dump')
{
return this.handle_dump(data);
}
throw new RequestError(404, 'Supported APIs: /v3/kv/txn, /v3/kv/range, /v3/kv/put, /v3/kv/deleterange, '+
'/v3/lease/grant, /v3/lease/revoke, /v3/kv/lease/revoke, /v3/lease/keepalive');
}
handle_kv_txn(data)
{
if (this.cfg.use_base64)
{
for (const item of data.compare||[])
{
if (item.key != null)
item.key = this.de64(item.key);
}
for (const items of [ data.success, data.failure ])
{
for (const item of items||[])
{
const req = item.request_range || item.requestRange ||
item.request_put || item.requestPut ||
item.request_delete_range || item.requestDeleteRange;
if (req.key != null)
req.key = this.de64(req.key);
if (req.range_end != null)
req.range_end = this.de64(req.range_end);
if (req.value != null)
req.value = this.de64(req.value);
}
}
}
const result = this.etctree.api_txn(data);
if (this.cfg.use_base64)
{
for (const item of result.responses||[])
{
if (item.response_range)
{
for (const kv of item.response_range.kvs)
{
if (kv.key != null)
kv.key = this.b64(kv.key);
if (kv.value != null)
kv.value = this.b64(kv.value);
}
}
}
}
return result;
}
handle_kv_range(data)
{
const r = this.handle_kv_txn({ success: [ { request_range: data } ] });
return { header: r.header, ...r.responses[0].response_range };
}
handle_kv_put(data)
{
const r = this.handle_kv_txn({ success: [ { request_put: data } ] });
return { header: r.header, ...r.responses[0].response_put };
}
handle_kv_deleterange(data)
{
const r = this.handle_kv_txn({ success: [ { request_delete_range: data } ] });
return { header: r.header, ...r.responses[0].response_delete_range };
}
handle_lease_grant(data)
{
return this.etctree.api_grant_lease(data);
}
handle_lease_revoke(data)
{
return this.etctree.api_revoke_lease(data);
}
handle_kv_lease_revoke(data)
{
return this.etctree.api_revoke_lease(data);
}
handle_lease_keepalive(data)
{
return this.etctree.api_keepalive_lease(data);
}
handle_dump(data)
{
return this.etctree.dump();
}
handleMessage(client_id, msg, socket)
{
if (msg.create_request)
{
// FIXME progress_notify, filters, prev_kv
const create_request = msg.create_request;
if (!create_request.watch_id || !this.clients[client_id].watches[create_request.watch_id])
{
const req = { ...create_request, watch_id: null };
if (this.cfg.use_base64)
{
if (req.key != null)
req.key = this.de64(req.key);
if (req.range_end != null)
req.range_end = this.de64(req.range_end);
}
const watch = this.etctree.api_create_watch(req, (msg) => this.sendToSocket(socket, msg));
if (!watch.created)
{
socket.send(JSON.stringify({ result: { header: { revision: this.etctree.mod_revision }, watch_id: create_request.watch_id, ...watch } }));
}
else
{
create_request.watch_id ||= watch.watch_id;
this.clients[client_id].watches[create_request.watch_id] = watch.watch_id;
socket.send(JSON.stringify({ result: { header: { revision: this.etctree.mod_revision }, watch_id: create_request.watch_id, created: true } }));
}
}
}
else if (msg.cancel_request)
{
const mapped_id = this.clients[client_id].watches[msg.cancel_request.watch_id];
if (mapped_id)
{
this.etctree.api_cancel_watch({ watch_id: mapped_id });
delete this.clients[client_id].watches[msg.cancel_request.watch_id];
socket.send(JSON.stringify({ result: { header: { revision: this.etctree.mod_revision }, watch_id: msg.cancel_request.watch_id, canceled: true } }));
}
}
else if (msg.progress_request)
{
socket.send(JSON.stringify({ result: { header: { revision: this.etctree.mod_revision } } }));
}
}
sendToSocket(socket, msg)
{
if (this.cfg.use_base64 && msg.result && msg.result.events)
{
msg = { ...msg, result: { ...msg.result, events: msg.result.events.map(ev =>
{
if (ev.kv)
{
const kv = { ...ev.kv };
if (kv.key != null)
{
kv.key = this.b64(kv.key);
}
if (kv.value != null)
{
kv.value = this.b64(kv.value);
}
return { ...ev, kv };
}
return ev;
}) } };
}
socket.send(JSON.stringify(msg));
}
unsubscribeClient(client_id)
{
if (!this.clients[client_id])
return;
for (const watch_id in this.clients[client_id].watches)
{
const mapped_id = this.clients[client_id].watches[watch_id];
this.etctree.api_cancel_watch({ watch_id: mapped_id });
}
}
de64(k)
{
return Buffer.from(k, 'base64').toString();
}
b64(k)
{
return Buffer.from(k).toString('base64');
}
}
new AntiEtcd({ port: 12379 }).run().catch(console.error);

667
etctree.js Normal file
View File

@ -0,0 +1,667 @@
const crypto = require('crypto');
const stableStringify = require('./stable-stringify.js');
/*type TreeNode = {
value?: any,
create_revision?: number,
mod_revision?: number,
version?: number,
lease?: string,
children: { [string]: TreeNode },
watchers?: number[],
key_watchers?: number[],
};*/
class EtcTree
{
constructor()
{
this.state = {};
this.leases = {};
this.watchers = {};
this.watcher_id = 0;
this.mod_revision = 0;
this.paused = false;
this.on_expire_lease = null;
}
check(chk)
{
const parts = this.key_parts(chk.key);
const { cur } = this.get_subtree(parts, false, false);
let check_value, ref_value;
if (chk.target === 'MOD')
{
check_value = cur && cur.mod_revision || 0;
ref_value = chk.mod_revision || 0;
}
else if (chk.target === 'CREATE')
{
check_value = cur && cur.create_revision || 0;
ref_value = chk.create_revision || 0;
}
else if (chk.target === 'VERSION')
{
check_value = cur && cur.version || 0;
ref_value = chk.version || 0;
}
else if (chk.target === 'LEASE')
{
check_value = cur && cur.lease;
ref_value = chk.lease;
}
else
{
throw new Error('Unsupported comparison target: '+chk.target);
}
if (chk.result === 'LESS')
{
return check_value < ref_value;
}
else if (chk.result)
{
throw new Error('Unsupported comparison result: '+chk.result);
}
return check_value == ref_value;
}
key_parts(key)
{
const parts = key.replace(/\/\/+/g, '/').replace(/\/$/g, ''); // trim beginning?
return parts === '' ? [] : parts.split('/');
}
get_range(req)
{
const key = req.key;
const end = req.range_end;
if (end != null && (key[key.length-1] != '/' || end[end.length-1] != '0' ||
end.substr(0, end.length-1) !== key.substr(0, key.length-1)))
{
throw new Error('Non-directory range queries are unsupported');
}
const parts = this.key_parts(key);
return { parts, all: end != null };
}
get_subtree(parts, create, notify)
{
let cur = this.state;
let watchers = notify ? [] : null;
for (let k of parts)
{
if (notify && cur.watchers)
{
watchers.push.apply(watchers, cur.watchers);
}
if (!cur.children)
{
if (!create)
{
return {};
}
cur.children = {};
}
if (!cur.children[k])
{
if (!create)
{
return {};
}
cur.children[k] = {};
}
cur = cur.children[k];
}
if (notify && cur.watchers)
{
watchers.push.apply(watchers, cur.watchers);
}
return { watchers, cur };
}
// create a snapshot of all data including leases
dump(persistent_only)
{
const snapshot = {
state: this._copy_tree(this.state, persistent_only) || {},
mod_revision: this.mod_revision,
};
if (!persistent_only)
{
snapshot.leases = {};
for (const id in this.leases)
{
const lease = this.leases[id];
snapshot.leases[id] = { ttl: lease.ttl, expires: lease.expires };
}
}
return snapshot;
}
_copy_tree(cur, no_lease)
{
const nonempty = cur.value != null && (!no_lease || !copy.lease);
const copy = (nonempty ? { ...cur } : {});
copy.children = {};
delete copy.watchers;
delete copy.key_watchers;
let has_children = false;
for (const k in cur.children)
{
const child = this._copy_tree(cur.children[k]);
if (child)
{
copy.children[k] = child;
has_children = true;
}
}
if (!nonempty && !has_children)
{
return null;
}
if (!has_children)
{
delete copy.children;
}
return copy;
}
// load snapshot of all data including leases
load(snapshot)
{
this.mod_revision = snapshot.mod_revision;
// First apply leases
for (const id in this.leases)
{
if (!snapshot.leases[id])
{
this.api_revoke_lease(id);
}
}
for (const id in snapshot.leases)
{
if (!this.leases[id])
{
this.leases[id] = { ...snapshot.leases[id], timer_id: null, keys: {} };
}
else if (this.leases[id].ttl != snapshot.leases[id].ttl ||
this.leases[id].expires != snapshot.leases[id].expires)
{
this.leases[id].ttl = snapshot.leases[id].ttl;
this.leases[id].expires = snapshot.leases[id].expires;
}
else
{
continue;
}
if (this.leases[id].timer_id)
{
clearTimeout(this.leases[id].timer_id);
this.leases[id].timer_id = null;
}
if (!this.paused)
{
this.leases[id].timer_id = setTimeout(() => this.expire_lease(id), this.leases[id].expires - Date.now());
}
}
// Then find and apply the difference in data
const notifications = [];
this._restore_diff(this.state, snapshot.state, null, this.state.watchers || [], notifications);
this.notify(notifications);
}
_restore_diff(cur_old, cur_new, prefix, watchers, notifications)
{
const key = prefix === null ? '' : prefix;
if (!eq(cur_old.lease, cur_new.lease))
{
if (cur_old.lease && this.leases[cur_old.lease])
{
delete this.leases[cur_old.lease].keys[key];
}
cur_old.lease = cur_new.lease;
if (cur_new.lease && this.leases[cur_new.lease])
{
this.leases[cur_new.lease].keys[key] = true;
}
}
cur_old.mod_revision = cur_new.mod_revision;
cur_old.create_revision = cur_new.create_revision;
cur_old.version = cur_new.version;
if (!eq(cur_old.value, cur_new.value))
{
cur_old.value = cur_new.value;
for (const w of (cur_old.key_watchers ? [ ...watchers, ...(cur_old.key_watchers||[]) ] : watchers))
{
const notify = { watchers, key, value: cur_old.value, mod_revision: cur_old.mod_revision };
if (cur_old.lease)
{
notify.lease = cur_old.lease;
}
notifications.push(notify);
}
}
cur_old.children ||= {};
for (const k in cur_new.children)
{
if (!cur_old.children[k])
{
cur_old.children[k] = cur_new.children[k];
}
else
{
this._restore_diff(
cur_old.children[k], cur_new.children[k],
prefix === null ? k : prefix+'/'+k,
cur_old.children[k].watchers ? [ ...watchers, ...cur_old.children[k].watchers ] : watchers,
notifications
);
}
}
for (const k in cur_old.children)
{
if (!cur_new.children[k])
{
// Delete subtree
this.delete_all(
notifications,
cur_old.children[k].watchers ? [ ...watchers, ...cur_old.children[k].watchers ] : watchers,
cur_old.children[k], true,
prefix === null ? k : prefix+'/'+k,
this.mod_revision
);
}
}
}
// slave/follower nodes don't expire leases themselves, they listen for the leader instead
pause_leases()
{
this.paused = true;
for (const id in this.leases)
{
const lease = this.leases[id];
if (lease.timer_id)
{
clearTimeout(lease.timer_id);
lease.timer_id = null;
}
}
}
resume_leases()
{
this.paused = false;
for (const id in this.leases)
{
const lease = this.leases[id];
if (!lease.timer_id)
{
lease.timer_id = setTimeout(() => this.expire_lease(id), lease.expires - Date.now());
}
}
}
set_on_expire_lease(cb)
{
this.on_expire_lease = cb;
}
api_grant_lease(req)
{
let id;
while (!id || this.leases[id])
{
id = crypto.randomBytes(8).toString('hex');
}
const expires = Date.now() + req.TTL*1000;
const timer_id = this.paused ? null : setTimeout(() => this.expire_lease(id), req.TTL*1000);
this.leases[id] = { ttl: req.TTL, expires, timer_id, keys: {} };
return { header: { revision: this.mod_revision }, ID: id, TTL: req.TTL };
}
api_keepalive_lease(req)
{
const id = req.ID;
if (!this.leases[id])
{
throw new Error('unknown lease');
}
const lease = this.leases[id];
if (lease.timer_id)
{
clearTimeout(lease.timer_id);
lease.timer_id = null;
}
const ttl = this.leases[id].ttl;
lease.expires = Date.now() + ttl*1000;
if (!this.paused)
{
lease.timer_id = setTimeout(() => this.expire_lease(id), ttl*1000);
}
// extra wrapping in { result: ... }
return { result: { header: { revision: this.mod_revision }, ID: id, TTL: ''+ttl } };
}
expire_lease(id)
{
this.api_revoke_lease({ ID: id })
if (this.on_expire_lease)
{
this.on_expire_lease(id);
}
}
api_revoke_lease(req)
{
if (!this.leases[req.ID])
{
throw new Error('unknown lease');
}
const next_revision = this.mod_revision + 1;
const notifications = [];
for (const key in this.leases[req.ID].keys)
{
this.txn_action({ request_delete_range: { key } }, next_revision, notifications);
}
this.notify(notifications);
return { header: { revision: this.mod_revision } };
}
api_create_watch(req, send)
{
const { parts, all } = this.get_range(req);
if (req.start_revision && this.compact_revision && this.compact_revision > req.start_revision)
{
// Deletions up to this.compact_revision are forgotten
return { compact_revision: this.compact_revision };
}
let watch_id = req.watch_id;
if (watch_id instanceof Object)
{
throw new Error('invalid watch_id');
}
if (!watch_id)
{
watch_id = ++this.watcher_id;
}
if (!this.watchers[watch_id])
{
this.watchers[watch_id] = {
paths: [],
send,
};
}
this.watchers[watch_id].paths.push(parts);
const { cur } = this.get_subtree(parts, true, false);
if (all)
{
cur.watchers = cur.watchers || [];
cur.watchers.push(watch_id);
}
else
{
cur.key_watchers = cur.key_watchers || [];
cur.key_watchers.push(watch_id);
}
if (req.start_revision && req.start_revision < this.mod_revision)
{
// Send initial changes
setImmediate(() =>
{
const events = [];
const { cur } = this.get_subtree([], false, false);
this.get_modified(events, cur, null, req.start_revision);
send({ result: { header: { revision: this.mod_revision }, events } });
});
}
return { watch_id, created: true };
}
get_modified(events, cur, prefix, min_rev)
{
if (cur.mod_revision >= min_rev)
{
events.push({
type: cur.value == null ? 'DELETE' : 'PUT',
kv: cur.value == null ? { key: (prefix === null ? '' : prefix) } : {
key: prefix,
value: cur.value,
},
});
}
if (cur.children)
{
for (const k in cur.children)
{
this.get_modified(events, cur.children[k], prefix === null ? k : prefix+'/'+k, min_rev);
}
}
}
api_cancel_watch(watch_id)
{
if (this.watchers[watch_id])
{
for (const parts of this.watchers[watch_id].paths)
{
const { cur } = this.get_subtree(parts, false, false);
if (cur)
{
if (cur.watchers)
{
cur.watchers = cur.watchers.filter(id => id != watch_id);
if (!cur.watchers.length)
cur.watchers = null;
}
if (cur.key_watchers)
{
cur.key_watchers = cur.key_watchers.filter(id => id != watch_id);
if (!cur.key_watchers.length)
cur.key_watchers = null;
}
// FIXME: cleanup deleted tree paths
}
}
delete this.watchers[watch_id];
}
return { canceled: true };
}
api_txn({ compare, success, failure })
{
const failed = (compare || []).filter(chk => !this.check(chk)).length > 0;
const responses = [];
const notifications = [];
const next_revision = this.mod_revision + 1;
for (const req of (failed ? failure : success) || [])
{
responses.push(this.txn_action(req, next_revision, notifications));
}
this.notify(notifications);
return { header: { revision: this.mod_revision }, succeeded: !failed, responses };
}
notify(notifications)
{
if (!notifications.length)
{
return;
}
const by_watcher = {};
for (const notif of notifications)
{
const watchers = notif.watchers;
delete notif.watchers;
const conv = { type: ('value' in notif) ? 'PUT' : 'DELETE', kv: notif };
for (const wid of watchers)
{
if (this.watchers[wid])
{
by_watcher[wid] = by_watcher[wid] || { header: { revision: this.mod_revision }, events: {} };
by_watcher[wid].events[notif.key] = conv;
}
}
}
for (const wid in by_watcher)
{
by_watcher[wid].events = Object.values(by_watcher[wid].events);
this.watchers[wid].send({ result: by_watcher[wid] });
}
}
txn_action(req, cur_revision, notifications)
{
if (req.request_range || req.requestRange)
{
const request_range = req.request_range || req.requestRange;
// FIXME: limit, revision(-), sort_order, sort_target, serializable(-),
// count_only, min_mod_revision, max_mod_revision, min_create_revision, max_create_revision
const { parts, all } = this.get_range(request_range);
const { cur } = this.get_subtree(parts, false, false);
const kvs = [];
if (cur)
{
this.get_all(kvs, cur, all, parts.join('/') || null, request_range);
}
return { response_range: { kvs } };
}
else if (req.request_put || req.requestPut)
{
const request_put = req.request_put || req.requestPut;
// FIXME: prev_kv, ignore_value(?), ignore_lease(?)
const parts = this.key_parts(request_put.key);
const key = parts.join('/');
const value = request_put.value;
const { cur, watchers } = this.get_subtree(parts, true, true);
if (cur.key_watchers)
{
watchers.push.apply(watchers, cur.key_watchers);
}
if (!eq(cur.value, value) || cur.lease != request_put.lease)
{
if (cur.lease && this.leases[cur.lease])
{
delete this.leases[cur.lease].keys[key];
}
if (request_put.lease)
{
if (!this.leases[request_put.lease])
{
throw new Error('unknown lease: '+request_put.lease);
}
cur.lease = request_put.lease;
this.leases[request_put.lease].keys[key] = true;
}
else if (cur.lease)
{
cur.lease = null;
}
this.mod_revision = cur_revision;
cur.version = (cur.version||0) + 1;
cur.mod_revision = cur_revision;
if (cur.value == null)
{
cur.create_revision = cur_revision;
}
cur.value = value;
const notify = { watchers, key, value, mod_revision: cur.mod_revision };
if (cur.lease)
{
notify.lease = cur.lease;
}
notifications.push(notify);
}
return { response_put: {} };
}
else if (req.request_delete_range || req.requestDeleteRange)
{
const request_delete_range = req.request_delete_range || req.requestDeleteRange;
// FIXME: prev_kv
const { parts, all } = this.get_range(request_delete_range);
const { cur, watchers } = this.get_subtree(parts, false, true);
const prevcount = notifications.length;
if (cur)
{
this.delete_all(notifications, watchers, cur, all, parts.join('/') || null, cur_revision);
}
return { response_delete_range: { deleted: notifications.length-prevcount } };
}
return {};
}
get_all(kvs, cur, all, prefix, req)
{
if (req.limit && kvs.length > req.limit)
{
return;
}
if (cur.value != null)
{
const item = { key: (prefix === null ? '' : prefix) };
if (!req.keys_only)
{
item.value = cur.value;
item.mod_revision = cur.mod_revision;
//item.create_revision = cur.create_revision;
//item.version = cur.version;
if (cur.lease)
{
item.lease = cur.lease;
}
}
kvs.push(item);
}
if (all && cur.children)
{
for (let k in cur.children)
{
this.get_all(kvs, cur.children[k], true, prefix === null ? k : prefix+'/'+k, req);
}
}
}
delete_all(notifications, watchers, cur, all, prefix, cur_revision)
{
if (cur.value != null)
{
// Do not actually forget the key until the deletion is confirmed by all replicas
// ...and until it's not required by watchers
if (cur.lease && this.leases[cur.lease])
{
delete this.leases[cur.lease].keys[prefix === null ? '' : prefix];
}
cur.value = null;
cur.version = 0;
cur.create_revision = null;
cur.mod_revision = cur_revision;
this.mod_revision = cur_revision;
notifications.push({
watchers: cur.key_watchers ? [ ...watchers, ...cur.key_watchers ] : watchers,
key: (prefix === null ? '' : prefix),
mod_revision: cur_revision,
});
}
if (all && cur.children)
{
for (let k in cur.children)
{
const subw = cur.children[k].watchers ? [ ...watchers, ...cur.children[k].watchers ] : watchers;
this.delete_all(notifications, subw, cur.children[k], true, prefix === null ? k : prefix+'/'+k, cur_revision);
}
}
}
}
function eq(a, b)
{
if (a instanceof Object || b instanceof Object)
{
return stableStringify(a) === stableStringify(b);
}
return a == b;
}
EtcTree.eq = eq;
module.exports = EtcTree;

126
etctree.spec.js Normal file
View File

@ -0,0 +1,126 @@
const EtcTree = require('./etctree.js');
const tests = {};
let cur_test = '';
const expect = (a, b) =>
{
if (!EtcTree.eq(a, b))
{
process.stderr.write(cur_test+' test:\nexpected: '+JSON.stringify(b)+'\nreal: '+JSON.stringify(a)+'\n'+new Error().stack.replace(/^.*\n.*\n/, '')+'\n');
process.exit(1);
}
};
tests['read/write'] = async () =>
{
const t = new EtcTree();
expect(
t.api_txn({ success: [ { request_put: { key: '/vitastor//config/global', value: { hello: 'world' } } } ] }),
{ header: { revision: 1 }, succeeded: true, responses: [ { response_put: {} } ] }
);
expect(
t.api_txn({ success: [ { request_range: { key: '/vitastor/config/global' } } ] }),
{ header: { revision: 1 }, succeeded: true, responses: [ { response_range: {
kvs: [ { key: '/vitastor/config/global', mod_revision: 1, value: { hello: 'world' } } ],
} } ] }
);
expect(
t.api_txn({ success: [ { request_range: { key: '/vitastor/config/', range_end: '/vitastor/config0' } } ] }),
{ header: { revision: 1 }, succeeded: true, responses: [ { response_range: {
kvs: [ { key: '/vitastor/config/global', mod_revision: 1, value: { hello: 'world' } } ],
} } ] }
);
expect(
t.api_txn({ success: [ { request_range: { key: '/vitasto/', range_end: '/vitasto0' } } ] }),
{ header: { revision: 1 }, succeeded: true, responses: [ { response_range: { kvs: [] } } ] }
);
expect(
t.api_txn({
compare: [ { key: '/vitastor/config/global', target: 'MOD', mod_revision: 1, result: 'LESS' } ],
success: [ { request_put: { key: '/vitastor//config/global', value: { hello: 'world' } } } ],
failure: [ { request_range: { key: '/vitastor/config/global' } } ],
}),
{ header: { revision: 1 }, succeeded: false, responses: [ { response_range: {
kvs: [ { key: '/vitastor/config/global', mod_revision: 1, value: { hello: 'world' } } ],
} } ] }
);
expect(
t.api_txn({
compare: [ { key: '/vitastor/config/global', target: 'MOD', mod_revision: 2, result: 'LESS' } ],
success: [ { request_put: { key: '/vitastor//config/global', value: { hello: 'world2' } } } ]
}),
{ header: { revision: 2 }, succeeded: true, responses: [ { response_put: {} } ] }
);
expect(
t.api_txn({ success: [ { request_range: { key: '/vitastor/config/', range_end: '/vitastor/config0' } } ] }),
{ header: { revision: 2 }, succeeded: true, responses: [ { response_range: {
kvs: [ { key: '/vitastor/config/global', mod_revision: 2, value: { hello: 'world2' } } ],
} } ] }
);
expect(
t.dump(false),
{"state":{"children":{"":{"children":{"vitastor":{"children":{"config":{"children":{"global":{"version":2,"mod_revision":2,"create_revision":1,"value":{"hello":"world2"}}}}}}}}}},"mod_revision":2,"leases":{}}
);
};
tests['watch'] = async () =>
{
const t = new EtcTree();
const sent = [];
const send = (event) => sent.push(event);
expect(
t.api_txn({ success: [ { request_put: { key: '/vitastor//config/global', value: { hello: 'world' } } } ] }),
{ header: { revision: 1 }, succeeded: true, responses: [ { response_put: {} } ] }
);
expect(
t.api_create_watch({ watch_id: 1, key: '/vitastor/', range_end: '/vitastor0' }, send),
{ watch_id: 1, created: true }
);
expect(sent, []);
expect(
t.api_txn({ success: [ { request_put: { key: '/vitastor/osd/state/1', value: { ip: '1.2.3.4' } } } ] }),
{ header: { revision: 2 }, succeeded: true, responses: [ { response_put: {} } ] }
);
expect(sent, [ { result: { header: { revision: 2 }, events: [ { type: 'PUT', kv: { key: '/vitastor/osd/state/1', value: { ip: '1.2.3.4' }, mod_revision: 2 } } ] } } ]);
};
tests['lease'] = async () =>
{
const t = new EtcTree();
const sent = [];
const send = (event) => sent.push(event);
const leaseID = t.api_grant_lease({ TTL: 0.5 }).ID;
expect(leaseID != null, true);
expect(
t.api_txn({ success: [ { request_put: { key: '/vitastor/osd/state/1', lease: leaseID, value: { ip: '1.2.3.4' } } } ] }),
{ header: { revision: 1 }, succeeded: true, responses: [ { response_put: {} } ] }
);
expect(
t.api_create_watch({ watch_id: 1, key: '/vitastor/', range_end: '/vitastor0' }, send),
{ watch_id: 1, created: true }
);
expect(sent, []);
const dump = t.dump(false);
const expires = dump.leases[leaseID].expires;
expect(dump, {"state":{"children":{"":{"children":{"vitastor":{"children":{"osd":{"children":{"state":{"children":{"1":{"lease":leaseID,"version":1,"mod_revision":1,"create_revision":1,"value":{"ip":"1.2.3.4"}}}}}}}}}}}},"mod_revision":1,"leases":{[leaseID]:{"ttl":0.5,"expires":expires}}});
await new Promise(ok => setTimeout(ok, 600));
expect(sent, [ { result: { header: { revision: 2 }, events: [ { type: 'DELETE', kv: { key: '/vitastor/osd/state/1', mod_revision: 2 } } ] } } ]);
t.pause_leases();
t.load(dump);
expect(t.dump(false), dump);
const t2 = new EtcTree();
t2.pause_leases();
t2.load(dump);
expect(t2.dump(false), dump);
};
(async function()
{
for (cur_test in tests)
{
await tests[cur_test]();
console.log(cur_test+' test: OK');
}
})().catch(console.error);

140
model_simple.js Normal file
View File

@ -0,0 +1,140 @@
#!/usr/bin/nodejs
// "Stupid" gossip algorithm simulation tool
function test_simple(options)
{
options.total ||= 100;
options.gossip ||= 4;
options.msgcap ||= 5;
options.update ||= 0;
options.initial ||= 5;
let messages_sent = 0;
let tick = 1;
const known = {};
const lists = {};
const listsv2 = {};
for (let i = 1; i <= options.total; i++)
{
known[i] = {};
lists[i] = [];
for (let j = 1; j <= (options.update ? options.total : options.initial); j++)
{
known[i][j] = 1; // meta version 1
lists[i].push(j);
}
listsv2[i] = [];
}
let cmp_lists;
let cmp_n;
if (options.update)
{
// We want to update <options.update> nodes metadata to version 2
for (let i = 1; i <= options.update; i++)
{
known[i][i] = 2;
listsv2[i].push(i);
}
cmp_lists = listsv2;
cmp_n = options.update;
}
else
{
// We want <options.total-options.initial> to join <options.initial>
for (let i = 1; i <= options.initial; i++)
{
if (!known[i][i])
{
known[i][i] = 1;
lists[i].push(i);
}
for (let alive = options.initial+1; alive <= options.total; alive++)
{
if (!known[i][alive])
{
known[i][alive] = true;
lists[i].push(alive);
}
}
}
cmp_lists = lists;
cmp_n = options.total;
}
let in_sync = 0;
for (let i = 1; i <= options.total; i++)
{
if (cmp_lists[i].length == cmp_n)
{
in_sync++;
}
}
let avg_known = 0;
while (in_sync < options.total)
{
console.log('tick '+tick+': '+in_sync+' in sync, avg '+avg_known);
for (let i = 1; i <= options.total; i++)
{
const known_i = lists[i];
const send_to = [];
for (let j = 0; j < options.gossip; j++)
{
send_to.push(known_i[0|(Math.random()*known_i.length)]);
}
const send_what = [];
for (let j = 0; j < options.msgcap; j++)
{
// FIXME: Exclude duplicates, exclude <send_to>
send_what.push(known_i[0|(Math.random()*known_i.length)]);
}
for (const alive of send_what)
{
for (const to of send_to)
{
if (!known[to][alive] || known[i][alive] > known[to][alive])
{
known[to][alive] = known[i][alive];
cmp_lists[to].push(alive);
if (cmp_lists[to].length == cmp_n)
{
console.log('node '+to+': tick '+tick);
in_sync++;
}
}
}
}
messages_sent += send_what.length*send_to.length;
}
avg_known = 0;
for (let i = 1; i <= options.total; i++)
{
avg_known += cmp_lists[i].length;
}
avg_known /= options.total;
tick++;
}
console.log('tick '+tick+': '+in_sync+' in sync, avg '+avg_known);
console.log(messages_sent+' messages sent');
}
const options = {};
for (let i = 2; i < process.argv.length; i++)
{
if (process.argv[i] === '-h' || process.argv[i] === '--help')
{
console.error('USAGE: '+process.argv[0]+' '+process.argv[1]+` [OPTIONS]
--gossip 4 how many nodes to gossip with every tick
--msgcap 5 how many nodes to gossip about every tick
--total 1000 total nodes
--update 0 total nodes to update if testing update. if 0 then test joining
--initial 5 initial nodes in sync to test joining (when --update is 0)`);
process.exit();
}
else if (process.argv[i].substr(0, 2) == '--')
{
options[process.argv[i].substr(2)] = 0|process.argv[i+1];
i++;
}
}
test_simple(options);

177
model_update.js Normal file
View File

@ -0,0 +1,177 @@
#!/usr/bin/nodejs
// https://github.com/hashicorp/memberlist simulation tool
class LimQ
{
constructor(retransmit, maxlen)
{
this.buckets = [];
for (let i = 0; i < retransmit; i++)
{
this.buckets.push([]);
}
this.len = 0;
this.maxlen = maxlen;
}
push(item)
{
if (this.len >= this.maxlen)
return;
const b = this.buckets[this.buckets.length-1];
b.push(item);
}
shift(n)
{
let items = [];
let move = [];
for (let i = this.buckets.length-1; i >= 0 && items.length < n; i--)
{
const rm = this.buckets[i].splice(0, n-items.length);
items.push.apply(items, rm);
if (i > 0)
for (const e of rm)
move.push([ e, i-1 ]);
else
this.len -= rm.length;
}
for (const e of move)
{
this.buckets[e[1]].push(e[0]);
}
return items;
}
}
function test_memberlist(options)
{
options.gossip ||= 4;
options.msgcap ||= 5;
options.max_ticks ||= 100000;
options.total ||= 100;
options.retransmit ||= 12;
options.update ||= 0;
options.initial ||= 5;
let tick = 0;
let messages_sent = 0;
const queue = {};
const known = {}; // { node: { other_node: meta_version } }
const lists = {};
const listsv2 = {};
for (let i = 1; i <= options.total; i++)
{
known[i] = {};
lists[i] = [];
for (let j = 1; j <= (options.update ? options.total : options.initial); j++)
{
known[i][j] = 1; // meta version 1
lists[i].push(j);
}
listsv2[i] = [];
queue[i] = new LimQ(options.retransmit, options.max_queue);
}
let cmp_lists;
let cmp_n;
if (options.update)
{
// We want to update <options.update> nodes metadata to version 2
for (let i = 1; i <= options.update; i++)
{
known[i][i] = 2;
listsv2[i].push(i);
queue[i].push(i);
}
cmp_lists = listsv2;
cmp_n = options.update;
}
else
{
// We want <options.total-options.initial> to join <options.initial>
for (let i = 1; i <= options.initial; i++)
{
for (let alive = options.initial+1; alive <= options.total; alive++)
{
known[i][alive] = 1;
lists[i].push(alive);
queue[i].push(alive);
}
}
cmp_lists = lists;
cmp_n = options.total;
}
let in_sync = 0;
for (let i = 1; i <= options.total; i++)
{
if (cmp_lists[i].length == cmp_n)
{
in_sync++;
}
}
let avg_known = 0;
while (in_sync < options.total && tick < options.max_ticks)
{
console.log('tick '+tick+': '+in_sync+' in sync, avg '+avg_known);
for (let i = 1; i <= options.total; i++)
{
const known_i = lists[i];
for (let g = 0; g < options.gossip; g++)
{
const to = known_i[0|(Math.random()*known_i.length)];
let send_what = queue[i].shift(options.msgcap);
messages_sent += send_what.length;
for (const alive of send_what)
{
if (!known[to][alive] || known[i][alive] > known[to][alive])
{
known[to][alive] = known[i][alive];
cmp_lists[to].push(alive);
queue[to].push(alive);
const cur_updated = cmp_lists[to].length;
if (cur_updated == cmp_n)
{
console.log('node '+to+': synced at tick '+tick);
in_sync++;
}
}
}
}
}
avg_known = 0;
for (let i = 1; i <= options.total; i++)
{
avg_known += cmp_lists[i].length;
}
avg_known /= options.total;
tick++;
}
console.log('tick '+tick+': '+in_sync+' in sync, avg '+avg_known);
console.log(messages_sent+' messages sent');
}
const options = {};
for (let i = 2; i < process.argv.length; i++)
{
if (process.argv[i] === '-h' || process.argv[i] === '--help')
{
console.error('USAGE: '+process.argv[0]+' '+process.argv[1]+` [OPTIONS]
--gossip 4 how many nodes to gossip with every tick
--msgcap 5 how many "alive" messages fits in a single packet (meta size/UDP packet size in memberlist)
--max_ticks 100000 execution limit
--max_queue 1024 queue size limit
--total 100 total nodes
--retransmit 12 retransmission count. by default log(total)*4 in memberlist
--update 0 total nodes to update if testing update. if 0 then test joining
--initial 5 initial nodes in sync to test joining (when --update is 0)`);
process.exit();
}
else if (process.argv[i].substr(0, 2) == '--')
{
options[process.argv[i].substr(2)] = 0|process.argv[i+1];
i++;
}
}
test_memberlist(options);

78
stable-stringify.js Normal file
View File

@ -0,0 +1,78 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: MIT
function stableStringify(obj, opts)
{
if (!opts)
opts = {};
if (typeof opts === 'function')
opts = { cmp: opts };
let space = opts.space || '';
if (typeof space === 'number')
space = Array(space+1).join(' ');
const cycles = (typeof opts.cycles === 'boolean') ? opts.cycles : false;
const cmp = opts.cmp && (function (f)
{
return function (node)
{
return function (a, b)
{
let aobj = { key: a, value: node[a] };
let bobj = { key: b, value: node[b] };
return f(aobj, bobj);
};
};
})(opts.cmp);
const seen = new Map();
return (function stringify (parent, key, node, level)
{
const indent = space ? ('\n' + new Array(level + 1).join(space)) : '';
const colonSeparator = space ? ': ' : ':';
if (node === undefined)
{
return;
}
if (typeof node !== 'object' || node === null)
{
return JSON.stringify(node);
}
if (node instanceof Array)
{
const out = [];
for (let i = 0; i < node.length; i++)
{
const item = stringify(node, i, node[i], level+1) || JSON.stringify(null);
out.push(indent + space + item);
}
return '[' + out.join(',') + indent + ']';
}
else
{
if (seen.has(node))
{
if (cycles)
return JSON.stringify('__cycle__');
throw new TypeError('Converting circular structure to JSON');
}
else
seen.set(node, true);
const keys = Object.keys(node).sort(cmp && cmp(node));
const out = [];
for (let i = 0; i < keys.length; i++)
{
const key = keys[i];
const value = stringify(node, key, node[key], level+1);
if (!value)
continue;
const keyValue = JSON.stringify(key)
+ colonSeparator
+ value;
out.push(indent + space + keyValue);
}
seen.delete(node);
return '{' + out.join(',') + indent + '}';
}
})({ '': obj }, '', obj, 0);
}
module.exports = stableStringify;

View File

@ -11,6 +11,14 @@
//
// Supports leader expiration like in NuRaft:
// https://github.com/eBay/NuRaft/blob/master/docs/leadership_expiration.md
//
// Also supports leader priorities, similar to NuRaft but even simpler:
// If a node receives a VoteRequest message with larger term but with smaller
// priority than its own, it immediately starts a new voting round.
// It guarantees that a node with non-maximum priority can't become leader
// without being re-elected.
// If all priorities are equal (or just zero), the election algorithm
// becomes identical to the basic algorithm without priorities.
const EventEmitter = require('events');
@ -32,6 +40,7 @@ class TinyRaft extends EventEmitter
// heartbeatTimeout?: number,
// leadershipTimeout?: number,
// initialTerm?: number,
// leaderPriority?: number,
// }
constructor(config)
{
@ -43,6 +52,7 @@ class TinyRaft extends EventEmitter
this.randomTimeout = config.randomTimeout > 0 ? Number(config.randomTimeout) : this.electionTimeout;
this.heartbeatTimeout = Number(config.heartbeatTimeout) || 1000;
this.leadershipTimeout = Number(config.leadershipTimeout) || 0;
this.leaderPriority = Number(config.leaderPriority) || undefined;
if (!this.nodeId || this.nodeId instanceof Object ||
!(this.nodes instanceof Array) || this.nodes.filter(n => !n || n instanceof Object).length > 0 ||
!(this.send instanceof Function))
@ -85,7 +95,7 @@ class TinyRaft extends EventEmitter
{
if (node != this.nodeId)
{
this.send(node, { type: VOTE_REQUEST, term: this.term, leader: this.leader });
this.send(node, { type: VOTE_REQUEST, term: this.term, leader: this.leader, priority: this.leaderPriority });
}
}
// Next term will start right after this one times out
@ -111,11 +121,11 @@ class TinyRaft extends EventEmitter
{
if (this.state == LEADER)
{
for (const node of this.votes[this.nodeId])
for (const node of this.followers)
{
if (node != this.nodeId)
{
this.send(node, { type: PING, term: this.term });
this.send(node, { type: PING, term: this.term, priority: this.leaderPriority });
}
}
}
@ -125,7 +135,32 @@ class TinyRaft extends EventEmitter
{
if (msg.type == VOTE_REQUEST)
{
if (msg.term > this.term && msg.leader)
this._onReceiveVoteRequest(from, msg);
}
else if (msg.type == VOTE)
{
this._onReceiveVote(from, msg);
}
else if (msg.type == PING)
{
this._onReceivePing(from, msg);
}
else if (msg.type == PONG)
{
this._onReceivePong(from, msg);
}
}
_onReceiveVoteRequest(from, msg)
{
if (msg.term > this.term && msg.leader)
{
if (this.leaderPriority && (msg.priority||0) < this.leaderPriority)
{
this.term = msg.term;
this.start();
}
else
{
this.leader = msg.leader;
this.term = msg.term;
@ -133,75 +168,101 @@ class TinyRaft extends EventEmitter
this._nextTerm(this.heartbeatTimeout*2 + this.electionTimeout);
this.emit('change', { state: this.state, term: this.term, leader: this.leader });
}
this.send(from, { type: VOTE, term: this.term, leader: this.leader });
}
else if (msg.type == VOTE && msg.term == this.term)
const prio = (this.leader == this.nodeId ? this.leaderPriority : undefined);
this.send(from, { type: VOTE, term: this.term, leader: this.leader, leaderPriority: prio });
}
_onReceiveVote(from, msg)
{
if (!msg.leader || msg.term < this.term)
{
return;
}
if (msg.term > this.term)
{
this.term = msg.term;
this.leader = msg.leader;
// Repeat VOTE to the leader to join it
this.send(this.leader, { type: VOTE, term: this.term, leader: this.leader, priority: msg.priority });
}
// add <from> as voter for <msg.leader>
this.votes[msg.leader] = this.votes[msg.leader] || [];
let found = false;
for (const voter of this.votes[msg.leader])
{
if (voter == from)
{
found = true;
break;
}
}
if (!found)
{
this.voted++;
this.votes[msg.leader] = this.votes[msg.leader] || [];
this.votes[msg.leader].push(from);
const n = this.votes[msg.leader].length;
if (n == 1 + (0 | this.nodes.length/2))
{
if (msg.leader == this.nodeId)
{
this.leader = msg.leader;
this.state = LEADER;
this._nextTerm(this.leadershipTimeout > 0 ? this.leadershipTimeout : -1);
this.followers = this.votes[this.nodeId];
for (const follower of this.followers)
{
if (follower != this.nodeId)
{
// Send a heartbeat to confirm leadership
this.send(follower, { type: PING, term: this.term });
}
}
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId, followers: this.votes[this.nodeId] });
}
else
{
this._nextTerm(0);
}
}
else if (n > this.nodes.length/2 && this.state == LEADER && msg.leader == this.nodeId)
}
const n = this.votes[msg.leader].length;
if (n == 1 + (0 | this.nodes.length/2))
{
if (msg.leader == this.nodeId)
{
this.leader = msg.leader;
this.state = LEADER;
this._nextTerm(this.leadershipTimeout > 0 ? this.leadershipTimeout : -1);
this.followers = this.votes[this.nodeId];
// Send a heartbeat to confirm leadership
this.send(from, { type: PING, term: this.term });
this._heartbeat();
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId, followers: this.votes[this.nodeId] });
}
else if (this._isVotingFailed())
else
{
this._nextTerm(0);
}
}
else if (msg.type == PING)
else if (n > this.nodes.length/2 && this.state == LEADER && msg.leader == this.nodeId)
{
if (this.state == CANDIDATE && this.term == msg.term && from == this.leader)
{
this.state = FOLLOWER;
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId });
}
if (this.state == FOLLOWER && from == this.leader)
{
this.markAlive();
}
if (this.leadershipTimeout > 0)
{
this.send(from, { type: PONG, term: this.term, leader: this.leader });
}
this.followers = this.votes[this.nodeId];
// Send a heartbeat to confirm leadership
this.send(from, { type: PING, term: this.term, priority: this.leaderPriority });
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId, followers: this.votes[this.nodeId] });
}
else if (msg.type == PONG && this.state == LEADER)
else if (this._isVotingFailed())
{
if (msg.leader != this.nodeId)
{
this.start();
}
else
{
this._nextTerm(this.leadershipTimeout > 0 ? this.leadershipTimeout : -1);
}
this._nextTerm(0);
}
}
_onReceivePing(from, msg)
{
if (this.state == CANDIDATE && this.term == msg.term && from == this.leader)
{
this.state = FOLLOWER;
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId });
}
if (this.state == FOLLOWER && from == this.leader)
{
this.markAlive();
}
if (this.leadershipTimeout > 0)
{
this.send(from, { type: PONG, term: this.term, leader: this.leader });
}
}
_onReceivePong(from, msg)
{
if (this.state != LEADER)
{
return;
}
if (msg.leader != this.nodeId)
{
this.start();
}
else
{
this._nextTerm(this.leadershipTimeout > 0 ? this.leadershipTimeout : -1);
}
}
@ -233,12 +294,14 @@ class TinyRaft extends EventEmitter
if (this.state == LEADER)
{
this.votes[this.nodeId] = this.votes[this.nodeId].filter(n => nodes.indexOf(n) >= 0);
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId, followers: this.votes[this.nodeId] });
if (this.votes[this.nodeId].length < (1 + (0 | this.nodes.length/2)))
this.start();
else
this.emit('change', { state: this.state, term: this.term, leader: this.nodeId, followers: this.votes[this.nodeId] });
}
else if (this.state == FOLLOWER && nodes.indexOf(this.leader) < 0 || this.state == CANDIDATE)
{
r.nextTerm(-1);
r.start();
this.start();
}
}
}

View File

@ -1,9 +1,9 @@
const TinyRaft = require('./tinyraft.js');
function newNode(id, nodes, partitions)
function newNode(id, nodes, partitions, mod)
{
partitions = partitions || {};
let n = new TinyRaft({
let cfg = {
nodes: [ 1, 2, 3, 4, 5 ],
nodeId: id,
heartbeatTimeout: 100,
@ -16,7 +16,10 @@ function newNode(id, nodes, partitions)
setImmediate(function() { nodes[to].onReceive(n.nodeId, msg); });
}
},
});
};
if (mod)
mod(cfg);
let n = new TinyRaft(cfg);
n.on('change', (st) =>
{
console.log(
@ -27,13 +30,13 @@ function newNode(id, nodes, partitions)
nodes[id] = n;
}
function newNodes(count, partitions)
function newNodes(count, partitions, mod)
{
partitions = partitions || {};
const nodes = {};
for (let i = 1; i <= count; i++)
{
newNode(i, nodes, partitions);
newNode(i, nodes, partitions, mod);
}
for (let i = 1; i <= count; i++)
{
@ -129,7 +132,7 @@ async function testStartThenRemoveNode()
async function testAddNode()
{
console.log('testAddNode');
const nodes = newNodes(5);
const nodes = newNodes(5, {}, cfg => cfg.initialTerm = 1000);
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 5);
// Add node
@ -148,10 +151,174 @@ async function testAddNode()
console.log('testAddNode: OK');
}
async function testLeadershipExpiration()
{
console.log('testLeadershipExpiration');
const partitions = {};
const nodes = newNodes(5, partitions, cfg => cfg.leadershipTimeout = 1500);
// Check that 5 nodes are in quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 5);
// Break network on the leader
let leader = nodes[1].leader;
console.log("stopping the leader's ("+leader+") network");
for (let i = 1; i <= 5; i++)
{
partitions[i+'-'+leader] = true;
partitions[leader+'-'+i] = true;
}
// Check that the leader loses leadership after 2 * leadershipTimeout
await new Promise(ok => setTimeout(ok, 3000));
if (nodes[leader].state != TinyRaft.CANDIDATE)
{
throw new Error("leadership expiration doesn't work");
}
// Clean up
for (const id in nodes)
{
nodes[id].stop();
}
console.log('testLeadershipExpiration: OK');
}
async function testRestart()
{
console.log('testRestart');
const nodes = newNodes(5, {}, cfg => cfg.initialTerm = 1000);
let leaderChanges = 0, prevLeader = null;
nodes[2].on('change', (st) =>
{
const leader = st.state == TinyRaft.CANDIDATE ? null : st.leader;
if (leader != prevLeader)
{
prevLeader = leader;
leaderChanges++;
}
});
// Check that 5 nodes are in quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 5);
if (leaderChanges >= 3)
{
throw new Error("leaderChanges = "+leaderChanges+" (expected < 3)")
}
// Stop a follower
let restarted = 1 + (prevLeader % 5);
if (restarted == 2)
{
restarted = 1 + (prevLeader + 1) % 5;
}
console.log("stopping a follower (node "+restarted+")");
nodes[restarted].stop();
delete nodes[restarted];
// Wait 2000ms
await new Promise(ok => setTimeout(ok, 2000));
// Restart a follower
console.log("restarting a follower (node "+restarted+")");
leaderChanges = 0;
newNode(restarted, nodes, {}, null);
nodes[restarted].start();
// Check quorum and the fact that the leader didn't change after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 5);
if (leaderChanges > 0)
{
throw new Error("leader changed after restart of a follower");
}
// Clean up
for (const id in nodes)
{
nodes[id].stop();
}
console.log('testRestart: OK');
}
async function testChangeNodes()
{
console.log('testChangeNodes');
console.log('starting nodes 1-5');
const nodes = newNodes(5, {}, cfg => cfg.initialTerm = 1000);
// Check that 5 nodes are in quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 5);
// Stop node 4
console.log('stopping node 4');
nodes[4].stop();
delete nodes[4];
// Wait 1000ms
await new Promise(ok => setTimeout(ok, 1000));
// Change nodes from 1 2 3 4 5 to 1 2 3 5 6
console.log('starting node 6');
newNode(6, nodes);
nodes[6].start();
nodes[1].setNodes([ 1, 2, 3, 5, 6 ]);
nodes[2].setNodes([ 1, 2, 3, 5, 6 ]);
nodes[3].setNodes([ 1, 2, 3, 5, 6 ]);
nodes[5].setNodes([ 1, 2, 3, 5, 6 ]);
nodes[6].setNodes([ 1, 2, 3, 5, 6 ]);
// Check that 5 nodes are in quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 5);
// Clean up
for (const id in nodes)
{
if (nodes[id])
nodes[id].stop();
}
console.log('testChangeNodes: OK');
}
async function testLeaderPriority()
{
console.log('testLeaderPriority');
console.log('starting nodes 1-5');
const nodes = newNodes(5, {}, cfg => cfg.leaderPriority = cfg.nodeId+1);
// Check that 5 nodes are in quorum after 2000ms
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 5);
if (nodes[1].leader != 5)
{
throw new Error('leader is not 5');
}
// Stop node 5
console.log('stopping node 5');
nodes[5].stop();
delete nodes[5];
// Wait 2000ms and check that the leader is now 4
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 4);
if (nodes[1].leader != 4)
{
throw new Error('leader is not 4');
}
// Stop node 4
console.log('stopping node 4');
nodes[4].stop();
delete nodes[4];
// Wait 2000ms and check that the leader is now 3
await new Promise(ok => setTimeout(ok, 2000));
checkQuorum(nodes, 3);
if (nodes[1].leader != 3)
{
throw new Error('leader is not 3');
}
// Clean up
for (const id in nodes)
{
if (nodes[id])
nodes[id].stop();
}
console.log('testLeaderPriority: OK');
}
async function run()
{
await testStartThenRemoveNode();
await testAddNode();
await testLeadershipExpiration();
await testRestart();
await testChangeNodes();
await testLeaderPriority();
process.exit(0);
}