Compare commits

..

No commits in common. "d95e4683feb8f0864c9f4668368a47b4220eed54" and "2071dacf93990e2621d1420aaa6dc2cbc5f98b89" have entirely different histories.

6 changed files with 55 additions and 550 deletions

View File

@ -1,439 +0,0 @@
# AntiEtcd
Simplistic miniature etcd replacement based on [TinyRaft](https://git.yourcmc.ru/vitalif/tinyraft/)
- Embeddable
- REST API only, gRPC is shit and will never be supported
- [TinyRaft](https://git.yourcmc.ru/vitalif/tinyraft/)-based leader election
- Websocket-based cluster communication
- Supports a limited subset of etcd REST APIs
- Optional persistence
(c) Vitaliy Filippov, 2024
License: Mozilla Public License 2.0 or [VNPL-1.1](https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/VNPL-1.1.txt)
## CLI Usage
```
npm install antietcd
node_modules/.bin/antietcd \
[--cert ssl.crt] [--key ssl.key] [--port 12379] \
[--data data.gz] [--persist_interval 500] \
[--node_id node1 --cluster_key abcdef --cluster node1=http://localhost:12379,node2=http://localhost:12380,node3=http://localhost:12381]
[other options]
```
Antietcd doesn't background itself, so use systemd or start-stop-daemon to run it as a background service.
## Options
### HTTP
<dl>
<dt>--port 2379</dt>
<dd>Listen port</dd>
<dt>--cert \<cert\></dt>
<dd>Use TLS with this certificate file (PEM format)</dd>
<dt>--key \<key\></dt>
<dd>Use TLS with this key file (PEM format)</dd>
<dt>--ca \<ca\></dt>
<dd>Use trusted root certificates from this file.
Specify \<ca\> = \<cert\> if your certificate is self-signed.</dd>
<dt>--client_cert_auth 1</dt>
<dd>Require TLS client certificates signed by <ca> or by default CA to connect.</dd>
<dt>--ws_keepalive_interval 30000</dt>
<dd>Client websocket ping (keepalive) interval in milliseconds</dd>
</dl>
### Persistence
<dl>
<dt>--data \<filename\></dt>
<dd>Store persistent data in \<filename\></dd>
<dt>--persist_interval \<milliseconds\></dt>
<dd>Persist data on disk after this interval, not immediately after change</dd>
<dt>--persist_filter ./filter.js</dt>
<dd>Use persistence filter from ./filter.js (or a module). <br />
Persistence filter is a function(cfg) returning function(key, value) ran
for every change and returning a new value or undefined to skip persistence.</dd>
</dl>
### Clustering
<dl>
<dt>--node_id \<id></dt>
<dd>ID of this cluster node</dd>
<dt>--cluster \<id1\>=\<url1\>,\<id2\>=\<url2\>,...</dt>
<dd>All other cluster nodes</dd>
<dt>--cluster_key \<key\></dt>
<dd>Shared cluster key for identification</dd>
<dt>--election_timeout 5000</dt>
<dd>Raft election timeout</dd>
<dt>--heartbeat_timeout 1000</dt>
<dd>Raft leader heartbeat timeout</dd>
<dt>--wait_quorum_timeout 30000</dt>
<dd>Timeout for requests to wait for quorum to come up</dd>
<dt>--leader_priority \<number\></dt>
<dd>Raft leader priority for this node (optional)</dd>
<dt>--stale_read 1</dt>
<dd>Allow to serve reads from followers. Specify 0 to disallow</dd>
<dt>--reconnect_interval 1000</dt>
<dd>Unavailable peer connection retry interval</dd>
<dt>--dump_timeout 5000</dt>
<dd>Timeout for dump command in milliseconds</dd>
<dt>--load_timeout 5000</dt>
<dd>Timeout for load command in milliseconds</dd>
<dt>--forward_timeout 1000</dt>
<dd>Timeout for forwarding requests from follower to leader in milliseconds</dd>
<dt>--replication_timeout 1000</dt>
<dd>Timeout for replicating requests from leader to follower in milliseconds</dd>
<dt>--compact_revisions 1000</dt>
<dd>Number of previous revisions to keep deletion information in memory</dd>
<dt>--compact_timeout 1000</dt>
<dd>Timeout for compaction requests from leader to follower in milliseconds</dd>
</dl>
## Embedded Usage
```js
const AntiEtcd = require('antietcd');
const srv = new AntiEtcd({ ...configuration });
// Start server
srv.start();
// Make a local API call in generic style:
let res = await srv.api('kv_txn'|'kv_range'|'kv_put'|'kv_deleterange'|'lease_grant'|'lease_revoke'|'lease_keepalive', { ...params });
// Or function-style:
res = await srv.txn(params);
res = await srv.range(params);
res = await srv.put(params);
res = await srv.deleterange(params);
res = await srv.lease_grant(params);
res = await srv.lease_revoke(params);
res = await srv.lease_keepalive(params);
// Error handling:
try
{
res = await srv.txn(params);
}
catch (e)
{
if (e instanceof AntiEtcd.RequestError)
{
// e.code is HTTP code
// e.message is error message
}
}
// Stop server
srv.stop();
```
## Persistence
Persistence is very simple: full database is dumped into JSON, gzipped and saved as file.
By default, it is written and fsynced on disk on every change, but it can be configured
to dump DB on disk at fixed intervals, for example, at most every 500 ms - of course,
at expense of slightly reduced crash resiliency (example: `--persist_interval 500`).
You can also specify a filter to exclude some data from persistence by using the option
`--persist_filter ./filter.js`. Persistence filter code example:
```js
function example_filter(cfg)
{
// <cfg> contains all command-line options
const prefix = cfg.exclude_keys;
if (!prefix)
{
return null;
}
return (key, value) =>
{
if (key.substr(0, prefix.length) == prefix)
{
// Skip all keys with prefix from persistence
return undefined;
}
if (key === '/statistics')
{
// Return <unneeded_key> from inside value
const decoded = JSON.parse(value);
return JSON.stringify({ ...decoded, unneeded_key: undefined });
}
return value;
};
}
module.exports = example_filter;
```
## Supported etcd APIs
NOTE: `key`, `value` and `range_end` are always encoded in base64, like in original etcd.
Range requests are only supported across "directories" separated by `/`.
It means that in range requests `key` must always end with `/` and `range_end` must always
end with `0`, and that such request will return a whole subtree of keys.
### /v3/kv/txn
Request:
```
interface TxnRequest {
compare?: (
{ key: string, target: "MOD", mod_revision: number, result?: "LESS" }
| { key: string, target: "CREATE", create_revision: number, result?: "LESS" }
| { key: string, target: "VERSION", version: number, result?: "LESS" }
| { key: string, target: "LEASE", lease: string, result?: "LESS" }
| { key: string, target: "VALUE", value: string }
)[],
success?: (
{ request_put: PutRequest }
| { request_range: RangeRequest }
| { request_delete_range: DeleteRangeRequest }
)[],
failure?: (
{ request_put: PutRequest }
| { request_range: RangeRequest }
| { request_delete_range: DeleteRangeRequest }
)[],
serializable?: boolean,
}
```
`serializable` allows to serve read-only requests from follower even if `stale_read` is not enabled.
Response:
```
interface TxnResponse {
header: { revision: number },
succeeded: boolean,
responses: (
{ response_put: PutResponse }
| { response_range: RangeResponse }
| { response_delete_range: DeleteRangeResponse }
)[],
}
```
### /v3/kv/put
Request:
```
interface PutRequest {
key: string,
value: string,
lease?: string,
}
```
Other parameters are not supported: prev_kv, ignore_value, ignore_lease.
Response:
```
interface PutResponse {
header: { revision: number },
}
```
### /v3/kv/range
Request:
```
interface RangeRequest {
key: string,
range_end?: string,
keys_only?: boolean,
serializable?: boolean,
}
```
`serializable` allows to serve read-only requests from follower even if `stale_read` is not enabled.
Other parameters are not supported: revision, limit, sort_order, sort_target,
count_only, min_mod_revision, max_mod_revision, min_create_revision, max_create_revision.
Response:
```
interface RangeResponse {
header: { revision: number },
kvs: { key: string }[] | {
key: string,
value: string,
lease?: string,
mod_revision: number,
}[],
}
```
### /v3/kv/deleterange
Request:
```
interface DeleteRangeRequest {
key: string,
range_end?: string,
}
```
Other parameters are not supported: prev_kv.
Response:
```
interface DeleteRangeResponse {
header: { revision: number },
// number of deleted keys
deleted: number,
}
```
### /v3/lease/grant
Request:
```
interface LeaseGrantRequest {
ID?: string,
TTL: number,
}
```
Response:
```
interface LeaseGrantResponse {
header: { revision: number },
ID: string,
TTL: number,
}
```
### /v3/lease/keepalive
Request:
```
interface LeaseKeepaliveRequest {
ID: string,
}
```
Response:
```
interface LeaseKeepaliveResponse {
header: { revision: number },
ID: string,
TTL: number,
}
```
### /v3/lease/revoke or /v3/kv/lease/revoke
Request:
```
interface LeaseRevokeRequest {
ID: string,
}
```
Response:
```
interface LeaseRevokeResponse {
header: { revision: number },
}
```
### Websocket-based watch APIs
Client-to-server message format:
```
type ClientMessage =
{ create_request: {
key: string,
range_end?: string,
start_revision?: number,
watch_id?: string,
} }
| { cancel_request: {
watch_id: string,
} }
| { progress_request: {} }
```
Server-to-client message format:
```
type ServerMessage = {
header: { revision: number },
watch_id: string,
created?: boolean,
canceled?: boolean,
compact_revision?: number,
events?: {
type: 'PUT'|'DELETE',
kv: {
key: string,
value: string,
lease?: string,
mod_revision: number,
},
}[],
} | { error: 'bad-json' } | { error: 'empty-message' }
```
### HTTP Error Codes
- 400 for invalid requests
- 404 for unsupported API / URL not found
- 405 for non-POST request method
- 501 for unsupported API feature - non-directory range queries and so on
- 502 for server is stopping
- 503 for quorum-related errors - quorum not available and so on

View File

@ -5,10 +5,6 @@ const { runCallbacks, RequestError } = require('./common.js');
const LEADER_MISMATCH = 'raft leader/term mismatch';
const LEADER_ONLY = 1;
const NO_WAIT_QUORUM = 2;
const READ_FROM_FOLLOWER = 4;
class AntiCluster
{
constructor(antietcd)
@ -52,7 +48,7 @@ class AntiCluster
if (node_id != this.cfg.node_id && this.cfg.cluster[node_id] &&
(!this.cluster_connections[node_id] || !this.antietcd.clients[this.cluster_connections[node_id]]))
{
const socket = new ws.WebSocket(this.cfg.cluster[node_id].replace(/^http/, 'ws'), this.antietcd.tls);
const socket = new ws.WebSocket(this.cfg.cluster[node_id].replace(/^http/, 'ws'));
const client_id = this.antietcd._startWebsocket(socket, () => setTimeout(() => this.connectToNode(node_id), this.cfg.reconnect_interval||1000));
this.cluster_connections[node_id] = client_id;
socket.on('open', () =>
@ -341,33 +337,25 @@ class AntiCluster
{
return null;
}
if (leaderonly == LEADER_ONLY && this.raft.state != TinyRaft.LEADER)
if (leaderonly && this.raft.state != TinyRaft.LEADER)
{
throw new RequestError(503, 'Not leader');
}
if (leaderonly == NO_WAIT_QUORUM && this.raft.state == TinyRaft.CANDIDATE)
if (this.raft.state == TinyRaft.CANDIDATE)
{
throw new RequestError(503, 'Quorum not available');
}
if (!this.synced)
{
// Wait for quorum / initial sync with timeout
await new Promise((ok, no) =>
{
this.wait_sync.push(ok);
setTimeout(() =>
{
this.wait_sync = this.wait_sync.filter(cb => cb != ok);
no(new RequestError(503, 'Quorum not available'));
}, this.cfg.wait_quorum_timeout||30000);
});
}
if (this.raft.state == TinyRaft.FOLLOWER &&
(this._isWrite(path, data) || !this.cfg.stale_read && !(leaderonly & READ_FROM_FOLLOWER)))
else if (this.raft.state == TinyRaft.FOLLOWER &&
(!this.cfg.stale_read || this._isWrite(path, data)))
{
// Forward to leader
return await this._forwardToLeader(path, data);
}
else if (!this.synced)
{
// Wait for initial sync for read-only requests
await new Promise(ok => this.wait_sync.push(ok));
}
return null;
}
@ -515,8 +503,4 @@ class AntiCluster
}
}
AntiCluster.LEADER_ONLY = LEADER_ONLY;
AntiCluster.NO_WAIT_QUORUM = NO_WAIT_QUORUM;
AntiCluster.READ_FROM_FOLLOWER = READ_FROM_FOLLOWER;
module.exports = AntiCluster;

View File

@ -1,16 +1,17 @@
#!/usr/bin/env node
const AntiEtcd = require('./antietcd.js');
const vitastor_persist_filter = require('./vitastor_persist_filter.js');
const help_text = `Miniature etcd replacement based on TinyRaft
(c) Vitaliy Filippov, 2024
License: Mozilla Public License 2.0 or Vitastor Network Public License 1.1
License: Mozilla Public License 2.0
Usage:
${process.argv[0]} ${process.argv[1]}
[--cert ssl.crt] [--key ssl.key] [--port 12379]
[--data data.gz] [--persist-filter ./filter.js] [--persist_interval 500]
[--data data.gz] [--vitastor-persist-filter /vitastor] [--no-persist-filter] [--persist_interval 500]
[--node_id node1 --cluster_key abcdef --cluster node1=http://localhost:12379,node2=http://localhost:12380,node3=http://localhost:12381]
[other options]
@ -26,28 +27,23 @@ HTTP:
--port 2379
Listen port
--cert <cert>
--cert <filename>
Use TLS with this certificate file (PEM format)
--key <key>
--key <filename>
Use TLS with this key file (PEM format)
--ca <ca>
Use trusted root certificates from this file.
Specify <ca> = <cert> if your certificate is self-signed.
--client_cert_auth 1
Require TLS client certificates signed by <ca> or by default CA to connect.
--ws_keepalive_interval 30000
Client websocket ping (keepalive) interval in milliseconds
Persistence:
--data <filename>
Store persistent data in <filename>
Use <filename> to store persistent data
--persist_interval <milliseconds>
Persist data on disk after this interval, not immediately after change
--persist_filter ./filter.js
Use persistence filter from ./filter.js (or a module).
Persistence filter is a function(cfg) returning function(key, value) ran
for every change and returning a new value or undefined to skip persistence.
Persist data on disk after this interval, not immediately
--no_persist_filter
Store all data
--vitastor_persist_filter <prefix>
Store only data required for Vitastor with prefix <prefix> on disk
Clustering:
@ -61,12 +57,10 @@ Clustering:
Raft election timeout
--heartbeat_timeout 1000
Raft leader heartbeat timeout
--wait_quorum_timeout 30000
Timeout for requests to wait for quorum to come up
--leader_priority <number>
Raft leader priority for this node (optional)
--stale_read 1
Allow to serve reads from followers. Specify 0 to disallow
--stale_read 0|1
Allow to serve reads from followers
--reconnect_interval 1000
Unavailable peer connection retry interval
--dump_timeout 5000
@ -85,7 +79,9 @@ Clustering:
function parse()
{
const options = { stale_read: 1 };
const options = {
persist_filter: vitastor_persist_filter('/vitastor'),
};
for (let i = 2; i < process.argv.length; i++)
{
const arg = process.argv[i].toLowerCase().replace(/^--(.+)$/, (m, m1) => '--'+m1.replace(/-/g, '_'));
@ -94,16 +90,19 @@ function parse()
console.error(help_text.trim());
process.exit();
}
else if (arg.substr(0, 2) == '--')
else if (arg == '--no_persist_filter')
{
options['persist_filter'] = null;
}
else if (arg == '--vitastor_persist_filter')
{
options['persist_filter'] = vitastor_persist_filter(process.argv[++i]||'');
}
else if (arg.substr(0, 2) == '--' && arg != '--persist_filter')
{
options[arg.substr(2)] = process.argv[++i];
}
}
options['stale_read'] = options['stale_read'] === '1' || options['stale_read'] === 'yes' || options['stale_read'] === 'true';
if (options['persist_filter'])
{
options['persist_filter'] = require(options['persist_filter'])(options);
}
return options;
}

View File

@ -49,18 +49,7 @@ class AntiEtcd extends EventEmitter
}
if (this.cfg.cert)
{
this.tls = {
key: await fsp.readFile(this.cfg.key),
cert: await fsp.readFile(this.cfg.cert),
};
if (this.cfg.ca)
{
this.tls.ca = await fsp.readFile(this.cfg.ca);
}
if (this.cfg.client_cert_auth)
{
this.tls.requestCert = true;
}
this.tls = { key: await fsp.readFile(this.cfg.key), cert: await fsp.readFile(this.cfg.cert) };
this.server = https.createServer(this.tls, (req, res) => this._handleRequest(req, res));
}
else
@ -104,25 +93,18 @@ class AntiEtcd extends EventEmitter
{
res.push(this.persistence.persistChange(msg));
}
if (res.length == 1)
if (res.length)
{
await res[0];
res = await Promise.allSettled(res);
const errors = res.filter(r => r.status == 'rejected');
if (errors.length)
{
for (const e of errors)
{
console.error(e.reason);
}
throw errors[0].reason;
}
else if (res.length > 0)
{
let done = 0;
await new Promise((allOk, allNo) =>
{
res.map(promise => promise.then(res =>
{
if ((++done) == res.length)
allOk();
}).catch(e =>
{
console.error(e);
allNo(e);
}));
});
}
}
@ -281,14 +263,6 @@ class AntiEtcd extends EventEmitter
{
data.leaderonly = true;
}
if (requestUrl.searchParams.get('serializable'))
{
data.serializable = true;
}
if (requestUrl.searchParams.get('nowaitquorum'))
{
data.nowaitquorum = true;
}
try
{
if (requestUrl.pathname.substr(0, 4) == '/v3/')
@ -332,13 +306,7 @@ class AntiEtcd extends EventEmitter
}
if (path !== 'dump' && this.cluster)
{
const res = await this.cluster.checkRaftState(
path,
(data.leaderonly ? AntiCluster.LEADER_ONLY : 0) |
(data.serializable ? AntiCluster.READ_FROM_FOLLOWER : 0) |
(data.nowaitquorum ? AntiCluster.NO_WAIT_QUORUM : 0),
data
);
const res = await this.cluster.checkRaftState(path, data.leaderonly, data);
if (res)
{
return res;

View File

@ -1,6 +1,5 @@
const crypto = require('crypto');
const stableStringify = require('./stable-stringify.js');
const { RequestError } = require('./common.js');
/*type TreeNode = {
value?: any,
@ -86,14 +85,9 @@ class EtcTree
check_value = cur && cur.lease;
ref_value = chk.lease;
}
else if (chk.target === 'VALUE')
{
check_value = cur && cur.value;
ref_value = chk.value;
}
else
{
throw new RequestError(501, 'Unsupported comparison target: '+chk.target);
throw new Error('Unsupported comparison target: '+chk.target);
}
if (chk.result === 'LESS')
{
@ -101,7 +95,7 @@ class EtcTree
}
else if (chk.result)
{
throw new RequestError(501, 'Unsupported comparison result: '+chk.result);
throw new Error('Unsupported comparison result: '+chk.result);
}
return check_value == ref_value;
}
@ -119,7 +113,7 @@ class EtcTree
if (end != null && (key[key.length-1] != '/' || end[end.length-1] != '0' ||
end.substr(0, end.length-1) !== key.substr(0, key.length-1)))
{
throw new RequestError(501, 'Non-directory range queries are unsupported');
throw new Error('Non-directory range queries are unsupported');
}
const parts = this._key_parts(key);
return { parts, all: end != null };
@ -386,7 +380,7 @@ class EtcTree
const id = req.ID;
if (!this.leases[id])
{
throw new RequestError(400, 'unknown lease');
throw new Error('unknown lease');
}
const lease = this.leases[id];
if (lease.timer_id)
@ -436,7 +430,7 @@ class EtcTree
{
if (!this.leases[id])
{
throw new RequestError(400, 'unknown lease');
throw new Error('unknown lease');
}
for (const key in this.leases[id].keys)
{
@ -452,7 +446,7 @@ class EtcTree
{
if (no_throw)
return null;
throw new RequestError(400, 'unknown lease');
throw new Error('unknown lease');
}
this.mod_revision++;
this._sync_revoke_lease(req.ID, notifications, this.mod_revision);
@ -546,7 +540,7 @@ class EtcTree
let watch_id = req.watch_id;
if (watch_id instanceof Object)
{
throw new RequestError(400, 'invalid watch_id');
throw new Error('invalid watch_id');
}
if (!watch_id)
{
@ -741,7 +735,7 @@ class EtcTree
{
if (!this.leases[request_put.lease])
{
throw new RequestError(400, 'unknown lease: '+request_put.lease);
throw new Error('unknown lease: '+request_put.lease);
}
cur.lease = request_put.lease;
this.leases[request_put.lease].keys[key] = true;

View File

@ -1,6 +1,5 @@
function vitastor_persist_filter(cfg)
function vitastor_persist_filter(prefix)
{
const prefix = cfg.vitastor_prefix || '/vitastor';
return (key, value) =>
{
if (key.substr(0, prefix.length+'/osd/stats/'.length) == prefix+'/osd/stats/')