Compare commits

...

10 Commits

Author SHA1 Message Date
Vitaliy Filippov 60a300621e Support scattered write in node.js binding
Test / test_rebalance_verify (push) Successful in 1m27s Details
Test / test_rebalance_verify_imm (push) Successful in 1m26s Details
Test / test_root_node (push) Successful in 6s Details
Test / test_rebalance_verify_ec (push) Successful in 1m34s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 1m35s Details
Test / test_write_no_same (push) Successful in 7s Details
Test / test_switch_primary (push) Successful in 31s Details
Test / test_write (push) Successful in 30s Details
Test / test_write_xor (push) Successful in 33s Details
Test / test_heal_pg_size_2 (push) Successful in 2m15s Details
Test / test_heal_antietcd (push) Successful in 2m16s Details
Test / test_heal_csum_32k_dmj (push) Successful in 2m18s Details
Test / test_heal_csum_32k_dj (push) Successful in 2m18s Details
Test / test_heal_csum_32k (push) Successful in 2m19s Details
Test / test_heal_csum_4k_dmj (push) Successful in 2m13s Details
Test / test_osd_tags (push) Successful in 9s Details
Test / test_enospc (push) Successful in 10s Details
Test / test_enospc_xor (push) Successful in 11s Details
Test / test_enospc_imm (push) Successful in 10s Details
Test / test_enospc_imm_xor (push) Successful in 13s Details
Test / test_scrub (push) Successful in 13s Details
Test / test_scrub_zero_osd_2 (push) Successful in 12s Details
Test / test_scrub_xor (push) Successful in 12s Details
Test / test_scrub_pg_size_3 (push) Successful in 14s Details
Test / test_heal_csum_4k_dj (push) Successful in 2m14s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s Details
Test / test_heal_csum_4k (push) Successful in 2m13s Details
Test / test_nfs (push) Successful in 11s Details
Test / test_scrub_ec (push) Successful in 16s Details
Test / test_heal_ec (push) Failing after 10m7s Details
2024-07-31 01:15:38 +03:00
Vitaliy Filippov 731a902cd0 Fix a rare client hang on PG state changes 2024-07-31 01:15:38 +03:00
Vitaliy Filippov 94a13d1d2e Always continue operations to not miss resuming after the lack of PG primary
Should fix spurious client hangs during PG primary switchover
2024-07-31 01:15:37 +03:00
Vitaliy Filippov 8e0f242d30 Add downgrade docs 2024-07-31 01:15:37 +03:00
Vitaliy Filippov 0daa8ea39b Support seamless upgrade to new PG config and stats etcd key names 2024-07-31 01:15:37 +03:00
Vitaliy Filippov b263d311ef Use separate watch revisions for different watchers 2024-07-31 01:15:37 +03:00
Vitaliy Filippov 8720185780 Run tests in CI in memory (in tmpfs) 2024-07-31 01:15:37 +03:00
Vitaliy Filippov 20584414d8 Report OSD version in /osd/state/ and /osd/stats/ (for the future) 2024-07-31 01:15:37 +03:00
Vitaliy Filippov 306a3db7f3 Rename VERSION define to VITASTOR_VERSION 2024-07-31 01:15:37 +03:00
Vitaliy Filippov 5b0aebada4 Rename /config/pgs to /pg/config and /pg/stats/* to /pgstats/* 2024-07-31 01:15:37 +03:00
54 changed files with 520 additions and 324 deletions

View File

@ -16,6 +16,7 @@ env:
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
OSD_ARGS: '--etcd_quick_timeout 2000'
USE_RAMDISK: 1
concurrency:
group: ci-${{ github.ref }}

View File

@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
set(VERSION "1.7.1")
set(VITASTOR_VERSION "1.7.1")
add_subdirectory(src)

View File

@ -1,9 +1,9 @@
VERSION ?= v1.7.1
VITASTOR_VERSION ?= v1.7.1
all: build push
build:
@docker build --rm -t vitalif/vitastor-csi:$(VERSION) .
@docker build --rm -t vitalif/vitastor-csi:$(VITASTOR_VERSION) .
push:
@docker push vitalif/vitastor-csi:$(VERSION)
@docker push vitalif/vitastor-csi:$(VITASTOR_VERSION)

View File

@ -42,7 +42,7 @@ PG state always includes exactly 1 of the following base states:
- **offline** — PG isn't activated by any OSD at all. Either primary OSD isn't set for
this PG at all (if the pool is just created), or an unavailable OSD is set as primary,
or the primary OSD refuses to start this PG (for example, because of wrong block_size),
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/config/pgs` in etcd.
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/pg/config` in etcd.
- **starting** — primary OSD has acquired PG lock in etcd, PG is starting.
- **peering** — primary OSD requests PG object listings from secondary OSDs and calculates
the PG state.
@ -150,7 +150,7 @@ POOL_ID=1
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
for i in $(seq 1 $PG_COUNT); do
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
done
```
@ -169,21 +169,51 @@ Upgrading is performed without stopping clients (VMs/containers), you just need
upgrade and restart servers one by one. However, ideally you should restart VMs too
to make them use the new version of the client library.
Exceptions (specific upgrade instructions):
- Upgrading <= 1.1.x to 1.2.0 or later, if you use EC n+k with k>=2, is recommended
### 1.1.x to 1.2.0
Upgrading version <= 1.1.x to version >= 1.2.0, if you use EC n+k with k>=2, is recommended
to be performed with full downtime: first you should stop all clients, then all OSDs,
then upgrade and start everything back — because versions before 1.2.0 have several
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
- Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
### 0.8.7 to 0.9.0
Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
without this intermediate step, client I/O will hang until the end of upgrade process.
- Upgrading from <= 0.5.x to >= 0.6.x is not supported.
Rollback:
- Version 1.0.0 has a new disk format, so OSDs initiaziled on 1.0.0 can't be rolled
back to 0.9.x or previous versions.
- Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
start with 0.7.x or 0.6.x. :-)
### 0.5.x to 0.6.x
Upgrading from <= 0.5.x to >= 0.6.x is not supported.
## Downgrade
Downgrade are also allowed freely, except the following specific instructions:
### 1.8.0 to 1.7.1
Before downgrading from version >= 1.8.0 to version <= 1.7.1
you have to copy /vitastor/pg/config etcd key to /vitastor/config/pgs:
```
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
etcdctl --endpoints=http://... put /vitastor/config/pgs
```
Then you can just install older packages and restart all services.
If you performed downgrade without first copying that key, run "add all OSDs into the
history records of all PGs" from [Restoring from lost pool configuration](#restoring-from-lost-pool-configuration).
### 1.0.0 to 0.9.x
Version 1.0.0 has a new disk format, so OSDs initialized on 1.0.0 or later can't
be rolled back to 0.9.x or previous versions.
### 0.8.0 to 0.7.x
Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
start with older versions (0.4.x - 0.7.x). :-)
## OSD memory usage

View File

@ -42,7 +42,7 @@
- **offline** — PG вообще не активирована ни одним OSD. Либо первичный OSD не назначен вообще
(если пул только создан), либо в качестве первичного назначен недоступный OSD, либо
назначенный OSD отказывается запускать эту PG (например, из-за несовпадения block_size),
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/config/pgs` в etcd.
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/pg/config` в etcd.
- **starting** — первичный OSD захватил блокировку PG в etcd, PG запускается.
- **peering** — первичный OSD опрашивает вторичные OSD на предмет списков объектов данной PG и рассчитывает её состояние.
- **repeering** — PG ожидает завершения текущих операций ввода-вывода, после чего перейдёт в состояние **peering**.
@ -147,7 +147,7 @@ POOL_ID=1
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
for i in $(seq 1 $PG_COUNT); do
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
done
```
@ -166,21 +166,51 @@ done
достаточно обновлять серверы по одному. Однако, конечно, чтобы запущенные виртуальные машины
начали использовать новую версию клиентской библиотеки, их тоже нужно перезапустить.
Исключения (особые указания при обновлении):
- Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
### 1.1.x -> 1.2.0
Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
могли приводить к некорректному чтению данных в деградированных EC-пулах.
- Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
### 0.8.7 -> 0.9.0
Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
Иначе клиентский ввод-вывод зависнет до завершения обновления.
- Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
Откат:
- В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
### 0.5.x -> 0.6.x
Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
## Откат версии
Откат (понижение версии) тоже свободно разрешён, кроме указанных ниже случаев:
### 1.8.0 -> 1.7.1
Перед понижением версии с >= 1.8.0 до <= 1.7.1 вы должны скопировать ключ
etcd `/vitastor/pg/config` в `/vitastor/config/pgs`:
```
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
etcdctl --endpoints=http://... put /vitastor/config/pgs
```
После этого можно просто установить более старые пакеты и перезапустить все сервисы.
Если вы откатили версию, не скопировав предварительно этот ключ - выполните "добавление всех
OSD в исторические записи всех PG" из раздела [Восстановление потерянной конфигурации пулов](#восстановление-потерянной-конфигурации-пулов).
### 1.0.0 -> 0.9.x
В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
нельзя откатить до версии 0.9.x и более ранних.
- В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD нельзя откатить
до 0.7.x или 0.6.x. :-)
### 0.8.0 -> 0.7.x
В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD не запустятся на
более ранних версиях (0.4.x - 0.7.x). :-)
## Потребление памяти OSD

View File

@ -6,7 +6,7 @@ const etcd_nonempty_keys = {
'config/global': 1,
'config/node_placement': 1,
'config/pools': 1,
'config/pgs': 1,
'pg/config': 1,
'history/last_clean_pgs': 1,
'stats': 1,
};
@ -15,7 +15,8 @@ const etcd_allow = new RegExp('^'+[
'config/node_placement',
'config/pools',
'config/osd/[1-9]\\d*',
'config/pgs',
'config/pgs', // old name
'pg/config',
'config/inode/[1-9]\\d*/[1-9]\\d*',
'osd/state/[1-9]\\d*',
'osd/stats/[1-9]\\d*',
@ -24,7 +25,8 @@ const etcd_allow = new RegExp('^'+[
'mon/master',
'mon/member/[a-f0-9]+',
'pg/state/[1-9]\\d*/[1-9]\\d*',
'pg/stats/[1-9]\\d*/[1-9]\\d*',
'pg/stats/[1-9]\\d*/[1-9]\\d*', // old name
'pgstats/[1-9]\\d*/[1-9]\\d*',
'pg/history/[1-9]\\d*/[1-9]\\d*',
'history/last_clean_pgs',
'inode/stats/[1-9]\\d*/\\d+',
@ -205,19 +207,6 @@ const etcd_tree = {
osd: {
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
},
/* pgs: {
hash: string,
items: {
<pool_id>: {
<pg_id>: {
osd_set: [ 1, 2, 3 ],
primary: 1,
pause: false,
}
}
}
}, */
pgs: {},
/* inode: {
<pool_id>: {
<inode_t>: {
@ -290,6 +279,19 @@ const etcd_tree = {
},
},
pg: {
/* config: {
hash: string,
items: {
<pool_id>: {
<pg_id>: {
osd_set: [ 1, 2, 3 ],
primary: 1,
pause: false,
}
}
}
}, */
config: {},
state: {
/* <pool_id>: {
<pg_id>: {
@ -300,18 +302,6 @@ const etcd_tree = {
}
}, */
},
stats: {
/* <pool_id>: {
<pg_id>: {
object_count: uint64_t,
clean_count: uint64_t,
misplaced_count: uint64_t,
degraded_count: uint64_t,
incomplete_count: uint64_t,
write_osd_set: osd_num_t[],
},
}, */
},
history: {
/* <pool_id>: {
<pg_id>: {
@ -323,6 +313,18 @@ const etcd_tree = {
}, */
},
},
pgstats: {
/* <pool_id>: {
<pg_id>: {
object_count: uint64_t,
clean_count: uint64_t,
misplaced_count: uint64_t,
degraded_count: uint64_t,
incomplete_count: uint64_t,
write_osd_set: osd_num_t[],
},
}, */
},
inode: {
stats: {
/* <pool_id>: {

View File

@ -75,6 +75,8 @@ class Mon
this.prev_stats = { osd_stats: {}, osd_diff: {} };
this.recheck_pgs_active = false;
this.watcher_active = false;
this.old_pg_config = false;
this.old_pg_stats_seen = false;
}
async start()
@ -122,7 +124,7 @@ class Mon
!Number(this.state.pool.stats[pool_id].pg_real_size))
{
// Generate missing data in etcd
this.state.config.pgs.hash = null;
this.state.pg.config.hash = null;
break;
}
}
@ -201,10 +203,15 @@ class Mon
stats_changed = true;
changed = true;
}
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/' || key.substr(0, 16) == '/osd/inodestats/')
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 9) == '/pgstats/' || key.substr(0, 16) == '/osd/inodestats/')
{
stats_changed = true;
}
else if (key.substr(0, 10) == '/pg/stats/')
{
this.old_pg_stats_seen = true;
stats_changed = true;
}
else if (key.substr(0, 10) == '/pg/state/')
{
pg_states_changed = true;
@ -285,7 +292,7 @@ class Mon
continue next_pool;
}
}
new_clean_pgs.items[pool_id] = this.state.config.pgs.items[pool_id];
new_clean_pgs.items[pool_id] = this.state.pg.config.items[pool_id];
}
this.state.history.last_clean_pgs = new_clean_pgs;
await this.etcd.etcd_call('/kv/txn', {
@ -396,6 +403,50 @@ class Mon
this.parse_kv(kv);
}
}
if (Object.keys((this.state.config.pgs||{}).items||{}).length)
{
// Support seamless upgrade to new OSDs
if (!Object.keys((this.state.pg.config||{}).items||{}).length)
{
const pgs = JSON.stringify(this.state.config.pgs);
this.state.pg.config = JSON.parse(pgs);
const res = await this.etcd.etcd_call('/kv/txn', {
success: [
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(pgs) } },
],
compare: [
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
],
}, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
if (!res.succeeded)
throw new Error('Failed to duplicate old PG config to new PG config');
}
this.old_pg_config = true;
this.old_pg_config_timer = setInterval(() => this.check_clear_old_config().catch(console.error),
this.config.old_pg_config_clear_interval||3600000);
}
}
async check_clear_old_config()
{
if (this.old_pg_config && this.old_pg_stats_seen)
{
this.old_pg_stats_seen = false;
return;
}
if (this.old_pg_config)
{
await this.etcd.etcd_call('/kv/txn', { success: [
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/config/pgs') } },
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/pg/stats/'), range_end: b64(this.config.etcd_prefix+'/pg/stats0') } },
] }, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
this.old_pg_config = false;
}
if (this.old_pg_config_timer)
{
clearInterval(this.old_pg_config_timer);
this.old_pg_config_timer = null;
}
}
all_osds()
@ -406,7 +457,7 @@ class Mon
async stop_all_pgs(pool_id)
{
let has_online = false, paused = true;
for (const pg in this.state.config.pgs.items[pool_id]||{})
for (const pg in this.state.pg.config.items[pool_id]||{})
{
// FIXME: Change all (||{}) to ?. (optional chaining) at some point
const cur_state = (((this.state.pg.state[pool_id]||{})[pg]||{}).state||[]).join(',');
@ -414,7 +465,7 @@ class Mon
{
has_online = true;
}
if (!this.state.config.pgs.items[pool_id][pg].pause)
if (!this.state.pg.config.items[pool_id][pg].pause)
{
paused = false;
}
@ -422,7 +473,7 @@ class Mon
if (!paused)
{
console.log('Stopping all PGs for pool '+pool_id+' before changing PG count');
const new_cfg = JSON.parse(JSON.stringify(this.state.config.pgs));
const new_cfg = JSON.parse(JSON.stringify(this.state.pg.config));
for (const pg in new_cfg.items[pool_id])
{
new_cfg.items[pool_id][pg].pause = true;
@ -430,22 +481,26 @@ class Mon
// Check that no OSDs change their state before we pause PGs
// Doing this we make sure that OSDs don't wake up in the middle of our "transaction"
// and can't see the old PG configuration
const checks = [];
const checks = [
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
];
for (const osd_num of this.all_osds())
{
const key = b64(this.config.etcd_prefix+'/osd/state/'+osd_num);
checks.push({ key, target: 'MOD', result: 'LESS', mod_revision: ''+this.etcd_watch_revision });
}
await this.etcd.etcd_call('/kv/txn', {
compare: [
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
...checks,
],
const txn = {
compare: checks,
success: [
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } },
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_cfg)) } },
],
}, this.config.etcd_mon_timeout, 0);
};
if (this.old_pg_config)
{
txn.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } });
}
await this.etcd.etcd_call('/kv/txn', txn, this.config.etcd_mon_timeout, 0);
return false;
}
return !has_online;
@ -473,7 +528,7 @@ class Mon
pools: this.state.config.pools,
};
const tree_hash = sha1hex(stableStringify(tree_cfg));
if (this.state.config.pgs.hash != tree_hash)
if (this.state.pg.config.hash != tree_hash)
{
// Something has changed
console.log('Pool configuration or OSD tree changed, re-optimizing');
@ -514,10 +569,10 @@ class Mon
else
{
// Nothing changed, but we still want to recheck the distribution of primaries
let new_config_pgs = recheck_primary(this.state, this.config, up_osds, osd_tree);
if (new_config_pgs)
let new_pg_config = recheck_primary(this.state, this.config, up_osds, osd_tree);
if (new_pg_config)
{
const ok = await this.save_pg_config(new_config_pgs);
const ok = await this.save_pg_config(new_pg_config);
if (ok)
console.log('PG configuration successfully changed');
else
@ -532,12 +587,12 @@ class Mon
async apply_pool_pgs(results, up_osds, osd_tree, tree_hash)
{
for (const pool_id in (this.state.config.pgs||{}).items||{})
for (const pool_id in (this.state.pg.config||{}).items||{})
{
// We should stop all PGs when deleting a pool or changing its PG count
if (!this.state.config.pools[pool_id] ||
this.state.config.pgs.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
Object.keys(this.state.config.pgs.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
this.state.pg.config.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
Object.keys(this.state.pg.config.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
{
if (!await this.stop_all_pgs(pool_id))
{
@ -545,22 +600,22 @@ class Mon
}
}
}
const new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
const new_pg_config = JSON.parse(JSON.stringify(this.state.pg.config));
const etcd_request = { compare: [], success: [] };
for (const pool_id in (new_config_pgs||{}).items||{})
for (const pool_id in (new_pg_config||{}).items||{})
{
if (!this.state.config.pools[pool_id])
{
const prev_pgs = [];
for (const pg in new_config_pgs.items[pool_id]||{})
for (const pg in new_pg_config.items[pool_id]||{})
{
prev_pgs[pg-1] = new_config_pgs.items[pool_id][pg].osd_set;
prev_pgs[pg-1] = new_pg_config.items[pool_id][pg].osd_set;
}
// Also delete pool statistics
etcd_request.success.push({ requestDeleteRange: {
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
} });
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
this.etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, [], []);
}
}
@ -569,7 +624,7 @@ class Mon
const pool_id = pool_res.pool_id;
const pool_cfg = this.state.config.pools[pool_id];
let pg_history = [];
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
{
if (this.state.pg.history[pool_id] &&
this.state.pg.history[pool_id][pg])
@ -578,9 +633,9 @@ class Mon
}
}
const real_prev_pgs = [];
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
{
real_prev_pgs[pg-1] = [ ...this.state.config.pgs.items[pool_id][pg].osd_set ];
real_prev_pgs[pg-1] = [ ...this.state.pg.config.items[pool_id][pg].osd_set ];
}
if (real_prev_pgs.length > 0 && real_prev_pgs.length != pool_res.pgs.length)
{
@ -591,8 +646,8 @@ class Mon
pg_history = scale_pg_history(pg_history, real_prev_pgs, pool_res.pgs);
// Drop stats
etcd_request.success.push({ requestDeleteRange: {
key: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'/'),
range_end: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'0'),
key: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'/'),
range_end: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'0'),
} });
}
const stats = {
@ -603,22 +658,26 @@ class Mon
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(stats)),
} });
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
}
new_config_pgs.hash = tree_hash;
return await this.save_pg_config(new_config_pgs, etcd_request);
new_pg_config.hash = tree_hash;
return await this.save_pg_config(new_pg_config, etcd_request);
}
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
{
etcd_request.compare.push(
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
);
etcd_request.success.push(
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_config_pgs)) } },
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_pg_config)) } },
);
if (this.old_pg_config)
{
etcd_request.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_pg_config)) } });
}
const txn_res = await this.etcd.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
return txn_res.succeeded;
}

View File

@ -57,7 +57,7 @@ function pick_primary(pool_config, osd_set, up_osds, aff_osds)
function recheck_primary(state, global_config, up_osds, osd_tree)
{
let new_config_pgs;
let new_pg_config;
for (const pool_id in state.config.pools)
{
const pool_cfg = state.config.pools[pool_id];
@ -69,30 +69,30 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
reset_rng();
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
{
if (!state.config.pgs.items[pool_id])
if (!state.pg.config.items[pool_id])
{
continue;
}
const pg_cfg = state.config.pgs.items[pool_id][pg_num];
const pg_cfg = state.pg.config.items[pool_id][pg_num];
if (pg_cfg)
{
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
if (pg_cfg.primary != new_primary)
{
if (!new_config_pgs)
if (!new_pg_config)
{
new_config_pgs = JSON.parse(JSON.stringify(state.config.pgs));
new_pg_config = JSON.parse(JSON.stringify(state.pg.config));
}
console.log(
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
);
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
new_pg_config.items[pool_id][pg_num].primary = new_primary;
}
}
}
}
return new_config_pgs;
return new_pg_config;
}
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
@ -185,10 +185,10 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
}
if (!prev_pgs.length)
{
// Fall back to config/pgs if it's empty
for (const pg in ((state.config.pgs.items||{})[pool_id]||{}))
// Fall back to pg/config if it's empty
for (const pg in ((state.pg.config.items||{})[pool_id]||{}))
{
prev_pgs[pg-1] = [ ...state.config.pgs.items[pool_id][pg].osd_set ];
prev_pgs[pg-1] = [ ...state.pg.config.items[pool_id][pg].osd_set ];
}
}
const old_pg_count = prev_pgs.length;
@ -205,8 +205,8 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
ordered: pool_cfg.scheme != 'replicated',
};
let optimize_result;
// Re-shuffle PGs if config/pgs.hash is empty
if (old_pg_count > 0 && state.config.pgs.hash)
// Re-shuffle PGs if pg/config.hash is empty
if (old_pg_count > 0 && state.pg.config.hash)
{
if (prev_pgs.length != pool_cfg.pg_count)
{

View File

@ -166,7 +166,7 @@ function export_prometheus_metrics(st)
res += `vitastor_pool_used_raw_tb{${pool_label}} ${pool_stat.used_raw_tb||0}\n`;
// PG states and pool up/down status
const real_pg_count = (Object.keys(((st.config.pgs||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
const real_pg_count = (Object.keys(((st.pg.config||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
const per_state = {
active: 0,
starting: 0,

View File

@ -100,10 +100,19 @@ function sum_object_counts(state, global_config)
{
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
let pgstats = state.pgstats;
if (state.pg.stats)
{
// Merge with old stats for seamless transition to new stats
for (const pool_id in state.pg.stats)
{
pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) };
}
}
for (const pool_id in pgstats)
{
let object_size = 0;
for (const osd_num of state.pg.stats[pool_id].write_osd_set||[])
for (const osd_num of pgstats[pool_id].write_osd_set||[])
{
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
{
@ -121,9 +130,9 @@ function sum_object_counts(state, global_config)
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
}
object_size = BigInt(object_size);
for (const pg_num in state.pg.stats[pool_id])
for (const pg_num in pgstats[pool_id])
{
const st = state.pg.stats[pool_id][pg_num];
const st = pgstats[pool_id][pg_num];
if (st)
{
for (const k in object_counts)

View File

@ -35,7 +35,8 @@ function vitastor_persist_filter(cfg)
}
else if (key.substr(0, prefix.length+'/osd/'.length) == prefix+'/osd/' ||
key.substr(0, prefix.length+'/inode/stats/'.length) == prefix+'/inode/stats/' ||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' ||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' || // old name
key.substr(0, prefix.length+'/pgstats/'.length) == prefix+'/pgstats/' ||
key.substr(0, prefix.length+'/pool/stats/'.length) == prefix+'/pool/stats/' ||
key == prefix+'/stats')
{

View File

@ -24,6 +24,7 @@ public:
}
iovec iov;
std::vector<iovec> iov_list;
NodeVitastorImage *img = NULL;
int op = 0;
uint64_t offset = 0, len = 0, version = 0;
@ -141,10 +142,8 @@ NAN_METHOD(NodeVitastor::Read)
static NodeVitastorRequest* getWriteRequest(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
{
uint64_t offset = Nan::To<int64_t>(info[argpos+0]).FromJust();
char *buf = node::Buffer::Data(info[argpos+1]);
uint64_t len = node::Buffer::Length(info[argpos+1]);
const auto & bufarg = info[argpos+1];
uint64_t version = 0;
if (!info[argpos+2].IsEmpty() && info[argpos+2]->IsObject())
{
auto key = Nan::New<v8::String>("version").ToLocalChecked();
@ -159,14 +158,33 @@ static NodeVitastorRequest* getWriteRequest(const Nan::FunctionCallbackInfo<v8::
auto req = new NodeVitastorRequest(callback);
req->offset = offset;
req->len = len;
req->version = version;
if (bufarg->IsArray())
{
auto buffers = bufarg.As<v8::Array>();
req->len = 0;
for (uint32_t i = 0; i < buffers->Length(); i++)
{
auto buffer_obj = Nan::Get(buffers, i).ToLocalChecked();
char *buf = node::Buffer::Data(buffer_obj);
uint64_t len = node::Buffer::Length(buffer_obj);
req->iov_list.push_back({ .iov_base = buf, .iov_len = len });
req->len += len;
}
}
else
{
char *buf = node::Buffer::Data(bufarg);
uint64_t len = node::Buffer::Length(bufarg);
req->iov = { .iov_base = buf, .iov_len = req->len };
req->len = len;
}
return req;
}
// write(pool, inode, offset, buffer, { version }?, callback(err))
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
NAN_METHOD(NodeVitastor::Write)
{
TRACE("NodeVitastor::Write");
@ -179,7 +197,10 @@ NAN_METHOD(NodeVitastor::Write)
auto req = getWriteRequest(info, 2);
std::unique_lock<std::mutex> lock(self->mu);
vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version, &req->iov, 1, on_write_finish, req);
vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version,
req->iov_list.size() ? req->iov_list.data() : &req->iov,
req->iov_list.size() ? req->iov_list.size() : 1,
on_write_finish, req);
}
// sync(callback(err))
@ -421,7 +442,10 @@ void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
else if (req->op == NODE_VITASTOR_WRITE)
{
uint64_t ino = vitastor_c_inode_get_num(watch);
vitastor_c_write(cli->c, ino, req->offset, req->len, req->version, &req->iov, 1, NodeVitastor::on_write_finish, req);
vitastor_c_write(cli->c, ino, req->offset, req->len, req->version,
req->iov_list.size() ? req->iov_list.data() : &req->iov,
req->iov_list.size() ? req->iov_list.size() : 1,
NodeVitastor::on_write_finish, req);
}
else if (req->op == NODE_VITASTOR_SYNC)
{

View File

@ -19,7 +19,7 @@ public:
static NAN_METHOD(Create);
// read(pool, inode, offset, len, callback(err, buffer, version))
static NAN_METHOD(Read);
// write(pool, inode, offset, buffer, { version }?, callback(err))
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
static NAN_METHOD(Write);
// sync(callback(err))
static NAN_METHOD(Sync);
@ -56,7 +56,7 @@ public:
static NAN_METHOD(Create);
// read(offset, len, callback(err, buffer, version))
static NAN_METHOD(Read);
// write(offset, buffer, { version }?, callback(err))
// write(offset, buf: Buffer | Buffer[], { version }?, callback(err))
static NAN_METHOD(Write);
// sync(callback(err))
static NAN_METHOD(Sync);

View File

@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VERSION = '1.7.1'
VITASTOR_VERSION = '1.7.1'
LOG = logging.getLogger(__name__)
@ -238,7 +238,7 @@ class VitastorDriver(driver.CloneableImageVD,
stats = {
'vendor_name': 'Vitastor',
'driver_version': self.VERSION,
'driver_version': VITASTOR_VERSION,
'storage_protocol': 'vitastor',
'total_capacity_gb': 'unknown',
'free_capacity_gb': 'unknown',

View File

@ -19,7 +19,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVERSION="1.7.1")
add_definitions(-DVITASTOR_VERSION="1.7.1")
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
add_link_options(-fno-omit-frame-pointer)
if (${WITH_ASAN})

View File

@ -13,7 +13,7 @@ target_link_libraries(vitastor_blk
# for timerfd_manager
vitastor_common
)
set_target_properties(vitastor_blk PROPERTIES VERSION ${VERSION} SOVERSION 0)
set_target_properties(vitastor_blk PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
if (${WITH_FIO})
# libfio_vitastor_blk.so

View File

@ -29,7 +29,7 @@ target_link_libraries(vitastor_client
${LIBURING_LIBRARIES}
${IBVERBS_LIBRARIES}
)
set_target_properties(vitastor_client PROPERTIES VERSION ${VERSION} SOVERSION 0)
set_target_properties(vitastor_client PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
configure_file(vitastor.pc.in vitastor.pc @ONLY)
if (${WITH_FIO})

View File

@ -452,12 +452,11 @@ void cluster_client_t::on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_nu
if (pg_cfg.cur_primary != prev_primary)
{
// Repeat this PG operations because an OSD which stopped being primary may not fsync operations
if (wb->repeat_ops_for(this, 0, pool_id, pg_num) > 0)
{
wb->repeat_ops_for(this, 0, pool_id, pg_num);
}
// Always continue to resume operations hung because of lack of the primary OSD
continue_ops();
}
}
}
bool cluster_client_t::get_immediate_commit(uint64_t inode)
{
@ -1066,11 +1065,11 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
!pg_it->second.pause && pg_it->second.cur_primary)
{
osd_num_t primary_osd = pg_it->second.cur_primary;
part->osd_num = primary_osd;
auto peer_it = msgr.osd_peer_fds.find(primary_osd);
if (peer_it != msgr.osd_peer_fds.end())
{
int peer_fd = peer_it->second;
part->osd_num = primary_osd;
part->flags |= PART_SENT;
op->inflight_count++;
uint64_t pg_bitmap_size = (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8) * (

View File

@ -333,7 +333,10 @@ void etcd_state_client_t::start_etcd_watcher()
etcd_watch_ws = NULL;
}
if (this->log_level > 1)
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju\n", etcd_address.c_str(), etcd_watch_revision);
{
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju/%ju/%ju\n", etcd_address.c_str(),
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
}
etcd_watch_ws = open_websocket(tfd, etcd_address, etcd_api_path+"/watch", etcd_slow_timeout,
[this, cur_addr = selected_etcd_address](const http_response_t *msg)
{
@ -348,16 +351,20 @@ void etcd_state_client_t::start_etcd_watcher()
}
else
{
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
if (data["result"]["created"].bool_value())
{
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
if (watch_id == ETCD_CONFIG_WATCH_ID ||
watch_id == ETCD_PG_STATE_WATCH_ID ||
watch_id == ETCD_PG_HISTORY_WATCH_ID ||
watch_id == ETCD_OSD_STATE_WATCH_ID)
{
etcd_watches_initialised++;
}
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && this->log_level > 0)
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju\n", cur_addr.c_str(), etcd_watch_revision);
{
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju/%ju/%ju\n", cur_addr.c_str(),
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
}
}
if (data["result"]["canceled"].bool_value())
{
@ -375,7 +382,7 @@ void etcd_state_client_t::start_etcd_watcher()
data["result"]["compact_revision"].uint64_value());
http_close(etcd_watch_ws);
etcd_watch_ws = NULL;
etcd_watch_revision = 0;
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = 0;
on_reload_hook();
}
return;
@ -393,13 +400,29 @@ void etcd_state_client_t::start_etcd_watcher()
exit(1);
}
}
// Save revision only if it's present in the message - because sometimes etcd sends something without a header, like:
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && !data["result"]["header"]["revision"].is_null())
{
// Protect against a revision beign split into multiple messages and some
// of them being lost. Even though I'm not sure if etcd actually splits them
// Also sometimes etcd sends something without a header, like:
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
etcd_watch_revision = data["result"]["header"]["revision"].uint64_value();
// Restart watchers from the same revision number as in the last received message,
// not from the next one to protect against revision being split into multiple messages,
// even though etcd guarantees not to do that **within a single watcher** without fragment=true:
// https://etcd.io/docs/v3.5/learning/api_guarantees/#watch-apis
// Revision contents are ALWAYS split into separate messages for different watchers though!
// So generally we have to resume each watcher from its own revision...
// Progress messages may have watch_id=-1 if sent on behalf of multiple watchers though.
// And antietcd has an advanced semantic which merges the same revision for all watchers
// into one message and just omits watch_id.
// So we also have to handle the case where watch_id is -1 or not present (0).
auto watch_rev = data["result"]["header"]["revision"].uint64_value();
if (!watch_id || watch_id == UINT64_MAX)
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = watch_rev;
else if (watch_id == ETCD_CONFIG_WATCH_ID)
etcd_watch_revision_config = watch_rev;
else if (watch_id == ETCD_PG_STATE_WATCH_ID)
etcd_watch_revision_pg = watch_rev;
else if (watch_id == ETCD_OSD_STATE_WATCH_ID)
etcd_watch_revision_osd = watch_rev;
addresses_to_try.clear();
}
// First gather all changes into a hash to remove multiple overwrites
@ -457,7 +480,7 @@ void etcd_state_client_t::start_etcd_watcher()
{ "create_request", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/config/") },
{ "range_end", base64_encode(etcd_prefix+"/config0") },
{ "start_revision", etcd_watch_revision },
{ "start_revision", etcd_watch_revision_config },
{ "watch_id", ETCD_CONFIG_WATCH_ID },
{ "progress_notify", true },
} }
@ -466,29 +489,21 @@ void etcd_state_client_t::start_etcd_watcher()
{ "create_request", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/osd/state/") },
{ "range_end", base64_encode(etcd_prefix+"/osd/state0") },
{ "start_revision", etcd_watch_revision },
{ "start_revision", etcd_watch_revision_osd },
{ "watch_id", ETCD_OSD_STATE_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/pg/state/") },
{ "range_end", base64_encode(etcd_prefix+"/pg/state0") },
{ "start_revision", etcd_watch_revision },
{ "key", base64_encode(etcd_prefix+"/pg/") },
{ "range_end", base64_encode(etcd_prefix+"/pg0") },
{ "start_revision", etcd_watch_revision_pg },
{ "watch_id", ETCD_PG_STATE_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/pg/history/") },
{ "range_end", base64_encode(etcd_prefix+"/pg/history0") },
{ "start_revision", etcd_watch_revision },
{ "watch_id", ETCD_PG_HISTORY_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
// FIXME: Do not watch /pg/history/ at all in client code (not in OSD)
if (on_start_watcher_hook)
{
on_start_watcher_hook(etcd_watch_ws);
@ -591,6 +606,11 @@ void etcd_state_client_t::load_pgs()
{ "key", base64_encode(etcd_prefix+"/config/pgs") },
} }
},
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/pg/config") },
} }
},
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/config/inode/") },
@ -640,13 +660,10 @@ void etcd_state_client_t::load_pgs()
return;
}
reset_pg_exists();
if (!etcd_watch_revision)
{
etcd_watch_revision = data["header"]["revision"].uint64_value()+1;
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = data["header"]["revision"].uint64_value()+1;
if (this->log_level > 3)
{
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision-1);
}
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision_pg-1);
}
for (auto & res: data["responses"].array_items())
{
@ -895,8 +912,17 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
on_change_pool_config_hook();
}
}
else if (key == etcd_prefix+"/config/pgs")
else if (key == etcd_prefix+"/pg/config" || key == etcd_prefix+"/config/pgs")
{
if (key == etcd_prefix+"/pg/config")
{
new_pg_config = !value.is_null();
}
else if (new_pg_config)
{
// Ignore old key if the new one is present
return;
}
for (auto & pool_item: this->pool_config)
{
for (auto & pg_item: pool_item.second.pg_config)

View File

@ -10,10 +10,9 @@
#include "timerfd_manager.h"
#define ETCD_CONFIG_WATCH_ID 1
#define ETCD_PG_STATE_WATCH_ID 2
#define ETCD_PG_HISTORY_WATCH_ID 3
#define ETCD_OSD_STATE_WATCH_ID 4
#define ETCD_TOTAL_WATCHES 4
#define ETCD_OSD_STATE_WATCH_ID 2
#define ETCD_PG_STATE_WATCH_ID 3
#define ETCD_TOTAL_WATCHES 3
#define DEFAULT_BLOCK_SIZE 128*1024
#define MIN_DATA_BLOCK_SIZE 4*1024
@ -95,7 +94,7 @@ protected:
std::string selected_etcd_address;
std::vector<std::string> addresses_to_try;
std::vector<inode_watch_t*> watches;
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
bool new_pg_config = false;
int ws_keepalive_timer = -1;
int ws_alive = 0;
bool rand_initialized = false;
@ -115,8 +114,11 @@ public:
int log_level = 0;
timerfd_manager_t *tfd = NULL;
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
int etcd_watches_initialised = 0;
uint64_t etcd_watch_revision = 0;
uint64_t etcd_watch_revision_config = 0;
uint64_t etcd_watch_revision_osd = 0;
uint64_t etcd_watch_revision_pg = 0;
std::map<pool_id_t, pool_config_t> pool_config;
std::map<osd_num_t, json11::Json> peer_states;
std::set<osd_num_t> seen_peers;

View File

@ -253,7 +253,7 @@ nla_put_failure:
const char *exe_name = NULL;
const char *help_text =
"Vitastor NBD proxy " VERSION "\n"
"Vitastor NBD proxy " VITASTOR_VERSION "\n"
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n"
"\n"
"COMMANDS:\n"

View File

@ -17,7 +17,7 @@
static const char *exe_name = NULL;
static const char* help_text =
"Vitastor command-line tool " VERSION "\n"
"Vitastor command-line tool " VITASTOR_VERSION "\n"
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
"\n"
"COMMANDS:\n"

View File

@ -49,8 +49,8 @@ struct pg_lister_t
{ "success", json11::Json::array {
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/stats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/stats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
} },
},
} },
@ -65,7 +65,7 @@ resume_1:
state = 100;
return;
}
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
{
pg_stats[(pool_pg_num_t){ .pool_id = pool_id, .pg_num = (pg_num_t)pg_num }] = value;
});

View File

@ -214,10 +214,10 @@ resume_1:
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/pg/stats/"
parent->cli->st_cli.etcd_prefix+"/pgstats/"
) },
{ "range_end", base64_encode(
parent->cli->st_cli.etcd_prefix+"/pg/stats0"
parent->cli->st_cli.etcd_prefix+"/pgstats0"
) },
} },
},
@ -235,7 +235,7 @@ resume_1:
}
// Calculate recovery percent
std::map<pool_id_t, object_counts_t> counts;
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/",
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/",
[&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
{
auto & cnt = counts[pool_id];

View File

@ -176,7 +176,7 @@ struct rm_osd_t
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/config/pgs"
parent->cli->st_cli.etcd_prefix+"/pg/config"
) },
} },
},
@ -229,7 +229,7 @@ struct rm_osd_t
}
if (!new_pgs.is_null())
{
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/config/pgs");
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/config");
rm_items.push_back(json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", pgs_key },
@ -427,7 +427,7 @@ struct rm_osd_t
{ "target", "MOD" },
{ "key", history_key },
{ "result", "LESS" },
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision+1 },
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision_pg+1 },
});
}
}

View File

@ -5,7 +5,7 @@
#include "str_util.h"
static const char *help_text =
"Vitastor disk management tool " VERSION "\n"
"Vitastor disk management tool " VITASTOR_VERSION "\n"
"(c) Vitaliy Filippov, 2022+ (VNPL-1.1)\n"
"\n"
"COMMANDS:\n"

View File

@ -10,7 +10,7 @@ set_target_properties(vitastor_kv PROPERTIES PUBLIC_HEADER "kv/vitastor_kv.h")
target_link_libraries(vitastor_kv
vitastor_client
)
set_target_properties(vitastor_kv PROPERTIES VERSION ${VERSION} SOVERSION 0)
set_target_properties(vitastor_kv PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
# vitastor-kv
add_executable(vitastor-kv

View File

@ -53,7 +53,7 @@ nfs_proxy_t::~nfs_proxy_t()
}
static const char* help_text =
"Vitastor NFS 3.0 proxy " VERSION "\n"
"Vitastor NFS 3.0 proxy " VITASTOR_VERSION "\n"
"(c) Vitaliy Filippov, 2021+ (VNPL-1.1)\n"
"\n"
"vitastor-nfs (--fs <NAME> | --block) [-o <OPT>] mount <MOUNTPOINT>\n"
@ -372,24 +372,6 @@ void nfs_proxy_t::watch_stats()
assert(cli->st_cli.on_start_watcher_hook == NULL);
cli->st_cli.on_start_watcher_hook = [this](http_co_t *etcd_watch_ws)
{
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats/") },
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats0") },
{ "start_revision", cli->st_cli.etcd_watch_revision },
{ "watch_id", ETCD_INODE_STATS_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats/") },
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats0") },
{ "start_revision", cli->st_cli.etcd_watch_revision },
{ "watch_id", ETCD_POOL_STATS_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
cli->st_cli.etcd_txn_slow(json11::Json::object {
{ "success", json11::Json::array {
json11::Json::object {
@ -415,6 +397,28 @@ void nfs_proxy_t::watch_stats()
parse_stats(kv);
}
}
if (cli->st_cli.etcd_watch_ws)
{
auto watch_rev = res["header"]["revision"].uint64_value()+1;
http_post_message(cli->st_cli.etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats/") },
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats0") },
{ "start_revision", watch_rev },
{ "watch_id", ETCD_INODE_STATS_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
http_post_message(cli->st_cli.etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats/") },
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats0") },
{ "start_revision", watch_rev },
{ "watch_id", ETCD_POOL_STATS_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
}
});
};
cli->st_cli.on_change_hook = [this, old_hook = cli->st_cli.on_change_hook](std::map<std::string, etcd_kv_t> & changes)

View File

@ -169,6 +169,7 @@ json11::Json osd_t::get_osd_state()
else
st["addresses"] = getifaddr_list();
st["host"] = std::string(hostname.data(), hostname.size());
st["version"] = VITASTOR_VERSION;
st["port"] = listening_port;
st["primary_enabled"] = run_primary;
st["blockstore_enabled"] = bs ? true : false;
@ -199,6 +200,7 @@ json11::Json osd_t::get_statistics()
st["bitmap_granularity"] = (uint64_t)bs_bitmap_granularity;
st["immediate_commit"] = immediate_commit == IMMEDIATE_ALL ? "all" : (immediate_commit == IMMEDIATE_SMALL ? "small" : "none");
st["host"] = self_state["host"];
st["version"] = VITASTOR_VERSION;
json11::Json::object op_stats, subop_stats;
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
{
@ -371,7 +373,7 @@ void osd_t::report_statistics()
pg_stats["write_osd_set"] = pg.cur_set;
txn.push_back(json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", base64_encode(st_cli.etcd_prefix+"/pg/stats/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num)) },
{ "key", base64_encode(st_cli.etcd_prefix+"/pgstats/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num)) },
{ "value", base64_encode(json11::Json(pg_stats).dump()) },
} }
});
@ -418,7 +420,7 @@ void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes
}
if (run_primary)
{
bool pgs = changes.find(st_cli.etcd_prefix+"/config/pgs") != changes.end();
bool pgs = changes.find(st_cli.etcd_prefix+"/pg/config") != changes.end();
if (pools || pgs)
{
apply_pg_count();
@ -903,7 +905,7 @@ void osd_t::report_pg_states()
{ "target", "MOD" },
{ "key", state_key_base64 },
{ "result", "LESS" },
{ "mod_revision", st_cli.etcd_watch_revision+1 },
{ "mod_revision", st_cli.etcd_watch_revision_pg+1 },
});
continue;
}
@ -974,7 +976,7 @@ void osd_t::report_pg_states()
{ "target", "MOD" },
{ "key", history_key },
{ "result", "LESS" },
{ "mod_revision", st_cli.etcd_watch_revision+1 },
{ "mod_revision", st_cli.etcd_watch_revision_pg+1 },
});
success.push_back(json11::Json::object {
{ "request_put", json11::Json::object {

View File

@ -20,7 +20,7 @@ static void handle_sigint(int sig)
}
static const char* help_text =
"Vitastor OSD (block object storage daemon) " VERSION "\n"
"Vitastor OSD (block object storage daemon) " VITASTOR_VERSION "\n"
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
"\n"
"OSDs are usually started by vitastor-disk.\n"

View File

@ -22,7 +22,7 @@ void configure_single_pg_pool(cluster_client_t *cli)
},
});
cli->st_cli.parse_state((etcd_kv_t){
.key = "/config/pgs",
.key = "/pg/config",
.value = json11::Json::object {
{ "items", json11::Json::object {
{ "1", json11::Json::object {

View File

@ -25,11 +25,22 @@ ETCD_IP=${ETCD_IP:-127.0.0.1}
ETCD_PORT=${ETCD_PORT:-12379}
ETCD_COUNT=${ETCD_COUNT:-1}
ANTIETCD=${ANTIETCD}
USE_RAMDISK=${USE_RAMDISK}
if [ "$KEEP_DATA" = "" ]; then
RAMDISK=/run/user/$(id -u)
findmnt $RAMDISK >/dev/null || (sudo mkdir -p $RAMDISK && sudo mount -t tmpfs tmpfs $RAMDISK)
if [[ -z "$KEEP_DATA" ]]; then
rm -rf ./testdata
rm -rf /run/user/$(id -u)/testdata_etcd*
rm -rf /run/user/$(id -u)/testdata_etcd* /run/user/$(id -u)/testdata_bin
mkdir -p ./testdata
if [[ -n "$USE_RAMDISK" ]]; then
OSD_ARGS="$OSD_ARGS --data_io cached"
mkdir -p /run/user/$(id -u)/testdata_bin
ln -s /run/user/$(id -u)/testdata_bin ./testdata/bin
else
mkdir -p ./testdata/bin
fi
fi
ETCD_URL="http://$ETCD_IP:$ETCD_PORT"
@ -41,9 +52,7 @@ start_etcd()
{
local i=$1
if [[ -z "$ANTIETCD" ]]; then
local t=/run/user/$(id -u)
findmnt $t >/dev/null || (sudo mkdir -p $t && sudo mount -t tmpfs tmpfs $t)
ionice -c2 -n0 $ETCD -name etcd$i --data-dir /run/user/$(id -u)/testdata_etcd$i \
ionice -c2 -n0 $ETCD -name etcd$i --data-dir $RAMDISK/testdata_etcd$i \
--advertise-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) --listen-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) \
--initial-advertise-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) --listen-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) \
--initial-cluster-token vitastor-tests-etcd --initial-cluster-state new \

View File

@ -50,8 +50,8 @@ if ! type -t osd_dev; then
osd_dev()
{
local i=$1
[[ -f ./testdata/test_osd$i.bin ]] || dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
echo ./testdata/test_osd$i.bin
[[ -f ./testdata/bin/test_osd$i.bin ]] || dd if=/dev/zero of=./testdata/bin/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
echo ./testdata/bin/test_osd$i.bin
}
fi
@ -89,7 +89,7 @@ wait_up()
local i=0
local configured=0
while [[ $i -lt $sec ]]; do
if $ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
if $ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
select(((.osd_set | select(. != 0) | sort | unique) | length) == '$PG_SIZE') ] | length) == '$PG_COUNT; then
configured=1
if $ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT; then

View File

@ -13,14 +13,14 @@ start_osd 4
sleep 2
for i in {1..30}; do
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
($ETCDCTL get /vitastor/pg/config --print-value-only |\
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])') && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$PG_COUNT) && \
break
sleep 1
done
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])'); then
format_error "FAILED: OSD NOT ADDED INTO DISTRIBUTION"
fi
@ -35,14 +35,14 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm-osd --force 4
sleep 2
for i in {1..30}; do
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
($ETCDCTL get /vitastor/pg/config --print-value-only |\
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "left_on_dead"]) ] | length) == '$PG_COUNT'') && \
break
sleep 1
done
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
format_error "FAILED: OSD NOT REMOVED FROM DISTRIBUTION"
fi

View File

@ -23,7 +23,7 @@ try_change()
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
for i in {1..60}; do
($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
($ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "has_misplaced"]) ] | length) == '$n'') && \
break
sleep 1
@ -36,14 +36,14 @@ try_change()
sleep 1
done
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
$ETCDCTL get /vitastor/config/pgs
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: $n PGS NOT CONFIGURED"
fi
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: $n PGS NOT UP"
fi
@ -53,7 +53,7 @@ try_change()
nobj=0
waittime=0
while [[ $nobj -ne $NOBJ && $waittime -lt 7 ]]; do
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
nobj=`$ETCDCTL get --prefix '/vitastor/pgstats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
if [[ $nobj -ne $NOBJ ]]; then
waittime=$((waittime+1))
sleep 1

View File

@ -13,7 +13,7 @@ try_change()
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$s',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
for i in {1..10}; do
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
($ETCDCTL get /vitastor/pg/config --print-value-only |\
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
@ -21,16 +21,16 @@ try_change()
sleep 1
done
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
fi
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
fi

View File

@ -13,13 +13,13 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
# Write
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
-mirror_file=./testdata/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg -runtime=10
-mirror_file=./testdata/bin/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg -runtime=10
# Intentionally corrupt OSD data and restart it
kill $OSD1_PID
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/test_osd1.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
truncate -s $data_offset ./testdata/test_osd1.bin
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/bin/test_osd1.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
truncate -s $data_offset ./testdata/bin/test_osd1.bin
dd if=/dev/zero of=./testdata/bin/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
start_osd 1
# FIXME: corrupt the journal WHEN OSD IS RUNNING and check reads too
@ -30,8 +30,8 @@ wait_up 10
# Read everything back
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/read.bin
-O raw ./testdata/bin/read.bin
diff ./testdata/read.bin ./testdata/mirror.bin
diff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
format_green OK

View File

@ -28,7 +28,7 @@ $ETCDCTL get --print-value-only /vitastor/config/pools | jq -s -e '. == [{"1":{"
sleep 2
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
$ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
jq -s -e '([ .[0].items["1"] | .[].osd_set | map_values(. | tonumber) | select((.[0] <= 4) != (.[1] <= 4)) ] | length) == 4'
format_green OK

View File

@ -20,7 +20,7 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
-mirror_file=./testdata/bin/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
kill_osds()
{
@ -53,13 +53,13 @@ kill_osds &
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bsrange=4k-128k -blockalign=4k -direct=1 -iodepth=32 -fsync=256 -rw=randrw \
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/bin/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/read.bin
-O raw ./testdata/bin/read.bin
if ! diff -q ./testdata/read.bin ./testdata/mirror.bin; then
if ! diff -q ./testdata/bin/read.bin ./testdata/bin/mirror.bin; then
format_error Data lost during self-heal
fi

View File

@ -43,7 +43,7 @@ wait_finish_rebalance 300
#fi
# Check that no objects are lost !
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
nobj=`$ETCDCTL get --prefix '/vitastor/pgstats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
if [ "$nobj" -ne $((IMG_SIZE*8/PG_DATA_SIZE)) ]; then
format_error "Data lost after multiple interrupted rebalancings"
fi

View File

@ -8,14 +8,14 @@ OSD_SIZE=1024
OSD_COUNT=5
OSD_ARGS="$OSD_ARGS"
for i in $(seq 1 $OSD_COUNT); do
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/osd/vitastor-osd --log_level 10 --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
dd if=/dev/zero of=./testdata/bin/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/osd/vitastor-osd --log_level 10 --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
eval OSD${i}_PID=$!
done
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":1,"failure_domain":"osd","immediate_commit":"none"}}'
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[1,0],"primary":1}}}}'
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[1,0],"primary":1}}}}'
for i in {1..30}; do
sleep 1
@ -30,7 +30,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[1,0],"primary":0}}}}'
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[1,0],"primary":0}}}}'
for i in {1..30}; do
sleep 1
@ -43,7 +43,7 @@ done
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
sleep 5
for i in {1..30}; do
@ -60,7 +60,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=1 -number_ios=2 -rw=write \
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":0}}}}'
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":0}}}}'
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
@ -76,7 +76,7 @@ done
cp testdata/osd4.log testdata/osd4_pre.log
>testdata/osd4.log
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
for i in {1..30}; do
sleep 1

View File

@ -27,9 +27,9 @@ $ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicate
sleep 2
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only
$ETCDCTL get --prefix /vitastor/pg/config --print-value-only
if ! ($ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
if ! ($ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
jq -s -e '[ [ .[] | select(has("items")) | .items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
format_error "Some PGs missing replicas"
fi

View File

@ -16,7 +16,7 @@ try_change()
s=$2
for i in {1..10}; do
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
($ETCDCTL get /vitastor/pg/config --print-value-only |\
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])') && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
@ -24,16 +24,16 @@ try_change()
sleep 1
done
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])'); then
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
fi
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
fi

View File

@ -16,14 +16,14 @@ trap "sudo build/src/client/vitastor-nbd unmap $NBD_DEV"'; kill -9 $(jobs -p)' E
sudo chown $(id -u) $NBD_DEV
dd if=/dev/urandom of=./testdata/img1.bin bs=1M count=$IMG_SIZE
dd if=/dev/urandom of=./testdata/bin/img1.bin bs=1M count=$IMG_SIZE
dd if=./testdata/img1.bin of=$NBD_DEV bs=1M count=$IMG_SIZE oflag=direct
dd if=./testdata/bin/img1.bin of=$NBD_DEV bs=1M count=$IMG_SIZE oflag=direct
verify() {
echo "Verifying before rebalance"
dd if=$NBD_DEV of=./testdata/img2.bin bs=1M count=$IMG_SIZE iflag=direct
diff ./testdata/img1.bin ./testdata/img2.bin
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=1M count=$IMG_SIZE iflag=direct
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
$ETCDCTL put /vitastor/config/osd/1 '{"reweight":'$1'}'
$ETCDCTL put /vitastor/config/osd/2 '{"reweight":'$1'}'
@ -31,18 +31,18 @@ verify() {
for i in {1..10000}; do
O=$(((RANDOM*RANDOM) % (IMG_SIZE*128)))
dd if=$NBD_DEV of=./testdata/img2.bin bs=4k seek=$O skip=$O count=1 iflag=direct conv=notrunc
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=4k seek=$O skip=$O count=1 iflag=direct conv=notrunc
done
echo "Verifying during rebalance"
diff ./testdata/img1.bin ./testdata/img2.bin
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
# Wait for the rebalance to finish
wait_finish_rebalance 300
echo "Verifying after rebalance"
dd if=$NBD_DEV of=./testdata/img2.bin bs=1M count=$IMG_SIZE iflag=direct
diff ./testdata/img1.bin ./testdata/img2.bin
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=1M count=$IMG_SIZE iflag=direct
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
}
# Verify with regular reads

View File

@ -14,7 +14,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
-O raw ./testdata/before.bin
-O raw ./testdata/bin/before.bin
for i in $(seq 1 $OSD_COUNT); do
pid=OSD${i}_PID
@ -23,19 +23,19 @@ for i in $(seq 1 $OSD_COUNT); do
done
for i in $(seq 1 $OSD_COUNT); do
offsets=$(build/src/disk_tool/vitastor-disk simple-offsets --format json ./testdata/test_osd$i.bin)
offsets=$(build/src/disk_tool/vitastor-disk simple-offsets --format json ./testdata/bin/test_osd$i.bin)
meta_offset=$(echo $offsets | jq -r .meta_offset)
data_offset=$(echo $offsets | jq -r .data_offset)
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json
build/src/disk_tool/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/bin/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json
build/src/disk_tool/vitastor-disk dump-meta ./testdata/bin/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json
build/src/disk_tool/vitastor-disk resize \
$(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) \
$(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) \
--new_meta_offset 0 \
--new_meta_len $((1024*1024)) \
--new_journal_offset $((1024*1024)) \
--new_data_offset $((128*1024*1024))
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json
build/src/disk_tool/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/bin/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json
build/src/disk_tool/vitastor-disk dump-meta ./testdata/bin/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json
if ! (cat ./testdata/meta_before_resize.json ./testdata/meta_after_resize.json | \
jq -e -s 'map([ .entries[] | del(.block) ] | sort_by(.pool, .inode, .stripe)) | .[0] == .[1] and (.[0] | length) > 1000'); then
format_error "OSD $i metadata corrupted after resizing"
@ -50,7 +50,7 @@ $ETCDCTL del --prefix /vitastor/osd/state/
for i in $(seq 1 $OSD_COUNT); do
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
--data_device ./testdata/test_osd$i.bin \
--data_device ./testdata/bin/test_osd$i.bin \
--meta_offset 0 \
--journal_offset $((1024*1024)) \
--data_offset $((128*1024*1024)) >>./testdata/osd$i.log 2>&1 &
@ -59,9 +59,9 @@ done
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
-O raw ./testdata/after.bin
-O raw ./testdata/bin/after.bin
if ! cmp ./testdata/before.bin ./testdata/after.bin; then
if ! cmp ./testdata/bin/before.bin ./testdata/bin/after.bin; then
format_error "Data differs after resizing"
fi

View File

@ -21,9 +21,7 @@ $ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicate
sleep 2
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only
if ! ($ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
if ! ($ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
jq -s -e '[ [ .[0].items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
format_error "Some PGs missing replicas"
fi

View File

@ -18,19 +18,19 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
# Write
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
-mirror_file=./testdata/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg
-mirror_file=./testdata/bin/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg
# Save PG primary
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
primary=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
# Intentionally corrupt OSD data and restart it
zero_osd_pid=OSD${ZERO_OSD}_PID
kill ${!zero_osd_pid}
sleep 1
kill -9 ${!zero_osd_pid} || true
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/test_osd$ZERO_OSD.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
truncate -s $data_offset ./testdata/test_osd$ZERO_OSD.bin
dd if=/dev/zero of=./testdata/test_osd$ZERO_OSD.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/bin/test_osd$ZERO_OSD.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
truncate -s $data_offset ./testdata/bin/test_osd$ZERO_OSD.bin
dd if=/dev/zero of=./testdata/bin/test_osd$ZERO_OSD.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
$ETCDCTL del /vitastor/osd/state/$ZERO_OSD
start_osd $ZERO_OSD
@ -38,7 +38,7 @@ start_osd $ZERO_OSD
wait_up 10
# Wait until PG is back on the same primary
wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/config/pgs | jq -s -e \'.[0].items["1"]["1"].primary == "'$primary'"'"'"
wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/pg/config | jq -s -e \'.[0].items["1"]["1"].primary == "'$primary'"'"'"
# Trigger scrub
$ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'`
@ -64,8 +64,8 @@ fi
# Read everything back
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/read.bin
-O raw ./testdata/bin/read.bin
diff ./testdata/read.bin ./testdata/mirror.bin
diff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
format_green OK

View File

@ -34,21 +34,21 @@ qemu-img convert -p \
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024))" \
-O raw ./testdata/merged.bin
-O raw ./testdata/bin/merged.bin
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg@0" \
-O raw ./testdata/layer0.bin
-O raw ./testdata/bin/layer0.bin
$ETCDCTL put /vitastor/config/inode/1/3 '{"name":"testimg","size":'$((32*1024*1024))'}'
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/layer1.bin
-O raw ./testdata/bin/layer1.bin
node tests/merge.js ./testdata/layer0.bin ./testdata/layer1.bin ./testdata/check.bin
node tests/merge.js ./testdata/bin/layer0.bin ./testdata/bin/layer1.bin ./testdata/bin/check.bin
cmp ./testdata/merged.bin ./testdata/check.bin
cmp ./testdata/bin/merged.bin ./testdata/bin/check.bin
# Test merge
@ -58,22 +58,22 @@ build/src/cmd/vitastor-cli rm --etcd_address $ETCD_URL testimg@0
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/merged-by-tool.bin
-O raw ./testdata/bin/merged-by-tool.bin
cmp ./testdata/merged.bin ./testdata/merged-by-tool.bin
cmp ./testdata/bin/merged.bin ./testdata/bin/merged-by-tool.bin
# Test merge by qemu-img
qemu-img rebase -u -b layer0.qcow2 -F qcow2 ./testdata/layer1.qcow2
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/bin/rebased.bin
cmp ./testdata/merged.bin ./testdata/rebased.bin
cmp ./testdata/bin/merged.bin ./testdata/bin/rebased.bin
qemu-img rebase -u -b '' ./testdata/layer1.qcow2
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/bin/rebased.bin
cmp ./testdata/layer1.bin ./testdata/rebased.bin
cmp ./testdata/bin/layer1.bin ./testdata/bin/rebased.bin
format_green OK

View File

@ -9,7 +9,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create -s 32M testchain
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/bin/mirror.bin
for i in {1..10}; do
# Create a snapshot
@ -17,18 +17,18 @@ for i in {1..10}; do
# Check that the new snapshot is see-through
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/check.bin
cmp ./testdata/check.bin ./testdata/mirror.bin
-O raw ./testdata/bin/check.bin
cmp ./testdata/bin/check.bin ./testdata/bin/mirror.bin
# Write something to it
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -rw=randwrite \
-randrepeat=$((i <= 2)) -buffer_pattern=0x$((10+i))$((10+i))$((10+i))$((10+i)) \
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/mirror.bin
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/bin/mirror.bin
# Check the new content
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
-O raw ./testdata/bin/layer1.bin
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
done
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
@ -36,13 +36,13 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
# Check the final image
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
-O raw ./testdata/bin/layer1.bin
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
# Check the last remaining snapshot
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain@10" \
-O raw ./testdata/layer0.bin
cmp ./testdata/layer0.bin ./testdata/check.bin
-O raw ./testdata/bin/layer0.bin
cmp ./testdata/bin/layer0.bin ./testdata/bin/check.bin
format_green OK

View File

@ -9,7 +9,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create -s 128M testchain
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/bin/mirror.bin
# Create a snapshot
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
@ -17,13 +17,13 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
# Write something to it
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 -rw=randwrite \
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/mirror.bin
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/bin/mirror.bin
# Check the new content
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
-O raw ./testdata/bin/layer1.bin
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
# Merge
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
@ -31,7 +31,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
# Check the final image
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
-O raw ./testdata/bin/layer1.bin
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
format_green OK

View File

@ -23,7 +23,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
kill $OSD2_PID
build/src/osd/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
$(build/src/disk_tool/vitastor-disk simple-offsets --format options --device ./testdata/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
$(build/src/disk_tool/vitastor-disk simple-offsets --format options --device ./testdata/bin/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
sleep 2
# Check PG state - it should NOT become active

View File

@ -2,14 +2,14 @@
. `dirname $0`/run_3osds.sh
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
primary=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
primary_pid=OSD${primary}_PID
kill -9 ${!primary_pid}
sleep 15
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/config/pgs | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/pg/config | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
newprim=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
newprim=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
if [ "$newprim" = "$primary" ]; then
format_error Primary not switched

View File

@ -15,7 +15,7 @@ OSD_COUNT=3
OSD_ARGS="$OSD_ARGS"
OFFSET_ARGS="$OFFSET_ARGS"
for i in $(seq 1 $OSD_COUNT); do
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
eval OSD${i}_PID=$!
done

View File

@ -43,10 +43,10 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))" \
-O raw ./testdata/read.bin
-O raw ./testdata/bin/read.bin
qemu-img convert -S 4096 -p \
-f raw ./testdata/read.bin \
-f raw ./testdata/bin/read.bin \
-O raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))"
format_green OK