Compare commits

..

No commits in common. "60a300621ee79e8793c73e49938a3dc747e4168f" and "d6f0b480c8c5db219b680a493db3721fd407352b" have entirely different histories.

54 changed files with 325 additions and 521 deletions

View File

@ -16,7 +16,6 @@ env:
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
OSD_ARGS: '--etcd_quick_timeout 2000'
USE_RAMDISK: 1
concurrency:
group: ci-${{ github.ref }}

View File

@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
set(VITASTOR_VERSION "1.7.1")
set(VERSION "1.7.1")
add_subdirectory(src)

View File

@ -1,9 +1,9 @@
VITASTOR_VERSION ?= v1.7.1
VERSION ?= v1.7.1
all: build push
build:
@docker build --rm -t vitalif/vitastor-csi:$(VITASTOR_VERSION) .
@docker build --rm -t vitalif/vitastor-csi:$(VERSION) .
push:
@docker push vitalif/vitastor-csi:$(VITASTOR_VERSION)
@docker push vitalif/vitastor-csi:$(VERSION)

View File

@ -42,7 +42,7 @@ PG state always includes exactly 1 of the following base states:
- **offline** — PG isn't activated by any OSD at all. Either primary OSD isn't set for
this PG at all (if the pool is just created), or an unavailable OSD is set as primary,
or the primary OSD refuses to start this PG (for example, because of wrong block_size),
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/pg/config` in etcd.
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/config/pgs` in etcd.
- **starting** — primary OSD has acquired PG lock in etcd, PG is starting.
- **peering** — primary OSD requests PG object listings from secondary OSDs and calculates
the PG state.
@ -150,7 +150,7 @@ POOL_ID=1
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
for i in $(seq 1 $PG_COUNT); do
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
done
```
@ -169,51 +169,21 @@ Upgrading is performed without stopping clients (VMs/containers), you just need
upgrade and restart servers one by one. However, ideally you should restart VMs too
to make them use the new version of the client library.
### 1.1.x to 1.2.0
Exceptions (specific upgrade instructions):
- Upgrading <= 1.1.x to 1.2.0 or later, if you use EC n+k with k>=2, is recommended
to be performed with full downtime: first you should stop all clients, then all OSDs,
then upgrade and start everything back — because versions before 1.2.0 have several
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
- Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
without this intermediate step, client I/O will hang until the end of upgrade process.
- Upgrading from <= 0.5.x to >= 0.6.x is not supported.
Upgrading version <= 1.1.x to version >= 1.2.0, if you use EC n+k with k>=2, is recommended
to be performed with full downtime: first you should stop all clients, then all OSDs,
then upgrade and start everything back — because versions before 1.2.0 have several
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
### 0.8.7 to 0.9.0
Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
without this intermediate step, client I/O will hang until the end of upgrade process.
### 0.5.x to 0.6.x
Upgrading from <= 0.5.x to >= 0.6.x is not supported.
## Downgrade
Downgrade are also allowed freely, except the following specific instructions:
### 1.8.0 to 1.7.1
Before downgrading from version >= 1.8.0 to version <= 1.7.1
you have to copy /vitastor/pg/config etcd key to /vitastor/config/pgs:
```
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
etcdctl --endpoints=http://... put /vitastor/config/pgs
```
Then you can just install older packages and restart all services.
If you performed downgrade without first copying that key, run "add all OSDs into the
history records of all PGs" from [Restoring from lost pool configuration](#restoring-from-lost-pool-configuration).
### 1.0.0 to 0.9.x
Version 1.0.0 has a new disk format, so OSDs initialized on 1.0.0 or later can't
be rolled back to 0.9.x or previous versions.
### 0.8.0 to 0.7.x
Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
start with older versions (0.4.x - 0.7.x). :-)
Rollback:
- Version 1.0.0 has a new disk format, so OSDs initiaziled on 1.0.0 can't be rolled
back to 0.9.x or previous versions.
- Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
start with 0.7.x or 0.6.x. :-)
## OSD memory usage

View File

@ -42,7 +42,7 @@
- **offline** — PG вообще не активирована ни одним OSD. Либо первичный OSD не назначен вообще
(если пул только создан), либо в качестве первичного назначен недоступный OSD, либо
назначенный OSD отказывается запускать эту PG (например, из-за несовпадения block_size),
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/pg/config` в etcd.
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/config/pgs` в etcd.
- **starting** — первичный OSD захватил блокировку PG в etcd, PG запускается.
- **peering** — первичный OSD опрашивает вторичные OSD на предмет списков объектов данной PG и рассчитывает её состояние.
- **repeering** — PG ожидает завершения текущих операций ввода-вывода, после чего перейдёт в состояние **peering**.
@ -147,7 +147,7 @@ POOL_ID=1
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
for i in $(seq 1 $PG_COUNT); do
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
done
```
@ -166,51 +166,21 @@ done
достаточно обновлять серверы по одному. Однако, конечно, чтобы запущенные виртуальные машины
начали использовать новую версию клиентской библиотеки, их тоже нужно перезапустить.
### 1.1.x -> 1.2.0
Исключения (особые указания при обновлении):
- Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
могли приводить к некорректному чтению данных в деградированных EC-пулах.
- Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
Иначе клиентский ввод-вывод зависнет до завершения обновления.
- Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
могли приводить к некорректному чтению данных в деградированных EC-пулах.
### 0.8.7 -> 0.9.0
Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
Иначе клиентский ввод-вывод зависнет до завершения обновления.
### 0.5.x -> 0.6.x
Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
## Откат версии
Откат (понижение версии) тоже свободно разрешён, кроме указанных ниже случаев:
### 1.8.0 -> 1.7.1
Перед понижением версии с >= 1.8.0 до <= 1.7.1 вы должны скопировать ключ
etcd `/vitastor/pg/config` в `/vitastor/config/pgs`:
```
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
etcdctl --endpoints=http://... put /vitastor/config/pgs
```
После этого можно просто установить более старые пакеты и перезапустить все сервисы.
Если вы откатили версию, не скопировав предварительно этот ключ - выполните "добавление всех
OSD в исторические записи всех PG" из раздела [Восстановление потерянной конфигурации пулов](#восстановление-потерянной-конфигурации-пулов).
### 1.0.0 -> 0.9.x
В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
нельзя откатить до версии 0.9.x и более ранних.
### 0.8.0 -> 0.7.x
В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD не запустятся на
более ранних версиях (0.4.x - 0.7.x). :-)
Откат:
- В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
нельзя откатить до версии 0.9.x и более ранних.
- В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD нельзя откатить
до 0.7.x или 0.6.x. :-)
## Потребление памяти OSD

View File

@ -6,7 +6,7 @@ const etcd_nonempty_keys = {
'config/global': 1,
'config/node_placement': 1,
'config/pools': 1,
'pg/config': 1,
'config/pgs': 1,
'history/last_clean_pgs': 1,
'stats': 1,
};
@ -15,8 +15,7 @@ const etcd_allow = new RegExp('^'+[
'config/node_placement',
'config/pools',
'config/osd/[1-9]\\d*',
'config/pgs', // old name
'pg/config',
'config/pgs',
'config/inode/[1-9]\\d*/[1-9]\\d*',
'osd/state/[1-9]\\d*',
'osd/stats/[1-9]\\d*',
@ -25,8 +24,7 @@ const etcd_allow = new RegExp('^'+[
'mon/master',
'mon/member/[a-f0-9]+',
'pg/state/[1-9]\\d*/[1-9]\\d*',
'pg/stats/[1-9]\\d*/[1-9]\\d*', // old name
'pgstats/[1-9]\\d*/[1-9]\\d*',
'pg/stats/[1-9]\\d*/[1-9]\\d*',
'pg/history/[1-9]\\d*/[1-9]\\d*',
'history/last_clean_pgs',
'inode/stats/[1-9]\\d*/\\d+',
@ -207,6 +205,19 @@ const etcd_tree = {
osd: {
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
},
/* pgs: {
hash: string,
items: {
<pool_id>: {
<pg_id>: {
osd_set: [ 1, 2, 3 ],
primary: 1,
pause: false,
}
}
}
}, */
pgs: {},
/* inode: {
<pool_id>: {
<inode_t>: {
@ -279,19 +290,6 @@ const etcd_tree = {
},
},
pg: {
/* config: {
hash: string,
items: {
<pool_id>: {
<pg_id>: {
osd_set: [ 1, 2, 3 ],
primary: 1,
pause: false,
}
}
}
}, */
config: {},
state: {
/* <pool_id>: {
<pg_id>: {
@ -302,6 +300,18 @@ const etcd_tree = {
}
}, */
},
stats: {
/* <pool_id>: {
<pg_id>: {
object_count: uint64_t,
clean_count: uint64_t,
misplaced_count: uint64_t,
degraded_count: uint64_t,
incomplete_count: uint64_t,
write_osd_set: osd_num_t[],
},
}, */
},
history: {
/* <pool_id>: {
<pg_id>: {
@ -313,18 +323,6 @@ const etcd_tree = {
}, */
},
},
pgstats: {
/* <pool_id>: {
<pg_id>: {
object_count: uint64_t,
clean_count: uint64_t,
misplaced_count: uint64_t,
degraded_count: uint64_t,
incomplete_count: uint64_t,
write_osd_set: osd_num_t[],
},
}, */
},
inode: {
stats: {
/* <pool_id>: {

View File

@ -75,8 +75,6 @@ class Mon
this.prev_stats = { osd_stats: {}, osd_diff: {} };
this.recheck_pgs_active = false;
this.watcher_active = false;
this.old_pg_config = false;
this.old_pg_stats_seen = false;
}
async start()
@ -124,7 +122,7 @@ class Mon
!Number(this.state.pool.stats[pool_id].pg_real_size))
{
// Generate missing data in etcd
this.state.pg.config.hash = null;
this.state.config.pgs.hash = null;
break;
}
}
@ -203,15 +201,10 @@ class Mon
stats_changed = true;
changed = true;
}
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 9) == '/pgstats/' || key.substr(0, 16) == '/osd/inodestats/')
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/' || key.substr(0, 16) == '/osd/inodestats/')
{
stats_changed = true;
}
else if (key.substr(0, 10) == '/pg/stats/')
{
this.old_pg_stats_seen = true;
stats_changed = true;
}
else if (key.substr(0, 10) == '/pg/state/')
{
pg_states_changed = true;
@ -292,7 +285,7 @@ class Mon
continue next_pool;
}
}
new_clean_pgs.items[pool_id] = this.state.pg.config.items[pool_id];
new_clean_pgs.items[pool_id] = this.state.config.pgs.items[pool_id];
}
this.state.history.last_clean_pgs = new_clean_pgs;
await this.etcd.etcd_call('/kv/txn', {
@ -403,50 +396,6 @@ class Mon
this.parse_kv(kv);
}
}
if (Object.keys((this.state.config.pgs||{}).items||{}).length)
{
// Support seamless upgrade to new OSDs
if (!Object.keys((this.state.pg.config||{}).items||{}).length)
{
const pgs = JSON.stringify(this.state.config.pgs);
this.state.pg.config = JSON.parse(pgs);
const res = await this.etcd.etcd_call('/kv/txn', {
success: [
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(pgs) } },
],
compare: [
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
],
}, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
if (!res.succeeded)
throw new Error('Failed to duplicate old PG config to new PG config');
}
this.old_pg_config = true;
this.old_pg_config_timer = setInterval(() => this.check_clear_old_config().catch(console.error),
this.config.old_pg_config_clear_interval||3600000);
}
}
async check_clear_old_config()
{
if (this.old_pg_config && this.old_pg_stats_seen)
{
this.old_pg_stats_seen = false;
return;
}
if (this.old_pg_config)
{
await this.etcd.etcd_call('/kv/txn', { success: [
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/config/pgs') } },
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/pg/stats/'), range_end: b64(this.config.etcd_prefix+'/pg/stats0') } },
] }, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
this.old_pg_config = false;
}
if (this.old_pg_config_timer)
{
clearInterval(this.old_pg_config_timer);
this.old_pg_config_timer = null;
}
}
all_osds()
@ -457,7 +406,7 @@ class Mon
async stop_all_pgs(pool_id)
{
let has_online = false, paused = true;
for (const pg in this.state.pg.config.items[pool_id]||{})
for (const pg in this.state.config.pgs.items[pool_id]||{})
{
// FIXME: Change all (||{}) to ?. (optional chaining) at some point
const cur_state = (((this.state.pg.state[pool_id]||{})[pg]||{}).state||[]).join(',');
@ -465,7 +414,7 @@ class Mon
{
has_online = true;
}
if (!this.state.pg.config.items[pool_id][pg].pause)
if (!this.state.config.pgs.items[pool_id][pg].pause)
{
paused = false;
}
@ -473,7 +422,7 @@ class Mon
if (!paused)
{
console.log('Stopping all PGs for pool '+pool_id+' before changing PG count');
const new_cfg = JSON.parse(JSON.stringify(this.state.pg.config));
const new_cfg = JSON.parse(JSON.stringify(this.state.config.pgs));
for (const pg in new_cfg.items[pool_id])
{
new_cfg.items[pool_id][pg].pause = true;
@ -481,26 +430,22 @@ class Mon
// Check that no OSDs change their state before we pause PGs
// Doing this we make sure that OSDs don't wake up in the middle of our "transaction"
// and can't see the old PG configuration
const checks = [
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
];
const checks = [];
for (const osd_num of this.all_osds())
{
const key = b64(this.config.etcd_prefix+'/osd/state/'+osd_num);
checks.push({ key, target: 'MOD', result: 'LESS', mod_revision: ''+this.etcd_watch_revision });
}
const txn = {
compare: checks,
success: [
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_cfg)) } },
await this.etcd.etcd_call('/kv/txn', {
compare: [
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
...checks,
],
};
if (this.old_pg_config)
{
txn.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } });
}
await this.etcd.etcd_call('/kv/txn', txn, this.config.etcd_mon_timeout, 0);
success: [
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } },
],
}, this.config.etcd_mon_timeout, 0);
return false;
}
return !has_online;
@ -528,7 +473,7 @@ class Mon
pools: this.state.config.pools,
};
const tree_hash = sha1hex(stableStringify(tree_cfg));
if (this.state.pg.config.hash != tree_hash)
if (this.state.config.pgs.hash != tree_hash)
{
// Something has changed
console.log('Pool configuration or OSD tree changed, re-optimizing');
@ -569,10 +514,10 @@ class Mon
else
{
// Nothing changed, but we still want to recheck the distribution of primaries
let new_pg_config = recheck_primary(this.state, this.config, up_osds, osd_tree);
if (new_pg_config)
let new_config_pgs = recheck_primary(this.state, this.config, up_osds, osd_tree);
if (new_config_pgs)
{
const ok = await this.save_pg_config(new_pg_config);
const ok = await this.save_pg_config(new_config_pgs);
if (ok)
console.log('PG configuration successfully changed');
else
@ -587,12 +532,12 @@ class Mon
async apply_pool_pgs(results, up_osds, osd_tree, tree_hash)
{
for (const pool_id in (this.state.pg.config||{}).items||{})
for (const pool_id in (this.state.config.pgs||{}).items||{})
{
// We should stop all PGs when deleting a pool or changing its PG count
if (!this.state.config.pools[pool_id] ||
this.state.pg.config.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
Object.keys(this.state.pg.config.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
this.state.config.pgs.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
Object.keys(this.state.config.pgs.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
{
if (!await this.stop_all_pgs(pool_id))
{
@ -600,22 +545,22 @@ class Mon
}
}
}
const new_pg_config = JSON.parse(JSON.stringify(this.state.pg.config));
const new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
const etcd_request = { compare: [], success: [] };
for (const pool_id in (new_pg_config||{}).items||{})
for (const pool_id in (new_config_pgs||{}).items||{})
{
if (!this.state.config.pools[pool_id])
{
const prev_pgs = [];
for (const pg in new_pg_config.items[pool_id]||{})
for (const pg in new_config_pgs.items[pool_id]||{})
{
prev_pgs[pg-1] = new_pg_config.items[pool_id][pg].osd_set;
prev_pgs[pg-1] = new_config_pgs.items[pool_id][pg].osd_set;
}
// Also delete pool statistics
etcd_request.success.push({ requestDeleteRange: {
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
} });
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
this.etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, [], []);
}
}
@ -624,7 +569,7 @@ class Mon
const pool_id = pool_res.pool_id;
const pool_cfg = this.state.config.pools[pool_id];
let pg_history = [];
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
{
if (this.state.pg.history[pool_id] &&
this.state.pg.history[pool_id][pg])
@ -633,9 +578,9 @@ class Mon
}
}
const real_prev_pgs = [];
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
{
real_prev_pgs[pg-1] = [ ...this.state.pg.config.items[pool_id][pg].osd_set ];
real_prev_pgs[pg-1] = [ ...this.state.config.pgs.items[pool_id][pg].osd_set ];
}
if (real_prev_pgs.length > 0 && real_prev_pgs.length != pool_res.pgs.length)
{
@ -646,8 +591,8 @@ class Mon
pg_history = scale_pg_history(pg_history, real_prev_pgs, pool_res.pgs);
// Drop stats
etcd_request.success.push({ requestDeleteRange: {
key: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'/'),
range_end: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'0'),
key: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'/'),
range_end: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'0'),
} });
}
const stats = {
@ -658,26 +603,22 @@ class Mon
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(stats)),
} });
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
}
new_pg_config.hash = tree_hash;
return await this.save_pg_config(new_pg_config, etcd_request);
new_config_pgs.hash = tree_hash;
return await this.save_pg_config(new_config_pgs, etcd_request);
}
async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
{
etcd_request.compare.push(
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
);
etcd_request.success.push(
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_pg_config)) } },
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_config_pgs)) } },
);
if (this.old_pg_config)
{
etcd_request.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_pg_config)) } });
}
const txn_res = await this.etcd.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
return txn_res.succeeded;
}

View File

@ -57,7 +57,7 @@ function pick_primary(pool_config, osd_set, up_osds, aff_osds)
function recheck_primary(state, global_config, up_osds, osd_tree)
{
let new_pg_config;
let new_config_pgs;
for (const pool_id in state.config.pools)
{
const pool_cfg = state.config.pools[pool_id];
@ -69,30 +69,30 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
reset_rng();
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
{
if (!state.pg.config.items[pool_id])
if (!state.config.pgs.items[pool_id])
{
continue;
}
const pg_cfg = state.pg.config.items[pool_id][pg_num];
const pg_cfg = state.config.pgs.items[pool_id][pg_num];
if (pg_cfg)
{
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
if (pg_cfg.primary != new_primary)
{
if (!new_pg_config)
if (!new_config_pgs)
{
new_pg_config = JSON.parse(JSON.stringify(state.pg.config));
new_config_pgs = JSON.parse(JSON.stringify(state.config.pgs));
}
console.log(
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
);
new_pg_config.items[pool_id][pg_num].primary = new_primary;
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
}
}
}
}
return new_pg_config;
return new_config_pgs;
}
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
@ -185,10 +185,10 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
}
if (!prev_pgs.length)
{
// Fall back to pg/config if it's empty
for (const pg in ((state.pg.config.items||{})[pool_id]||{}))
// Fall back to config/pgs if it's empty
for (const pg in ((state.config.pgs.items||{})[pool_id]||{}))
{
prev_pgs[pg-1] = [ ...state.pg.config.items[pool_id][pg].osd_set ];
prev_pgs[pg-1] = [ ...state.config.pgs.items[pool_id][pg].osd_set ];
}
}
const old_pg_count = prev_pgs.length;
@ -205,8 +205,8 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
ordered: pool_cfg.scheme != 'replicated',
};
let optimize_result;
// Re-shuffle PGs if pg/config.hash is empty
if (old_pg_count > 0 && state.pg.config.hash)
// Re-shuffle PGs if config/pgs.hash is empty
if (old_pg_count > 0 && state.config.pgs.hash)
{
if (prev_pgs.length != pool_cfg.pg_count)
{

View File

@ -166,7 +166,7 @@ function export_prometheus_metrics(st)
res += `vitastor_pool_used_raw_tb{${pool_label}} ${pool_stat.used_raw_tb||0}\n`;
// PG states and pool up/down status
const real_pg_count = (Object.keys(((st.pg.config||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
const real_pg_count = (Object.keys(((st.config.pgs||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
const per_state = {
active: 0,
starting: 0,

View File

@ -100,19 +100,10 @@ function sum_object_counts(state, global_config)
{
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
let pgstats = state.pgstats;
if (state.pg.stats)
{
// Merge with old stats for seamless transition to new stats
for (const pool_id in state.pg.stats)
{
pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) };
}
}
for (const pool_id in pgstats)
for (const pool_id in state.pg.stats)
{
let object_size = 0;
for (const osd_num of pgstats[pool_id].write_osd_set||[])
for (const osd_num of state.pg.stats[pool_id].write_osd_set||[])
{
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
{
@ -130,9 +121,9 @@ function sum_object_counts(state, global_config)
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
}
object_size = BigInt(object_size);
for (const pg_num in pgstats[pool_id])
for (const pg_num in state.pg.stats[pool_id])
{
const st = pgstats[pool_id][pg_num];
const st = state.pg.stats[pool_id][pg_num];
if (st)
{
for (const k in object_counts)

View File

@ -35,8 +35,7 @@ function vitastor_persist_filter(cfg)
}
else if (key.substr(0, prefix.length+'/osd/'.length) == prefix+'/osd/' ||
key.substr(0, prefix.length+'/inode/stats/'.length) == prefix+'/inode/stats/' ||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' || // old name
key.substr(0, prefix.length+'/pgstats/'.length) == prefix+'/pgstats/' ||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' ||
key.substr(0, prefix.length+'/pool/stats/'.length) == prefix+'/pool/stats/' ||
key == prefix+'/stats')
{

View File

@ -24,7 +24,6 @@ public:
}
iovec iov;
std::vector<iovec> iov_list;
NodeVitastorImage *img = NULL;
int op = 0;
uint64_t offset = 0, len = 0, version = 0;
@ -142,8 +141,10 @@ NAN_METHOD(NodeVitastor::Read)
static NodeVitastorRequest* getWriteRequest(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
{
uint64_t offset = Nan::To<int64_t>(info[argpos+0]).FromJust();
const auto & bufarg = info[argpos+1];
char *buf = node::Buffer::Data(info[argpos+1]);
uint64_t len = node::Buffer::Length(info[argpos+1]);
uint64_t version = 0;
if (!info[argpos+2].IsEmpty() && info[argpos+2]->IsObject())
{
auto key = Nan::New<v8::String>("version").ToLocalChecked();
@ -158,33 +159,14 @@ static NodeVitastorRequest* getWriteRequest(const Nan::FunctionCallbackInfo<v8::
auto req = new NodeVitastorRequest(callback);
req->offset = offset;
req->len = len;
req->version = version;
if (bufarg->IsArray())
{
auto buffers = bufarg.As<v8::Array>();
req->len = 0;
for (uint32_t i = 0; i < buffers->Length(); i++)
{
auto buffer_obj = Nan::Get(buffers, i).ToLocalChecked();
char *buf = node::Buffer::Data(buffer_obj);
uint64_t len = node::Buffer::Length(buffer_obj);
req->iov_list.push_back({ .iov_base = buf, .iov_len = len });
req->len += len;
}
}
else
{
char *buf = node::Buffer::Data(bufarg);
uint64_t len = node::Buffer::Length(bufarg);
req->iov = { .iov_base = buf, .iov_len = req->len };
req->len = len;
}
req->iov = { .iov_base = buf, .iov_len = req->len };
return req;
}
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
// write(pool, inode, offset, buffer, { version }?, callback(err))
NAN_METHOD(NodeVitastor::Write)
{
TRACE("NodeVitastor::Write");
@ -197,10 +179,7 @@ NAN_METHOD(NodeVitastor::Write)
auto req = getWriteRequest(info, 2);
std::unique_lock<std::mutex> lock(self->mu);
vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version,
req->iov_list.size() ? req->iov_list.data() : &req->iov,
req->iov_list.size() ? req->iov_list.size() : 1,
on_write_finish, req);
vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version, &req->iov, 1, on_write_finish, req);
}
// sync(callback(err))
@ -442,10 +421,7 @@ void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
else if (req->op == NODE_VITASTOR_WRITE)
{
uint64_t ino = vitastor_c_inode_get_num(watch);
vitastor_c_write(cli->c, ino, req->offset, req->len, req->version,
req->iov_list.size() ? req->iov_list.data() : &req->iov,
req->iov_list.size() ? req->iov_list.size() : 1,
NodeVitastor::on_write_finish, req);
vitastor_c_write(cli->c, ino, req->offset, req->len, req->version, &req->iov, 1, NodeVitastor::on_write_finish, req);
}
else if (req->op == NODE_VITASTOR_SYNC)
{

View File

@ -19,7 +19,7 @@ public:
static NAN_METHOD(Create);
// read(pool, inode, offset, len, callback(err, buffer, version))
static NAN_METHOD(Read);
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
// write(pool, inode, offset, buffer, { version }?, callback(err))
static NAN_METHOD(Write);
// sync(callback(err))
static NAN_METHOD(Sync);
@ -56,7 +56,7 @@ public:
static NAN_METHOD(Create);
// read(offset, len, callback(err, buffer, version))
static NAN_METHOD(Read);
// write(offset, buf: Buffer | Buffer[], { version }?, callback(err))
// write(offset, buffer, { version }?, callback(err))
static NAN_METHOD(Write);
// sync(callback(err))
static NAN_METHOD(Sync);

View File

@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VITASTOR_VERSION = '1.7.1'
VERSION = '1.7.1'
LOG = logging.getLogger(__name__)
@ -238,7 +238,7 @@ class VitastorDriver(driver.CloneableImageVD,
stats = {
'vendor_name': 'Vitastor',
'driver_version': VITASTOR_VERSION,
'driver_version': self.VERSION,
'storage_protocol': 'vitastor',
'total_capacity_gb': 'unknown',
'free_capacity_gb': 'unknown',

View File

@ -19,7 +19,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVITASTOR_VERSION="1.7.1")
add_definitions(-DVERSION="1.7.1")
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
add_link_options(-fno-omit-frame-pointer)
if (${WITH_ASAN})

View File

@ -13,7 +13,7 @@ target_link_libraries(vitastor_blk
# for timerfd_manager
vitastor_common
)
set_target_properties(vitastor_blk PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
set_target_properties(vitastor_blk PROPERTIES VERSION ${VERSION} SOVERSION 0)
if (${WITH_FIO})
# libfio_vitastor_blk.so

View File

@ -29,7 +29,7 @@ target_link_libraries(vitastor_client
${LIBURING_LIBRARIES}
${IBVERBS_LIBRARIES}
)
set_target_properties(vitastor_client PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
set_target_properties(vitastor_client PROPERTIES VERSION ${VERSION} SOVERSION 0)
configure_file(vitastor.pc.in vitastor.pc @ONLY)
if (${WITH_FIO})

View File

@ -452,10 +452,11 @@ void cluster_client_t::on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_nu
if (pg_cfg.cur_primary != prev_primary)
{
// Repeat this PG operations because an OSD which stopped being primary may not fsync operations
wb->repeat_ops_for(this, 0, pool_id, pg_num);
if (wb->repeat_ops_for(this, 0, pool_id, pg_num) > 0)
{
continue_ops();
}
}
// Always continue to resume operations hung because of lack of the primary OSD
continue_ops();
}
bool cluster_client_t::get_immediate_commit(uint64_t inode)
@ -1065,11 +1066,11 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
!pg_it->second.pause && pg_it->second.cur_primary)
{
osd_num_t primary_osd = pg_it->second.cur_primary;
part->osd_num = primary_osd;
auto peer_it = msgr.osd_peer_fds.find(primary_osd);
if (peer_it != msgr.osd_peer_fds.end())
{
int peer_fd = peer_it->second;
part->osd_num = primary_osd;
part->flags |= PART_SENT;
op->inflight_count++;
uint64_t pg_bitmap_size = (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8) * (

View File

@ -333,10 +333,7 @@ void etcd_state_client_t::start_etcd_watcher()
etcd_watch_ws = NULL;
}
if (this->log_level > 1)
{
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju/%ju/%ju\n", etcd_address.c_str(),
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
}
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju\n", etcd_address.c_str(), etcd_watch_revision);
etcd_watch_ws = open_websocket(tfd, etcd_address, etcd_api_path+"/watch", etcd_slow_timeout,
[this, cur_addr = selected_etcd_address](const http_response_t *msg)
{
@ -351,20 +348,16 @@ void etcd_state_client_t::start_etcd_watcher()
}
else
{
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
if (data["result"]["created"].bool_value())
{
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
if (watch_id == ETCD_CONFIG_WATCH_ID ||
watch_id == ETCD_PG_STATE_WATCH_ID ||
watch_id == ETCD_PG_HISTORY_WATCH_ID ||
watch_id == ETCD_OSD_STATE_WATCH_ID)
{
etcd_watches_initialised++;
}
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && this->log_level > 0)
{
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju/%ju/%ju\n", cur_addr.c_str(),
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
}
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju\n", cur_addr.c_str(), etcd_watch_revision);
}
if (data["result"]["canceled"].bool_value())
{
@ -382,7 +375,7 @@ void etcd_state_client_t::start_etcd_watcher()
data["result"]["compact_revision"].uint64_value());
http_close(etcd_watch_ws);
etcd_watch_ws = NULL;
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = 0;
etcd_watch_revision = 0;
on_reload_hook();
}
return;
@ -400,29 +393,13 @@ void etcd_state_client_t::start_etcd_watcher()
exit(1);
}
}
// Save revision only if it's present in the message - because sometimes etcd sends something without a header, like:
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && !data["result"]["header"]["revision"].is_null())
{
// Restart watchers from the same revision number as in the last received message,
// not from the next one to protect against revision being split into multiple messages,
// even though etcd guarantees not to do that **within a single watcher** without fragment=true:
// https://etcd.io/docs/v3.5/learning/api_guarantees/#watch-apis
// Revision contents are ALWAYS split into separate messages for different watchers though!
// So generally we have to resume each watcher from its own revision...
// Progress messages may have watch_id=-1 if sent on behalf of multiple watchers though.
// And antietcd has an advanced semantic which merges the same revision for all watchers
// into one message and just omits watch_id.
// So we also have to handle the case where watch_id is -1 or not present (0).
auto watch_rev = data["result"]["header"]["revision"].uint64_value();
if (!watch_id || watch_id == UINT64_MAX)
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = watch_rev;
else if (watch_id == ETCD_CONFIG_WATCH_ID)
etcd_watch_revision_config = watch_rev;
else if (watch_id == ETCD_PG_STATE_WATCH_ID)
etcd_watch_revision_pg = watch_rev;
else if (watch_id == ETCD_OSD_STATE_WATCH_ID)
etcd_watch_revision_osd = watch_rev;
// Protect against a revision beign split into multiple messages and some
// of them being lost. Even though I'm not sure if etcd actually splits them
// Also sometimes etcd sends something without a header, like:
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
etcd_watch_revision = data["result"]["header"]["revision"].uint64_value();
addresses_to_try.clear();
}
// First gather all changes into a hash to remove multiple overwrites
@ -480,7 +457,7 @@ void etcd_state_client_t::start_etcd_watcher()
{ "create_request", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/config/") },
{ "range_end", base64_encode(etcd_prefix+"/config0") },
{ "start_revision", etcd_watch_revision_config },
{ "start_revision", etcd_watch_revision },
{ "watch_id", ETCD_CONFIG_WATCH_ID },
{ "progress_notify", true },
} }
@ -489,21 +466,29 @@ void etcd_state_client_t::start_etcd_watcher()
{ "create_request", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/osd/state/") },
{ "range_end", base64_encode(etcd_prefix+"/osd/state0") },
{ "start_revision", etcd_watch_revision_osd },
{ "start_revision", etcd_watch_revision },
{ "watch_id", ETCD_OSD_STATE_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/pg/") },
{ "range_end", base64_encode(etcd_prefix+"/pg0") },
{ "start_revision", etcd_watch_revision_pg },
{ "key", base64_encode(etcd_prefix+"/pg/state/") },
{ "range_end", base64_encode(etcd_prefix+"/pg/state0") },
{ "start_revision", etcd_watch_revision },
{ "watch_id", ETCD_PG_STATE_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
// FIXME: Do not watch /pg/history/ at all in client code (not in OSD)
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/pg/history/") },
{ "range_end", base64_encode(etcd_prefix+"/pg/history0") },
{ "start_revision", etcd_watch_revision },
{ "watch_id", ETCD_PG_HISTORY_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
if (on_start_watcher_hook)
{
on_start_watcher_hook(etcd_watch_ws);
@ -606,11 +591,6 @@ void etcd_state_client_t::load_pgs()
{ "key", base64_encode(etcd_prefix+"/config/pgs") },
} }
},
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/pg/config") },
} }
},
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(etcd_prefix+"/config/inode/") },
@ -660,10 +640,13 @@ void etcd_state_client_t::load_pgs()
return;
}
reset_pg_exists();
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = data["header"]["revision"].uint64_value()+1;
if (this->log_level > 3)
if (!etcd_watch_revision)
{
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision_pg-1);
etcd_watch_revision = data["header"]["revision"].uint64_value()+1;
if (this->log_level > 3)
{
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision-1);
}
}
for (auto & res: data["responses"].array_items())
{
@ -912,17 +895,8 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
on_change_pool_config_hook();
}
}
else if (key == etcd_prefix+"/pg/config" || key == etcd_prefix+"/config/pgs")
else if (key == etcd_prefix+"/config/pgs")
{
if (key == etcd_prefix+"/pg/config")
{
new_pg_config = !value.is_null();
}
else if (new_pg_config)
{
// Ignore old key if the new one is present
return;
}
for (auto & pool_item: this->pool_config)
{
for (auto & pg_item: pool_item.second.pg_config)

View File

@ -10,9 +10,10 @@
#include "timerfd_manager.h"
#define ETCD_CONFIG_WATCH_ID 1
#define ETCD_OSD_STATE_WATCH_ID 2
#define ETCD_PG_STATE_WATCH_ID 3
#define ETCD_TOTAL_WATCHES 3
#define ETCD_PG_STATE_WATCH_ID 2
#define ETCD_PG_HISTORY_WATCH_ID 3
#define ETCD_OSD_STATE_WATCH_ID 4
#define ETCD_TOTAL_WATCHES 4
#define DEFAULT_BLOCK_SIZE 128*1024
#define MIN_DATA_BLOCK_SIZE 4*1024
@ -94,7 +95,7 @@ protected:
std::string selected_etcd_address;
std::vector<std::string> addresses_to_try;
std::vector<inode_watch_t*> watches;
bool new_pg_config = false;
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
int ws_keepalive_timer = -1;
int ws_alive = 0;
bool rand_initialized = false;
@ -114,11 +115,8 @@ public:
int log_level = 0;
timerfd_manager_t *tfd = NULL;
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
int etcd_watches_initialised = 0;
uint64_t etcd_watch_revision_config = 0;
uint64_t etcd_watch_revision_osd = 0;
uint64_t etcd_watch_revision_pg = 0;
uint64_t etcd_watch_revision = 0;
std::map<pool_id_t, pool_config_t> pool_config;
std::map<osd_num_t, json11::Json> peer_states;
std::set<osd_num_t> seen_peers;

View File

@ -253,7 +253,7 @@ nla_put_failure:
const char *exe_name = NULL;
const char *help_text =
"Vitastor NBD proxy " VITASTOR_VERSION "\n"
"Vitastor NBD proxy " VERSION "\n"
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n"
"\n"
"COMMANDS:\n"

View File

@ -17,7 +17,7 @@
static const char *exe_name = NULL;
static const char* help_text =
"Vitastor command-line tool " VITASTOR_VERSION "\n"
"Vitastor command-line tool " VERSION "\n"
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
"\n"
"COMMANDS:\n"

View File

@ -49,8 +49,8 @@ struct pg_lister_t
{ "success", json11::Json::array {
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/stats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/stats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
} },
},
} },
@ -65,7 +65,7 @@ resume_1:
state = 100;
return;
}
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
{
pg_stats[(pool_pg_num_t){ .pool_id = pool_id, .pg_num = (pg_num_t)pg_num }] = value;
});

View File

@ -214,10 +214,10 @@ resume_1:
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/pgstats/"
parent->cli->st_cli.etcd_prefix+"/pg/stats/"
) },
{ "range_end", base64_encode(
parent->cli->st_cli.etcd_prefix+"/pgstats0"
parent->cli->st_cli.etcd_prefix+"/pg/stats0"
) },
} },
},
@ -235,7 +235,7 @@ resume_1:
}
// Calculate recovery percent
std::map<pool_id_t, object_counts_t> counts;
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/",
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/",
[&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
{
auto & cnt = counts[pool_id];

View File

@ -176,7 +176,7 @@ struct rm_osd_t
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/pg/config"
parent->cli->st_cli.etcd_prefix+"/config/pgs"
) },
} },
},
@ -229,7 +229,7 @@ struct rm_osd_t
}
if (!new_pgs.is_null())
{
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/config");
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/config/pgs");
rm_items.push_back(json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", pgs_key },
@ -427,7 +427,7 @@ struct rm_osd_t
{ "target", "MOD" },
{ "key", history_key },
{ "result", "LESS" },
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision_pg+1 },
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision+1 },
});
}
}

View File

@ -5,7 +5,7 @@
#include "str_util.h"
static const char *help_text =
"Vitastor disk management tool " VITASTOR_VERSION "\n"
"Vitastor disk management tool " VERSION "\n"
"(c) Vitaliy Filippov, 2022+ (VNPL-1.1)\n"
"\n"
"COMMANDS:\n"

View File

@ -10,7 +10,7 @@ set_target_properties(vitastor_kv PROPERTIES PUBLIC_HEADER "kv/vitastor_kv.h")
target_link_libraries(vitastor_kv
vitastor_client
)
set_target_properties(vitastor_kv PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
set_target_properties(vitastor_kv PROPERTIES VERSION ${VERSION} SOVERSION 0)
# vitastor-kv
add_executable(vitastor-kv

View File

@ -53,7 +53,7 @@ nfs_proxy_t::~nfs_proxy_t()
}
static const char* help_text =
"Vitastor NFS 3.0 proxy " VITASTOR_VERSION "\n"
"Vitastor NFS 3.0 proxy " VERSION "\n"
"(c) Vitaliy Filippov, 2021+ (VNPL-1.1)\n"
"\n"
"vitastor-nfs (--fs <NAME> | --block) [-o <OPT>] mount <MOUNTPOINT>\n"
@ -372,6 +372,24 @@ void nfs_proxy_t::watch_stats()
assert(cli->st_cli.on_start_watcher_hook == NULL);
cli->st_cli.on_start_watcher_hook = [this](http_co_t *etcd_watch_ws)
{
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats/") },
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats0") },
{ "start_revision", cli->st_cli.etcd_watch_revision },
{ "watch_id", ETCD_INODE_STATS_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats/") },
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats0") },
{ "start_revision", cli->st_cli.etcd_watch_revision },
{ "watch_id", ETCD_POOL_STATS_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
cli->st_cli.etcd_txn_slow(json11::Json::object {
{ "success", json11::Json::array {
json11::Json::object {
@ -397,28 +415,6 @@ void nfs_proxy_t::watch_stats()
parse_stats(kv);
}
}
if (cli->st_cli.etcd_watch_ws)
{
auto watch_rev = res["header"]["revision"].uint64_value()+1;
http_post_message(cli->st_cli.etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats/") },
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats0") },
{ "start_revision", watch_rev },
{ "watch_id", ETCD_INODE_STATS_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
http_post_message(cli->st_cli.etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
{ "create_request", json11::Json::object {
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats/") },
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats0") },
{ "start_revision", watch_rev },
{ "watch_id", ETCD_POOL_STATS_WATCH_ID },
{ "progress_notify", true },
} }
}).dump());
}
});
};
cli->st_cli.on_change_hook = [this, old_hook = cli->st_cli.on_change_hook](std::map<std::string, etcd_kv_t> & changes)

View File

@ -169,7 +169,6 @@ json11::Json osd_t::get_osd_state()
else
st["addresses"] = getifaddr_list();
st["host"] = std::string(hostname.data(), hostname.size());
st["version"] = VITASTOR_VERSION;
st["port"] = listening_port;
st["primary_enabled"] = run_primary;
st["blockstore_enabled"] = bs ? true : false;
@ -200,7 +199,6 @@ json11::Json osd_t::get_statistics()
st["bitmap_granularity"] = (uint64_t)bs_bitmap_granularity;
st["immediate_commit"] = immediate_commit == IMMEDIATE_ALL ? "all" : (immediate_commit == IMMEDIATE_SMALL ? "small" : "none");
st["host"] = self_state["host"];
st["version"] = VITASTOR_VERSION;
json11::Json::object op_stats, subop_stats;
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
{
@ -373,7 +371,7 @@ void osd_t::report_statistics()
pg_stats["write_osd_set"] = pg.cur_set;
txn.push_back(json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", base64_encode(st_cli.etcd_prefix+"/pgstats/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num)) },
{ "key", base64_encode(st_cli.etcd_prefix+"/pg/stats/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num)) },
{ "value", base64_encode(json11::Json(pg_stats).dump()) },
} }
});
@ -420,7 +418,7 @@ void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes
}
if (run_primary)
{
bool pgs = changes.find(st_cli.etcd_prefix+"/pg/config") != changes.end();
bool pgs = changes.find(st_cli.etcd_prefix+"/config/pgs") != changes.end();
if (pools || pgs)
{
apply_pg_count();
@ -905,7 +903,7 @@ void osd_t::report_pg_states()
{ "target", "MOD" },
{ "key", state_key_base64 },
{ "result", "LESS" },
{ "mod_revision", st_cli.etcd_watch_revision_pg+1 },
{ "mod_revision", st_cli.etcd_watch_revision+1 },
});
continue;
}
@ -976,7 +974,7 @@ void osd_t::report_pg_states()
{ "target", "MOD" },
{ "key", history_key },
{ "result", "LESS" },
{ "mod_revision", st_cli.etcd_watch_revision_pg+1 },
{ "mod_revision", st_cli.etcd_watch_revision+1 },
});
success.push_back(json11::Json::object {
{ "request_put", json11::Json::object {

View File

@ -20,7 +20,7 @@ static void handle_sigint(int sig)
}
static const char* help_text =
"Vitastor OSD (block object storage daemon) " VITASTOR_VERSION "\n"
"Vitastor OSD (block object storage daemon) " VERSION "\n"
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
"\n"
"OSDs are usually started by vitastor-disk.\n"

View File

@ -22,7 +22,7 @@ void configure_single_pg_pool(cluster_client_t *cli)
},
});
cli->st_cli.parse_state((etcd_kv_t){
.key = "/pg/config",
.key = "/config/pgs",
.value = json11::Json::object {
{ "items", json11::Json::object {
{ "1", json11::Json::object {

View File

@ -25,22 +25,11 @@ ETCD_IP=${ETCD_IP:-127.0.0.1}
ETCD_PORT=${ETCD_PORT:-12379}
ETCD_COUNT=${ETCD_COUNT:-1}
ANTIETCD=${ANTIETCD}
USE_RAMDISK=${USE_RAMDISK}
RAMDISK=/run/user/$(id -u)
findmnt $RAMDISK >/dev/null || (sudo mkdir -p $RAMDISK && sudo mount -t tmpfs tmpfs $RAMDISK)
if [[ -z "$KEEP_DATA" ]]; then
if [ "$KEEP_DATA" = "" ]; then
rm -rf ./testdata
rm -rf /run/user/$(id -u)/testdata_etcd* /run/user/$(id -u)/testdata_bin
rm -rf /run/user/$(id -u)/testdata_etcd*
mkdir -p ./testdata
if [[ -n "$USE_RAMDISK" ]]; then
OSD_ARGS="$OSD_ARGS --data_io cached"
mkdir -p /run/user/$(id -u)/testdata_bin
ln -s /run/user/$(id -u)/testdata_bin ./testdata/bin
else
mkdir -p ./testdata/bin
fi
fi
ETCD_URL="http://$ETCD_IP:$ETCD_PORT"
@ -52,7 +41,9 @@ start_etcd()
{
local i=$1
if [[ -z "$ANTIETCD" ]]; then
ionice -c2 -n0 $ETCD -name etcd$i --data-dir $RAMDISK/testdata_etcd$i \
local t=/run/user/$(id -u)
findmnt $t >/dev/null || (sudo mkdir -p $t && sudo mount -t tmpfs tmpfs $t)
ionice -c2 -n0 $ETCD -name etcd$i --data-dir /run/user/$(id -u)/testdata_etcd$i \
--advertise-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) --listen-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) \
--initial-advertise-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) --listen-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) \
--initial-cluster-token vitastor-tests-etcd --initial-cluster-state new \

View File

@ -50,8 +50,8 @@ if ! type -t osd_dev; then
osd_dev()
{
local i=$1
[[ -f ./testdata/bin/test_osd$i.bin ]] || dd if=/dev/zero of=./testdata/bin/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
echo ./testdata/bin/test_osd$i.bin
[[ -f ./testdata/test_osd$i.bin ]] || dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
echo ./testdata/test_osd$i.bin
}
fi
@ -89,7 +89,7 @@ wait_up()
local i=0
local configured=0
while [[ $i -lt $sec ]]; do
if $ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
if $ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
select(((.osd_set | select(. != 0) | sort | unique) | length) == '$PG_SIZE') ] | length) == '$PG_COUNT; then
configured=1
if $ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT; then

View File

@ -13,14 +13,14 @@ start_osd 4
sleep 2
for i in {1..30}; do
($ETCDCTL get /vitastor/pg/config --print-value-only |\
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])') && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$PG_COUNT) && \
break
sleep 1
done
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])'); then
format_error "FAILED: OSD NOT ADDED INTO DISTRIBUTION"
fi
@ -35,14 +35,14 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm-osd --force 4
sleep 2
for i in {1..30}; do
($ETCDCTL get /vitastor/pg/config --print-value-only |\
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "left_on_dead"]) ] | length) == '$PG_COUNT'') && \
break
sleep 1
done
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
format_error "FAILED: OSD NOT REMOVED FROM DISTRIBUTION"
fi

View File

@ -23,7 +23,7 @@ try_change()
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
for i in {1..60}; do
($ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "has_misplaced"]) ] | length) == '$n'') && \
break
sleep 1
@ -36,14 +36,14 @@ try_change()
sleep 1
done
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
$ETCDCTL get /vitastor/pg/config
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: $n PGS NOT CONFIGURED"
fi
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: $n PGS NOT UP"
fi
@ -53,7 +53,7 @@ try_change()
nobj=0
waittime=0
while [[ $nobj -ne $NOBJ && $waittime -lt 7 ]]; do
nobj=`$ETCDCTL get --prefix '/vitastor/pgstats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
if [[ $nobj -ne $NOBJ ]]; then
waittime=$((waittime+1))
sleep 1

View File

@ -13,7 +13,7 @@ try_change()
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$s',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
for i in {1..10}; do
($ETCDCTL get /vitastor/pg/config --print-value-only |\
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
@ -21,16 +21,16 @@ try_change()
sleep 1
done
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
fi
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
fi

View File

@ -13,13 +13,13 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
# Write
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
-mirror_file=./testdata/bin/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg -runtime=10
-mirror_file=./testdata/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg -runtime=10
# Intentionally corrupt OSD data and restart it
kill $OSD1_PID
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/bin/test_osd1.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
truncate -s $data_offset ./testdata/bin/test_osd1.bin
dd if=/dev/zero of=./testdata/bin/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/test_osd1.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
truncate -s $data_offset ./testdata/test_osd1.bin
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
start_osd 1
# FIXME: corrupt the journal WHEN OSD IS RUNNING and check reads too
@ -30,8 +30,8 @@ wait_up 10
# Read everything back
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/bin/read.bin
-O raw ./testdata/read.bin
diff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
diff ./testdata/read.bin ./testdata/mirror.bin
format_green OK

View File

@ -28,7 +28,7 @@ $ETCDCTL get --print-value-only /vitastor/config/pools | jq -s -e '. == [{"1":{"
sleep 2
$ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
jq -s -e '([ .[0].items["1"] | .[].osd_set | map_values(. | tonumber) | select((.[0] <= 4) != (.[1] <= 4)) ] | length) == 4'
format_green OK

View File

@ -20,7 +20,7 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-mirror_file=./testdata/bin/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
kill_osds()
{
@ -53,13 +53,13 @@ kill_osds &
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bsrange=4k-128k -blockalign=4k -direct=1 -iodepth=32 -fsync=256 -rw=randrw \
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/bin/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/bin/read.bin
-O raw ./testdata/read.bin
if ! diff -q ./testdata/bin/read.bin ./testdata/bin/mirror.bin; then
if ! diff -q ./testdata/read.bin ./testdata/mirror.bin; then
format_error Data lost during self-heal
fi

View File

@ -43,7 +43,7 @@ wait_finish_rebalance 300
#fi
# Check that no objects are lost !
nobj=`$ETCDCTL get --prefix '/vitastor/pgstats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
if [ "$nobj" -ne $((IMG_SIZE*8/PG_DATA_SIZE)) ]; then
format_error "Data lost after multiple interrupted rebalancings"
fi

View File

@ -8,14 +8,14 @@ OSD_SIZE=1024
OSD_COUNT=5
OSD_ARGS="$OSD_ARGS"
for i in $(seq 1 $OSD_COUNT); do
dd if=/dev/zero of=./testdata/bin/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/osd/vitastor-osd --log_level 10 --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/osd/vitastor-osd --log_level 10 --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
eval OSD${i}_PID=$!
done
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":1,"failure_domain":"osd","immediate_commit":"none"}}'
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[1,0],"primary":1}}}}'
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[1,0],"primary":1}}}}'
for i in {1..30}; do
sleep 1
@ -30,7 +30,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[1,0],"primary":0}}}}'
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[1,0],"primary":0}}}}'
for i in {1..30}; do
sleep 1
@ -43,7 +43,7 @@ done
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
sleep 5
for i in {1..30}; do
@ -60,7 +60,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=1 -number_ios=2 -rw=write \
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":0}}}}'
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":0}}}}'
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
@ -76,7 +76,7 @@ done
cp testdata/osd4.log testdata/osd4_pre.log
>testdata/osd4.log
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
for i in {1..30}; do
sleep 1

View File

@ -27,9 +27,9 @@ $ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicate
sleep 2
$ETCDCTL get --prefix /vitastor/pg/config --print-value-only
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only
if ! ($ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
if ! ($ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
jq -s -e '[ [ .[] | select(has("items")) | .items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
format_error "Some PGs missing replicas"
fi

View File

@ -16,7 +16,7 @@ try_change()
s=$2
for i in {1..10}; do
($ETCDCTL get /vitastor/pg/config --print-value-only |\
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])') && \
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
@ -24,16 +24,16 @@ try_change()
sleep 1
done
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])'); then
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
fi
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
$ETCDCTL get /vitastor/pg/config
$ETCDCTL get /vitastor/config/pgs
$ETCDCTL get --prefix /vitastor/pg/state/
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
fi

View File

@ -16,14 +16,14 @@ trap "sudo build/src/client/vitastor-nbd unmap $NBD_DEV"'; kill -9 $(jobs -p)' E
sudo chown $(id -u) $NBD_DEV
dd if=/dev/urandom of=./testdata/bin/img1.bin bs=1M count=$IMG_SIZE
dd if=/dev/urandom of=./testdata/img1.bin bs=1M count=$IMG_SIZE
dd if=./testdata/bin/img1.bin of=$NBD_DEV bs=1M count=$IMG_SIZE oflag=direct
dd if=./testdata/img1.bin of=$NBD_DEV bs=1M count=$IMG_SIZE oflag=direct
verify() {
echo "Verifying before rebalance"
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=1M count=$IMG_SIZE iflag=direct
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
dd if=$NBD_DEV of=./testdata/img2.bin bs=1M count=$IMG_SIZE iflag=direct
diff ./testdata/img1.bin ./testdata/img2.bin
$ETCDCTL put /vitastor/config/osd/1 '{"reweight":'$1'}'
$ETCDCTL put /vitastor/config/osd/2 '{"reweight":'$1'}'
@ -31,18 +31,18 @@ verify() {
for i in {1..10000}; do
O=$(((RANDOM*RANDOM) % (IMG_SIZE*128)))
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=4k seek=$O skip=$O count=1 iflag=direct conv=notrunc
dd if=$NBD_DEV of=./testdata/img2.bin bs=4k seek=$O skip=$O count=1 iflag=direct conv=notrunc
done
echo "Verifying during rebalance"
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
diff ./testdata/img1.bin ./testdata/img2.bin
# Wait for the rebalance to finish
wait_finish_rebalance 300
echo "Verifying after rebalance"
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=1M count=$IMG_SIZE iflag=direct
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
dd if=$NBD_DEV of=./testdata/img2.bin bs=1M count=$IMG_SIZE iflag=direct
diff ./testdata/img1.bin ./testdata/img2.bin
}
# Verify with regular reads

View File

@ -14,7 +14,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
-O raw ./testdata/bin/before.bin
-O raw ./testdata/before.bin
for i in $(seq 1 $OSD_COUNT); do
pid=OSD${i}_PID
@ -23,19 +23,19 @@ for i in $(seq 1 $OSD_COUNT); do
done
for i in $(seq 1 $OSD_COUNT); do
offsets=$(build/src/disk_tool/vitastor-disk simple-offsets --format json ./testdata/bin/test_osd$i.bin)
offsets=$(build/src/disk_tool/vitastor-disk simple-offsets --format json ./testdata/test_osd$i.bin)
meta_offset=$(echo $offsets | jq -r .meta_offset)
data_offset=$(echo $offsets | jq -r .data_offset)
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/bin/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json
build/src/disk_tool/vitastor-disk dump-meta ./testdata/bin/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json
build/src/disk_tool/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json
build/src/disk_tool/vitastor-disk resize \
$(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) \
$(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) \
--new_meta_offset 0 \
--new_meta_len $((1024*1024)) \
--new_journal_offset $((1024*1024)) \
--new_data_offset $((128*1024*1024))
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/bin/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json
build/src/disk_tool/vitastor-disk dump-meta ./testdata/bin/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json
build/src/disk_tool/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json
if ! (cat ./testdata/meta_before_resize.json ./testdata/meta_after_resize.json | \
jq -e -s 'map([ .entries[] | del(.block) ] | sort_by(.pool, .inode, .stripe)) | .[0] == .[1] and (.[0] | length) > 1000'); then
format_error "OSD $i metadata corrupted after resizing"
@ -50,7 +50,7 @@ $ETCDCTL del --prefix /vitastor/osd/state/
for i in $(seq 1 $OSD_COUNT); do
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
--data_device ./testdata/bin/test_osd$i.bin \
--data_device ./testdata/test_osd$i.bin \
--meta_offset 0 \
--journal_offset $((1024*1024)) \
--data_offset $((128*1024*1024)) >>./testdata/osd$i.log 2>&1 &
@ -59,9 +59,9 @@ done
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
-O raw ./testdata/bin/after.bin
-O raw ./testdata/after.bin
if ! cmp ./testdata/bin/before.bin ./testdata/bin/after.bin; then
if ! cmp ./testdata/before.bin ./testdata/after.bin; then
format_error "Data differs after resizing"
fi

View File

@ -21,7 +21,9 @@ $ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicate
sleep 2
if ! ($ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only
if ! ($ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
jq -s -e '[ [ .[0].items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
format_error "Some PGs missing replicas"
fi

View File

@ -18,19 +18,19 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
# Write
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
-mirror_file=./testdata/bin/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg
-mirror_file=./testdata/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg
# Save PG primary
primary=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
# Intentionally corrupt OSD data and restart it
zero_osd_pid=OSD${ZERO_OSD}_PID
kill ${!zero_osd_pid}
sleep 1
kill -9 ${!zero_osd_pid} || true
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/bin/test_osd$ZERO_OSD.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
truncate -s $data_offset ./testdata/bin/test_osd$ZERO_OSD.bin
dd if=/dev/zero of=./testdata/bin/test_osd$ZERO_OSD.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/test_osd$ZERO_OSD.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
truncate -s $data_offset ./testdata/test_osd$ZERO_OSD.bin
dd if=/dev/zero of=./testdata/test_osd$ZERO_OSD.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
$ETCDCTL del /vitastor/osd/state/$ZERO_OSD
start_osd $ZERO_OSD
@ -38,7 +38,7 @@ start_osd $ZERO_OSD
wait_up 10
# Wait until PG is back on the same primary
wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/pg/config | jq -s -e \'.[0].items["1"]["1"].primary == "'$primary'"'"'"
wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/config/pgs | jq -s -e \'.[0].items["1"]["1"].primary == "'$primary'"'"'"
# Trigger scrub
$ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'`
@ -64,8 +64,8 @@ fi
# Read everything back
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/bin/read.bin
-O raw ./testdata/read.bin
diff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
diff ./testdata/read.bin ./testdata/mirror.bin
format_green OK

View File

@ -34,21 +34,21 @@ qemu-img convert -p \
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024))" \
-O raw ./testdata/bin/merged.bin
-O raw ./testdata/merged.bin
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg@0" \
-O raw ./testdata/bin/layer0.bin
-O raw ./testdata/layer0.bin
$ETCDCTL put /vitastor/config/inode/1/3 '{"name":"testimg","size":'$((32*1024*1024))'}'
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/bin/layer1.bin
-O raw ./testdata/layer1.bin
node tests/merge.js ./testdata/bin/layer0.bin ./testdata/bin/layer1.bin ./testdata/bin/check.bin
node tests/merge.js ./testdata/layer0.bin ./testdata/layer1.bin ./testdata/check.bin
cmp ./testdata/bin/merged.bin ./testdata/bin/check.bin
cmp ./testdata/merged.bin ./testdata/check.bin
# Test merge
@ -58,22 +58,22 @@ build/src/cmd/vitastor-cli rm --etcd_address $ETCD_URL testimg@0
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/bin/merged-by-tool.bin
-O raw ./testdata/merged-by-tool.bin
cmp ./testdata/bin/merged.bin ./testdata/bin/merged-by-tool.bin
cmp ./testdata/merged.bin ./testdata/merged-by-tool.bin
# Test merge by qemu-img
qemu-img rebase -u -b layer0.qcow2 -F qcow2 ./testdata/layer1.qcow2
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/bin/rebased.bin
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
cmp ./testdata/bin/merged.bin ./testdata/bin/rebased.bin
cmp ./testdata/merged.bin ./testdata/rebased.bin
qemu-img rebase -u -b '' ./testdata/layer1.qcow2
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/bin/rebased.bin
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
cmp ./testdata/bin/layer1.bin ./testdata/bin/rebased.bin
cmp ./testdata/layer1.bin ./testdata/rebased.bin
format_green OK

View File

@ -9,7 +9,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create -s 32M testchain
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/bin/mirror.bin
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
for i in {1..10}; do
# Create a snapshot
@ -17,18 +17,18 @@ for i in {1..10}; do
# Check that the new snapshot is see-through
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/bin/check.bin
cmp ./testdata/bin/check.bin ./testdata/bin/mirror.bin
-O raw ./testdata/check.bin
cmp ./testdata/check.bin ./testdata/mirror.bin
# Write something to it
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -rw=randwrite \
-randrepeat=$((i <= 2)) -buffer_pattern=0x$((10+i))$((10+i))$((10+i))$((10+i)) \
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/bin/mirror.bin
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/mirror.bin
# Check the new content
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/bin/layer1.bin
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
done
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
@ -36,13 +36,13 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
# Check the final image
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/bin/layer1.bin
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
# Check the last remaining snapshot
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain@10" \
-O raw ./testdata/bin/layer0.bin
cmp ./testdata/bin/layer0.bin ./testdata/bin/check.bin
-O raw ./testdata/layer0.bin
cmp ./testdata/layer0.bin ./testdata/check.bin
format_green OK

View File

@ -9,7 +9,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create -s 128M testchain
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/bin/mirror.bin
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
# Create a snapshot
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
@ -17,13 +17,13 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
# Write something to it
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 -rw=randwrite \
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/bin/mirror.bin
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/mirror.bin
# Check the new content
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/bin/layer1.bin
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
# Merge
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
@ -31,7 +31,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
# Check the final image
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/bin/layer1.bin
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
format_green OK

View File

@ -23,7 +23,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
kill $OSD2_PID
build/src/osd/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
$(build/src/disk_tool/vitastor-disk simple-offsets --format options --device ./testdata/bin/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
$(build/src/disk_tool/vitastor-disk simple-offsets --format options --device ./testdata/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
sleep 2
# Check PG state - it should NOT become active

View File

@ -2,14 +2,14 @@
. `dirname $0`/run_3osds.sh
primary=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
primary_pid=OSD${primary}_PID
kill -9 ${!primary_pid}
sleep 15
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/pg/config | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/config/pgs | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
newprim=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
newprim=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
if [ "$newprim" = "$primary" ]; then
format_error Primary not switched

View File

@ -15,7 +15,7 @@ OSD_COUNT=3
OSD_ARGS="$OSD_ARGS"
OFFSET_ARGS="$OFFSET_ARGS"
for i in $(seq 1 $OSD_COUNT); do
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
eval OSD${i}_PID=$!
done

View File

@ -43,10 +43,10 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))" \
-O raw ./testdata/bin/read.bin
-O raw ./testdata/read.bin
qemu-img convert -S 4096 -p \
-f raw ./testdata/bin/read.bin \
-f raw ./testdata/read.bin \
-O raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))"
format_green OK