Compare commits
7 Commits
f1f8531fd4
...
da73d5f45a
Author | SHA1 | Date |
---|---|---|
Vitaliy Filippov | da73d5f45a | |
Vitaliy Filippov | 88bbe16ac3 | |
Vitaliy Filippov | 85ba14319a | |
Vitaliy Filippov | 5a56912d5e | |
Vitaliy Filippov | edc2f4eb97 | |
Vitaliy Filippov | ba806ff1ba | |
Vitaliy Filippov | 3d925c37cd |
|
@ -16,6 +16,7 @@ env:
|
|||
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
|
||||
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
|
||||
OSD_ARGS: '--etcd_quick_timeout 2000'
|
||||
USE_RAMDISK: 1
|
||||
|
||||
concurrency:
|
||||
group: ci-${{ github.ref }}
|
||||
|
|
|
@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
|||
|
||||
project(vitastor)
|
||||
|
||||
set(VERSION "1.7.1")
|
||||
set(VITASTOR_VERSION "1.7.1")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
VERSION ?= v1.7.1
|
||||
VITASTOR_VERSION ?= v1.7.1
|
||||
|
||||
all: build push
|
||||
|
||||
build:
|
||||
@docker build --rm -t vitalif/vitastor-csi:$(VERSION) .
|
||||
@docker build --rm -t vitalif/vitastor-csi:$(VITASTOR_VERSION) .
|
||||
|
||||
push:
|
||||
@docker push vitalif/vitastor-csi:$(VERSION)
|
||||
@docker push vitalif/vitastor-csi:$(VITASTOR_VERSION)
|
||||
|
|
|
@ -42,7 +42,7 @@ PG state always includes exactly 1 of the following base states:
|
|||
- **offline** — PG isn't activated by any OSD at all. Either primary OSD isn't set for
|
||||
this PG at all (if the pool is just created), or an unavailable OSD is set as primary,
|
||||
or the primary OSD refuses to start this PG (for example, because of wrong block_size),
|
||||
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/config/pgs` in etcd.
|
||||
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/pg/config` in etcd.
|
||||
- **starting** — primary OSD has acquired PG lock in etcd, PG is starting.
|
||||
- **peering** — primary OSD requests PG object listings from secondary OSDs and calculates
|
||||
the PG state.
|
||||
|
@ -150,7 +150,7 @@ POOL_ID=1
|
|||
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
|
||||
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
|
||||
for i in $(seq 1 $PG_COUNT); do
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
|
||||
done
|
||||
```
|
||||
|
||||
|
@ -169,21 +169,51 @@ Upgrading is performed without stopping clients (VMs/containers), you just need
|
|||
upgrade and restart servers one by one. However, ideally you should restart VMs too
|
||||
to make them use the new version of the client library.
|
||||
|
||||
Exceptions (specific upgrade instructions):
|
||||
- Upgrading <= 1.1.x to 1.2.0 or later, if you use EC n+k with k>=2, is recommended
|
||||
to be performed with full downtime: first you should stop all clients, then all OSDs,
|
||||
then upgrade and start everything back — because versions before 1.2.0 have several
|
||||
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
|
||||
- Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
|
||||
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
|
||||
without this intermediate step, client I/O will hang until the end of upgrade process.
|
||||
- Upgrading from <= 0.5.x to >= 0.6.x is not supported.
|
||||
### 1.1.x to 1.2.0
|
||||
|
||||
Rollback:
|
||||
- Version 1.0.0 has a new disk format, so OSDs initiaziled on 1.0.0 can't be rolled
|
||||
back to 0.9.x or previous versions.
|
||||
- Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
|
||||
start with 0.7.x or 0.6.x. :-)
|
||||
Upgrading version <= 1.1.x to version >= 1.2.0, if you use EC n+k with k>=2, is recommended
|
||||
to be performed with full downtime: first you should stop all clients, then all OSDs,
|
||||
then upgrade and start everything back — because versions before 1.2.0 have several
|
||||
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
|
||||
|
||||
### 0.8.7 to 0.9.0
|
||||
|
||||
Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
|
||||
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
|
||||
without this intermediate step, client I/O will hang until the end of upgrade process.
|
||||
|
||||
### 0.5.x to 0.6.x
|
||||
|
||||
Upgrading from <= 0.5.x to >= 0.6.x is not supported.
|
||||
|
||||
## Downgrade
|
||||
|
||||
Downgrade are also allowed freely, except the following specific instructions:
|
||||
|
||||
### 1.8.0 to 1.7.1
|
||||
|
||||
Before downgrading from version >= 1.8.0 to version <= 1.7.1
|
||||
you have to copy /vitastor/pg/config etcd key to /vitastor/config/pgs:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
|
||||
etcdctl --endpoints=http://... put /vitastor/config/pgs
|
||||
```
|
||||
|
||||
Then you can just install older packages and restart all services.
|
||||
|
||||
If you performed downgrade without first copying that key, run "add all OSDs into the
|
||||
history records of all PGs" from [Restoring from lost pool configuration](#restoring-from-lost-pool-configuration).
|
||||
|
||||
### 1.0.0 to 0.9.x
|
||||
|
||||
Version 1.0.0 has a new disk format, so OSDs initialized on 1.0.0 or later can't
|
||||
be rolled back to 0.9.x or previous versions.
|
||||
|
||||
### 0.8.0 to 0.7.x
|
||||
|
||||
Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
|
||||
start with older versions (0.4.x - 0.7.x). :-)
|
||||
|
||||
## OSD memory usage
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
- **offline** — PG вообще не активирована ни одним OSD. Либо первичный OSD не назначен вообще
|
||||
(если пул только создан), либо в качестве первичного назначен недоступный OSD, либо
|
||||
назначенный OSD отказывается запускать эту PG (например, из-за несовпадения block_size),
|
||||
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/config/pgs` в etcd.
|
||||
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/pg/config` в etcd.
|
||||
- **starting** — первичный OSD захватил блокировку PG в etcd, PG запускается.
|
||||
- **peering** — первичный OSD опрашивает вторичные OSD на предмет списков объектов данной PG и рассчитывает её состояние.
|
||||
- **repeering** — PG ожидает завершения текущих операций ввода-вывода, после чего перейдёт в состояние **peering**.
|
||||
|
@ -147,7 +147,7 @@ POOL_ID=1
|
|||
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
|
||||
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
|
||||
for i in $(seq 1 $PG_COUNT); do
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
|
||||
done
|
||||
```
|
||||
|
||||
|
@ -166,21 +166,51 @@ done
|
|||
достаточно обновлять серверы по одному. Однако, конечно, чтобы запущенные виртуальные машины
|
||||
начали использовать новую версию клиентской библиотеки, их тоже нужно перезапустить.
|
||||
|
||||
Исключения (особые указания при обновлении):
|
||||
- Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
|
||||
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
|
||||
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
|
||||
могли приводить к некорректному чтению данных в деградированных EC-пулах.
|
||||
- Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
|
||||
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
|
||||
Иначе клиентский ввод-вывод зависнет до завершения обновления.
|
||||
- Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
|
||||
### 1.1.x -> 1.2.0
|
||||
|
||||
Откат:
|
||||
- В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
|
||||
нельзя откатить до версии 0.9.x и более ранних.
|
||||
- В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD нельзя откатить
|
||||
до 0.7.x или 0.6.x. :-)
|
||||
Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
|
||||
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
|
||||
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
|
||||
могли приводить к некорректному чтению данных в деградированных EC-пулах.
|
||||
|
||||
### 0.8.7 -> 0.9.0
|
||||
|
||||
Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
|
||||
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
|
||||
Иначе клиентский ввод-вывод зависнет до завершения обновления.
|
||||
|
||||
### 0.5.x -> 0.6.x
|
||||
|
||||
Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
|
||||
|
||||
## Откат версии
|
||||
|
||||
Откат (понижение версии) тоже свободно разрешён, кроме указанных ниже случаев:
|
||||
|
||||
### 1.8.0 -> 1.7.1
|
||||
|
||||
Перед понижением версии с >= 1.8.0 до <= 1.7.1 вы должны скопировать ключ
|
||||
etcd `/vitastor/pg/config` в `/vitastor/config/pgs`:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
|
||||
etcdctl --endpoints=http://... put /vitastor/config/pgs
|
||||
```
|
||||
|
||||
После этого можно просто установить более старые пакеты и перезапустить все сервисы.
|
||||
|
||||
Если вы откатили версию, не скопировав предварительно этот ключ - выполните "добавление всех
|
||||
OSD в исторические записи всех PG" из раздела [Восстановление потерянной конфигурации пулов](#восстановление-потерянной-конфигурации-пулов).
|
||||
|
||||
### 1.0.0 -> 0.9.x
|
||||
|
||||
В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
|
||||
нельзя откатить до версии 0.9.x и более ранних.
|
||||
|
||||
### 0.8.0 -> 0.7.x
|
||||
|
||||
В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD не запустятся на
|
||||
более ранних версиях (0.4.x - 0.7.x). :-)
|
||||
|
||||
## Потребление памяти OSD
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ const etcd_nonempty_keys = {
|
|||
'config/global': 1,
|
||||
'config/node_placement': 1,
|
||||
'config/pools': 1,
|
||||
'config/pgs': 1,
|
||||
'pg/config': 1,
|
||||
'history/last_clean_pgs': 1,
|
||||
'stats': 1,
|
||||
};
|
||||
|
@ -15,7 +15,8 @@ const etcd_allow = new RegExp('^'+[
|
|||
'config/node_placement',
|
||||
'config/pools',
|
||||
'config/osd/[1-9]\\d*',
|
||||
'config/pgs',
|
||||
'config/pgs', // old name
|
||||
'pg/config',
|
||||
'config/inode/[1-9]\\d*/[1-9]\\d*',
|
||||
'osd/state/[1-9]\\d*',
|
||||
'osd/stats/[1-9]\\d*',
|
||||
|
@ -24,7 +25,8 @@ const etcd_allow = new RegExp('^'+[
|
|||
'mon/master',
|
||||
'mon/member/[a-f0-9]+',
|
||||
'pg/state/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/stats/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/stats/[1-9]\\d*/[1-9]\\d*', // old name
|
||||
'pgstats/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/history/[1-9]\\d*/[1-9]\\d*',
|
||||
'history/last_clean_pgs',
|
||||
'inode/stats/[1-9]\\d*/\\d+',
|
||||
|
@ -205,19 +207,6 @@ const etcd_tree = {
|
|||
osd: {
|
||||
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
|
||||
},
|
||||
/* pgs: {
|
||||
hash: string,
|
||||
items: {
|
||||
<pool_id>: {
|
||||
<pg_id>: {
|
||||
osd_set: [ 1, 2, 3 ],
|
||||
primary: 1,
|
||||
pause: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}, */
|
||||
pgs: {},
|
||||
/* inode: {
|
||||
<pool_id>: {
|
||||
<inode_t>: {
|
||||
|
@ -290,6 +279,19 @@ const etcd_tree = {
|
|||
},
|
||||
},
|
||||
pg: {
|
||||
/* config: {
|
||||
hash: string,
|
||||
items: {
|
||||
<pool_id>: {
|
||||
<pg_id>: {
|
||||
osd_set: [ 1, 2, 3 ],
|
||||
primary: 1,
|
||||
pause: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}, */
|
||||
config: {},
|
||||
state: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
|
@ -300,18 +302,6 @@ const etcd_tree = {
|
|||
}
|
||||
}, */
|
||||
},
|
||||
stats: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
object_count: uint64_t,
|
||||
clean_count: uint64_t,
|
||||
misplaced_count: uint64_t,
|
||||
degraded_count: uint64_t,
|
||||
incomplete_count: uint64_t,
|
||||
write_osd_set: osd_num_t[],
|
||||
},
|
||||
}, */
|
||||
},
|
||||
history: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
|
@ -323,6 +313,18 @@ const etcd_tree = {
|
|||
}, */
|
||||
},
|
||||
},
|
||||
pgstats: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
object_count: uint64_t,
|
||||
clean_count: uint64_t,
|
||||
misplaced_count: uint64_t,
|
||||
degraded_count: uint64_t,
|
||||
incomplete_count: uint64_t,
|
||||
write_osd_set: osd_num_t[],
|
||||
},
|
||||
}, */
|
||||
},
|
||||
inode: {
|
||||
stats: {
|
||||
/* <pool_id>: {
|
||||
|
|
135
mon/mon.js
135
mon/mon.js
|
@ -75,6 +75,8 @@ class Mon
|
|||
this.prev_stats = { osd_stats: {}, osd_diff: {} };
|
||||
this.recheck_pgs_active = false;
|
||||
this.watcher_active = false;
|
||||
this.old_pg_config = false;
|
||||
this.old_pg_stats_seen = false;
|
||||
}
|
||||
|
||||
async start()
|
||||
|
@ -122,7 +124,7 @@ class Mon
|
|||
!Number(this.state.pool.stats[pool_id].pg_real_size))
|
||||
{
|
||||
// Generate missing data in etcd
|
||||
this.state.config.pgs.hash = null;
|
||||
this.state.pg.config.hash = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -201,10 +203,15 @@ class Mon
|
|||
stats_changed = true;
|
||||
changed = true;
|
||||
}
|
||||
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/' || key.substr(0, 16) == '/osd/inodestats/')
|
||||
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 9) == '/pgstats/' || key.substr(0, 16) == '/osd/inodestats/')
|
||||
{
|
||||
stats_changed = true;
|
||||
}
|
||||
else if (key.substr(0, 10) == '/pg/stats/')
|
||||
{
|
||||
this.old_pg_stats_seen = true;
|
||||
stats_changed = true;
|
||||
}
|
||||
else if (key.substr(0, 10) == '/pg/state/')
|
||||
{
|
||||
pg_states_changed = true;
|
||||
|
@ -285,7 +292,7 @@ class Mon
|
|||
continue next_pool;
|
||||
}
|
||||
}
|
||||
new_clean_pgs.items[pool_id] = this.state.config.pgs.items[pool_id];
|
||||
new_clean_pgs.items[pool_id] = this.state.pg.config.items[pool_id];
|
||||
}
|
||||
this.state.history.last_clean_pgs = new_clean_pgs;
|
||||
await this.etcd.etcd_call('/kv/txn', {
|
||||
|
@ -396,6 +403,50 @@ class Mon
|
|||
this.parse_kv(kv);
|
||||
}
|
||||
}
|
||||
if (Object.keys((this.state.config.pgs||{}).items||{}).length)
|
||||
{
|
||||
// Support seamless upgrade to new OSDs
|
||||
if (!Object.keys((this.state.pg.config||{}).items||{}).length)
|
||||
{
|
||||
const pgs = JSON.stringify(this.state.config.pgs);
|
||||
this.state.pg.config = JSON.parse(pgs);
|
||||
const res = await this.etcd.etcd_call('/kv/txn', {
|
||||
success: [
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(pgs) } },
|
||||
],
|
||||
compare: [
|
||||
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
],
|
||||
}, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
|
||||
if (!res.succeeded)
|
||||
throw new Error('Failed to duplicate old PG config to new PG config');
|
||||
}
|
||||
this.old_pg_config = true;
|
||||
this.old_pg_config_timer = setInterval(() => this.check_clear_old_config().catch(console.error),
|
||||
this.config.old_pg_config_clear_interval||3600000);
|
||||
}
|
||||
}
|
||||
|
||||
async check_clear_old_config()
|
||||
{
|
||||
if (this.old_pg_config && this.old_pg_stats_seen)
|
||||
{
|
||||
this.old_pg_stats_seen = false;
|
||||
return;
|
||||
}
|
||||
if (this.old_pg_config)
|
||||
{
|
||||
await this.etcd.etcd_call('/kv/txn', { success: [
|
||||
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/config/pgs') } },
|
||||
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/pg/stats/'), range_end: b64(this.config.etcd_prefix+'/pg/stats0') } },
|
||||
] }, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
|
||||
this.old_pg_config = false;
|
||||
}
|
||||
if (this.old_pg_config_timer)
|
||||
{
|
||||
clearInterval(this.old_pg_config_timer);
|
||||
this.old_pg_config_timer = null;
|
||||
}
|
||||
}
|
||||
|
||||
all_osds()
|
||||
|
@ -406,7 +457,7 @@ class Mon
|
|||
async stop_all_pgs(pool_id)
|
||||
{
|
||||
let has_online = false, paused = true;
|
||||
for (const pg in this.state.config.pgs.items[pool_id]||{})
|
||||
for (const pg in this.state.pg.config.items[pool_id]||{})
|
||||
{
|
||||
// FIXME: Change all (||{}) to ?. (optional chaining) at some point
|
||||
const cur_state = (((this.state.pg.state[pool_id]||{})[pg]||{}).state||[]).join(',');
|
||||
|
@ -414,7 +465,7 @@ class Mon
|
|||
{
|
||||
has_online = true;
|
||||
}
|
||||
if (!this.state.config.pgs.items[pool_id][pg].pause)
|
||||
if (!this.state.pg.config.items[pool_id][pg].pause)
|
||||
{
|
||||
paused = false;
|
||||
}
|
||||
|
@ -422,7 +473,7 @@ class Mon
|
|||
if (!paused)
|
||||
{
|
||||
console.log('Stopping all PGs for pool '+pool_id+' before changing PG count');
|
||||
const new_cfg = JSON.parse(JSON.stringify(this.state.config.pgs));
|
||||
const new_cfg = JSON.parse(JSON.stringify(this.state.pg.config));
|
||||
for (const pg in new_cfg.items[pool_id])
|
||||
{
|
||||
new_cfg.items[pool_id][pg].pause = true;
|
||||
|
@ -430,22 +481,26 @@ class Mon
|
|||
// Check that no OSDs change their state before we pause PGs
|
||||
// Doing this we make sure that OSDs don't wake up in the middle of our "transaction"
|
||||
// and can't see the old PG configuration
|
||||
const checks = [];
|
||||
const checks = [
|
||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
];
|
||||
for (const osd_num of this.all_osds())
|
||||
{
|
||||
const key = b64(this.config.etcd_prefix+'/osd/state/'+osd_num);
|
||||
checks.push({ key, target: 'MOD', result: 'LESS', mod_revision: ''+this.etcd_watch_revision });
|
||||
}
|
||||
await this.etcd.etcd_call('/kv/txn', {
|
||||
compare: [
|
||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
...checks,
|
||||
],
|
||||
const txn = {
|
||||
compare: checks,
|
||||
success: [
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } },
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_cfg)) } },
|
||||
],
|
||||
}, this.config.etcd_mon_timeout, 0);
|
||||
};
|
||||
if (this.old_pg_config)
|
||||
{
|
||||
txn.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } });
|
||||
}
|
||||
await this.etcd.etcd_call('/kv/txn', txn, this.config.etcd_mon_timeout, 0);
|
||||
return false;
|
||||
}
|
||||
return !has_online;
|
||||
|
@ -473,7 +528,7 @@ class Mon
|
|||
pools: this.state.config.pools,
|
||||
};
|
||||
const tree_hash = sha1hex(stableStringify(tree_cfg));
|
||||
if (this.state.config.pgs.hash != tree_hash)
|
||||
if (this.state.pg.config.hash != tree_hash)
|
||||
{
|
||||
// Something has changed
|
||||
console.log('Pool configuration or OSD tree changed, re-optimizing');
|
||||
|
@ -514,10 +569,10 @@ class Mon
|
|||
else
|
||||
{
|
||||
// Nothing changed, but we still want to recheck the distribution of primaries
|
||||
let new_config_pgs = recheck_primary(this.state, this.config, up_osds, osd_tree);
|
||||
if (new_config_pgs)
|
||||
let new_pg_config = recheck_primary(this.state, this.config, up_osds, osd_tree);
|
||||
if (new_pg_config)
|
||||
{
|
||||
const ok = await this.save_pg_config(new_config_pgs);
|
||||
const ok = await this.save_pg_config(new_pg_config);
|
||||
if (ok)
|
||||
console.log('PG configuration successfully changed');
|
||||
else
|
||||
|
@ -532,12 +587,12 @@ class Mon
|
|||
|
||||
async apply_pool_pgs(results, up_osds, osd_tree, tree_hash)
|
||||
{
|
||||
for (const pool_id in (this.state.config.pgs||{}).items||{})
|
||||
for (const pool_id in (this.state.pg.config||{}).items||{})
|
||||
{
|
||||
// We should stop all PGs when deleting a pool or changing its PG count
|
||||
if (!this.state.config.pools[pool_id] ||
|
||||
this.state.config.pgs.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
|
||||
Object.keys(this.state.config.pgs.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
|
||||
this.state.pg.config.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
|
||||
Object.keys(this.state.pg.config.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
|
||||
{
|
||||
if (!await this.stop_all_pgs(pool_id))
|
||||
{
|
||||
|
@ -545,22 +600,22 @@ class Mon
|
|||
}
|
||||
}
|
||||
}
|
||||
const new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
|
||||
const new_pg_config = JSON.parse(JSON.stringify(this.state.pg.config));
|
||||
const etcd_request = { compare: [], success: [] };
|
||||
for (const pool_id in (new_config_pgs||{}).items||{})
|
||||
for (const pool_id in (new_pg_config||{}).items||{})
|
||||
{
|
||||
if (!this.state.config.pools[pool_id])
|
||||
{
|
||||
const prev_pgs = [];
|
||||
for (const pg in new_config_pgs.items[pool_id]||{})
|
||||
for (const pg in new_pg_config.items[pool_id]||{})
|
||||
{
|
||||
prev_pgs[pg-1] = new_config_pgs.items[pool_id][pg].osd_set;
|
||||
prev_pgs[pg-1] = new_pg_config.items[pool_id][pg].osd_set;
|
||||
}
|
||||
// Also delete pool statistics
|
||||
etcd_request.success.push({ requestDeleteRange: {
|
||||
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
} });
|
||||
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
|
||||
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
|
||||
this.etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, [], []);
|
||||
}
|
||||
}
|
||||
|
@ -569,7 +624,7 @@ class Mon
|
|||
const pool_id = pool_res.pool_id;
|
||||
const pool_cfg = this.state.config.pools[pool_id];
|
||||
let pg_history = [];
|
||||
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
|
||||
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
|
||||
{
|
||||
if (this.state.pg.history[pool_id] &&
|
||||
this.state.pg.history[pool_id][pg])
|
||||
|
@ -578,9 +633,9 @@ class Mon
|
|||
}
|
||||
}
|
||||
const real_prev_pgs = [];
|
||||
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
|
||||
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
|
||||
{
|
||||
real_prev_pgs[pg-1] = [ ...this.state.config.pgs.items[pool_id][pg].osd_set ];
|
||||
real_prev_pgs[pg-1] = [ ...this.state.pg.config.items[pool_id][pg].osd_set ];
|
||||
}
|
||||
if (real_prev_pgs.length > 0 && real_prev_pgs.length != pool_res.pgs.length)
|
||||
{
|
||||
|
@ -591,8 +646,8 @@ class Mon
|
|||
pg_history = scale_pg_history(pg_history, real_prev_pgs, pool_res.pgs);
|
||||
// Drop stats
|
||||
etcd_request.success.push({ requestDeleteRange: {
|
||||
key: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'/'),
|
||||
range_end: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'0'),
|
||||
key: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'/'),
|
||||
range_end: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'0'),
|
||||
} });
|
||||
}
|
||||
const stats = {
|
||||
|
@ -603,22 +658,26 @@ class Mon
|
|||
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
value: b64(JSON.stringify(stats)),
|
||||
} });
|
||||
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
|
||||
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
|
||||
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
|
||||
}
|
||||
new_config_pgs.hash = tree_hash;
|
||||
return await this.save_pg_config(new_config_pgs, etcd_request);
|
||||
new_pg_config.hash = tree_hash;
|
||||
return await this.save_pg_config(new_pg_config, etcd_request);
|
||||
}
|
||||
|
||||
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
|
||||
async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
|
||||
{
|
||||
etcd_request.compare.push(
|
||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
);
|
||||
etcd_request.success.push(
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_config_pgs)) } },
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_pg_config)) } },
|
||||
);
|
||||
if (this.old_pg_config)
|
||||
{
|
||||
etcd_request.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_pg_config)) } });
|
||||
}
|
||||
const txn_res = await this.etcd.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
|
||||
return txn_res.succeeded;
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@ function pick_primary(pool_config, osd_set, up_osds, aff_osds)
|
|||
|
||||
function recheck_primary(state, global_config, up_osds, osd_tree)
|
||||
{
|
||||
let new_config_pgs;
|
||||
let new_pg_config;
|
||||
for (const pool_id in state.config.pools)
|
||||
{
|
||||
const pool_cfg = state.config.pools[pool_id];
|
||||
|
@ -69,30 +69,30 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
|
|||
reset_rng();
|
||||
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
|
||||
{
|
||||
if (!state.config.pgs.items[pool_id])
|
||||
if (!state.pg.config.items[pool_id])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const pg_cfg = state.config.pgs.items[pool_id][pg_num];
|
||||
const pg_cfg = state.pg.config.items[pool_id][pg_num];
|
||||
if (pg_cfg)
|
||||
{
|
||||
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
|
||||
if (pg_cfg.primary != new_primary)
|
||||
{
|
||||
if (!new_config_pgs)
|
||||
if (!new_pg_config)
|
||||
{
|
||||
new_config_pgs = JSON.parse(JSON.stringify(state.config.pgs));
|
||||
new_pg_config = JSON.parse(JSON.stringify(state.pg.config));
|
||||
}
|
||||
console.log(
|
||||
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
|
||||
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
|
||||
);
|
||||
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
|
||||
new_pg_config.items[pool_id][pg_num].primary = new_primary;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new_config_pgs;
|
||||
return new_pg_config;
|
||||
}
|
||||
|
||||
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
|
||||
|
@ -185,10 +185,10 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
|
|||
}
|
||||
if (!prev_pgs.length)
|
||||
{
|
||||
// Fall back to config/pgs if it's empty
|
||||
for (const pg in ((state.config.pgs.items||{})[pool_id]||{}))
|
||||
// Fall back to pg/config if it's empty
|
||||
for (const pg in ((state.pg.config.items||{})[pool_id]||{}))
|
||||
{
|
||||
prev_pgs[pg-1] = [ ...state.config.pgs.items[pool_id][pg].osd_set ];
|
||||
prev_pgs[pg-1] = [ ...state.pg.config.items[pool_id][pg].osd_set ];
|
||||
}
|
||||
}
|
||||
const old_pg_count = prev_pgs.length;
|
||||
|
@ -205,8 +205,8 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
|
|||
ordered: pool_cfg.scheme != 'replicated',
|
||||
};
|
||||
let optimize_result;
|
||||
// Re-shuffle PGs if config/pgs.hash is empty
|
||||
if (old_pg_count > 0 && state.config.pgs.hash)
|
||||
// Re-shuffle PGs if pg/config.hash is empty
|
||||
if (old_pg_count > 0 && state.pg.config.hash)
|
||||
{
|
||||
if (prev_pgs.length != pool_cfg.pg_count)
|
||||
{
|
||||
|
|
|
@ -166,7 +166,7 @@ function export_prometheus_metrics(st)
|
|||
res += `vitastor_pool_used_raw_tb{${pool_label}} ${pool_stat.used_raw_tb||0}\n`;
|
||||
|
||||
// PG states and pool up/down status
|
||||
const real_pg_count = (Object.keys(((st.config.pgs||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
|
||||
const real_pg_count = (Object.keys(((st.pg.config||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
|
||||
const per_state = {
|
||||
active: 0,
|
||||
starting: 0,
|
||||
|
|
17
mon/stats.js
17
mon/stats.js
|
@ -100,10 +100,19 @@ function sum_object_counts(state, global_config)
|
|||
{
|
||||
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||
for (const pool_id in state.pg.stats)
|
||||
let pgstats = state.pgstats;
|
||||
if (state.pg.stats)
|
||||
{
|
||||
// Merge with old stats for seamless transition to new stats
|
||||
for (const pool_id in state.pg.stats)
|
||||
{
|
||||
pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) };
|
||||
}
|
||||
}
|
||||
for (const pool_id in pgstats)
|
||||
{
|
||||
let object_size = 0;
|
||||
for (const osd_num of state.pg.stats[pool_id].write_osd_set||[])
|
||||
for (const osd_num of pgstats[pool_id].write_osd_set||[])
|
||||
{
|
||||
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
|
||||
{
|
||||
|
@ -121,9 +130,9 @@ function sum_object_counts(state, global_config)
|
|||
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
|
||||
}
|
||||
object_size = BigInt(object_size);
|
||||
for (const pg_num in state.pg.stats[pool_id])
|
||||
for (const pg_num in pgstats[pool_id])
|
||||
{
|
||||
const st = state.pg.stats[pool_id][pg_num];
|
||||
const st = pgstats[pool_id][pg_num];
|
||||
if (st)
|
||||
{
|
||||
for (const k in object_counts)
|
||||
|
|
|
@ -35,7 +35,8 @@ function vitastor_persist_filter(cfg)
|
|||
}
|
||||
else if (key.substr(0, prefix.length+'/osd/'.length) == prefix+'/osd/' ||
|
||||
key.substr(0, prefix.length+'/inode/stats/'.length) == prefix+'/inode/stats/' ||
|
||||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' ||
|
||||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' || // old name
|
||||
key.substr(0, prefix.length+'/pgstats/'.length) == prefix+'/pgstats/' ||
|
||||
key.substr(0, prefix.length+'/pool/stats/'.length) == prefix+'/pool/stats/' ||
|
||||
key == prefix+'/stats')
|
||||
{
|
||||
|
|
|
@ -50,7 +50,7 @@ from cinder.volume import configuration
|
|||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VERSION = '1.7.1'
|
||||
VITASTOR_VERSION = '1.7.1'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
@ -238,7 +238,7 @@ class VitastorDriver(driver.CloneableImageVD,
|
|||
|
||||
stats = {
|
||||
'vendor_name': 'Vitastor',
|
||||
'driver_version': self.VERSION,
|
||||
'driver_version': VITASTOR_VERSION,
|
||||
'storage_protocol': 'vitastor',
|
||||
'total_capacity_gb': 'unknown',
|
||||
'free_capacity_gb': 'unknown',
|
||||
|
|
|
@ -19,7 +19,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
|||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVERSION="1.7.1")
|
||||
add_definitions(-DVITASTOR_VERSION="1.7.1")
|
||||
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
|
||||
add_link_options(-fno-omit-frame-pointer)
|
||||
if (${WITH_ASAN})
|
||||
|
|
|
@ -13,7 +13,7 @@ target_link_libraries(vitastor_blk
|
|||
# for timerfd_manager
|
||||
vitastor_common
|
||||
)
|
||||
set_target_properties(vitastor_blk PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
||||
set_target_properties(vitastor_blk PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||
|
||||
if (${WITH_FIO})
|
||||
# libfio_vitastor_blk.so
|
||||
|
|
|
@ -29,7 +29,7 @@ target_link_libraries(vitastor_client
|
|||
${LIBURING_LIBRARIES}
|
||||
${IBVERBS_LIBRARIES}
|
||||
)
|
||||
set_target_properties(vitastor_client PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
||||
set_target_properties(vitastor_client PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||
configure_file(vitastor.pc.in vitastor.pc @ONLY)
|
||||
|
||||
if (${WITH_FIO})
|
||||
|
|
|
@ -333,7 +333,10 @@ void etcd_state_client_t::start_etcd_watcher()
|
|||
etcd_watch_ws = NULL;
|
||||
}
|
||||
if (this->log_level > 1)
|
||||
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju\n", etcd_address.c_str(), etcd_watch_revision);
|
||||
{
|
||||
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju/%ju/%ju\n", etcd_address.c_str(),
|
||||
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
|
||||
}
|
||||
etcd_watch_ws = open_websocket(tfd, etcd_address, etcd_api_path+"/watch", etcd_slow_timeout,
|
||||
[this, cur_addr = selected_etcd_address](const http_response_t *msg)
|
||||
{
|
||||
|
@ -348,16 +351,20 @@ void etcd_state_client_t::start_etcd_watcher()
|
|||
}
|
||||
else
|
||||
{
|
||||
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
|
||||
if (data["result"]["created"].bool_value())
|
||||
{
|
||||
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
|
||||
if (watch_id == ETCD_CONFIG_WATCH_ID ||
|
||||
watch_id == ETCD_PG_STATE_WATCH_ID ||
|
||||
watch_id == ETCD_PG_HISTORY_WATCH_ID ||
|
||||
watch_id == ETCD_OSD_STATE_WATCH_ID)
|
||||
{
|
||||
etcd_watches_initialised++;
|
||||
}
|
||||
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && this->log_level > 0)
|
||||
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju\n", cur_addr.c_str(), etcd_watch_revision);
|
||||
{
|
||||
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju/%ju/%ju\n", cur_addr.c_str(),
|
||||
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
|
||||
}
|
||||
}
|
||||
if (data["result"]["canceled"].bool_value())
|
||||
{
|
||||
|
@ -375,7 +382,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
|||
data["result"]["compact_revision"].uint64_value());
|
||||
http_close(etcd_watch_ws);
|
||||
etcd_watch_ws = NULL;
|
||||
etcd_watch_revision = 0;
|
||||
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = 0;
|
||||
on_reload_hook();
|
||||
}
|
||||
return;
|
||||
|
@ -393,13 +400,29 @@ void etcd_state_client_t::start_etcd_watcher()
|
|||
exit(1);
|
||||
}
|
||||
}
|
||||
// Save revision only if it's present in the message - because sometimes etcd sends something without a header, like:
|
||||
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
|
||||
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && !data["result"]["header"]["revision"].is_null())
|
||||
{
|
||||
// Protect against a revision beign split into multiple messages and some
|
||||
// of them being lost. Even though I'm not sure if etcd actually splits them
|
||||
// Also sometimes etcd sends something without a header, like:
|
||||
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
|
||||
etcd_watch_revision = data["result"]["header"]["revision"].uint64_value();
|
||||
// Restart watchers from the same revision number as in the last received message,
|
||||
// not from the next one to protect against revision being split into multiple messages,
|
||||
// even though etcd guarantees not to do that **within a single watcher** without fragment=true:
|
||||
// https://etcd.io/docs/v3.5/learning/api_guarantees/#watch-apis
|
||||
// Revision contents are ALWAYS split into separate messages for different watchers though!
|
||||
// So generally we have to resume each watcher from its own revision...
|
||||
// Progress messages may have watch_id=-1 if sent on behalf of multiple watchers though.
|
||||
// And antietcd has an advanced semantic which merges the same revision for all watchers
|
||||
// into one message and just omits watch_id.
|
||||
// So we also have to handle the case where watch_id is -1 or not present (0).
|
||||
auto watch_rev = data["result"]["header"]["revision"].uint64_value();
|
||||
if (!watch_id || watch_id == UINT64_MAX)
|
||||
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = watch_rev;
|
||||
else if (watch_id == ETCD_CONFIG_WATCH_ID)
|
||||
etcd_watch_revision_config = watch_rev;
|
||||
else if (watch_id == ETCD_PG_STATE_WATCH_ID)
|
||||
etcd_watch_revision_pg = watch_rev;
|
||||
else if (watch_id == ETCD_OSD_STATE_WATCH_ID)
|
||||
etcd_watch_revision_osd = watch_rev;
|
||||
addresses_to_try.clear();
|
||||
}
|
||||
// First gather all changes into a hash to remove multiple overwrites
|
||||
|
@ -457,7 +480,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
|||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/config/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/config0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "start_revision", etcd_watch_revision_config },
|
||||
{ "watch_id", ETCD_CONFIG_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
|
@ -466,29 +489,21 @@ void etcd_state_client_t::start_etcd_watcher()
|
|||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/osd/state/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/osd/state0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "start_revision", etcd_watch_revision_osd },
|
||||
{ "watch_id", ETCD_OSD_STATE_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/state/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/pg/state0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/pg0") },
|
||||
{ "start_revision", etcd_watch_revision_pg },
|
||||
{ "watch_id", ETCD_PG_STATE_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/history/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/pg/history0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "watch_id", ETCD_PG_HISTORY_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
// FIXME: Do not watch /pg/history/ at all in client code (not in OSD)
|
||||
if (on_start_watcher_hook)
|
||||
{
|
||||
on_start_watcher_hook(etcd_watch_ws);
|
||||
|
@ -591,6 +606,11 @@ void etcd_state_client_t::load_pgs()
|
|||
{ "key", base64_encode(etcd_prefix+"/config/pgs") },
|
||||
} }
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/config") },
|
||||
} }
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/config/inode/") },
|
||||
|
@ -640,13 +660,10 @@ void etcd_state_client_t::load_pgs()
|
|||
return;
|
||||
}
|
||||
reset_pg_exists();
|
||||
if (!etcd_watch_revision)
|
||||
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = data["header"]["revision"].uint64_value()+1;
|
||||
if (this->log_level > 3)
|
||||
{
|
||||
etcd_watch_revision = data["header"]["revision"].uint64_value()+1;
|
||||
if (this->log_level > 3)
|
||||
{
|
||||
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision-1);
|
||||
}
|
||||
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision_pg-1);
|
||||
}
|
||||
for (auto & res: data["responses"].array_items())
|
||||
{
|
||||
|
@ -895,8 +912,17 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
|||
on_change_pool_config_hook();
|
||||
}
|
||||
}
|
||||
else if (key == etcd_prefix+"/config/pgs")
|
||||
else if (key == etcd_prefix+"/pg/config" || key == etcd_prefix+"/config/pgs")
|
||||
{
|
||||
if (key == etcd_prefix+"/pg/config")
|
||||
{
|
||||
new_pg_config = !value.is_null();
|
||||
}
|
||||
else if (new_pg_config)
|
||||
{
|
||||
// Ignore old key if the new one is present
|
||||
return;
|
||||
}
|
||||
for (auto & pool_item: this->pool_config)
|
||||
{
|
||||
for (auto & pg_item: pool_item.second.pg_config)
|
||||
|
|
|
@ -10,10 +10,9 @@
|
|||
#include "timerfd_manager.h"
|
||||
|
||||
#define ETCD_CONFIG_WATCH_ID 1
|
||||
#define ETCD_PG_STATE_WATCH_ID 2
|
||||
#define ETCD_PG_HISTORY_WATCH_ID 3
|
||||
#define ETCD_OSD_STATE_WATCH_ID 4
|
||||
#define ETCD_TOTAL_WATCHES 4
|
||||
#define ETCD_OSD_STATE_WATCH_ID 2
|
||||
#define ETCD_PG_STATE_WATCH_ID 3
|
||||
#define ETCD_TOTAL_WATCHES 3
|
||||
|
||||
#define DEFAULT_BLOCK_SIZE 128*1024
|
||||
#define MIN_DATA_BLOCK_SIZE 4*1024
|
||||
|
@ -95,7 +94,7 @@ protected:
|
|||
std::string selected_etcd_address;
|
||||
std::vector<std::string> addresses_to_try;
|
||||
std::vector<inode_watch_t*> watches;
|
||||
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
|
||||
bool new_pg_config = false;
|
||||
int ws_keepalive_timer = -1;
|
||||
int ws_alive = 0;
|
||||
bool rand_initialized = false;
|
||||
|
@ -115,8 +114,11 @@ public:
|
|||
int log_level = 0;
|
||||
timerfd_manager_t *tfd = NULL;
|
||||
|
||||
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
|
||||
int etcd_watches_initialised = 0;
|
||||
uint64_t etcd_watch_revision = 0;
|
||||
uint64_t etcd_watch_revision_config = 0;
|
||||
uint64_t etcd_watch_revision_osd = 0;
|
||||
uint64_t etcd_watch_revision_pg = 0;
|
||||
std::map<pool_id_t, pool_config_t> pool_config;
|
||||
std::map<osd_num_t, json11::Json> peer_states;
|
||||
std::set<osd_num_t> seen_peers;
|
||||
|
|
|
@ -253,7 +253,7 @@ nla_put_failure:
|
|||
const char *exe_name = NULL;
|
||||
|
||||
const char *help_text =
|
||||
"Vitastor NBD proxy " VERSION "\n"
|
||||
"Vitastor NBD proxy " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
static const char *exe_name = NULL;
|
||||
|
||||
static const char* help_text =
|
||||
"Vitastor command-line tool " VERSION "\n"
|
||||
"Vitastor command-line tool " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
|
|
|
@ -49,8 +49,8 @@ struct pg_lister_t
|
|||
{ "success", json11::Json::array {
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/stats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
|
||||
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/stats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
|
||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
|
||||
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
|
||||
} },
|
||||
},
|
||||
} },
|
||||
|
@ -65,7 +65,7 @@ resume_1:
|
|||
state = 100;
|
||||
return;
|
||||
}
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
||||
{
|
||||
pg_stats[(pool_pg_num_t){ .pool_id = pool_id, .pg_num = (pg_num_t)pg_num }] = value;
|
||||
});
|
||||
|
|
|
@ -214,10 +214,10 @@ resume_1:
|
|||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+"/pg/stats/"
|
||||
parent->cli->st_cli.etcd_prefix+"/pgstats/"
|
||||
) },
|
||||
{ "range_end", base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+"/pg/stats0"
|
||||
parent->cli->st_cli.etcd_prefix+"/pgstats0"
|
||||
) },
|
||||
} },
|
||||
},
|
||||
|
@ -235,7 +235,7 @@ resume_1:
|
|||
}
|
||||
// Calculate recovery percent
|
||||
std::map<pool_id_t, object_counts_t> counts;
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/",
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/",
|
||||
[&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
||||
{
|
||||
auto & cnt = counts[pool_id];
|
||||
|
|
|
@ -176,7 +176,7 @@ struct rm_osd_t
|
|||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+"/config/pgs"
|
||||
parent->cli->st_cli.etcd_prefix+"/pg/config"
|
||||
) },
|
||||
} },
|
||||
},
|
||||
|
@ -229,7 +229,7 @@ struct rm_osd_t
|
|||
}
|
||||
if (!new_pgs.is_null())
|
||||
{
|
||||
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/config/pgs");
|
||||
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/config");
|
||||
rm_items.push_back(json11::Json::object {
|
||||
{ "request_put", json11::Json::object {
|
||||
{ "key", pgs_key },
|
||||
|
@ -427,7 +427,7 @@ struct rm_osd_t
|
|||
{ "target", "MOD" },
|
||||
{ "key", history_key },
|
||||
{ "result", "LESS" },
|
||||
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision+1 },
|
||||
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision_pg+1 },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#include "str_util.h"
|
||||
|
||||
static const char *help_text =
|
||||
"Vitastor disk management tool " VERSION "\n"
|
||||
"Vitastor disk management tool " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2022+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
|
|
|
@ -10,7 +10,7 @@ set_target_properties(vitastor_kv PROPERTIES PUBLIC_HEADER "kv/vitastor_kv.h")
|
|||
target_link_libraries(vitastor_kv
|
||||
vitastor_client
|
||||
)
|
||||
set_target_properties(vitastor_kv PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
||||
set_target_properties(vitastor_kv PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||
|
||||
# vitastor-kv
|
||||
add_executable(vitastor-kv
|
||||
|
|
|
@ -53,7 +53,7 @@ nfs_proxy_t::~nfs_proxy_t()
|
|||
}
|
||||
|
||||
static const char* help_text =
|
||||
"Vitastor NFS 3.0 proxy " VERSION "\n"
|
||||
"Vitastor NFS 3.0 proxy " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2021+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"vitastor-nfs (--fs <NAME> | --block) [-o <OPT>] mount <MOUNTPOINT>\n"
|
||||
|
@ -372,24 +372,6 @@ void nfs_proxy_t::watch_stats()
|
|||
assert(cli->st_cli.on_start_watcher_hook == NULL);
|
||||
cli->st_cli.on_start_watcher_hook = [this](http_co_t *etcd_watch_ws)
|
||||
{
|
||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats/") },
|
||||
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats0") },
|
||||
{ "start_revision", cli->st_cli.etcd_watch_revision },
|
||||
{ "watch_id", ETCD_INODE_STATS_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats/") },
|
||||
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats0") },
|
||||
{ "start_revision", cli->st_cli.etcd_watch_revision },
|
||||
{ "watch_id", ETCD_POOL_STATS_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
cli->st_cli.etcd_txn_slow(json11::Json::object {
|
||||
{ "success", json11::Json::array {
|
||||
json11::Json::object {
|
||||
|
@ -415,6 +397,28 @@ void nfs_proxy_t::watch_stats()
|
|||
parse_stats(kv);
|
||||
}
|
||||
}
|
||||
if (cli->st_cli.etcd_watch_ws)
|
||||
{
|
||||
auto watch_rev = res["header"]["revision"].uint64_value()+1;
|
||||
http_post_message(cli->st_cli.etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats/") },
|
||||
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats0") },
|
||||
{ "start_revision", watch_rev },
|
||||
{ "watch_id", ETCD_INODE_STATS_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
http_post_message(cli->st_cli.etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats/") },
|
||||
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats0") },
|
||||
{ "start_revision", watch_rev },
|
||||
{ "watch_id", ETCD_POOL_STATS_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
}
|
||||
});
|
||||
};
|
||||
cli->st_cli.on_change_hook = [this, old_hook = cli->st_cli.on_change_hook](std::map<std::string, etcd_kv_t> & changes)
|
||||
|
|
|
@ -169,6 +169,7 @@ json11::Json osd_t::get_osd_state()
|
|||
else
|
||||
st["addresses"] = getifaddr_list();
|
||||
st["host"] = std::string(hostname.data(), hostname.size());
|
||||
st["version"] = VITASTOR_VERSION;
|
||||
st["port"] = listening_port;
|
||||
st["primary_enabled"] = run_primary;
|
||||
st["blockstore_enabled"] = bs ? true : false;
|
||||
|
@ -199,6 +200,7 @@ json11::Json osd_t::get_statistics()
|
|||
st["bitmap_granularity"] = (uint64_t)bs_bitmap_granularity;
|
||||
st["immediate_commit"] = immediate_commit == IMMEDIATE_ALL ? "all" : (immediate_commit == IMMEDIATE_SMALL ? "small" : "none");
|
||||
st["host"] = self_state["host"];
|
||||
st["version"] = VITASTOR_VERSION;
|
||||
json11::Json::object op_stats, subop_stats;
|
||||
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
|
||||
{
|
||||
|
@ -371,7 +373,7 @@ void osd_t::report_statistics()
|
|||
pg_stats["write_osd_set"] = pg.cur_set;
|
||||
txn.push_back(json11::Json::object {
|
||||
{ "request_put", json11::Json::object {
|
||||
{ "key", base64_encode(st_cli.etcd_prefix+"/pg/stats/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num)) },
|
||||
{ "key", base64_encode(st_cli.etcd_prefix+"/pgstats/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num)) },
|
||||
{ "value", base64_encode(json11::Json(pg_stats).dump()) },
|
||||
} }
|
||||
});
|
||||
|
@ -418,7 +420,7 @@ void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes
|
|||
}
|
||||
if (run_primary)
|
||||
{
|
||||
bool pgs = changes.find(st_cli.etcd_prefix+"/config/pgs") != changes.end();
|
||||
bool pgs = changes.find(st_cli.etcd_prefix+"/pg/config") != changes.end();
|
||||
if (pools || pgs)
|
||||
{
|
||||
apply_pg_count();
|
||||
|
@ -903,7 +905,7 @@ void osd_t::report_pg_states()
|
|||
{ "target", "MOD" },
|
||||
{ "key", state_key_base64 },
|
||||
{ "result", "LESS" },
|
||||
{ "mod_revision", st_cli.etcd_watch_revision+1 },
|
||||
{ "mod_revision", st_cli.etcd_watch_revision_pg+1 },
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
@ -974,7 +976,7 @@ void osd_t::report_pg_states()
|
|||
{ "target", "MOD" },
|
||||
{ "key", history_key },
|
||||
{ "result", "LESS" },
|
||||
{ "mod_revision", st_cli.etcd_watch_revision+1 },
|
||||
{ "mod_revision", st_cli.etcd_watch_revision_pg+1 },
|
||||
});
|
||||
success.push_back(json11::Json::object {
|
||||
{ "request_put", json11::Json::object {
|
||||
|
|
|
@ -20,7 +20,7 @@ static void handle_sigint(int sig)
|
|||
}
|
||||
|
||||
static const char* help_text =
|
||||
"Vitastor OSD (block object storage daemon) " VERSION "\n"
|
||||
"Vitastor OSD (block object storage daemon) " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"OSDs are usually started by vitastor-disk.\n"
|
||||
|
|
|
@ -22,7 +22,7 @@ void configure_single_pg_pool(cluster_client_t *cli)
|
|||
},
|
||||
});
|
||||
cli->st_cli.parse_state((etcd_kv_t){
|
||||
.key = "/config/pgs",
|
||||
.key = "/pg/config",
|
||||
.value = json11::Json::object {
|
||||
{ "items", json11::Json::object {
|
||||
{ "1", json11::Json::object {
|
||||
|
|
|
@ -25,11 +25,22 @@ ETCD_IP=${ETCD_IP:-127.0.0.1}
|
|||
ETCD_PORT=${ETCD_PORT:-12379}
|
||||
ETCD_COUNT=${ETCD_COUNT:-1}
|
||||
ANTIETCD=${ANTIETCD}
|
||||
USE_RAMDISK=${USE_RAMDISK}
|
||||
|
||||
if [ "$KEEP_DATA" = "" ]; then
|
||||
RAMDISK=/run/user/$(id -u)
|
||||
findmnt $RAMDISK >/dev/null || (sudo mkdir -p $RAMDISK && sudo mount -t tmpfs tmpfs $RAMDISK)
|
||||
|
||||
if [[ -z "$KEEP_DATA" ]]; then
|
||||
rm -rf ./testdata
|
||||
rm -rf /run/user/$(id -u)/testdata_etcd*
|
||||
rm -rf /run/user/$(id -u)/testdata_etcd* /run/user/$(id -u)/testdata_bin
|
||||
mkdir -p ./testdata
|
||||
if [[ -n "$USE_RAMDISK" ]]; then
|
||||
OSD_ARGS="$OSD_ARGS --data_io cached"
|
||||
mkdir -p /run/user/$(id -u)/testdata_bin
|
||||
ln -s /run/user/$(id -u)/testdata_bin ./testdata/bin
|
||||
else
|
||||
mkdir -p ./testdata/bin
|
||||
fi
|
||||
fi
|
||||
|
||||
ETCD_URL="http://$ETCD_IP:$ETCD_PORT"
|
||||
|
@ -41,9 +52,7 @@ start_etcd()
|
|||
{
|
||||
local i=$1
|
||||
if [[ -z "$ANTIETCD" ]]; then
|
||||
local t=/run/user/$(id -u)
|
||||
findmnt $t >/dev/null || (sudo mkdir -p $t && sudo mount -t tmpfs tmpfs $t)
|
||||
ionice -c2 -n0 $ETCD -name etcd$i --data-dir /run/user/$(id -u)/testdata_etcd$i \
|
||||
ionice -c2 -n0 $ETCD -name etcd$i --data-dir $RAMDISK/testdata_etcd$i \
|
||||
--advertise-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) --listen-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) \
|
||||
--initial-advertise-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) --listen-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) \
|
||||
--initial-cluster-token vitastor-tests-etcd --initial-cluster-state new \
|
||||
|
|
|
@ -50,8 +50,8 @@ if ! type -t osd_dev; then
|
|||
osd_dev()
|
||||
{
|
||||
local i=$1
|
||||
[[ -f ./testdata/test_osd$i.bin ]] || dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
echo ./testdata/test_osd$i.bin
|
||||
[[ -f ./testdata/bin/test_osd$i.bin ]] || dd if=/dev/zero of=./testdata/bin/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
echo ./testdata/bin/test_osd$i.bin
|
||||
}
|
||||
fi
|
||||
|
||||
|
@ -89,7 +89,7 @@ wait_up()
|
|||
local i=0
|
||||
local configured=0
|
||||
while [[ $i -lt $sec ]]; do
|
||||
if $ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
|
||||
if $ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
|
||||
select(((.osd_set | select(. != 0) | sort | unique) | length) == '$PG_SIZE') ] | length) == '$PG_COUNT; then
|
||||
configured=1
|
||||
if $ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT; then
|
||||
|
|
|
@ -13,14 +13,14 @@ start_osd 4
|
|||
sleep 2
|
||||
|
||||
for i in {1..30}; do
|
||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])') && \
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$PG_COUNT) && \
|
||||
break
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])'); then
|
||||
format_error "FAILED: OSD NOT ADDED INTO DISTRIBUTION"
|
||||
fi
|
||||
|
@ -35,14 +35,14 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm-osd --force 4
|
|||
sleep 2
|
||||
|
||||
for i in {1..30}; do
|
||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "left_on_dead"]) ] | length) == '$PG_COUNT'') && \
|
||||
break
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
|
||||
format_error "FAILED: OSD NOT REMOVED FROM DISTRIBUTION"
|
||||
fi
|
||||
|
|
|
@ -23,7 +23,7 @@ try_change()
|
|||
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
|
||||
|
||||
for i in {1..60}; do
|
||||
($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
|
||||
($ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "has_misplaced"]) ] | length) == '$n'') && \
|
||||
break
|
||||
sleep 1
|
||||
|
@ -36,14 +36,14 @@ try_change()
|
|||
sleep 1
|
||||
done
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: $n PGS NOT CONFIGURED"
|
||||
fi
|
||||
|
||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: $n PGS NOT UP"
|
||||
fi
|
||||
|
@ -53,7 +53,7 @@ try_change()
|
|||
nobj=0
|
||||
waittime=0
|
||||
while [[ $nobj -ne $NOBJ && $waittime -lt 7 ]]; do
|
||||
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||
nobj=`$ETCDCTL get --prefix '/vitastor/pgstats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||
if [[ $nobj -ne $NOBJ ]]; then
|
||||
waittime=$((waittime+1))
|
||||
sleep 1
|
||||
|
|
|
@ -13,7 +13,7 @@ try_change()
|
|||
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$s',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
|
||||
|
||||
for i in {1..10}; do
|
||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
|
||||
|
@ -21,16 +21,16 @@ try_change()
|
|||
sleep 1
|
||||
done
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
|
||||
fi
|
||||
|
||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
|
||||
fi
|
||||
|
|
|
@ -13,13 +13,13 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
|
|||
# Write
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
||||
-mirror_file=./testdata/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg -runtime=10
|
||||
-mirror_file=./testdata/bin/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg -runtime=10
|
||||
|
||||
# Intentionally corrupt OSD data and restart it
|
||||
kill $OSD1_PID
|
||||
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/test_osd1.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
||||
truncate -s $data_offset ./testdata/test_osd1.bin
|
||||
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/bin/test_osd1.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
||||
truncate -s $data_offset ./testdata/bin/test_osd1.bin
|
||||
dd if=/dev/zero of=./testdata/bin/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
start_osd 1
|
||||
|
||||
# FIXME: corrupt the journal WHEN OSD IS RUNNING and check reads too
|
||||
|
@ -30,8 +30,8 @@ wait_up 10
|
|||
# Read everything back
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/read.bin
|
||||
-O raw ./testdata/bin/read.bin
|
||||
|
||||
diff ./testdata/read.bin ./testdata/mirror.bin
|
||||
diff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
|
||||
|
||||
format_green OK
|
||||
|
|
|
@ -28,7 +28,7 @@ $ETCDCTL get --print-value-only /vitastor/config/pools | jq -s -e '. == [{"1":{"
|
|||
|
||||
sleep 2
|
||||
|
||||
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
|
||||
$ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
|
||||
jq -s -e '([ .[0].items["1"] | .[].osd_set | map_values(. | tonumber) | select((.[0] <= 4) != (.[1] <= 4)) ] | length) == 4'
|
||||
|
||||
format_green OK
|
||||
|
|
|
@ -20,7 +20,7 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
|
|||
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
|
||||
-mirror_file=./testdata/bin/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
|
||||
|
||||
kill_osds()
|
||||
{
|
||||
|
@ -53,13 +53,13 @@ kill_osds &
|
|||
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bsrange=4k-128k -blockalign=4k -direct=1 -iodepth=32 -fsync=256 -rw=randrw \
|
||||
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
|
||||
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/bin/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/read.bin
|
||||
-O raw ./testdata/bin/read.bin
|
||||
|
||||
if ! diff -q ./testdata/read.bin ./testdata/mirror.bin; then
|
||||
if ! diff -q ./testdata/bin/read.bin ./testdata/bin/mirror.bin; then
|
||||
format_error Data lost during self-heal
|
||||
fi
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ wait_finish_rebalance 300
|
|||
#fi
|
||||
|
||||
# Check that no objects are lost !
|
||||
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||
nobj=`$ETCDCTL get --prefix '/vitastor/pgstats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||
if [ "$nobj" -ne $((IMG_SIZE*8/PG_DATA_SIZE)) ]; then
|
||||
format_error "Data lost after multiple interrupted rebalancings"
|
||||
fi
|
||||
|
|
|
@ -8,14 +8,14 @@ OSD_SIZE=1024
|
|||
OSD_COUNT=5
|
||||
OSD_ARGS="$OSD_ARGS"
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
build/src/osd/vitastor-osd --log_level 10 --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
||||
dd if=/dev/zero of=./testdata/bin/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
build/src/osd/vitastor-osd --log_level 10 --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
||||
eval OSD${i}_PID=$!
|
||||
done
|
||||
|
||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":1,"failure_domain":"osd","immediate_commit":"none"}}'
|
||||
|
||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[1,0],"primary":1}}}}'
|
||||
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[1,0],"primary":1}}}}'
|
||||
|
||||
for i in {1..30}; do
|
||||
sleep 1
|
||||
|
@ -30,7 +30,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
|||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
|
||||
|
||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[1,0],"primary":0}}}}'
|
||||
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[1,0],"primary":0}}}}'
|
||||
|
||||
for i in {1..30}; do
|
||||
sleep 1
|
||||
|
@ -43,7 +43,7 @@ done
|
|||
|
||||
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
|
||||
|
||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
||||
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
||||
|
||||
sleep 5
|
||||
for i in {1..30}; do
|
||||
|
@ -60,7 +60,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
|||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=1 -number_ios=2 -rw=write \
|
||||
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
|
||||
|
||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":0}}}}'
|
||||
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":0}}}}'
|
||||
|
||||
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
|
||||
|
||||
|
@ -76,7 +76,7 @@ done
|
|||
cp testdata/osd4.log testdata/osd4_pre.log
|
||||
>testdata/osd4.log
|
||||
|
||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
||||
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
||||
|
||||
for i in {1..30}; do
|
||||
sleep 1
|
||||
|
|
|
@ -27,9 +27,9 @@ $ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicate
|
|||
|
||||
sleep 2
|
||||
|
||||
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only
|
||||
$ETCDCTL get --prefix /vitastor/pg/config --print-value-only
|
||||
|
||||
if ! ($ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
|
||||
if ! ($ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
|
||||
jq -s -e '[ [ .[] | select(has("items")) | .items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
|
||||
format_error "Some PGs missing replicas"
|
||||
fi
|
||||
|
|
|
@ -16,7 +16,7 @@ try_change()
|
|||
s=$2
|
||||
|
||||
for i in {1..10}; do
|
||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])') && \
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
|
||||
|
@ -24,16 +24,16 @@ try_change()
|
|||
sleep 1
|
||||
done
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])'); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
|
||||
fi
|
||||
|
||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
|
||||
fi
|
||||
|
|
|
@ -16,14 +16,14 @@ trap "sudo build/src/client/vitastor-nbd unmap $NBD_DEV"'; kill -9 $(jobs -p)' E
|
|||
|
||||
sudo chown $(id -u) $NBD_DEV
|
||||
|
||||
dd if=/dev/urandom of=./testdata/img1.bin bs=1M count=$IMG_SIZE
|
||||
dd if=/dev/urandom of=./testdata/bin/img1.bin bs=1M count=$IMG_SIZE
|
||||
|
||||
dd if=./testdata/img1.bin of=$NBD_DEV bs=1M count=$IMG_SIZE oflag=direct
|
||||
dd if=./testdata/bin/img1.bin of=$NBD_DEV bs=1M count=$IMG_SIZE oflag=direct
|
||||
|
||||
verify() {
|
||||
echo "Verifying before rebalance"
|
||||
dd if=$NBD_DEV of=./testdata/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
||||
diff ./testdata/img1.bin ./testdata/img2.bin
|
||||
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
||||
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
|
||||
|
||||
$ETCDCTL put /vitastor/config/osd/1 '{"reweight":'$1'}'
|
||||
$ETCDCTL put /vitastor/config/osd/2 '{"reweight":'$1'}'
|
||||
|
@ -31,18 +31,18 @@ verify() {
|
|||
|
||||
for i in {1..10000}; do
|
||||
O=$(((RANDOM*RANDOM) % (IMG_SIZE*128)))
|
||||
dd if=$NBD_DEV of=./testdata/img2.bin bs=4k seek=$O skip=$O count=1 iflag=direct conv=notrunc
|
||||
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=4k seek=$O skip=$O count=1 iflag=direct conv=notrunc
|
||||
done
|
||||
|
||||
echo "Verifying during rebalance"
|
||||
diff ./testdata/img1.bin ./testdata/img2.bin
|
||||
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
|
||||
|
||||
# Wait for the rebalance to finish
|
||||
wait_finish_rebalance 300
|
||||
|
||||
echo "Verifying after rebalance"
|
||||
dd if=$NBD_DEV of=./testdata/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
||||
diff ./testdata/img1.bin ./testdata/img2.bin
|
||||
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
||||
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
|
||||
}
|
||||
|
||||
# Verify with regular reads
|
||||
|
|
|
@ -14,7 +14,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
|||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
|
||||
-O raw ./testdata/before.bin
|
||||
-O raw ./testdata/bin/before.bin
|
||||
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
pid=OSD${i}_PID
|
||||
|
@ -23,19 +23,19 @@ for i in $(seq 1 $OSD_COUNT); do
|
|||
done
|
||||
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
offsets=$(build/src/disk_tool/vitastor-disk simple-offsets --format json ./testdata/test_osd$i.bin)
|
||||
offsets=$(build/src/disk_tool/vitastor-disk simple-offsets --format json ./testdata/bin/test_osd$i.bin)
|
||||
meta_offset=$(echo $offsets | jq -r .meta_offset)
|
||||
data_offset=$(echo $offsets | jq -r .data_offset)
|
||||
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/bin/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-meta ./testdata/bin/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json
|
||||
build/src/disk_tool/vitastor-disk resize \
|
||||
$(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) \
|
||||
$(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) \
|
||||
--new_meta_offset 0 \
|
||||
--new_meta_len $((1024*1024)) \
|
||||
--new_journal_offset $((1024*1024)) \
|
||||
--new_data_offset $((128*1024*1024))
|
||||
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/bin/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-meta ./testdata/bin/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json
|
||||
if ! (cat ./testdata/meta_before_resize.json ./testdata/meta_after_resize.json | \
|
||||
jq -e -s 'map([ .entries[] | del(.block) ] | sort_by(.pool, .inode, .stripe)) | .[0] == .[1] and (.[0] | length) > 1000'); then
|
||||
format_error "OSD $i metadata corrupted after resizing"
|
||||
|
@ -50,7 +50,7 @@ $ETCDCTL del --prefix /vitastor/osd/state/
|
|||
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
|
||||
--data_device ./testdata/test_osd$i.bin \
|
||||
--data_device ./testdata/bin/test_osd$i.bin \
|
||||
--meta_offset 0 \
|
||||
--journal_offset $((1024*1024)) \
|
||||
--data_offset $((128*1024*1024)) >>./testdata/osd$i.log 2>&1 &
|
||||
|
@ -59,9 +59,9 @@ done
|
|||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
|
||||
-O raw ./testdata/after.bin
|
||||
-O raw ./testdata/bin/after.bin
|
||||
|
||||
if ! cmp ./testdata/before.bin ./testdata/after.bin; then
|
||||
if ! cmp ./testdata/bin/before.bin ./testdata/bin/after.bin; then
|
||||
format_error "Data differs after resizing"
|
||||
fi
|
||||
|
||||
|
|
|
@ -21,9 +21,7 @@ $ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicate
|
|||
|
||||
sleep 2
|
||||
|
||||
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only
|
||||
|
||||
if ! ($ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
|
||||
if ! ($ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
|
||||
jq -s -e '[ [ .[0].items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
|
||||
format_error "Some PGs missing replicas"
|
||||
fi
|
||||
|
|
|
@ -18,19 +18,19 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
|
|||
# Write
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
||||
-mirror_file=./testdata/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg
|
||||
-mirror_file=./testdata/bin/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg
|
||||
|
||||
# Save PG primary
|
||||
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
|
||||
primary=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
|
||||
|
||||
# Intentionally corrupt OSD data and restart it
|
||||
zero_osd_pid=OSD${ZERO_OSD}_PID
|
||||
kill ${!zero_osd_pid}
|
||||
sleep 1
|
||||
kill -9 ${!zero_osd_pid} || true
|
||||
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/test_osd$ZERO_OSD.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
||||
truncate -s $data_offset ./testdata/test_osd$ZERO_OSD.bin
|
||||
dd if=/dev/zero of=./testdata/test_osd$ZERO_OSD.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/bin/test_osd$ZERO_OSD.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
||||
truncate -s $data_offset ./testdata/bin/test_osd$ZERO_OSD.bin
|
||||
dd if=/dev/zero of=./testdata/bin/test_osd$ZERO_OSD.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
$ETCDCTL del /vitastor/osd/state/$ZERO_OSD
|
||||
start_osd $ZERO_OSD
|
||||
|
||||
|
@ -38,7 +38,7 @@ start_osd $ZERO_OSD
|
|||
wait_up 10
|
||||
|
||||
# Wait until PG is back on the same primary
|
||||
wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/config/pgs | jq -s -e \'.[0].items["1"]["1"].primary == "'$primary'"'"'"
|
||||
wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/pg/config | jq -s -e \'.[0].items["1"]["1"].primary == "'$primary'"'"'"
|
||||
|
||||
# Trigger scrub
|
||||
$ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'`
|
||||
|
@ -64,8 +64,8 @@ fi
|
|||
# Read everything back
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/read.bin
|
||||
-O raw ./testdata/bin/read.bin
|
||||
|
||||
diff ./testdata/read.bin ./testdata/mirror.bin
|
||||
diff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
|
||||
|
||||
format_green OK
|
||||
|
|
|
@ -34,21 +34,21 @@ qemu-img convert -p \
|
|||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024))" \
|
||||
-O raw ./testdata/merged.bin
|
||||
-O raw ./testdata/bin/merged.bin
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg@0" \
|
||||
-O raw ./testdata/layer0.bin
|
||||
-O raw ./testdata/bin/layer0.bin
|
||||
|
||||
$ETCDCTL put /vitastor/config/inode/1/3 '{"name":"testimg","size":'$((32*1024*1024))'}'
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
-O raw ./testdata/bin/layer1.bin
|
||||
|
||||
node tests/merge.js ./testdata/layer0.bin ./testdata/layer1.bin ./testdata/check.bin
|
||||
node tests/merge.js ./testdata/bin/layer0.bin ./testdata/bin/layer1.bin ./testdata/bin/check.bin
|
||||
|
||||
cmp ./testdata/merged.bin ./testdata/check.bin
|
||||
cmp ./testdata/bin/merged.bin ./testdata/bin/check.bin
|
||||
|
||||
# Test merge
|
||||
|
||||
|
@ -58,22 +58,22 @@ build/src/cmd/vitastor-cli rm --etcd_address $ETCD_URL testimg@0
|
|||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/merged-by-tool.bin
|
||||
-O raw ./testdata/bin/merged-by-tool.bin
|
||||
|
||||
cmp ./testdata/merged.bin ./testdata/merged-by-tool.bin
|
||||
cmp ./testdata/bin/merged.bin ./testdata/bin/merged-by-tool.bin
|
||||
|
||||
# Test merge by qemu-img
|
||||
|
||||
qemu-img rebase -u -b layer0.qcow2 -F qcow2 ./testdata/layer1.qcow2
|
||||
|
||||
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
|
||||
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/bin/rebased.bin
|
||||
|
||||
cmp ./testdata/merged.bin ./testdata/rebased.bin
|
||||
cmp ./testdata/bin/merged.bin ./testdata/bin/rebased.bin
|
||||
|
||||
qemu-img rebase -u -b '' ./testdata/layer1.qcow2
|
||||
|
||||
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
|
||||
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/bin/rebased.bin
|
||||
|
||||
cmp ./testdata/layer1.bin ./testdata/rebased.bin
|
||||
cmp ./testdata/bin/layer1.bin ./testdata/bin/rebased.bin
|
||||
|
||||
format_green OK
|
||||
|
|
|
@ -9,7 +9,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create -s 32M testchain
|
|||
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
|
||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/bin/mirror.bin
|
||||
|
||||
for i in {1..10}; do
|
||||
# Create a snapshot
|
||||
|
@ -17,18 +17,18 @@ for i in {1..10}; do
|
|||
# Check that the new snapshot is see-through
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/check.bin
|
||||
cmp ./testdata/check.bin ./testdata/mirror.bin
|
||||
-O raw ./testdata/bin/check.bin
|
||||
cmp ./testdata/bin/check.bin ./testdata/bin/mirror.bin
|
||||
# Write something to it
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -rw=randwrite \
|
||||
-randrepeat=$((i <= 2)) -buffer_pattern=0x$((10+i))$((10+i))$((10+i))$((10+i)) \
|
||||
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/mirror.bin
|
||||
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/bin/mirror.bin
|
||||
# Check the new content
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
-O raw ./testdata/bin/layer1.bin
|
||||
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||
done
|
||||
|
||||
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
|
||||
|
@ -36,13 +36,13 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
|
|||
# Check the final image
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
-O raw ./testdata/bin/layer1.bin
|
||||
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||
|
||||
# Check the last remaining snapshot
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain@10" \
|
||||
-O raw ./testdata/layer0.bin
|
||||
cmp ./testdata/layer0.bin ./testdata/check.bin
|
||||
-O raw ./testdata/bin/layer0.bin
|
||||
cmp ./testdata/bin/layer0.bin ./testdata/bin/check.bin
|
||||
|
||||
format_green OK
|
||||
|
|
|
@ -9,7 +9,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create -s 128M testchain
|
|||
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
|
||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/bin/mirror.bin
|
||||
|
||||
# Create a snapshot
|
||||
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
|
||||
|
@ -17,13 +17,13 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
|
|||
# Write something to it
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 -rw=randwrite \
|
||||
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/mirror.bin
|
||||
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/bin/mirror.bin
|
||||
|
||||
# Check the new content
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
-O raw ./testdata/bin/layer1.bin
|
||||
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||
|
||||
# Merge
|
||||
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
|
||||
|
@ -31,7 +31,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
|
|||
# Check the final image
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
-O raw ./testdata/bin/layer1.bin
|
||||
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||
|
||||
format_green OK
|
||||
|
|
|
@ -23,7 +23,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
|||
|
||||
kill $OSD2_PID
|
||||
build/src/osd/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
|
||||
$(build/src/disk_tool/vitastor-disk simple-offsets --format options --device ./testdata/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
|
||||
$(build/src/disk_tool/vitastor-disk simple-offsets --format options --device ./testdata/bin/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
|
||||
sleep 2
|
||||
|
||||
# Check PG state - it should NOT become active
|
||||
|
|
|
@ -2,14 +2,14 @@
|
|||
|
||||
. `dirname $0`/run_3osds.sh
|
||||
|
||||
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
|
||||
primary=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
|
||||
primary_pid=OSD${primary}_PID
|
||||
kill -9 ${!primary_pid}
|
||||
|
||||
sleep 15
|
||||
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/config/pgs | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
|
||||
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/pg/config | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
|
||||
|
||||
newprim=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
|
||||
newprim=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
|
||||
|
||||
if [ "$newprim" = "$primary" ]; then
|
||||
format_error Primary not switched
|
||||
|
|
|
@ -15,7 +15,7 @@ OSD_COUNT=3
|
|||
OSD_ARGS="$OSD_ARGS"
|
||||
OFFSET_ARGS="$OFFSET_ARGS"
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
||||
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
||||
eval OSD${i}_PID=$!
|
||||
done
|
||||
|
||||
|
|
|
@ -43,10 +43,10 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
|||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))" \
|
||||
-O raw ./testdata/read.bin
|
||||
-O raw ./testdata/bin/read.bin
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw ./testdata/read.bin \
|
||||
-f raw ./testdata/bin/read.bin \
|
||||
-O raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))"
|
||||
|
||||
format_green OK
|
||||
|
|
Loading…
Reference in New Issue