Compare commits
10 Commits
master
...
check-writ
Author | SHA1 | Date |
---|---|---|
Vitaliy Filippov | a1f8ac4ecf | |
Vitaliy Filippov | 3acf3a867d | |
Vitaliy Filippov | 8635e0af24 | |
Vitaliy Filippov | da73d5f45a | |
Vitaliy Filippov | 88bbe16ac3 | |
Vitaliy Filippov | 85ba14319a | |
Vitaliy Filippov | 5a56912d5e | |
Vitaliy Filippov | edc2f4eb97 | |
Vitaliy Filippov | ba806ff1ba | |
Vitaliy Filippov | 3d925c37cd |
|
@ -16,6 +16,7 @@ env:
|
||||||
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
|
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
|
||||||
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
|
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
|
||||||
OSD_ARGS: '--etcd_quick_timeout 2000'
|
OSD_ARGS: '--etcd_quick_timeout 2000'
|
||||||
|
USE_RAMDISK: 1
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ci-${{ github.ref }}
|
group: ci-${{ github.ref }}
|
||||||
|
|
|
@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
||||||
|
|
||||||
project(vitastor)
|
project(vitastor)
|
||||||
|
|
||||||
set(VERSION "1.7.1")
|
set(VITASTOR_VERSION "1.7.1")
|
||||||
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
VERSION ?= v1.7.1
|
VITASTOR_VERSION ?= v1.7.1
|
||||||
|
|
||||||
all: build push
|
all: build push
|
||||||
|
|
||||||
build:
|
build:
|
||||||
@docker build --rm -t vitalif/vitastor-csi:$(VERSION) .
|
@docker build --rm -t vitalif/vitastor-csi:$(VITASTOR_VERSION) .
|
||||||
|
|
||||||
push:
|
push:
|
||||||
@docker push vitalif/vitastor-csi:$(VERSION)
|
@docker push vitalif/vitastor-csi:$(VITASTOR_VERSION)
|
||||||
|
|
|
@ -42,7 +42,7 @@ PG state always includes exactly 1 of the following base states:
|
||||||
- **offline** — PG isn't activated by any OSD at all. Either primary OSD isn't set for
|
- **offline** — PG isn't activated by any OSD at all. Either primary OSD isn't set for
|
||||||
this PG at all (if the pool is just created), or an unavailable OSD is set as primary,
|
this PG at all (if the pool is just created), or an unavailable OSD is set as primary,
|
||||||
or the primary OSD refuses to start this PG (for example, because of wrong block_size),
|
or the primary OSD refuses to start this PG (for example, because of wrong block_size),
|
||||||
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/config/pgs` in etcd.
|
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/pg/config` in etcd.
|
||||||
- **starting** — primary OSD has acquired PG lock in etcd, PG is starting.
|
- **starting** — primary OSD has acquired PG lock in etcd, PG is starting.
|
||||||
- **peering** — primary OSD requests PG object listings from secondary OSDs and calculates
|
- **peering** — primary OSD requests PG object listings from secondary OSDs and calculates
|
||||||
the PG state.
|
the PG state.
|
||||||
|
@ -150,7 +150,7 @@ POOL_ID=1
|
||||||
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
|
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
|
||||||
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
|
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
|
||||||
for i in $(seq 1 $PG_COUNT); do
|
for i in $(seq 1 $PG_COUNT); do
|
||||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
|
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
|
||||||
done
|
done
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -169,21 +169,51 @@ Upgrading is performed without stopping clients (VMs/containers), you just need
|
||||||
upgrade and restart servers one by one. However, ideally you should restart VMs too
|
upgrade and restart servers one by one. However, ideally you should restart VMs too
|
||||||
to make them use the new version of the client library.
|
to make them use the new version of the client library.
|
||||||
|
|
||||||
Exceptions (specific upgrade instructions):
|
### 1.1.x to 1.2.0
|
||||||
- Upgrading <= 1.1.x to 1.2.0 or later, if you use EC n+k with k>=2, is recommended
|
|
||||||
to be performed with full downtime: first you should stop all clients, then all OSDs,
|
|
||||||
then upgrade and start everything back — because versions before 1.2.0 have several
|
|
||||||
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
|
|
||||||
- Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
|
|
||||||
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
|
|
||||||
without this intermediate step, client I/O will hang until the end of upgrade process.
|
|
||||||
- Upgrading from <= 0.5.x to >= 0.6.x is not supported.
|
|
||||||
|
|
||||||
Rollback:
|
Upgrading version <= 1.1.x to version >= 1.2.0, if you use EC n+k with k>=2, is recommended
|
||||||
- Version 1.0.0 has a new disk format, so OSDs initiaziled on 1.0.0 can't be rolled
|
to be performed with full downtime: first you should stop all clients, then all OSDs,
|
||||||
back to 0.9.x or previous versions.
|
then upgrade and start everything back — because versions before 1.2.0 have several
|
||||||
- Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
|
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
|
||||||
start with 0.7.x or 0.6.x. :-)
|
|
||||||
|
### 0.8.7 to 0.9.0
|
||||||
|
|
||||||
|
Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
|
||||||
|
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
|
||||||
|
without this intermediate step, client I/O will hang until the end of upgrade process.
|
||||||
|
|
||||||
|
### 0.5.x to 0.6.x
|
||||||
|
|
||||||
|
Upgrading from <= 0.5.x to >= 0.6.x is not supported.
|
||||||
|
|
||||||
|
## Downgrade
|
||||||
|
|
||||||
|
Downgrade are also allowed freely, except the following specific instructions:
|
||||||
|
|
||||||
|
### 1.8.0 to 1.7.1
|
||||||
|
|
||||||
|
Before downgrading from version >= 1.8.0 to version <= 1.7.1
|
||||||
|
you have to copy /vitastor/pg/config etcd key to /vitastor/config/pgs:
|
||||||
|
|
||||||
|
```
|
||||||
|
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
|
||||||
|
etcdctl --endpoints=http://... put /vitastor/config/pgs
|
||||||
|
```
|
||||||
|
|
||||||
|
Then you can just install older packages and restart all services.
|
||||||
|
|
||||||
|
If you performed downgrade without first copying that key, run "add all OSDs into the
|
||||||
|
history records of all PGs" from [Restoring from lost pool configuration](#restoring-from-lost-pool-configuration).
|
||||||
|
|
||||||
|
### 1.0.0 to 0.9.x
|
||||||
|
|
||||||
|
Version 1.0.0 has a new disk format, so OSDs initialized on 1.0.0 or later can't
|
||||||
|
be rolled back to 0.9.x or previous versions.
|
||||||
|
|
||||||
|
### 0.8.0 to 0.7.x
|
||||||
|
|
||||||
|
Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
|
||||||
|
start with older versions (0.4.x - 0.7.x). :-)
|
||||||
|
|
||||||
## OSD memory usage
|
## OSD memory usage
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@
|
||||||
- **offline** — PG вообще не активирована ни одним OSD. Либо первичный OSD не назначен вообще
|
- **offline** — PG вообще не активирована ни одним OSD. Либо первичный OSD не назначен вообще
|
||||||
(если пул только создан), либо в качестве первичного назначен недоступный OSD, либо
|
(если пул только создан), либо в качестве первичного назначен недоступный OSD, либо
|
||||||
назначенный OSD отказывается запускать эту PG (например, из-за несовпадения block_size),
|
назначенный OSD отказывается запускать эту PG (например, из-за несовпадения block_size),
|
||||||
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/config/pgs` в etcd.
|
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/pg/config` в etcd.
|
||||||
- **starting** — первичный OSD захватил блокировку PG в etcd, PG запускается.
|
- **starting** — первичный OSD захватил блокировку PG в etcd, PG запускается.
|
||||||
- **peering** — первичный OSD опрашивает вторичные OSD на предмет списков объектов данной PG и рассчитывает её состояние.
|
- **peering** — первичный OSD опрашивает вторичные OSD на предмет списков объектов данной PG и рассчитывает её состояние.
|
||||||
- **repeering** — PG ожидает завершения текущих операций ввода-вывода, после чего перейдёт в состояние **peering**.
|
- **repeering** — PG ожидает завершения текущих операций ввода-вывода, после чего перейдёт в состояние **peering**.
|
||||||
|
@ -147,7 +147,7 @@ POOL_ID=1
|
||||||
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
|
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
|
||||||
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
|
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
|
||||||
for i in $(seq 1 $PG_COUNT); do
|
for i in $(seq 1 $PG_COUNT); do
|
||||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
|
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
|
||||||
done
|
done
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -166,21 +166,51 @@ done
|
||||||
достаточно обновлять серверы по одному. Однако, конечно, чтобы запущенные виртуальные машины
|
достаточно обновлять серверы по одному. Однако, конечно, чтобы запущенные виртуальные машины
|
||||||
начали использовать новую версию клиентской библиотеки, их тоже нужно перезапустить.
|
начали использовать новую версию клиентской библиотеки, их тоже нужно перезапустить.
|
||||||
|
|
||||||
Исключения (особые указания при обновлении):
|
### 1.1.x -> 1.2.0
|
||||||
- Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
|
|
||||||
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
|
|
||||||
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
|
|
||||||
могли приводить к некорректному чтению данных в деградированных EC-пулах.
|
|
||||||
- Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
|
|
||||||
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
|
|
||||||
Иначе клиентский ввод-вывод зависнет до завершения обновления.
|
|
||||||
- Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
|
|
||||||
|
|
||||||
Откат:
|
Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
|
||||||
- В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
|
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
|
||||||
нельзя откатить до версии 0.9.x и более ранних.
|
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
|
||||||
- В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD нельзя откатить
|
могли приводить к некорректному чтению данных в деградированных EC-пулах.
|
||||||
до 0.7.x или 0.6.x. :-)
|
|
||||||
|
### 0.8.7 -> 0.9.0
|
||||||
|
|
||||||
|
Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
|
||||||
|
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
|
||||||
|
Иначе клиентский ввод-вывод зависнет до завершения обновления.
|
||||||
|
|
||||||
|
### 0.5.x -> 0.6.x
|
||||||
|
|
||||||
|
Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
|
||||||
|
|
||||||
|
## Откат версии
|
||||||
|
|
||||||
|
Откат (понижение версии) тоже свободно разрешён, кроме указанных ниже случаев:
|
||||||
|
|
||||||
|
### 1.8.0 -> 1.7.1
|
||||||
|
|
||||||
|
Перед понижением версии с >= 1.8.0 до <= 1.7.1 вы должны скопировать ключ
|
||||||
|
etcd `/vitastor/pg/config` в `/vitastor/config/pgs`:
|
||||||
|
|
||||||
|
```
|
||||||
|
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
|
||||||
|
etcdctl --endpoints=http://... put /vitastor/config/pgs
|
||||||
|
```
|
||||||
|
|
||||||
|
После этого можно просто установить более старые пакеты и перезапустить все сервисы.
|
||||||
|
|
||||||
|
Если вы откатили версию, не скопировав предварительно этот ключ - выполните "добавление всех
|
||||||
|
OSD в исторические записи всех PG" из раздела [Восстановление потерянной конфигурации пулов](#восстановление-потерянной-конфигурации-пулов).
|
||||||
|
|
||||||
|
### 1.0.0 -> 0.9.x
|
||||||
|
|
||||||
|
В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
|
||||||
|
нельзя откатить до версии 0.9.x и более ранних.
|
||||||
|
|
||||||
|
### 0.8.0 -> 0.7.x
|
||||||
|
|
||||||
|
В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD не запустятся на
|
||||||
|
более ранних версиях (0.4.x - 0.7.x). :-)
|
||||||
|
|
||||||
## Потребление памяти OSD
|
## Потребление памяти OSD
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ const etcd_nonempty_keys = {
|
||||||
'config/global': 1,
|
'config/global': 1,
|
||||||
'config/node_placement': 1,
|
'config/node_placement': 1,
|
||||||
'config/pools': 1,
|
'config/pools': 1,
|
||||||
'config/pgs': 1,
|
'pg/config': 1,
|
||||||
'history/last_clean_pgs': 1,
|
'history/last_clean_pgs': 1,
|
||||||
'stats': 1,
|
'stats': 1,
|
||||||
};
|
};
|
||||||
|
@ -15,7 +15,8 @@ const etcd_allow = new RegExp('^'+[
|
||||||
'config/node_placement',
|
'config/node_placement',
|
||||||
'config/pools',
|
'config/pools',
|
||||||
'config/osd/[1-9]\\d*',
|
'config/osd/[1-9]\\d*',
|
||||||
'config/pgs',
|
'config/pgs', // old name
|
||||||
|
'pg/config',
|
||||||
'config/inode/[1-9]\\d*/[1-9]\\d*',
|
'config/inode/[1-9]\\d*/[1-9]\\d*',
|
||||||
'osd/state/[1-9]\\d*',
|
'osd/state/[1-9]\\d*',
|
||||||
'osd/stats/[1-9]\\d*',
|
'osd/stats/[1-9]\\d*',
|
||||||
|
@ -24,7 +25,8 @@ const etcd_allow = new RegExp('^'+[
|
||||||
'mon/master',
|
'mon/master',
|
||||||
'mon/member/[a-f0-9]+',
|
'mon/member/[a-f0-9]+',
|
||||||
'pg/state/[1-9]\\d*/[1-9]\\d*',
|
'pg/state/[1-9]\\d*/[1-9]\\d*',
|
||||||
'pg/stats/[1-9]\\d*/[1-9]\\d*',
|
'pg/stats/[1-9]\\d*/[1-9]\\d*', // old name
|
||||||
|
'pgstats/[1-9]\\d*/[1-9]\\d*',
|
||||||
'pg/history/[1-9]\\d*/[1-9]\\d*',
|
'pg/history/[1-9]\\d*/[1-9]\\d*',
|
||||||
'history/last_clean_pgs',
|
'history/last_clean_pgs',
|
||||||
'inode/stats/[1-9]\\d*/\\d+',
|
'inode/stats/[1-9]\\d*/\\d+',
|
||||||
|
@ -205,19 +207,6 @@ const etcd_tree = {
|
||||||
osd: {
|
osd: {
|
||||||
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
|
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
|
||||||
},
|
},
|
||||||
/* pgs: {
|
|
||||||
hash: string,
|
|
||||||
items: {
|
|
||||||
<pool_id>: {
|
|
||||||
<pg_id>: {
|
|
||||||
osd_set: [ 1, 2, 3 ],
|
|
||||||
primary: 1,
|
|
||||||
pause: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}, */
|
|
||||||
pgs: {},
|
|
||||||
/* inode: {
|
/* inode: {
|
||||||
<pool_id>: {
|
<pool_id>: {
|
||||||
<inode_t>: {
|
<inode_t>: {
|
||||||
|
@ -290,6 +279,19 @@ const etcd_tree = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pg: {
|
pg: {
|
||||||
|
/* config: {
|
||||||
|
hash: string,
|
||||||
|
items: {
|
||||||
|
<pool_id>: {
|
||||||
|
<pg_id>: {
|
||||||
|
osd_set: [ 1, 2, 3 ],
|
||||||
|
primary: 1,
|
||||||
|
pause: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, */
|
||||||
|
config: {},
|
||||||
state: {
|
state: {
|
||||||
/* <pool_id>: {
|
/* <pool_id>: {
|
||||||
<pg_id>: {
|
<pg_id>: {
|
||||||
|
@ -300,18 +302,6 @@ const etcd_tree = {
|
||||||
}
|
}
|
||||||
}, */
|
}, */
|
||||||
},
|
},
|
||||||
stats: {
|
|
||||||
/* <pool_id>: {
|
|
||||||
<pg_id>: {
|
|
||||||
object_count: uint64_t,
|
|
||||||
clean_count: uint64_t,
|
|
||||||
misplaced_count: uint64_t,
|
|
||||||
degraded_count: uint64_t,
|
|
||||||
incomplete_count: uint64_t,
|
|
||||||
write_osd_set: osd_num_t[],
|
|
||||||
},
|
|
||||||
}, */
|
|
||||||
},
|
|
||||||
history: {
|
history: {
|
||||||
/* <pool_id>: {
|
/* <pool_id>: {
|
||||||
<pg_id>: {
|
<pg_id>: {
|
||||||
|
@ -323,6 +313,18 @@ const etcd_tree = {
|
||||||
}, */
|
}, */
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
pgstats: {
|
||||||
|
/* <pool_id>: {
|
||||||
|
<pg_id>: {
|
||||||
|
object_count: uint64_t,
|
||||||
|
clean_count: uint64_t,
|
||||||
|
misplaced_count: uint64_t,
|
||||||
|
degraded_count: uint64_t,
|
||||||
|
incomplete_count: uint64_t,
|
||||||
|
write_osd_set: osd_num_t[],
|
||||||
|
},
|
||||||
|
}, */
|
||||||
|
},
|
||||||
inode: {
|
inode: {
|
||||||
stats: {
|
stats: {
|
||||||
/* <pool_id>: {
|
/* <pool_id>: {
|
||||||
|
|
135
mon/mon.js
135
mon/mon.js
|
@ -75,6 +75,8 @@ class Mon
|
||||||
this.prev_stats = { osd_stats: {}, osd_diff: {} };
|
this.prev_stats = { osd_stats: {}, osd_diff: {} };
|
||||||
this.recheck_pgs_active = false;
|
this.recheck_pgs_active = false;
|
||||||
this.watcher_active = false;
|
this.watcher_active = false;
|
||||||
|
this.old_pg_config = false;
|
||||||
|
this.old_pg_stats_seen = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
async start()
|
async start()
|
||||||
|
@ -122,7 +124,7 @@ class Mon
|
||||||
!Number(this.state.pool.stats[pool_id].pg_real_size))
|
!Number(this.state.pool.stats[pool_id].pg_real_size))
|
||||||
{
|
{
|
||||||
// Generate missing data in etcd
|
// Generate missing data in etcd
|
||||||
this.state.config.pgs.hash = null;
|
this.state.pg.config.hash = null;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -201,10 +203,15 @@ class Mon
|
||||||
stats_changed = true;
|
stats_changed = true;
|
||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/' || key.substr(0, 16) == '/osd/inodestats/')
|
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 9) == '/pgstats/' || key.substr(0, 16) == '/osd/inodestats/')
|
||||||
{
|
{
|
||||||
stats_changed = true;
|
stats_changed = true;
|
||||||
}
|
}
|
||||||
|
else if (key.substr(0, 10) == '/pg/stats/')
|
||||||
|
{
|
||||||
|
this.old_pg_stats_seen = true;
|
||||||
|
stats_changed = true;
|
||||||
|
}
|
||||||
else if (key.substr(0, 10) == '/pg/state/')
|
else if (key.substr(0, 10) == '/pg/state/')
|
||||||
{
|
{
|
||||||
pg_states_changed = true;
|
pg_states_changed = true;
|
||||||
|
@ -285,7 +292,7 @@ class Mon
|
||||||
continue next_pool;
|
continue next_pool;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
new_clean_pgs.items[pool_id] = this.state.config.pgs.items[pool_id];
|
new_clean_pgs.items[pool_id] = this.state.pg.config.items[pool_id];
|
||||||
}
|
}
|
||||||
this.state.history.last_clean_pgs = new_clean_pgs;
|
this.state.history.last_clean_pgs = new_clean_pgs;
|
||||||
await this.etcd.etcd_call('/kv/txn', {
|
await this.etcd.etcd_call('/kv/txn', {
|
||||||
|
@ -396,6 +403,50 @@ class Mon
|
||||||
this.parse_kv(kv);
|
this.parse_kv(kv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (Object.keys((this.state.config.pgs||{}).items||{}).length)
|
||||||
|
{
|
||||||
|
// Support seamless upgrade to new OSDs
|
||||||
|
if (!Object.keys((this.state.pg.config||{}).items||{}).length)
|
||||||
|
{
|
||||||
|
const pgs = JSON.stringify(this.state.config.pgs);
|
||||||
|
this.state.pg.config = JSON.parse(pgs);
|
||||||
|
const res = await this.etcd.etcd_call('/kv/txn', {
|
||||||
|
success: [
|
||||||
|
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(pgs) } },
|
||||||
|
],
|
||||||
|
compare: [
|
||||||
|
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||||
|
],
|
||||||
|
}, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
|
||||||
|
if (!res.succeeded)
|
||||||
|
throw new Error('Failed to duplicate old PG config to new PG config');
|
||||||
|
}
|
||||||
|
this.old_pg_config = true;
|
||||||
|
this.old_pg_config_timer = setInterval(() => this.check_clear_old_config().catch(console.error),
|
||||||
|
this.config.old_pg_config_clear_interval||3600000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async check_clear_old_config()
|
||||||
|
{
|
||||||
|
if (this.old_pg_config && this.old_pg_stats_seen)
|
||||||
|
{
|
||||||
|
this.old_pg_stats_seen = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (this.old_pg_config)
|
||||||
|
{
|
||||||
|
await this.etcd.etcd_call('/kv/txn', { success: [
|
||||||
|
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/config/pgs') } },
|
||||||
|
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/pg/stats/'), range_end: b64(this.config.etcd_prefix+'/pg/stats0') } },
|
||||||
|
] }, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
|
||||||
|
this.old_pg_config = false;
|
||||||
|
}
|
||||||
|
if (this.old_pg_config_timer)
|
||||||
|
{
|
||||||
|
clearInterval(this.old_pg_config_timer);
|
||||||
|
this.old_pg_config_timer = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
all_osds()
|
all_osds()
|
||||||
|
@ -406,7 +457,7 @@ class Mon
|
||||||
async stop_all_pgs(pool_id)
|
async stop_all_pgs(pool_id)
|
||||||
{
|
{
|
||||||
let has_online = false, paused = true;
|
let has_online = false, paused = true;
|
||||||
for (const pg in this.state.config.pgs.items[pool_id]||{})
|
for (const pg in this.state.pg.config.items[pool_id]||{})
|
||||||
{
|
{
|
||||||
// FIXME: Change all (||{}) to ?. (optional chaining) at some point
|
// FIXME: Change all (||{}) to ?. (optional chaining) at some point
|
||||||
const cur_state = (((this.state.pg.state[pool_id]||{})[pg]||{}).state||[]).join(',');
|
const cur_state = (((this.state.pg.state[pool_id]||{})[pg]||{}).state||[]).join(',');
|
||||||
|
@ -414,7 +465,7 @@ class Mon
|
||||||
{
|
{
|
||||||
has_online = true;
|
has_online = true;
|
||||||
}
|
}
|
||||||
if (!this.state.config.pgs.items[pool_id][pg].pause)
|
if (!this.state.pg.config.items[pool_id][pg].pause)
|
||||||
{
|
{
|
||||||
paused = false;
|
paused = false;
|
||||||
}
|
}
|
||||||
|
@ -422,7 +473,7 @@ class Mon
|
||||||
if (!paused)
|
if (!paused)
|
||||||
{
|
{
|
||||||
console.log('Stopping all PGs for pool '+pool_id+' before changing PG count');
|
console.log('Stopping all PGs for pool '+pool_id+' before changing PG count');
|
||||||
const new_cfg = JSON.parse(JSON.stringify(this.state.config.pgs));
|
const new_cfg = JSON.parse(JSON.stringify(this.state.pg.config));
|
||||||
for (const pg in new_cfg.items[pool_id])
|
for (const pg in new_cfg.items[pool_id])
|
||||||
{
|
{
|
||||||
new_cfg.items[pool_id][pg].pause = true;
|
new_cfg.items[pool_id][pg].pause = true;
|
||||||
|
@ -430,22 +481,26 @@ class Mon
|
||||||
// Check that no OSDs change their state before we pause PGs
|
// Check that no OSDs change their state before we pause PGs
|
||||||
// Doing this we make sure that OSDs don't wake up in the middle of our "transaction"
|
// Doing this we make sure that OSDs don't wake up in the middle of our "transaction"
|
||||||
// and can't see the old PG configuration
|
// and can't see the old PG configuration
|
||||||
const checks = [];
|
const checks = [
|
||||||
|
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||||
|
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||||
|
];
|
||||||
for (const osd_num of this.all_osds())
|
for (const osd_num of this.all_osds())
|
||||||
{
|
{
|
||||||
const key = b64(this.config.etcd_prefix+'/osd/state/'+osd_num);
|
const key = b64(this.config.etcd_prefix+'/osd/state/'+osd_num);
|
||||||
checks.push({ key, target: 'MOD', result: 'LESS', mod_revision: ''+this.etcd_watch_revision });
|
checks.push({ key, target: 'MOD', result: 'LESS', mod_revision: ''+this.etcd_watch_revision });
|
||||||
}
|
}
|
||||||
await this.etcd.etcd_call('/kv/txn', {
|
const txn = {
|
||||||
compare: [
|
compare: checks,
|
||||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
|
||||||
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
|
||||||
...checks,
|
|
||||||
],
|
|
||||||
success: [
|
success: [
|
||||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } },
|
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_cfg)) } },
|
||||||
],
|
],
|
||||||
}, this.config.etcd_mon_timeout, 0);
|
};
|
||||||
|
if (this.old_pg_config)
|
||||||
|
{
|
||||||
|
txn.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } });
|
||||||
|
}
|
||||||
|
await this.etcd.etcd_call('/kv/txn', txn, this.config.etcd_mon_timeout, 0);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return !has_online;
|
return !has_online;
|
||||||
|
@ -473,7 +528,7 @@ class Mon
|
||||||
pools: this.state.config.pools,
|
pools: this.state.config.pools,
|
||||||
};
|
};
|
||||||
const tree_hash = sha1hex(stableStringify(tree_cfg));
|
const tree_hash = sha1hex(stableStringify(tree_cfg));
|
||||||
if (this.state.config.pgs.hash != tree_hash)
|
if (this.state.pg.config.hash != tree_hash)
|
||||||
{
|
{
|
||||||
// Something has changed
|
// Something has changed
|
||||||
console.log('Pool configuration or OSD tree changed, re-optimizing');
|
console.log('Pool configuration or OSD tree changed, re-optimizing');
|
||||||
|
@ -514,10 +569,10 @@ class Mon
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Nothing changed, but we still want to recheck the distribution of primaries
|
// Nothing changed, but we still want to recheck the distribution of primaries
|
||||||
let new_config_pgs = recheck_primary(this.state, this.config, up_osds, osd_tree);
|
let new_pg_config = recheck_primary(this.state, this.config, up_osds, osd_tree);
|
||||||
if (new_config_pgs)
|
if (new_pg_config)
|
||||||
{
|
{
|
||||||
const ok = await this.save_pg_config(new_config_pgs);
|
const ok = await this.save_pg_config(new_pg_config);
|
||||||
if (ok)
|
if (ok)
|
||||||
console.log('PG configuration successfully changed');
|
console.log('PG configuration successfully changed');
|
||||||
else
|
else
|
||||||
|
@ -532,12 +587,12 @@ class Mon
|
||||||
|
|
||||||
async apply_pool_pgs(results, up_osds, osd_tree, tree_hash)
|
async apply_pool_pgs(results, up_osds, osd_tree, tree_hash)
|
||||||
{
|
{
|
||||||
for (const pool_id in (this.state.config.pgs||{}).items||{})
|
for (const pool_id in (this.state.pg.config||{}).items||{})
|
||||||
{
|
{
|
||||||
// We should stop all PGs when deleting a pool or changing its PG count
|
// We should stop all PGs when deleting a pool or changing its PG count
|
||||||
if (!this.state.config.pools[pool_id] ||
|
if (!this.state.config.pools[pool_id] ||
|
||||||
this.state.config.pgs.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
|
this.state.pg.config.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
|
||||||
Object.keys(this.state.config.pgs.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
|
Object.keys(this.state.pg.config.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
|
||||||
{
|
{
|
||||||
if (!await this.stop_all_pgs(pool_id))
|
if (!await this.stop_all_pgs(pool_id))
|
||||||
{
|
{
|
||||||
|
@ -545,22 +600,22 @@ class Mon
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
|
const new_pg_config = JSON.parse(JSON.stringify(this.state.pg.config));
|
||||||
const etcd_request = { compare: [], success: [] };
|
const etcd_request = { compare: [], success: [] };
|
||||||
for (const pool_id in (new_config_pgs||{}).items||{})
|
for (const pool_id in (new_pg_config||{}).items||{})
|
||||||
{
|
{
|
||||||
if (!this.state.config.pools[pool_id])
|
if (!this.state.config.pools[pool_id])
|
||||||
{
|
{
|
||||||
const prev_pgs = [];
|
const prev_pgs = [];
|
||||||
for (const pg in new_config_pgs.items[pool_id]||{})
|
for (const pg in new_pg_config.items[pool_id]||{})
|
||||||
{
|
{
|
||||||
prev_pgs[pg-1] = new_config_pgs.items[pool_id][pg].osd_set;
|
prev_pgs[pg-1] = new_pg_config.items[pool_id][pg].osd_set;
|
||||||
}
|
}
|
||||||
// Also delete pool statistics
|
// Also delete pool statistics
|
||||||
etcd_request.success.push({ requestDeleteRange: {
|
etcd_request.success.push({ requestDeleteRange: {
|
||||||
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
|
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
|
||||||
} });
|
} });
|
||||||
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
|
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
|
||||||
this.etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, [], []);
|
this.etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, [], []);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -569,7 +624,7 @@ class Mon
|
||||||
const pool_id = pool_res.pool_id;
|
const pool_id = pool_res.pool_id;
|
||||||
const pool_cfg = this.state.config.pools[pool_id];
|
const pool_cfg = this.state.config.pools[pool_id];
|
||||||
let pg_history = [];
|
let pg_history = [];
|
||||||
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
|
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
|
||||||
{
|
{
|
||||||
if (this.state.pg.history[pool_id] &&
|
if (this.state.pg.history[pool_id] &&
|
||||||
this.state.pg.history[pool_id][pg])
|
this.state.pg.history[pool_id][pg])
|
||||||
|
@ -578,9 +633,9 @@ class Mon
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const real_prev_pgs = [];
|
const real_prev_pgs = [];
|
||||||
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
|
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
|
||||||
{
|
{
|
||||||
real_prev_pgs[pg-1] = [ ...this.state.config.pgs.items[pool_id][pg].osd_set ];
|
real_prev_pgs[pg-1] = [ ...this.state.pg.config.items[pool_id][pg].osd_set ];
|
||||||
}
|
}
|
||||||
if (real_prev_pgs.length > 0 && real_prev_pgs.length != pool_res.pgs.length)
|
if (real_prev_pgs.length > 0 && real_prev_pgs.length != pool_res.pgs.length)
|
||||||
{
|
{
|
||||||
|
@ -591,8 +646,8 @@ class Mon
|
||||||
pg_history = scale_pg_history(pg_history, real_prev_pgs, pool_res.pgs);
|
pg_history = scale_pg_history(pg_history, real_prev_pgs, pool_res.pgs);
|
||||||
// Drop stats
|
// Drop stats
|
||||||
etcd_request.success.push({ requestDeleteRange: {
|
etcd_request.success.push({ requestDeleteRange: {
|
||||||
key: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'/'),
|
key: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'/'),
|
||||||
range_end: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'0'),
|
range_end: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'0'),
|
||||||
} });
|
} });
|
||||||
}
|
}
|
||||||
const stats = {
|
const stats = {
|
||||||
|
@ -603,22 +658,26 @@ class Mon
|
||||||
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
|
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
|
||||||
value: b64(JSON.stringify(stats)),
|
value: b64(JSON.stringify(stats)),
|
||||||
} });
|
} });
|
||||||
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
|
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
|
||||||
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
|
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
|
||||||
}
|
}
|
||||||
new_config_pgs.hash = tree_hash;
|
new_pg_config.hash = tree_hash;
|
||||||
return await this.save_pg_config(new_config_pgs, etcd_request);
|
return await this.save_pg_config(new_pg_config, etcd_request);
|
||||||
}
|
}
|
||||||
|
|
||||||
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
|
async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
|
||||||
{
|
{
|
||||||
etcd_request.compare.push(
|
etcd_request.compare.push(
|
||||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||||
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||||
);
|
);
|
||||||
etcd_request.success.push(
|
etcd_request.success.push(
|
||||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_config_pgs)) } },
|
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_pg_config)) } },
|
||||||
);
|
);
|
||||||
|
if (this.old_pg_config)
|
||||||
|
{
|
||||||
|
etcd_request.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_pg_config)) } });
|
||||||
|
}
|
||||||
const txn_res = await this.etcd.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
|
const txn_res = await this.etcd.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
|
||||||
return txn_res.succeeded;
|
return txn_res.succeeded;
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,7 +57,7 @@ function pick_primary(pool_config, osd_set, up_osds, aff_osds)
|
||||||
|
|
||||||
function recheck_primary(state, global_config, up_osds, osd_tree)
|
function recheck_primary(state, global_config, up_osds, osd_tree)
|
||||||
{
|
{
|
||||||
let new_config_pgs;
|
let new_pg_config;
|
||||||
for (const pool_id in state.config.pools)
|
for (const pool_id in state.config.pools)
|
||||||
{
|
{
|
||||||
const pool_cfg = state.config.pools[pool_id];
|
const pool_cfg = state.config.pools[pool_id];
|
||||||
|
@ -69,30 +69,30 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
|
||||||
reset_rng();
|
reset_rng();
|
||||||
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
|
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
|
||||||
{
|
{
|
||||||
if (!state.config.pgs.items[pool_id])
|
if (!state.pg.config.items[pool_id])
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const pg_cfg = state.config.pgs.items[pool_id][pg_num];
|
const pg_cfg = state.pg.config.items[pool_id][pg_num];
|
||||||
if (pg_cfg)
|
if (pg_cfg)
|
||||||
{
|
{
|
||||||
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
|
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
|
||||||
if (pg_cfg.primary != new_primary)
|
if (pg_cfg.primary != new_primary)
|
||||||
{
|
{
|
||||||
if (!new_config_pgs)
|
if (!new_pg_config)
|
||||||
{
|
{
|
||||||
new_config_pgs = JSON.parse(JSON.stringify(state.config.pgs));
|
new_pg_config = JSON.parse(JSON.stringify(state.pg.config));
|
||||||
}
|
}
|
||||||
console.log(
|
console.log(
|
||||||
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
|
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
|
||||||
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
|
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
|
||||||
);
|
);
|
||||||
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
|
new_pg_config.items[pool_id][pg_num].primary = new_primary;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new_config_pgs;
|
return new_pg_config;
|
||||||
}
|
}
|
||||||
|
|
||||||
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
|
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
|
||||||
|
@ -185,10 +185,10 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
|
||||||
}
|
}
|
||||||
if (!prev_pgs.length)
|
if (!prev_pgs.length)
|
||||||
{
|
{
|
||||||
// Fall back to config/pgs if it's empty
|
// Fall back to pg/config if it's empty
|
||||||
for (const pg in ((state.config.pgs.items||{})[pool_id]||{}))
|
for (const pg in ((state.pg.config.items||{})[pool_id]||{}))
|
||||||
{
|
{
|
||||||
prev_pgs[pg-1] = [ ...state.config.pgs.items[pool_id][pg].osd_set ];
|
prev_pgs[pg-1] = [ ...state.pg.config.items[pool_id][pg].osd_set ];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const old_pg_count = prev_pgs.length;
|
const old_pg_count = prev_pgs.length;
|
||||||
|
@ -205,8 +205,8 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
|
||||||
ordered: pool_cfg.scheme != 'replicated',
|
ordered: pool_cfg.scheme != 'replicated',
|
||||||
};
|
};
|
||||||
let optimize_result;
|
let optimize_result;
|
||||||
// Re-shuffle PGs if config/pgs.hash is empty
|
// Re-shuffle PGs if pg/config.hash is empty
|
||||||
if (old_pg_count > 0 && state.config.pgs.hash)
|
if (old_pg_count > 0 && state.pg.config.hash)
|
||||||
{
|
{
|
||||||
if (prev_pgs.length != pool_cfg.pg_count)
|
if (prev_pgs.length != pool_cfg.pg_count)
|
||||||
{
|
{
|
||||||
|
|
|
@ -166,7 +166,7 @@ function export_prometheus_metrics(st)
|
||||||
res += `vitastor_pool_used_raw_tb{${pool_label}} ${pool_stat.used_raw_tb||0}\n`;
|
res += `vitastor_pool_used_raw_tb{${pool_label}} ${pool_stat.used_raw_tb||0}\n`;
|
||||||
|
|
||||||
// PG states and pool up/down status
|
// PG states and pool up/down status
|
||||||
const real_pg_count = (Object.keys(((st.config.pgs||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
|
const real_pg_count = (Object.keys(((st.pg.config||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
|
||||||
const per_state = {
|
const per_state = {
|
||||||
active: 0,
|
active: 0,
|
||||||
starting: 0,
|
starting: 0,
|
||||||
|
|
15
mon/stats.js
15
mon/stats.js
|
@ -100,10 +100,19 @@ function sum_object_counts(state, global_config)
|
||||||
{
|
{
|
||||||
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||||
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||||
|
let pgstats = state.pgstats;
|
||||||
|
if (state.pg.stats)
|
||||||
|
{
|
||||||
|
// Merge with old stats for seamless transition to new stats
|
||||||
for (const pool_id in state.pg.stats)
|
for (const pool_id in state.pg.stats)
|
||||||
|
{
|
||||||
|
pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const pool_id in pgstats)
|
||||||
{
|
{
|
||||||
let object_size = 0;
|
let object_size = 0;
|
||||||
for (const osd_num of state.pg.stats[pool_id].write_osd_set||[])
|
for (const osd_num of pgstats[pool_id].write_osd_set||[])
|
||||||
{
|
{
|
||||||
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
|
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
|
||||||
{
|
{
|
||||||
|
@ -121,9 +130,9 @@ function sum_object_counts(state, global_config)
|
||||||
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
|
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
|
||||||
}
|
}
|
||||||
object_size = BigInt(object_size);
|
object_size = BigInt(object_size);
|
||||||
for (const pg_num in state.pg.stats[pool_id])
|
for (const pg_num in pgstats[pool_id])
|
||||||
{
|
{
|
||||||
const st = state.pg.stats[pool_id][pg_num];
|
const st = pgstats[pool_id][pg_num];
|
||||||
if (st)
|
if (st)
|
||||||
{
|
{
|
||||||
for (const k in object_counts)
|
for (const k in object_counts)
|
||||||
|
|
|
@ -35,7 +35,8 @@ function vitastor_persist_filter(cfg)
|
||||||
}
|
}
|
||||||
else if (key.substr(0, prefix.length+'/osd/'.length) == prefix+'/osd/' ||
|
else if (key.substr(0, prefix.length+'/osd/'.length) == prefix+'/osd/' ||
|
||||||
key.substr(0, prefix.length+'/inode/stats/'.length) == prefix+'/inode/stats/' ||
|
key.substr(0, prefix.length+'/inode/stats/'.length) == prefix+'/inode/stats/' ||
|
||||||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' ||
|
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' || // old name
|
||||||
|
key.substr(0, prefix.length+'/pgstats/'.length) == prefix+'/pgstats/' ||
|
||||||
key.substr(0, prefix.length+'/pool/stats/'.length) == prefix+'/pool/stats/' ||
|
key.substr(0, prefix.length+'/pool/stats/'.length) == prefix+'/pool/stats/' ||
|
||||||
key == prefix+'/stats')
|
key == prefix+'/stats')
|
||||||
{
|
{
|
||||||
|
|
|
@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||||
from cinder.volume import driver
|
from cinder.volume import driver
|
||||||
from cinder.volume import volume_utils
|
from cinder.volume import volume_utils
|
||||||
|
|
||||||
VERSION = '1.7.1'
|
VITASTOR_VERSION = '1.7.1'
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -238,7 +238,7 @@ class VitastorDriver(driver.CloneableImageVD,
|
||||||
|
|
||||||
stats = {
|
stats = {
|
||||||
'vendor_name': 'Vitastor',
|
'vendor_name': 'Vitastor',
|
||||||
'driver_version': self.VERSION,
|
'driver_version': VITASTOR_VERSION,
|
||||||
'storage_protocol': 'vitastor',
|
'storage_protocol': 'vitastor',
|
||||||
'total_capacity_gb': 'unknown',
|
'total_capacity_gb': 'unknown',
|
||||||
'free_capacity_gb': 'unknown',
|
'free_capacity_gb': 'unknown',
|
||||||
|
|
|
@ -19,7 +19,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_definitions(-DVERSION="1.7.1")
|
add_definitions(-DVITASTOR_VERSION="1.7.1")
|
||||||
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
|
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
|
||||||
add_link_options(-fno-omit-frame-pointer)
|
add_link_options(-fno-omit-frame-pointer)
|
||||||
if (${WITH_ASAN})
|
if (${WITH_ASAN})
|
||||||
|
|
|
@ -13,7 +13,7 @@ target_link_libraries(vitastor_blk
|
||||||
# for timerfd_manager
|
# for timerfd_manager
|
||||||
vitastor_common
|
vitastor_common
|
||||||
)
|
)
|
||||||
set_target_properties(vitastor_blk PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
set_target_properties(vitastor_blk PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||||
|
|
||||||
if (${WITH_FIO})
|
if (${WITH_FIO})
|
||||||
# libfio_vitastor_blk.so
|
# libfio_vitastor_blk.so
|
||||||
|
|
|
@ -29,7 +29,7 @@ target_link_libraries(vitastor_client
|
||||||
${LIBURING_LIBRARIES}
|
${LIBURING_LIBRARIES}
|
||||||
${IBVERBS_LIBRARIES}
|
${IBVERBS_LIBRARIES}
|
||||||
)
|
)
|
||||||
set_target_properties(vitastor_client PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
set_target_properties(vitastor_client PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||||
configure_file(vitastor.pc.in vitastor.pc @ONLY)
|
configure_file(vitastor.pc.in vitastor.pc @ONLY)
|
||||||
|
|
||||||
if (${WITH_FIO})
|
if (${WITH_FIO})
|
||||||
|
|
|
@ -452,11 +452,10 @@ void cluster_client_t::on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_nu
|
||||||
if (pg_cfg.cur_primary != prev_primary)
|
if (pg_cfg.cur_primary != prev_primary)
|
||||||
{
|
{
|
||||||
// Repeat this PG operations because an OSD which stopped being primary may not fsync operations
|
// Repeat this PG operations because an OSD which stopped being primary may not fsync operations
|
||||||
if (wb->repeat_ops_for(this, 0, pool_id, pg_num) > 0)
|
wb->repeat_ops_for(this, 0, pool_id, pg_num);
|
||||||
{
|
}
|
||||||
|
// Always continue to resume operations hung because of lack of the primary OSD
|
||||||
continue_ops();
|
continue_ops();
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool cluster_client_t::get_immediate_commit(uint64_t inode)
|
bool cluster_client_t::get_immediate_commit(uint64_t inode)
|
||||||
|
@ -1066,11 +1065,11 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
||||||
!pg_it->second.pause && pg_it->second.cur_primary)
|
!pg_it->second.pause && pg_it->second.cur_primary)
|
||||||
{
|
{
|
||||||
osd_num_t primary_osd = pg_it->second.cur_primary;
|
osd_num_t primary_osd = pg_it->second.cur_primary;
|
||||||
|
part->osd_num = primary_osd;
|
||||||
auto peer_it = msgr.osd_peer_fds.find(primary_osd);
|
auto peer_it = msgr.osd_peer_fds.find(primary_osd);
|
||||||
if (peer_it != msgr.osd_peer_fds.end())
|
if (peer_it != msgr.osd_peer_fds.end())
|
||||||
{
|
{
|
||||||
int peer_fd = peer_it->second;
|
int peer_fd = peer_it->second;
|
||||||
part->osd_num = primary_osd;
|
|
||||||
part->flags |= PART_SENT;
|
part->flags |= PART_SENT;
|
||||||
op->inflight_count++;
|
op->inflight_count++;
|
||||||
uint64_t pg_bitmap_size = (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8) * (
|
uint64_t pg_bitmap_size = (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8) * (
|
||||||
|
|
|
@ -333,7 +333,10 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||||
etcd_watch_ws = NULL;
|
etcd_watch_ws = NULL;
|
||||||
}
|
}
|
||||||
if (this->log_level > 1)
|
if (this->log_level > 1)
|
||||||
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju\n", etcd_address.c_str(), etcd_watch_revision);
|
{
|
||||||
|
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju/%ju/%ju\n", etcd_address.c_str(),
|
||||||
|
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
|
||||||
|
}
|
||||||
etcd_watch_ws = open_websocket(tfd, etcd_address, etcd_api_path+"/watch", etcd_slow_timeout,
|
etcd_watch_ws = open_websocket(tfd, etcd_address, etcd_api_path+"/watch", etcd_slow_timeout,
|
||||||
[this, cur_addr = selected_etcd_address](const http_response_t *msg)
|
[this, cur_addr = selected_etcd_address](const http_response_t *msg)
|
||||||
{
|
{
|
||||||
|
@ -348,16 +351,20 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
|
||||||
if (data["result"]["created"].bool_value())
|
if (data["result"]["created"].bool_value())
|
||||||
{
|
{
|
||||||
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
|
|
||||||
if (watch_id == ETCD_CONFIG_WATCH_ID ||
|
if (watch_id == ETCD_CONFIG_WATCH_ID ||
|
||||||
watch_id == ETCD_PG_STATE_WATCH_ID ||
|
watch_id == ETCD_PG_STATE_WATCH_ID ||
|
||||||
watch_id == ETCD_PG_HISTORY_WATCH_ID ||
|
|
||||||
watch_id == ETCD_OSD_STATE_WATCH_ID)
|
watch_id == ETCD_OSD_STATE_WATCH_ID)
|
||||||
|
{
|
||||||
etcd_watches_initialised++;
|
etcd_watches_initialised++;
|
||||||
|
}
|
||||||
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && this->log_level > 0)
|
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && this->log_level > 0)
|
||||||
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju\n", cur_addr.c_str(), etcd_watch_revision);
|
{
|
||||||
|
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju/%ju/%ju\n", cur_addr.c_str(),
|
||||||
|
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (data["result"]["canceled"].bool_value())
|
if (data["result"]["canceled"].bool_value())
|
||||||
{
|
{
|
||||||
|
@ -375,7 +382,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||||
data["result"]["compact_revision"].uint64_value());
|
data["result"]["compact_revision"].uint64_value());
|
||||||
http_close(etcd_watch_ws);
|
http_close(etcd_watch_ws);
|
||||||
etcd_watch_ws = NULL;
|
etcd_watch_ws = NULL;
|
||||||
etcd_watch_revision = 0;
|
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = 0;
|
||||||
on_reload_hook();
|
on_reload_hook();
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
@ -393,13 +400,29 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Save revision only if it's present in the message - because sometimes etcd sends something without a header, like:
|
||||||
|
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
|
||||||
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && !data["result"]["header"]["revision"].is_null())
|
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && !data["result"]["header"]["revision"].is_null())
|
||||||
{
|
{
|
||||||
// Protect against a revision beign split into multiple messages and some
|
// Restart watchers from the same revision number as in the last received message,
|
||||||
// of them being lost. Even though I'm not sure if etcd actually splits them
|
// not from the next one to protect against revision being split into multiple messages,
|
||||||
// Also sometimes etcd sends something without a header, like:
|
// even though etcd guarantees not to do that **within a single watcher** without fragment=true:
|
||||||
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
|
// https://etcd.io/docs/v3.5/learning/api_guarantees/#watch-apis
|
||||||
etcd_watch_revision = data["result"]["header"]["revision"].uint64_value();
|
// Revision contents are ALWAYS split into separate messages for different watchers though!
|
||||||
|
// So generally we have to resume each watcher from its own revision...
|
||||||
|
// Progress messages may have watch_id=-1 if sent on behalf of multiple watchers though.
|
||||||
|
// And antietcd has an advanced semantic which merges the same revision for all watchers
|
||||||
|
// into one message and just omits watch_id.
|
||||||
|
// So we also have to handle the case where watch_id is -1 or not present (0).
|
||||||
|
auto watch_rev = data["result"]["header"]["revision"].uint64_value();
|
||||||
|
if (!watch_id || watch_id == UINT64_MAX)
|
||||||
|
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = watch_rev;
|
||||||
|
else if (watch_id == ETCD_CONFIG_WATCH_ID)
|
||||||
|
etcd_watch_revision_config = watch_rev;
|
||||||
|
else if (watch_id == ETCD_PG_STATE_WATCH_ID)
|
||||||
|
etcd_watch_revision_pg = watch_rev;
|
||||||
|
else if (watch_id == ETCD_OSD_STATE_WATCH_ID)
|
||||||
|
etcd_watch_revision_osd = watch_rev;
|
||||||
addresses_to_try.clear();
|
addresses_to_try.clear();
|
||||||
}
|
}
|
||||||
// First gather all changes into a hash to remove multiple overwrites
|
// First gather all changes into a hash to remove multiple overwrites
|
||||||
|
@ -457,7 +480,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||||
{ "create_request", json11::Json::object {
|
{ "create_request", json11::Json::object {
|
||||||
{ "key", base64_encode(etcd_prefix+"/config/") },
|
{ "key", base64_encode(etcd_prefix+"/config/") },
|
||||||
{ "range_end", base64_encode(etcd_prefix+"/config0") },
|
{ "range_end", base64_encode(etcd_prefix+"/config0") },
|
||||||
{ "start_revision", etcd_watch_revision },
|
{ "start_revision", etcd_watch_revision_config },
|
||||||
{ "watch_id", ETCD_CONFIG_WATCH_ID },
|
{ "watch_id", ETCD_CONFIG_WATCH_ID },
|
||||||
{ "progress_notify", true },
|
{ "progress_notify", true },
|
||||||
} }
|
} }
|
||||||
|
@ -466,29 +489,21 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||||
{ "create_request", json11::Json::object {
|
{ "create_request", json11::Json::object {
|
||||||
{ "key", base64_encode(etcd_prefix+"/osd/state/") },
|
{ "key", base64_encode(etcd_prefix+"/osd/state/") },
|
||||||
{ "range_end", base64_encode(etcd_prefix+"/osd/state0") },
|
{ "range_end", base64_encode(etcd_prefix+"/osd/state0") },
|
||||||
{ "start_revision", etcd_watch_revision },
|
{ "start_revision", etcd_watch_revision_osd },
|
||||||
{ "watch_id", ETCD_OSD_STATE_WATCH_ID },
|
{ "watch_id", ETCD_OSD_STATE_WATCH_ID },
|
||||||
{ "progress_notify", true },
|
{ "progress_notify", true },
|
||||||
} }
|
} }
|
||||||
}).dump());
|
}).dump());
|
||||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||||
{ "create_request", json11::Json::object {
|
{ "create_request", json11::Json::object {
|
||||||
{ "key", base64_encode(etcd_prefix+"/pg/state/") },
|
{ "key", base64_encode(etcd_prefix+"/pg/") },
|
||||||
{ "range_end", base64_encode(etcd_prefix+"/pg/state0") },
|
{ "range_end", base64_encode(etcd_prefix+"/pg0") },
|
||||||
{ "start_revision", etcd_watch_revision },
|
{ "start_revision", etcd_watch_revision_pg },
|
||||||
{ "watch_id", ETCD_PG_STATE_WATCH_ID },
|
{ "watch_id", ETCD_PG_STATE_WATCH_ID },
|
||||||
{ "progress_notify", true },
|
{ "progress_notify", true },
|
||||||
} }
|
} }
|
||||||
}).dump());
|
}).dump());
|
||||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
// FIXME: Do not watch /pg/history/ at all in client code (not in OSD)
|
||||||
{ "create_request", json11::Json::object {
|
|
||||||
{ "key", base64_encode(etcd_prefix+"/pg/history/") },
|
|
||||||
{ "range_end", base64_encode(etcd_prefix+"/pg/history0") },
|
|
||||||
{ "start_revision", etcd_watch_revision },
|
|
||||||
{ "watch_id", ETCD_PG_HISTORY_WATCH_ID },
|
|
||||||
{ "progress_notify", true },
|
|
||||||
} }
|
|
||||||
}).dump());
|
|
||||||
if (on_start_watcher_hook)
|
if (on_start_watcher_hook)
|
||||||
{
|
{
|
||||||
on_start_watcher_hook(etcd_watch_ws);
|
on_start_watcher_hook(etcd_watch_ws);
|
||||||
|
@ -591,6 +606,11 @@ void etcd_state_client_t::load_pgs()
|
||||||
{ "key", base64_encode(etcd_prefix+"/config/pgs") },
|
{ "key", base64_encode(etcd_prefix+"/config/pgs") },
|
||||||
} }
|
} }
|
||||||
},
|
},
|
||||||
|
json11::Json::object {
|
||||||
|
{ "request_range", json11::Json::object {
|
||||||
|
{ "key", base64_encode(etcd_prefix+"/pg/config") },
|
||||||
|
} }
|
||||||
|
},
|
||||||
json11::Json::object {
|
json11::Json::object {
|
||||||
{ "request_range", json11::Json::object {
|
{ "request_range", json11::Json::object {
|
||||||
{ "key", base64_encode(etcd_prefix+"/config/inode/") },
|
{ "key", base64_encode(etcd_prefix+"/config/inode/") },
|
||||||
|
@ -640,13 +660,10 @@ void etcd_state_client_t::load_pgs()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
reset_pg_exists();
|
reset_pg_exists();
|
||||||
if (!etcd_watch_revision)
|
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = data["header"]["revision"].uint64_value()+1;
|
||||||
{
|
|
||||||
etcd_watch_revision = data["header"]["revision"].uint64_value()+1;
|
|
||||||
if (this->log_level > 3)
|
if (this->log_level > 3)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision-1);
|
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision_pg-1);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for (auto & res: data["responses"].array_items())
|
for (auto & res: data["responses"].array_items())
|
||||||
{
|
{
|
||||||
|
@ -895,8 +912,17 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||||
on_change_pool_config_hook();
|
on_change_pool_config_hook();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (key == etcd_prefix+"/config/pgs")
|
else if (key == etcd_prefix+"/pg/config" || key == etcd_prefix+"/config/pgs")
|
||||||
{
|
{
|
||||||
|
if (key == etcd_prefix+"/pg/config")
|
||||||
|
{
|
||||||
|
new_pg_config = !value.is_null();
|
||||||
|
}
|
||||||
|
else if (new_pg_config)
|
||||||
|
{
|
||||||
|
// Ignore old key if the new one is present
|
||||||
|
return;
|
||||||
|
}
|
||||||
for (auto & pool_item: this->pool_config)
|
for (auto & pool_item: this->pool_config)
|
||||||
{
|
{
|
||||||
for (auto & pg_item: pool_item.second.pg_config)
|
for (auto & pg_item: pool_item.second.pg_config)
|
||||||
|
|
|
@ -10,10 +10,9 @@
|
||||||
#include "timerfd_manager.h"
|
#include "timerfd_manager.h"
|
||||||
|
|
||||||
#define ETCD_CONFIG_WATCH_ID 1
|
#define ETCD_CONFIG_WATCH_ID 1
|
||||||
#define ETCD_PG_STATE_WATCH_ID 2
|
#define ETCD_OSD_STATE_WATCH_ID 2
|
||||||
#define ETCD_PG_HISTORY_WATCH_ID 3
|
#define ETCD_PG_STATE_WATCH_ID 3
|
||||||
#define ETCD_OSD_STATE_WATCH_ID 4
|
#define ETCD_TOTAL_WATCHES 3
|
||||||
#define ETCD_TOTAL_WATCHES 4
|
|
||||||
|
|
||||||
#define DEFAULT_BLOCK_SIZE 128*1024
|
#define DEFAULT_BLOCK_SIZE 128*1024
|
||||||
#define MIN_DATA_BLOCK_SIZE 4*1024
|
#define MIN_DATA_BLOCK_SIZE 4*1024
|
||||||
|
@ -95,7 +94,7 @@ protected:
|
||||||
std::string selected_etcd_address;
|
std::string selected_etcd_address;
|
||||||
std::vector<std::string> addresses_to_try;
|
std::vector<std::string> addresses_to_try;
|
||||||
std::vector<inode_watch_t*> watches;
|
std::vector<inode_watch_t*> watches;
|
||||||
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
|
bool new_pg_config = false;
|
||||||
int ws_keepalive_timer = -1;
|
int ws_keepalive_timer = -1;
|
||||||
int ws_alive = 0;
|
int ws_alive = 0;
|
||||||
bool rand_initialized = false;
|
bool rand_initialized = false;
|
||||||
|
@ -115,8 +114,11 @@ public:
|
||||||
int log_level = 0;
|
int log_level = 0;
|
||||||
timerfd_manager_t *tfd = NULL;
|
timerfd_manager_t *tfd = NULL;
|
||||||
|
|
||||||
|
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
|
||||||
int etcd_watches_initialised = 0;
|
int etcd_watches_initialised = 0;
|
||||||
uint64_t etcd_watch_revision = 0;
|
uint64_t etcd_watch_revision_config = 0;
|
||||||
|
uint64_t etcd_watch_revision_osd = 0;
|
||||||
|
uint64_t etcd_watch_revision_pg = 0;
|
||||||
std::map<pool_id_t, pool_config_t> pool_config;
|
std::map<pool_id_t, pool_config_t> pool_config;
|
||||||
std::map<osd_num_t, json11::Json> peer_states;
|
std::map<osd_num_t, json11::Json> peer_states;
|
||||||
std::set<osd_num_t> seen_peers;
|
std::set<osd_num_t> seen_peers;
|
||||||
|
|
|
@ -253,7 +253,7 @@ nla_put_failure:
|
||||||
const char *exe_name = NULL;
|
const char *exe_name = NULL;
|
||||||
|
|
||||||
const char *help_text =
|
const char *help_text =
|
||||||
"Vitastor NBD proxy " VERSION "\n"
|
"Vitastor NBD proxy " VITASTOR_VERSION "\n"
|
||||||
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n"
|
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n"
|
||||||
"\n"
|
"\n"
|
||||||
"COMMANDS:\n"
|
"COMMANDS:\n"
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
static const char *exe_name = NULL;
|
static const char *exe_name = NULL;
|
||||||
|
|
||||||
static const char* help_text =
|
static const char* help_text =
|
||||||
"Vitastor command-line tool " VERSION "\n"
|
"Vitastor command-line tool " VITASTOR_VERSION "\n"
|
||||||
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
|
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
|
||||||
"\n"
|
"\n"
|
||||||
"COMMANDS:\n"
|
"COMMANDS:\n"
|
||||||
|
|
|
@ -49,8 +49,8 @@ struct pg_lister_t
|
||||||
{ "success", json11::Json::array {
|
{ "success", json11::Json::array {
|
||||||
json11::Json::object {
|
json11::Json::object {
|
||||||
{ "request_range", json11::Json::object {
|
{ "request_range", json11::Json::object {
|
||||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/stats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
|
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
|
||||||
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/stats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
|
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
|
||||||
} },
|
} },
|
||||||
},
|
},
|
||||||
} },
|
} },
|
||||||
|
@ -65,7 +65,7 @@ resume_1:
|
||||||
state = 100;
|
state = 100;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
||||||
{
|
{
|
||||||
pg_stats[(pool_pg_num_t){ .pool_id = pool_id, .pg_num = (pg_num_t)pg_num }] = value;
|
pg_stats[(pool_pg_num_t){ .pool_id = pool_id, .pg_num = (pg_num_t)pg_num }] = value;
|
||||||
});
|
});
|
||||||
|
|
|
@ -214,10 +214,10 @@ resume_1:
|
||||||
json11::Json::object {
|
json11::Json::object {
|
||||||
{ "request_range", json11::Json::object {
|
{ "request_range", json11::Json::object {
|
||||||
{ "key", base64_encode(
|
{ "key", base64_encode(
|
||||||
parent->cli->st_cli.etcd_prefix+"/pg/stats/"
|
parent->cli->st_cli.etcd_prefix+"/pgstats/"
|
||||||
) },
|
) },
|
||||||
{ "range_end", base64_encode(
|
{ "range_end", base64_encode(
|
||||||
parent->cli->st_cli.etcd_prefix+"/pg/stats0"
|
parent->cli->st_cli.etcd_prefix+"/pgstats0"
|
||||||
) },
|
) },
|
||||||
} },
|
} },
|
||||||
},
|
},
|
||||||
|
@ -235,7 +235,7 @@ resume_1:
|
||||||
}
|
}
|
||||||
// Calculate recovery percent
|
// Calculate recovery percent
|
||||||
std::map<pool_id_t, object_counts_t> counts;
|
std::map<pool_id_t, object_counts_t> counts;
|
||||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/",
|
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/",
|
||||||
[&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
[&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
||||||
{
|
{
|
||||||
auto & cnt = counts[pool_id];
|
auto & cnt = counts[pool_id];
|
||||||
|
|
|
@ -176,7 +176,7 @@ struct rm_osd_t
|
||||||
json11::Json::object {
|
json11::Json::object {
|
||||||
{ "request_range", json11::Json::object {
|
{ "request_range", json11::Json::object {
|
||||||
{ "key", base64_encode(
|
{ "key", base64_encode(
|
||||||
parent->cli->st_cli.etcd_prefix+"/config/pgs"
|
parent->cli->st_cli.etcd_prefix+"/pg/config"
|
||||||
) },
|
) },
|
||||||
} },
|
} },
|
||||||
},
|
},
|
||||||
|
@ -229,7 +229,7 @@ struct rm_osd_t
|
||||||
}
|
}
|
||||||
if (!new_pgs.is_null())
|
if (!new_pgs.is_null())
|
||||||
{
|
{
|
||||||
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/config/pgs");
|
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/config");
|
||||||
rm_items.push_back(json11::Json::object {
|
rm_items.push_back(json11::Json::object {
|
||||||
{ "request_put", json11::Json::object {
|
{ "request_put", json11::Json::object {
|
||||||
{ "key", pgs_key },
|
{ "key", pgs_key },
|
||||||
|
@ -427,7 +427,7 @@ struct rm_osd_t
|
||||||
{ "target", "MOD" },
|
{ "target", "MOD" },
|
||||||
{ "key", history_key },
|
{ "key", history_key },
|
||||||
{ "result", "LESS" },
|
{ "result", "LESS" },
|
||||||
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision+1 },
|
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision_pg+1 },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
#include "str_util.h"
|
#include "str_util.h"
|
||||||
|
|
||||||
static const char *help_text =
|
static const char *help_text =
|
||||||
"Vitastor disk management tool " VERSION "\n"
|
"Vitastor disk management tool " VITASTOR_VERSION "\n"
|
||||||
"(c) Vitaliy Filippov, 2022+ (VNPL-1.1)\n"
|
"(c) Vitaliy Filippov, 2022+ (VNPL-1.1)\n"
|
||||||
"\n"
|
"\n"
|
||||||
"COMMANDS:\n"
|
"COMMANDS:\n"
|
||||||
|
|
|
@ -10,7 +10,7 @@ set_target_properties(vitastor_kv PROPERTIES PUBLIC_HEADER "kv/vitastor_kv.h")
|
||||||
target_link_libraries(vitastor_kv
|
target_link_libraries(vitastor_kv
|
||||||
vitastor_client
|
vitastor_client
|
||||||
)
|
)
|
||||||
set_target_properties(vitastor_kv PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
set_target_properties(vitastor_kv PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||||
|
|
||||||
# vitastor-kv
|
# vitastor-kv
|
||||||
add_executable(vitastor-kv
|
add_executable(vitastor-kv
|
||||||
|
|
|
@ -53,7 +53,7 @@ nfs_proxy_t::~nfs_proxy_t()
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char* help_text =
|
static const char* help_text =
|
||||||
"Vitastor NFS 3.0 proxy " VERSION "\n"
|
"Vitastor NFS 3.0 proxy " VITASTOR_VERSION "\n"
|
||||||
"(c) Vitaliy Filippov, 2021+ (VNPL-1.1)\n"
|
"(c) Vitaliy Filippov, 2021+ (VNPL-1.1)\n"
|
||||||
"\n"
|
"\n"
|
||||||
"vitastor-nfs (--fs <NAME> | --block) [-o <OPT>] mount <MOUNTPOINT>\n"
|
"vitastor-nfs (--fs <NAME> | --block) [-o <OPT>] mount <MOUNTPOINT>\n"
|
||||||
|
@ -372,24 +372,6 @@ void nfs_proxy_t::watch_stats()
|
||||||
assert(cli->st_cli.on_start_watcher_hook == NULL);
|
assert(cli->st_cli.on_start_watcher_hook == NULL);
|
||||||
cli->st_cli.on_start_watcher_hook = [this](http_co_t *etcd_watch_ws)
|
cli->st_cli.on_start_watcher_hook = [this](http_co_t *etcd_watch_ws)
|
||||||
{
|
{
|
||||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
|
||||||
{ "create_request", json11::Json::object {
|
|
||||||
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats/") },
|
|
||||||
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats0") },
|
|
||||||
{ "start_revision", cli->st_cli.etcd_watch_revision },
|
|
||||||
{ "watch_id", ETCD_INODE_STATS_WATCH_ID },
|
|
||||||
{ "progress_notify", true },
|
|
||||||
} }
|
|
||||||
}).dump());
|
|
||||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
|
||||||
{ "create_request", json11::Json::object {
|
|
||||||
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats/") },
|
|
||||||
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats0") },
|
|
||||||
{ "start_revision", cli->st_cli.etcd_watch_revision },
|
|
||||||
{ "watch_id", ETCD_POOL_STATS_WATCH_ID },
|
|
||||||
{ "progress_notify", true },
|
|
||||||
} }
|
|
||||||
}).dump());
|
|
||||||
cli->st_cli.etcd_txn_slow(json11::Json::object {
|
cli->st_cli.etcd_txn_slow(json11::Json::object {
|
||||||
{ "success", json11::Json::array {
|
{ "success", json11::Json::array {
|
||||||
json11::Json::object {
|
json11::Json::object {
|
||||||
|
@ -415,6 +397,28 @@ void nfs_proxy_t::watch_stats()
|
||||||
parse_stats(kv);
|
parse_stats(kv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (cli->st_cli.etcd_watch_ws)
|
||||||
|
{
|
||||||
|
auto watch_rev = res["header"]["revision"].uint64_value()+1;
|
||||||
|
http_post_message(cli->st_cli.etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||||
|
{ "create_request", json11::Json::object {
|
||||||
|
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats/") },
|
||||||
|
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats0") },
|
||||||
|
{ "start_revision", watch_rev },
|
||||||
|
{ "watch_id", ETCD_INODE_STATS_WATCH_ID },
|
||||||
|
{ "progress_notify", true },
|
||||||
|
} }
|
||||||
|
}).dump());
|
||||||
|
http_post_message(cli->st_cli.etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||||
|
{ "create_request", json11::Json::object {
|
||||||
|
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats/") },
|
||||||
|
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats0") },
|
||||||
|
{ "start_revision", watch_rev },
|
||||||
|
{ "watch_id", ETCD_POOL_STATS_WATCH_ID },
|
||||||
|
{ "progress_notify", true },
|
||||||
|
} }
|
||||||
|
}).dump());
|
||||||
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
cli->st_cli.on_change_hook = [this, old_hook = cli->st_cli.on_change_hook](std::map<std::string, etcd_kv_t> & changes)
|
cli->st_cli.on_change_hook = [this, old_hook = cli->st_cli.on_change_hook](std::map<std::string, etcd_kv_t> & changes)
|
||||||
|
|
|
@ -169,6 +169,7 @@ json11::Json osd_t::get_osd_state()
|
||||||
else
|
else
|
||||||
st["addresses"] = getifaddr_list();
|
st["addresses"] = getifaddr_list();
|
||||||
st["host"] = std::string(hostname.data(), hostname.size());
|
st["host"] = std::string(hostname.data(), hostname.size());
|
||||||
|
st["version"] = VITASTOR_VERSION;
|
||||||
st["port"] = listening_port;
|
st["port"] = listening_port;
|
||||||
st["primary_enabled"] = run_primary;
|
st["primary_enabled"] = run_primary;
|
||||||
st["blockstore_enabled"] = bs ? true : false;
|
st["blockstore_enabled"] = bs ? true : false;
|
||||||
|
@ -199,6 +200,7 @@ json11::Json osd_t::get_statistics()
|
||||||
st["bitmap_granularity"] = (uint64_t)bs_bitmap_granularity;
|
st["bitmap_granularity"] = (uint64_t)bs_bitmap_granularity;
|
||||||
st["immediate_commit"] = immediate_commit == IMMEDIATE_ALL ? "all" : (immediate_commit == IMMEDIATE_SMALL ? "small" : "none");
|
st["immediate_commit"] = immediate_commit == IMMEDIATE_ALL ? "all" : (immediate_commit == IMMEDIATE_SMALL ? "small" : "none");
|
||||||
st["host"] = self_state["host"];
|
st["host"] = self_state["host"];
|
||||||
|
st["version"] = VITASTOR_VERSION;
|
||||||
json11::Json::object op_stats, subop_stats;
|
json11::Json::object op_stats, subop_stats;
|
||||||
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
|
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
|
||||||
{
|
{
|
||||||
|
@ -371,7 +373,7 @@ void osd_t::report_statistics()
|
||||||
pg_stats["write_osd_set"] = pg.cur_set;
|
pg_stats["write_osd_set"] = pg.cur_set;
|
||||||
txn.push_back(json11::Json::object {
|
txn.push_back(json11::Json::object {
|
||||||
{ "request_put", json11::Json::object {
|
{ "request_put", json11::Json::object {
|
||||||
{ "key", base64_encode(st_cli.etcd_prefix+"/pg/stats/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num)) },
|
{ "key", base64_encode(st_cli.etcd_prefix+"/pgstats/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num)) },
|
||||||
{ "value", base64_encode(json11::Json(pg_stats).dump()) },
|
{ "value", base64_encode(json11::Json(pg_stats).dump()) },
|
||||||
} }
|
} }
|
||||||
});
|
});
|
||||||
|
@ -418,7 +420,7 @@ void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes
|
||||||
}
|
}
|
||||||
if (run_primary)
|
if (run_primary)
|
||||||
{
|
{
|
||||||
bool pgs = changes.find(st_cli.etcd_prefix+"/config/pgs") != changes.end();
|
bool pgs = changes.find(st_cli.etcd_prefix+"/pg/config") != changes.end();
|
||||||
if (pools || pgs)
|
if (pools || pgs)
|
||||||
{
|
{
|
||||||
apply_pg_count();
|
apply_pg_count();
|
||||||
|
@ -903,7 +905,7 @@ void osd_t::report_pg_states()
|
||||||
{ "target", "MOD" },
|
{ "target", "MOD" },
|
||||||
{ "key", state_key_base64 },
|
{ "key", state_key_base64 },
|
||||||
{ "result", "LESS" },
|
{ "result", "LESS" },
|
||||||
{ "mod_revision", st_cli.etcd_watch_revision+1 },
|
{ "mod_revision", st_cli.etcd_watch_revision_pg+1 },
|
||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -974,7 +976,7 @@ void osd_t::report_pg_states()
|
||||||
{ "target", "MOD" },
|
{ "target", "MOD" },
|
||||||
{ "key", history_key },
|
{ "key", history_key },
|
||||||
{ "result", "LESS" },
|
{ "result", "LESS" },
|
||||||
{ "mod_revision", st_cli.etcd_watch_revision+1 },
|
{ "mod_revision", st_cli.etcd_watch_revision_pg+1 },
|
||||||
});
|
});
|
||||||
success.push_back(json11::Json::object {
|
success.push_back(json11::Json::object {
|
||||||
{ "request_put", json11::Json::object {
|
{ "request_put", json11::Json::object {
|
||||||
|
|
|
@ -20,7 +20,7 @@ static void handle_sigint(int sig)
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char* help_text =
|
static const char* help_text =
|
||||||
"Vitastor OSD (block object storage daemon) " VERSION "\n"
|
"Vitastor OSD (block object storage daemon) " VITASTOR_VERSION "\n"
|
||||||
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
|
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
|
||||||
"\n"
|
"\n"
|
||||||
"OSDs are usually started by vitastor-disk.\n"
|
"OSDs are usually started by vitastor-disk.\n"
|
||||||
|
|
|
@ -22,7 +22,7 @@ void configure_single_pg_pool(cluster_client_t *cli)
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
cli->st_cli.parse_state((etcd_kv_t){
|
cli->st_cli.parse_state((etcd_kv_t){
|
||||||
.key = "/config/pgs",
|
.key = "/pg/config",
|
||||||
.value = json11::Json::object {
|
.value = json11::Json::object {
|
||||||
{ "items", json11::Json::object {
|
{ "items", json11::Json::object {
|
||||||
{ "1", json11::Json::object {
|
{ "1", json11::Json::object {
|
||||||
|
|
|
@ -25,11 +25,22 @@ ETCD_IP=${ETCD_IP:-127.0.0.1}
|
||||||
ETCD_PORT=${ETCD_PORT:-12379}
|
ETCD_PORT=${ETCD_PORT:-12379}
|
||||||
ETCD_COUNT=${ETCD_COUNT:-1}
|
ETCD_COUNT=${ETCD_COUNT:-1}
|
||||||
ANTIETCD=${ANTIETCD}
|
ANTIETCD=${ANTIETCD}
|
||||||
|
USE_RAMDISK=${USE_RAMDISK}
|
||||||
|
|
||||||
if [ "$KEEP_DATA" = "" ]; then
|
RAMDISK=/run/user/$(id -u)
|
||||||
|
findmnt $RAMDISK >/dev/null || (sudo mkdir -p $RAMDISK && sudo mount -t tmpfs tmpfs $RAMDISK)
|
||||||
|
|
||||||
|
if [[ -z "$KEEP_DATA" ]]; then
|
||||||
rm -rf ./testdata
|
rm -rf ./testdata
|
||||||
rm -rf /run/user/$(id -u)/testdata_etcd*
|
rm -rf /run/user/$(id -u)/testdata_etcd* /run/user/$(id -u)/testdata_bin
|
||||||
mkdir -p ./testdata
|
mkdir -p ./testdata
|
||||||
|
if [[ -n "$USE_RAMDISK" ]]; then
|
||||||
|
OSD_ARGS="$OSD_ARGS --data_io cached"
|
||||||
|
mkdir -p /run/user/$(id -u)/testdata_bin
|
||||||
|
ln -s /run/user/$(id -u)/testdata_bin ./testdata/bin
|
||||||
|
else
|
||||||
|
mkdir -p ./testdata/bin
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
ETCD_URL="http://$ETCD_IP:$ETCD_PORT"
|
ETCD_URL="http://$ETCD_IP:$ETCD_PORT"
|
||||||
|
@ -41,9 +52,7 @@ start_etcd()
|
||||||
{
|
{
|
||||||
local i=$1
|
local i=$1
|
||||||
if [[ -z "$ANTIETCD" ]]; then
|
if [[ -z "$ANTIETCD" ]]; then
|
||||||
local t=/run/user/$(id -u)
|
ionice -c2 -n0 $ETCD -name etcd$i --data-dir $RAMDISK/testdata_etcd$i \
|
||||||
findmnt $t >/dev/null || (sudo mkdir -p $t && sudo mount -t tmpfs tmpfs $t)
|
|
||||||
ionice -c2 -n0 $ETCD -name etcd$i --data-dir /run/user/$(id -u)/testdata_etcd$i \
|
|
||||||
--advertise-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) --listen-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) \
|
--advertise-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) --listen-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) \
|
||||||
--initial-advertise-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) --listen-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) \
|
--initial-advertise-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) --listen-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) \
|
||||||
--initial-cluster-token vitastor-tests-etcd --initial-cluster-state new \
|
--initial-cluster-token vitastor-tests-etcd --initial-cluster-state new \
|
||||||
|
|
|
@ -30,10 +30,10 @@ wait_etcd
|
||||||
|
|
||||||
if [ "$IMMEDIATE_COMMIT" != "" ]; then
|
if [ "$IMMEDIATE_COMMIT" != "" ]; then
|
||||||
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5"
|
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5"
|
||||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"all","client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
|
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"all","client_writeback_allowed":true,"client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
|
||||||
else
|
else
|
||||||
NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5 --min_flusher_count 16"
|
NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5 --min_flusher_count 16"
|
||||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"none","client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
|
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"none","client_writeback_allowed":true,"client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
start_osd_on()
|
start_osd_on()
|
||||||
|
@ -50,8 +50,8 @@ if ! type -t osd_dev; then
|
||||||
osd_dev()
|
osd_dev()
|
||||||
{
|
{
|
||||||
local i=$1
|
local i=$1
|
||||||
[[ -f ./testdata/test_osd$i.bin ]] || dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
[[ -f ./testdata/bin/test_osd$i.bin ]] || dd if=/dev/zero of=./testdata/bin/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||||
echo ./testdata/test_osd$i.bin
|
echo ./testdata/bin/test_osd$i.bin
|
||||||
}
|
}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -89,7 +89,7 @@ wait_up()
|
||||||
local i=0
|
local i=0
|
||||||
local configured=0
|
local configured=0
|
||||||
while [[ $i -lt $sec ]]; do
|
while [[ $i -lt $sec ]]; do
|
||||||
if $ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
|
if $ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
|
||||||
select(((.osd_set | select(. != 0) | sort | unique) | length) == '$PG_SIZE') ] | length) == '$PG_COUNT; then
|
select(((.osd_set | select(. != 0) | sort | unique) | length) == '$PG_SIZE') ] | length) == '$PG_COUNT; then
|
||||||
configured=1
|
configured=1
|
||||||
if $ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT; then
|
if $ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT; then
|
||||||
|
|
|
@ -13,14 +13,14 @@ start_osd 4
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|
||||||
for i in {1..30}; do
|
for i in {1..30}; do
|
||||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])') && \
|
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])') && \
|
||||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$PG_COUNT) && \
|
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$PG_COUNT) && \
|
||||||
break
|
break
|
||||||
sleep 1
|
sleep 1
|
||||||
done
|
done
|
||||||
|
|
||||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])'); then
|
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])'); then
|
||||||
format_error "FAILED: OSD NOT ADDED INTO DISTRIBUTION"
|
format_error "FAILED: OSD NOT ADDED INTO DISTRIBUTION"
|
||||||
fi
|
fi
|
||||||
|
@ -35,14 +35,14 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm-osd --force 4
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|
||||||
for i in {1..30}; do
|
for i in {1..30}; do
|
||||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
|
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
|
||||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "left_on_dead"]) ] | length) == '$PG_COUNT'') && \
|
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "left_on_dead"]) ] | length) == '$PG_COUNT'') && \
|
||||||
break
|
break
|
||||||
sleep 1
|
sleep 1
|
||||||
done
|
done
|
||||||
|
|
||||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
|
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
|
||||||
format_error "FAILED: OSD NOT REMOVED FROM DISTRIBUTION"
|
format_error "FAILED: OSD NOT REMOVED FROM DISTRIBUTION"
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -23,7 +23,7 @@ try_change()
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
|
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
|
||||||
|
|
||||||
for i in {1..60}; do
|
for i in {1..60}; do
|
||||||
($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
|
($ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
|
||||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "has_misplaced"]) ] | length) == '$n'') && \
|
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "has_misplaced"]) ] | length) == '$n'') && \
|
||||||
break
|
break
|
||||||
sleep 1
|
sleep 1
|
||||||
|
@ -36,14 +36,14 @@ try_change()
|
||||||
sleep 1
|
sleep 1
|
||||||
done
|
done
|
||||||
|
|
||||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
|
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
|
||||||
$ETCDCTL get /vitastor/config/pgs
|
$ETCDCTL get /vitastor/pg/config
|
||||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||||
format_error "FAILED: $n PGS NOT CONFIGURED"
|
format_error "FAILED: $n PGS NOT CONFIGURED"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
||||||
$ETCDCTL get /vitastor/config/pgs
|
$ETCDCTL get /vitastor/pg/config
|
||||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||||
format_error "FAILED: $n PGS NOT UP"
|
format_error "FAILED: $n PGS NOT UP"
|
||||||
fi
|
fi
|
||||||
|
@ -53,7 +53,7 @@ try_change()
|
||||||
nobj=0
|
nobj=0
|
||||||
waittime=0
|
waittime=0
|
||||||
while [[ $nobj -ne $NOBJ && $waittime -lt 7 ]]; do
|
while [[ $nobj -ne $NOBJ && $waittime -lt 7 ]]; do
|
||||||
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
nobj=`$ETCDCTL get --prefix '/vitastor/pgstats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||||
if [[ $nobj -ne $NOBJ ]]; then
|
if [[ $nobj -ne $NOBJ ]]; then
|
||||||
waittime=$((waittime+1))
|
waittime=$((waittime+1))
|
||||||
sleep 1
|
sleep 1
|
||||||
|
|
|
@ -13,7 +13,7 @@ try_change()
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$s',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
|
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$s',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
|
||||||
|
|
||||||
for i in {1..10}; do
|
for i in {1..10}; do
|
||||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
|
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
|
||||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
|
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
|
||||||
|
@ -21,16 +21,16 @@ try_change()
|
||||||
sleep 1
|
sleep 1
|
||||||
done
|
done
|
||||||
|
|
||||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
|
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
|
||||||
$ETCDCTL get /vitastor/config/pgs
|
$ETCDCTL get /vitastor/pg/config
|
||||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||||
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
|
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
||||||
$ETCDCTL get /vitastor/config/pgs
|
$ETCDCTL get /vitastor/pg/config
|
||||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||||
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
|
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -13,13 +13,13 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
|
||||||
# Write
|
# Write
|
||||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
||||||
-mirror_file=./testdata/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg -runtime=10
|
-mirror_file=./testdata/bin/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg -runtime=10
|
||||||
|
|
||||||
# Intentionally corrupt OSD data and restart it
|
# Intentionally corrupt OSD data and restart it
|
||||||
kill $OSD1_PID
|
kill $OSD1_PID
|
||||||
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/test_osd1.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/bin/test_osd1.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
||||||
truncate -s $data_offset ./testdata/test_osd1.bin
|
truncate -s $data_offset ./testdata/bin/test_osd1.bin
|
||||||
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
dd if=/dev/zero of=./testdata/bin/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||||
start_osd 1
|
start_osd 1
|
||||||
|
|
||||||
# FIXME: corrupt the journal WHEN OSD IS RUNNING and check reads too
|
# FIXME: corrupt the journal WHEN OSD IS RUNNING and check reads too
|
||||||
|
@ -30,8 +30,8 @@ wait_up 10
|
||||||
# Read everything back
|
# Read everything back
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||||
-O raw ./testdata/read.bin
|
-O raw ./testdata/bin/read.bin
|
||||||
|
|
||||||
diff ./testdata/read.bin ./testdata/mirror.bin
|
diff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
|
||||||
|
|
||||||
format_green OK
|
format_green OK
|
||||||
|
|
|
@ -28,7 +28,7 @@ $ETCDCTL get --print-value-only /vitastor/config/pools | jq -s -e '. == [{"1":{"
|
||||||
|
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|
||||||
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
|
$ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
|
||||||
jq -s -e '([ .[0].items["1"] | .[].osd_set | map_values(. | tonumber) | select((.[0] <= 4) != (.[1] <= 4)) ] | length) == 4'
|
jq -s -e '([ .[0].items["1"] | .[].osd_set | map_values(. | tonumber) | select((.[0] <= 4) != (.[1] <= 4)) ] | length) == 4'
|
||||||
|
|
||||||
format_green OK
|
format_green OK
|
||||||
|
|
|
@ -20,7 +20,7 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
|
||||||
|
|
||||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||||
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
|
-mirror_file=./testdata/bin/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
|
||||||
|
|
||||||
kill_osds()
|
kill_osds()
|
||||||
{
|
{
|
||||||
|
@ -53,13 +53,13 @@ kill_osds &
|
||||||
|
|
||||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bsrange=4k-128k -blockalign=4k -direct=1 -iodepth=32 -fsync=256 -rw=randrw \
|
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bsrange=4k-128k -blockalign=4k -direct=1 -iodepth=32 -fsync=256 -rw=randrw \
|
||||||
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
|
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/bin/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
|
||||||
|
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||||
-O raw ./testdata/read.bin
|
-O raw ./testdata/bin/read.bin
|
||||||
|
|
||||||
if ! diff -q ./testdata/read.bin ./testdata/mirror.bin; then
|
if ! diff -q ./testdata/bin/read.bin ./testdata/bin/mirror.bin; then
|
||||||
format_error Data lost during self-heal
|
format_error Data lost during self-heal
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@ wait_finish_rebalance 300
|
||||||
#fi
|
#fi
|
||||||
|
|
||||||
# Check that no objects are lost !
|
# Check that no objects are lost !
|
||||||
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
nobj=`$ETCDCTL get --prefix '/vitastor/pgstats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||||
if [ "$nobj" -ne $((IMG_SIZE*8/PG_DATA_SIZE)) ]; then
|
if [ "$nobj" -ne $((IMG_SIZE*8/PG_DATA_SIZE)) ]; then
|
||||||
format_error "Data lost after multiple interrupted rebalancings"
|
format_error "Data lost after multiple interrupted rebalancings"
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -8,14 +8,14 @@ OSD_SIZE=1024
|
||||||
OSD_COUNT=5
|
OSD_COUNT=5
|
||||||
OSD_ARGS="$OSD_ARGS"
|
OSD_ARGS="$OSD_ARGS"
|
||||||
for i in $(seq 1 $OSD_COUNT); do
|
for i in $(seq 1 $OSD_COUNT); do
|
||||||
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
dd if=/dev/zero of=./testdata/bin/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||||
build/src/osd/vitastor-osd --log_level 10 --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
build/src/osd/vitastor-osd --log_level 10 --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
||||||
eval OSD${i}_PID=$!
|
eval OSD${i}_PID=$!
|
||||||
done
|
done
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":1,"failure_domain":"osd","immediate_commit":"none"}}'
|
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":1,"failure_domain":"osd","immediate_commit":"none"}}'
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[1,0],"primary":1}}}}'
|
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[1,0],"primary":1}}}}'
|
||||||
|
|
||||||
for i in {1..30}; do
|
for i in {1..30}; do
|
||||||
sleep 1
|
sleep 1
|
||||||
|
@ -30,7 +30,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||||
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
|
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[1,0],"primary":0}}}}'
|
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[1,0],"primary":0}}}}'
|
||||||
|
|
||||||
for i in {1..30}; do
|
for i in {1..30}; do
|
||||||
sleep 1
|
sleep 1
|
||||||
|
@ -43,7 +43,7 @@ done
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
|
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
||||||
|
|
||||||
sleep 5
|
sleep 5
|
||||||
for i in {1..30}; do
|
for i in {1..30}; do
|
||||||
|
@ -60,7 +60,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=1 -number_ios=2 -rw=write \
|
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=1 -number_ios=2 -rw=write \
|
||||||
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
|
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":0}}}}'
|
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":0}}}}'
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
|
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ done
|
||||||
cp testdata/osd4.log testdata/osd4_pre.log
|
cp testdata/osd4.log testdata/osd4_pre.log
|
||||||
>testdata/osd4.log
|
>testdata/osd4.log
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
||||||
|
|
||||||
for i in {1..30}; do
|
for i in {1..30}; do
|
||||||
sleep 1
|
sleep 1
|
||||||
|
|
|
@ -27,9 +27,9 @@ $ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicate
|
||||||
|
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|
||||||
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only
|
$ETCDCTL get --prefix /vitastor/pg/config --print-value-only
|
||||||
|
|
||||||
if ! ($ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
|
if ! ($ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
|
||||||
jq -s -e '[ [ .[] | select(has("items")) | .items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
|
jq -s -e '[ [ .[] | select(has("items")) | .items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
|
||||||
format_error "Some PGs missing replicas"
|
format_error "Some PGs missing replicas"
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -16,7 +16,7 @@ try_change()
|
||||||
s=$2
|
s=$2
|
||||||
|
|
||||||
for i in {1..10}; do
|
for i in {1..10}; do
|
||||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])') && \
|
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])') && \
|
||||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
|
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
|
||||||
|
@ -24,16 +24,16 @@ try_change()
|
||||||
sleep 1
|
sleep 1
|
||||||
done
|
done
|
||||||
|
|
||||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])'); then
|
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])'); then
|
||||||
$ETCDCTL get /vitastor/config/pgs
|
$ETCDCTL get /vitastor/pg/config
|
||||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||||
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
|
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
||||||
$ETCDCTL get /vitastor/config/pgs
|
$ETCDCTL get /vitastor/pg/config
|
||||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||||
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
|
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -16,14 +16,14 @@ trap "sudo build/src/client/vitastor-nbd unmap $NBD_DEV"'; kill -9 $(jobs -p)' E
|
||||||
|
|
||||||
sudo chown $(id -u) $NBD_DEV
|
sudo chown $(id -u) $NBD_DEV
|
||||||
|
|
||||||
dd if=/dev/urandom of=./testdata/img1.bin bs=1M count=$IMG_SIZE
|
dd if=/dev/urandom of=./testdata/bin/img1.bin bs=1M count=$IMG_SIZE
|
||||||
|
|
||||||
dd if=./testdata/img1.bin of=$NBD_DEV bs=1M count=$IMG_SIZE oflag=direct
|
dd if=./testdata/bin/img1.bin of=$NBD_DEV bs=1M count=$IMG_SIZE oflag=direct
|
||||||
|
|
||||||
verify() {
|
verify() {
|
||||||
echo "Verifying before rebalance"
|
echo "Verifying before rebalance"
|
||||||
dd if=$NBD_DEV of=./testdata/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
||||||
diff ./testdata/img1.bin ./testdata/img2.bin
|
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/osd/1 '{"reweight":'$1'}'
|
$ETCDCTL put /vitastor/config/osd/1 '{"reweight":'$1'}'
|
||||||
$ETCDCTL put /vitastor/config/osd/2 '{"reweight":'$1'}'
|
$ETCDCTL put /vitastor/config/osd/2 '{"reweight":'$1'}'
|
||||||
|
@ -31,18 +31,18 @@ verify() {
|
||||||
|
|
||||||
for i in {1..10000}; do
|
for i in {1..10000}; do
|
||||||
O=$(((RANDOM*RANDOM) % (IMG_SIZE*128)))
|
O=$(((RANDOM*RANDOM) % (IMG_SIZE*128)))
|
||||||
dd if=$NBD_DEV of=./testdata/img2.bin bs=4k seek=$O skip=$O count=1 iflag=direct conv=notrunc
|
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=4k seek=$O skip=$O count=1 iflag=direct conv=notrunc
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "Verifying during rebalance"
|
echo "Verifying during rebalance"
|
||||||
diff ./testdata/img1.bin ./testdata/img2.bin
|
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
|
||||||
|
|
||||||
# Wait for the rebalance to finish
|
# Wait for the rebalance to finish
|
||||||
wait_finish_rebalance 300
|
wait_finish_rebalance 300
|
||||||
|
|
||||||
echo "Verifying after rebalance"
|
echo "Verifying after rebalance"
|
||||||
dd if=$NBD_DEV of=./testdata/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
||||||
diff ./testdata/img1.bin ./testdata/img2.bin
|
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
|
||||||
}
|
}
|
||||||
|
|
||||||
# Verify with regular reads
|
# Verify with regular reads
|
||||||
|
|
|
@ -14,7 +14,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
|
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
|
||||||
-O raw ./testdata/before.bin
|
-O raw ./testdata/bin/before.bin
|
||||||
|
|
||||||
for i in $(seq 1 $OSD_COUNT); do
|
for i in $(seq 1 $OSD_COUNT); do
|
||||||
pid=OSD${i}_PID
|
pid=OSD${i}_PID
|
||||||
|
@ -23,19 +23,19 @@ for i in $(seq 1 $OSD_COUNT); do
|
||||||
done
|
done
|
||||||
|
|
||||||
for i in $(seq 1 $OSD_COUNT); do
|
for i in $(seq 1 $OSD_COUNT); do
|
||||||
offsets=$(build/src/disk_tool/vitastor-disk simple-offsets --format json ./testdata/test_osd$i.bin)
|
offsets=$(build/src/disk_tool/vitastor-disk simple-offsets --format json ./testdata/bin/test_osd$i.bin)
|
||||||
meta_offset=$(echo $offsets | jq -r .meta_offset)
|
meta_offset=$(echo $offsets | jq -r .meta_offset)
|
||||||
data_offset=$(echo $offsets | jq -r .data_offset)
|
data_offset=$(echo $offsets | jq -r .data_offset)
|
||||||
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json
|
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/bin/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json
|
||||||
build/src/disk_tool/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json
|
build/src/disk_tool/vitastor-disk dump-meta ./testdata/bin/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json
|
||||||
build/src/disk_tool/vitastor-disk resize \
|
build/src/disk_tool/vitastor-disk resize \
|
||||||
$(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) \
|
$(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) \
|
||||||
--new_meta_offset 0 \
|
--new_meta_offset 0 \
|
||||||
--new_meta_len $((1024*1024)) \
|
--new_meta_len $((1024*1024)) \
|
||||||
--new_journal_offset $((1024*1024)) \
|
--new_journal_offset $((1024*1024)) \
|
||||||
--new_data_offset $((128*1024*1024))
|
--new_data_offset $((128*1024*1024))
|
||||||
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json
|
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/bin/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json
|
||||||
build/src/disk_tool/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json
|
build/src/disk_tool/vitastor-disk dump-meta ./testdata/bin/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json
|
||||||
if ! (cat ./testdata/meta_before_resize.json ./testdata/meta_after_resize.json | \
|
if ! (cat ./testdata/meta_before_resize.json ./testdata/meta_after_resize.json | \
|
||||||
jq -e -s 'map([ .entries[] | del(.block) ] | sort_by(.pool, .inode, .stripe)) | .[0] == .[1] and (.[0] | length) > 1000'); then
|
jq -e -s 'map([ .entries[] | del(.block) ] | sort_by(.pool, .inode, .stripe)) | .[0] == .[1] and (.[0] | length) > 1000'); then
|
||||||
format_error "OSD $i metadata corrupted after resizing"
|
format_error "OSD $i metadata corrupted after resizing"
|
||||||
|
@ -50,7 +50,7 @@ $ETCDCTL del --prefix /vitastor/osd/state/
|
||||||
|
|
||||||
for i in $(seq 1 $OSD_COUNT); do
|
for i in $(seq 1 $OSD_COUNT); do
|
||||||
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
|
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
|
||||||
--data_device ./testdata/test_osd$i.bin \
|
--data_device ./testdata/bin/test_osd$i.bin \
|
||||||
--meta_offset 0 \
|
--meta_offset 0 \
|
||||||
--journal_offset $((1024*1024)) \
|
--journal_offset $((1024*1024)) \
|
||||||
--data_offset $((128*1024*1024)) >>./testdata/osd$i.log 2>&1 &
|
--data_offset $((128*1024*1024)) >>./testdata/osd$i.log 2>&1 &
|
||||||
|
@ -59,9 +59,9 @@ done
|
||||||
|
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
|
||||||
-O raw ./testdata/after.bin
|
-O raw ./testdata/bin/after.bin
|
||||||
|
|
||||||
if ! cmp ./testdata/before.bin ./testdata/after.bin; then
|
if ! cmp ./testdata/bin/before.bin ./testdata/bin/after.bin; then
|
||||||
format_error "Data differs after resizing"
|
format_error "Data differs after resizing"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -21,9 +21,7 @@ $ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicate
|
||||||
|
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|
||||||
$ETCDCTL get --prefix /vitastor/config/pgs --print-value-only
|
if ! ($ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
|
||||||
|
|
||||||
if ! ($ETCDCTL get --prefix /vitastor/config/pgs --print-value-only | \
|
|
||||||
jq -s -e '[ [ .[0].items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
|
jq -s -e '[ [ .[0].items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
|
||||||
format_error "Some PGs missing replicas"
|
format_error "Some PGs missing replicas"
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -18,19 +18,19 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
|
||||||
# Write
|
# Write
|
||||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
||||||
-mirror_file=./testdata/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg
|
-mirror_file=./testdata/bin/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg
|
||||||
|
|
||||||
# Save PG primary
|
# Save PG primary
|
||||||
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
|
primary=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
|
||||||
|
|
||||||
# Intentionally corrupt OSD data and restart it
|
# Intentionally corrupt OSD data and restart it
|
||||||
zero_osd_pid=OSD${ZERO_OSD}_PID
|
zero_osd_pid=OSD${ZERO_OSD}_PID
|
||||||
kill ${!zero_osd_pid}
|
kill ${!zero_osd_pid}
|
||||||
sleep 1
|
sleep 1
|
||||||
kill -9 ${!zero_osd_pid} || true
|
kill -9 ${!zero_osd_pid} || true
|
||||||
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/test_osd$ZERO_OSD.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/bin/test_osd$ZERO_OSD.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
||||||
truncate -s $data_offset ./testdata/test_osd$ZERO_OSD.bin
|
truncate -s $data_offset ./testdata/bin/test_osd$ZERO_OSD.bin
|
||||||
dd if=/dev/zero of=./testdata/test_osd$ZERO_OSD.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
dd if=/dev/zero of=./testdata/bin/test_osd$ZERO_OSD.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||||
$ETCDCTL del /vitastor/osd/state/$ZERO_OSD
|
$ETCDCTL del /vitastor/osd/state/$ZERO_OSD
|
||||||
start_osd $ZERO_OSD
|
start_osd $ZERO_OSD
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ start_osd $ZERO_OSD
|
||||||
wait_up 10
|
wait_up 10
|
||||||
|
|
||||||
# Wait until PG is back on the same primary
|
# Wait until PG is back on the same primary
|
||||||
wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/config/pgs | jq -s -e \'.[0].items["1"]["1"].primary == "'$primary'"'"'"
|
wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/pg/config | jq -s -e \'.[0].items["1"]["1"].primary == "'$primary'"'"'"
|
||||||
|
|
||||||
# Trigger scrub
|
# Trigger scrub
|
||||||
$ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'`
|
$ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'`
|
||||||
|
@ -64,8 +64,8 @@ fi
|
||||||
# Read everything back
|
# Read everything back
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||||
-O raw ./testdata/read.bin
|
-O raw ./testdata/bin/read.bin
|
||||||
|
|
||||||
diff ./testdata/read.bin ./testdata/mirror.bin
|
diff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
|
||||||
|
|
||||||
format_green OK
|
format_green OK
|
||||||
|
|
|
@ -34,21 +34,21 @@ qemu-img convert -p \
|
||||||
|
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024))" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024))" \
|
||||||
-O raw ./testdata/merged.bin
|
-O raw ./testdata/bin/merged.bin
|
||||||
|
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg@0" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg@0" \
|
||||||
-O raw ./testdata/layer0.bin
|
-O raw ./testdata/bin/layer0.bin
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/inode/1/3 '{"name":"testimg","size":'$((32*1024*1024))'}'
|
$ETCDCTL put /vitastor/config/inode/1/3 '{"name":"testimg","size":'$((32*1024*1024))'}'
|
||||||
|
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||||
-O raw ./testdata/layer1.bin
|
-O raw ./testdata/bin/layer1.bin
|
||||||
|
|
||||||
node tests/merge.js ./testdata/layer0.bin ./testdata/layer1.bin ./testdata/check.bin
|
node tests/merge.js ./testdata/bin/layer0.bin ./testdata/bin/layer1.bin ./testdata/bin/check.bin
|
||||||
|
|
||||||
cmp ./testdata/merged.bin ./testdata/check.bin
|
cmp ./testdata/bin/merged.bin ./testdata/bin/check.bin
|
||||||
|
|
||||||
# Test merge
|
# Test merge
|
||||||
|
|
||||||
|
@ -58,22 +58,22 @@ build/src/cmd/vitastor-cli rm --etcd_address $ETCD_URL testimg@0
|
||||||
|
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||||
-O raw ./testdata/merged-by-tool.bin
|
-O raw ./testdata/bin/merged-by-tool.bin
|
||||||
|
|
||||||
cmp ./testdata/merged.bin ./testdata/merged-by-tool.bin
|
cmp ./testdata/bin/merged.bin ./testdata/bin/merged-by-tool.bin
|
||||||
|
|
||||||
# Test merge by qemu-img
|
# Test merge by qemu-img
|
||||||
|
|
||||||
qemu-img rebase -u -b layer0.qcow2 -F qcow2 ./testdata/layer1.qcow2
|
qemu-img rebase -u -b layer0.qcow2 -F qcow2 ./testdata/layer1.qcow2
|
||||||
|
|
||||||
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
|
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/bin/rebased.bin
|
||||||
|
|
||||||
cmp ./testdata/merged.bin ./testdata/rebased.bin
|
cmp ./testdata/bin/merged.bin ./testdata/bin/rebased.bin
|
||||||
|
|
||||||
qemu-img rebase -u -b '' ./testdata/layer1.qcow2
|
qemu-img rebase -u -b '' ./testdata/layer1.qcow2
|
||||||
|
|
||||||
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
|
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/bin/rebased.bin
|
||||||
|
|
||||||
cmp ./testdata/layer1.bin ./testdata/rebased.bin
|
cmp ./testdata/bin/layer1.bin ./testdata/bin/rebased.bin
|
||||||
|
|
||||||
format_green OK
|
format_green OK
|
||||||
|
|
|
@ -9,7 +9,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create -s 32M testchain
|
||||||
|
|
||||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
|
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/bin/mirror.bin
|
||||||
|
|
||||||
for i in {1..10}; do
|
for i in {1..10}; do
|
||||||
# Create a snapshot
|
# Create a snapshot
|
||||||
|
@ -17,18 +17,18 @@ for i in {1..10}; do
|
||||||
# Check that the new snapshot is see-through
|
# Check that the new snapshot is see-through
|
||||||
qemu-img convert -p \
|
qemu-img convert -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||||
-O raw ./testdata/check.bin
|
-O raw ./testdata/bin/check.bin
|
||||||
cmp ./testdata/check.bin ./testdata/mirror.bin
|
cmp ./testdata/bin/check.bin ./testdata/bin/mirror.bin
|
||||||
# Write something to it
|
# Write something to it
|
||||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -rw=randwrite \
|
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -rw=randwrite \
|
||||||
-randrepeat=$((i <= 2)) -buffer_pattern=0x$((10+i))$((10+i))$((10+i))$((10+i)) \
|
-randrepeat=$((i <= 2)) -buffer_pattern=0x$((10+i))$((10+i))$((10+i))$((10+i)) \
|
||||||
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/mirror.bin
|
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/bin/mirror.bin
|
||||||
# Check the new content
|
# Check the new content
|
||||||
qemu-img convert -p \
|
qemu-img convert -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||||
-O raw ./testdata/layer1.bin
|
-O raw ./testdata/bin/layer1.bin
|
||||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||||
done
|
done
|
||||||
|
|
||||||
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
|
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
|
||||||
|
@ -36,13 +36,13 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
|
||||||
# Check the final image
|
# Check the final image
|
||||||
qemu-img convert -p \
|
qemu-img convert -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||||
-O raw ./testdata/layer1.bin
|
-O raw ./testdata/bin/layer1.bin
|
||||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||||
|
|
||||||
# Check the last remaining snapshot
|
# Check the last remaining snapshot
|
||||||
qemu-img convert -p \
|
qemu-img convert -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain@10" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain@10" \
|
||||||
-O raw ./testdata/layer0.bin
|
-O raw ./testdata/bin/layer0.bin
|
||||||
cmp ./testdata/layer0.bin ./testdata/check.bin
|
cmp ./testdata/bin/layer0.bin ./testdata/bin/check.bin
|
||||||
|
|
||||||
format_green OK
|
format_green OK
|
||||||
|
|
|
@ -9,7 +9,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create -s 128M testchain
|
||||||
|
|
||||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
|
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/bin/mirror.bin
|
||||||
|
|
||||||
# Create a snapshot
|
# Create a snapshot
|
||||||
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
|
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
|
||||||
|
@ -17,13 +17,13 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
|
||||||
# Write something to it
|
# Write something to it
|
||||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 -rw=randwrite \
|
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 -rw=randwrite \
|
||||||
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/mirror.bin
|
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/bin/mirror.bin
|
||||||
|
|
||||||
# Check the new content
|
# Check the new content
|
||||||
qemu-img convert -p \
|
qemu-img convert -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||||
-O raw ./testdata/layer1.bin
|
-O raw ./testdata/bin/layer1.bin
|
||||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||||
|
|
||||||
# Merge
|
# Merge
|
||||||
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
|
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
|
||||||
|
@ -31,7 +31,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
|
||||||
# Check the final image
|
# Check the final image
|
||||||
qemu-img convert -p \
|
qemu-img convert -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||||
-O raw ./testdata/layer1.bin
|
-O raw ./testdata/bin/layer1.bin
|
||||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||||
|
|
||||||
format_green OK
|
format_green OK
|
||||||
|
|
|
@ -23,7 +23,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
|
|
||||||
kill $OSD2_PID
|
kill $OSD2_PID
|
||||||
build/src/osd/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
|
build/src/osd/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
|
||||||
$(build/src/disk_tool/vitastor-disk simple-offsets --format options --device ./testdata/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
|
$(build/src/disk_tool/vitastor-disk simple-offsets --format options --device ./testdata/bin/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|
||||||
# Check PG state - it should NOT become active
|
# Check PG state - it should NOT become active
|
||||||
|
|
|
@ -2,14 +2,14 @@
|
||||||
|
|
||||||
. `dirname $0`/run_3osds.sh
|
. `dirname $0`/run_3osds.sh
|
||||||
|
|
||||||
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
|
primary=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
|
||||||
primary_pid=OSD${primary}_PID
|
primary_pid=OSD${primary}_PID
|
||||||
kill -9 ${!primary_pid}
|
kill -9 ${!primary_pid}
|
||||||
|
|
||||||
sleep 15
|
sleep 15
|
||||||
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/config/pgs | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
|
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/pg/config | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
|
||||||
|
|
||||||
newprim=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
|
newprim=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
|
||||||
|
|
||||||
if [ "$newprim" = "$primary" ]; then
|
if [ "$newprim" = "$primary" ]; then
|
||||||
format_error Primary not switched
|
format_error Primary not switched
|
||||||
|
|
|
@ -15,7 +15,7 @@ OSD_COUNT=3
|
||||||
OSD_ARGS="$OSD_ARGS"
|
OSD_ARGS="$OSD_ARGS"
|
||||||
OFFSET_ARGS="$OFFSET_ARGS"
|
OFFSET_ARGS="$OFFSET_ARGS"
|
||||||
for i in $(seq 1 $OSD_COUNT); do
|
for i in $(seq 1 $OSD_COUNT); do
|
||||||
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
||||||
eval OSD${i}_PID=$!
|
eval OSD${i}_PID=$!
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
|
@ -43,10 +43,10 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||||
|
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))" \
|
||||||
-O raw ./testdata/read.bin
|
-O raw ./testdata/bin/read.bin
|
||||||
|
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw ./testdata/read.bin \
|
-f raw ./testdata/bin/read.bin \
|
||||||
-O raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))"
|
-O raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))"
|
||||||
|
|
||||||
format_green OK
|
format_green OK
|
||||||
|
|
Loading…
Reference in New Issue