Compare commits
32 Commits
Author | SHA1 | Date | |
---|---|---|---|
ce359c5a69 | |||
521e867b10 | |||
333c54ebbf | |||
58d3da95c8 | |||
4e90e752eb | |||
09342d7189 | |||
eb3e8b8c19 | |||
e2ca3ad99e | |||
dd4b0aed2b | |||
42851a061c | |||
8e0f242d30 | |||
0daa8ea39b | |||
b263d311ef | |||
8720185780 | |||
20584414d8 | |||
306a3db7f3 | |||
5b0aebada4 | |||
d6f0b480c8 | |||
f1f8531fd4 | |||
8d79d59964 | |||
551a209a50 | |||
06cafd7702 | |||
3018352443 | |||
f8edfb4a71 | |||
8239ea2356 | |||
e898335b8d | |||
e7869611fa | |||
e1c2500b60 | |||
42cf3a11df | |||
4d9293f0e9 | |||
7a13f85ae2 | |||
fc219b8602 |
@@ -16,6 +16,7 @@ env:
|
||||
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
|
||||
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
|
||||
OSD_ARGS: '--etcd_quick_timeout 2000'
|
||||
USE_RAMDISK: 1
|
||||
|
||||
concurrency:
|
||||
group: ci-${{ github.ref }}
|
||||
@@ -197,6 +198,24 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_etcd_fail_antietcd:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: ANTIETCD=1 /root/vitastor/tests/test_etcd_fail.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_interrupted_rebalance:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -665,6 +684,24 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_antietcd:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: ANTIETCD=1 /root/vitastor/tests/test_heal.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_csum_32k_dmj:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
|
@@ -34,6 +34,10 @@ for my $line (<>)
|
||||
{
|
||||
$test_name .= '_imm';
|
||||
}
|
||||
elsif ($1 eq 'ANTIETCD')
|
||||
{
|
||||
$test_name .= '_antietcd';
|
||||
}
|
||||
else
|
||||
{
|
||||
$test_name .= '_'.lc($1).'_'.$2;
|
||||
|
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
project(vitastor)
|
||||
|
||||
set(VERSION "1.7.0")
|
||||
set(VITASTOR_VERSION "1.8.0")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@@ -1,9 +1,9 @@
|
||||
VERSION ?= v1.7.0
|
||||
VITASTOR_VERSION ?= v1.8.0
|
||||
|
||||
all: build push
|
||||
|
||||
build:
|
||||
@docker build --rm -t vitalif/vitastor-csi:$(VERSION) .
|
||||
@docker build --rm -t vitalif/vitastor-csi:$(VITASTOR_VERSION) .
|
||||
|
||||
push:
|
||||
@docker push vitalif/vitastor-csi:$(VERSION)
|
||||
@docker push vitalif/vitastor-csi:$(VITASTOR_VERSION)
|
||||
|
@@ -49,7 +49,7 @@ spec:
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
allowPrivilegeEscalation: true
|
||||
image: vitalif/vitastor-csi:v1.7.0
|
||||
image: vitalif/vitastor-csi:v1.8.0
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -121,7 +121,7 @@ spec:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
image: vitalif/vitastor-csi:v1.7.0
|
||||
image: vitalif/vitastor-csi:v1.8.0
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -5,7 +5,7 @@ package vitastor
|
||||
|
||||
const (
|
||||
vitastorCSIDriverName = "csi.vitastor.io"
|
||||
vitastorCSIDriverVersion = "1.7.0"
|
||||
vitastorCSIDriverVersion = "1.8.0"
|
||||
)
|
||||
|
||||
// Config struct fills the parameters of request or user input
|
||||
|
2
debian/changelog
vendored
2
debian/changelog
vendored
@@ -1,4 +1,4 @@
|
||||
vitastor (1.7.0-1) unstable; urgency=medium
|
||||
vitastor (1.8.0-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
|
@@ -17,10 +17,10 @@ To enable Vitastor support in Proxmox Virtual Environment (6.4-8.1 are supported
|
||||
- Restart pvedaemon: `systemctl restart pvedaemon`
|
||||
|
||||
`/etc/pve/storage.cfg` example (the only required option is vitastor_pool, all others
|
||||
are listed below with their default values):
|
||||
are listed below with their default values; `vitastor_ssd` is Proxmox storage pool id):
|
||||
|
||||
```
|
||||
vitastor: vitastor
|
||||
vitastor: vitastor_ssd
|
||||
# pool to put new images into
|
||||
vitastor_pool testpool
|
||||
# path to the configuration file
|
||||
|
@@ -16,10 +16,10 @@
|
||||
- Перезапустите демон Proxmox: `systemctl restart pvedaemon`
|
||||
|
||||
Пример `/etc/pve/storage.cfg` (единственная обязательная опция - vitastor_pool, все остальные
|
||||
перечислены внизу для понимания значений по умолчанию):
|
||||
перечислены внизу для понимания значений по умолчанию; `vitastor_ssd` - имя хранилища в Proxmox):
|
||||
|
||||
```
|
||||
vitastor: vitastor
|
||||
vitastor: vitastor_ssd
|
||||
# Пул, в который будут помещаться образы дисков
|
||||
vitastor_pool testpool
|
||||
# Путь к файлу конфигурации
|
||||
|
@@ -42,7 +42,7 @@ PG state always includes exactly 1 of the following base states:
|
||||
- **offline** — PG isn't activated by any OSD at all. Either primary OSD isn't set for
|
||||
this PG at all (if the pool is just created), or an unavailable OSD is set as primary,
|
||||
or the primary OSD refuses to start this PG (for example, because of wrong block_size),
|
||||
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/config/pgs` in etcd.
|
||||
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/pg/config` in etcd.
|
||||
- **starting** — primary OSD has acquired PG lock in etcd, PG is starting.
|
||||
- **peering** — primary OSD requests PG object listings from secondary OSDs and calculates
|
||||
the PG state.
|
||||
@@ -107,16 +107,17 @@ If a PG is active it can also have any number of the following additional states
|
||||
|
||||
## Removing a healthy disk
|
||||
|
||||
Befor removing a healthy disk from the cluster set its OSD weight(s) to 0 to
|
||||
move data away. To do that, add `"reweight":0` to etcd key `/vitastor/config/osd/<OSD_NUMBER>`.
|
||||
For example:
|
||||
Before removing a healthy disk from the cluster set its OSD weight(s) to 0 to
|
||||
move data away. To do that, run `vitastor-cli modify-osd --reweight 0 <НОМЕР_OSD>`.
|
||||
|
||||
Then wait until rebalance finishes and remove OSD by running `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
Zero weight can also be put manually into etcd key `/vitastor/config/osd/<НОМЕР_OSD>`, for example:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://1.1.1.1:2379/v3 put /vitastor/config/osd/1 '{"reweight":0}'
|
||||
```
|
||||
|
||||
Then wait until rebalance finishes and remove OSD by running `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
## Removing a failed disk
|
||||
|
||||
If a disk is already dead, its OSD(s) are likely already stopped.
|
||||
@@ -149,7 +150,7 @@ POOL_ID=1
|
||||
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
|
||||
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
|
||||
for i in $(seq 1 $PG_COUNT); do
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
|
||||
done
|
||||
```
|
||||
|
||||
@@ -168,21 +169,51 @@ Upgrading is performed without stopping clients (VMs/containers), you just need
|
||||
upgrade and restart servers one by one. However, ideally you should restart VMs too
|
||||
to make them use the new version of the client library.
|
||||
|
||||
Exceptions (specific upgrade instructions):
|
||||
- Upgrading <= 1.1.x to 1.2.0 or later, if you use EC n+k with k>=2, is recommended
|
||||
to be performed with full downtime: first you should stop all clients, then all OSDs,
|
||||
then upgrade and start everything back — because versions before 1.2.0 have several
|
||||
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
|
||||
- Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
|
||||
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
|
||||
without this intermediate step, client I/O will hang until the end of upgrade process.
|
||||
- Upgrading from <= 0.5.x to >= 0.6.x is not supported.
|
||||
### 1.1.x to 1.2.0
|
||||
|
||||
Rollback:
|
||||
- Version 1.0.0 has a new disk format, so OSDs initiaziled on 1.0.0 can't be rolled
|
||||
back to 0.9.x or previous versions.
|
||||
- Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
|
||||
start with 0.7.x or 0.6.x. :-)
|
||||
Upgrading version <= 1.1.x to version >= 1.2.0, if you use EC n+k with k>=2, is recommended
|
||||
to be performed with full downtime: first you should stop all clients, then all OSDs,
|
||||
then upgrade and start everything back — because versions before 1.2.0 have several
|
||||
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
|
||||
|
||||
### 0.8.7 to 0.9.0
|
||||
|
||||
Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
|
||||
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
|
||||
without this intermediate step, client I/O will hang until the end of upgrade process.
|
||||
|
||||
### 0.5.x to 0.6.x
|
||||
|
||||
Upgrading from <= 0.5.x to >= 0.6.x is not supported.
|
||||
|
||||
## Downgrade
|
||||
|
||||
Downgrade are also allowed freely, except the following specific instructions:
|
||||
|
||||
### 1.8.0 to 1.7.1
|
||||
|
||||
Before downgrading from version >= 1.8.0 to version <= 1.7.1
|
||||
you have to copy /vitastor/pg/config etcd key to /vitastor/config/pgs:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
|
||||
etcdctl --endpoints=http://... put /vitastor/config/pgs
|
||||
```
|
||||
|
||||
Then you can just install older packages and restart all services.
|
||||
|
||||
If you performed downgrade without first copying that key, run "add all OSDs into the
|
||||
history records of all PGs" from [Restoring from lost pool configuration](#restoring-from-lost-pool-configuration).
|
||||
|
||||
### 1.0.0 to 0.9.x
|
||||
|
||||
Version 1.0.0 has a new disk format, so OSDs initialized on 1.0.0 or later can't
|
||||
be rolled back to 0.9.x or previous versions.
|
||||
|
||||
### 0.8.0 to 0.7.x
|
||||
|
||||
Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
|
||||
start with older versions (0.4.x - 0.7.x). :-)
|
||||
|
||||
## OSD memory usage
|
||||
|
||||
|
@@ -42,7 +42,7 @@
|
||||
- **offline** — PG вообще не активирована ни одним OSD. Либо первичный OSD не назначен вообще
|
||||
(если пул только создан), либо в качестве первичного назначен недоступный OSD, либо
|
||||
назначенный OSD отказывается запускать эту PG (например, из-за несовпадения block_size),
|
||||
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/config/pgs` в etcd.
|
||||
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/pg/config` в etcd.
|
||||
- **starting** — первичный OSD захватил блокировку PG в etcd, PG запускается.
|
||||
- **peering** — первичный OSD опрашивает вторичные OSD на предмет списков объектов данной PG и рассчитывает её состояние.
|
||||
- **repeering** — PG ожидает завершения текущих операций ввода-вывода, после чего перейдёт в состояние **peering**.
|
||||
@@ -105,14 +105,16 @@ PG должны очень быстро переходить из них в др
|
||||
## Удаление исправного диска
|
||||
|
||||
Перед удалением исправного диска из кластера установите его OSD вес в 0, чтобы убрать с него данные.
|
||||
Для этого добавьте в ключ `/vitastor/config/osd/<НОМЕР_OSD>` в etcd значение `"reweight":0`, например:
|
||||
Для этого выполните команду `vitastor-cli modify-osd --reweight 0 <НОМЕР_OSD>`.
|
||||
|
||||
Дождитесь завершения перебалансировки данных, после чего удалите OSD командой `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
Также вес 0 можно прописать вручную прямо в etcd в ключ `/vitastor/config/osd/<НОМЕР_OSD>`, например:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://1.1.1.1:2379/v3 put /vitastor/config/osd/1 '{"reweight":0}'
|
||||
```
|
||||
|
||||
Дождитесь завершения ребаланса, после чего удалите OSD командой `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
## Удаление неисправного диска
|
||||
|
||||
Если диск уже умер, его OSD, скорее всего, уже будет/будут остановлен(ы).
|
||||
@@ -145,7 +147,7 @@ POOL_ID=1
|
||||
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
|
||||
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
|
||||
for i in $(seq 1 $PG_COUNT); do
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
|
||||
done
|
||||
```
|
||||
|
||||
@@ -164,21 +166,51 @@ done
|
||||
достаточно обновлять серверы по одному. Однако, конечно, чтобы запущенные виртуальные машины
|
||||
начали использовать новую версию клиентской библиотеки, их тоже нужно перезапустить.
|
||||
|
||||
Исключения (особые указания при обновлении):
|
||||
- Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
|
||||
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
|
||||
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
|
||||
могли приводить к некорректному чтению данных в деградированных EC-пулах.
|
||||
- Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
|
||||
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
|
||||
Иначе клиентский ввод-вывод зависнет до завершения обновления.
|
||||
- Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
|
||||
### 1.1.x -> 1.2.0
|
||||
|
||||
Откат:
|
||||
- В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
|
||||
нельзя откатить до версии 0.9.x и более ранних.
|
||||
- В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD нельзя откатить
|
||||
до 0.7.x или 0.6.x. :-)
|
||||
Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
|
||||
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
|
||||
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
|
||||
могли приводить к некорректному чтению данных в деградированных EC-пулах.
|
||||
|
||||
### 0.8.7 -> 0.9.0
|
||||
|
||||
Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
|
||||
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
|
||||
Иначе клиентский ввод-вывод зависнет до завершения обновления.
|
||||
|
||||
### 0.5.x -> 0.6.x
|
||||
|
||||
Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
|
||||
|
||||
## Откат версии
|
||||
|
||||
Откат (понижение версии) тоже свободно разрешён, кроме указанных ниже случаев:
|
||||
|
||||
### 1.8.0 -> 1.7.1
|
||||
|
||||
Перед понижением версии с >= 1.8.0 до <= 1.7.1 вы должны скопировать ключ
|
||||
etcd `/vitastor/pg/config` в `/vitastor/config/pgs`:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
|
||||
etcdctl --endpoints=http://... put /vitastor/config/pgs
|
||||
```
|
||||
|
||||
После этого можно просто установить более старые пакеты и перезапустить все сервисы.
|
||||
|
||||
Если вы откатили версию, не скопировав предварительно этот ключ - выполните "добавление всех
|
||||
OSD в исторические записи всех PG" из раздела [Восстановление потерянной конфигурации пулов](#восстановление-потерянной-конфигурации-пулов).
|
||||
|
||||
### 1.0.0 -> 0.9.x
|
||||
|
||||
В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
|
||||
нельзя откатить до версии 0.9.x и более ранних.
|
||||
|
||||
### 0.8.0 -> 0.7.x
|
||||
|
||||
В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD не запустятся на
|
||||
более ранних версиях (0.4.x - 0.7.x). :-)
|
||||
|
||||
## Потребление памяти OSD
|
||||
|
||||
|
@@ -26,6 +26,8 @@ It supports the following commands:
|
||||
- [rm-osd](#rm-osd)
|
||||
- [osd-tree](#osd-tree)
|
||||
- [ls-osd](#ls-osd)
|
||||
- [modify-osd](#modify-osd)
|
||||
- [pg-list](#pg-list)
|
||||
- [create-pool](#create-pool)
|
||||
- [modify-pool](#modify-pool)
|
||||
- [ls-pools](#ls-pools)
|
||||
@@ -305,7 +307,25 @@ OSD PARENT UP SIZE USED% TAGS WEIGHT BLOCK BITMAP
|
||||
|
||||
`vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>`
|
||||
|
||||
Set OSD reweight, tags or noout flag. See detail description in [OSD config documentation](../config.pool.en.md#osd-settings).
|
||||
Set OSD reweight, tags or noout flag. See detail description in [OSD config documentation](../config/pool.en.md#osd-settings).
|
||||
|
||||
## pg-list
|
||||
|
||||
`vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
|
||||
|
||||
List PGs with any of listed state filters (^ or ! in the beginning is negation). Options:
|
||||
|
||||
```
|
||||
--pool <pool name or number> Only list PGs of the given pool.
|
||||
--min <min pg number> Only list PGs with number >= min.
|
||||
--max <max pg number> Only list PGs with number <= max.
|
||||
```
|
||||
|
||||
Examples:
|
||||
|
||||
`vitastor-cli pg-list active+degraded`
|
||||
|
||||
`vitastor-cli pg-list ^active`
|
||||
|
||||
## create-pool
|
||||
|
||||
|
@@ -25,6 +25,8 @@ vitastor-cli - интерфейс командной строки для адм
|
||||
- [rm-osd](#rm-osd)
|
||||
- [osd-tree](#osd-tree)
|
||||
- [ls-osd](#ls-osd)
|
||||
- [modify-osd](#modify-osd)
|
||||
- [pg-list](#pg-list)
|
||||
- [create-pool](#create-pool)
|
||||
- [modify-pool](#modify-pool)
|
||||
- [ls-pools](#ls-pools)
|
||||
@@ -322,7 +324,26 @@ OSD PARENT UP SIZE USED% TAGS WEIGHT BLOCK BITMAP
|
||||
|
||||
`vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>`
|
||||
|
||||
Установить вес OSD, теги или флаг noout. Смотрите подробное описание в [документации настроек OSD](../config.pool.ru.md#настройки-osd).
|
||||
Установить вес OSD, теги или флаг noout. Смотрите подробное описание в [документации настроек OSD](../config/pool.ru.md#настройки-osd).
|
||||
|
||||
## pg-list
|
||||
|
||||
`vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
|
||||
|
||||
Вывести список PG с состояними, удовлетворяющими любому из переданных фильтров (^ или !
|
||||
в начале фильтра означает отрицание). Опции:
|
||||
|
||||
```
|
||||
--pool <pool name or number> Only list PGs of the given pool.
|
||||
--min <min pg number> Only list PGs with number >= min.
|
||||
--max <max pg number> Only list PGs with number <= max.
|
||||
```
|
||||
|
||||
Примеры:
|
||||
|
||||
`vitastor-cli pg-list active+degraded`
|
||||
|
||||
`vitastor-cli pg-list ^active`
|
||||
|
||||
## create-pool
|
||||
|
||||
|
@@ -1,8 +1,6 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const fs = require('fs');
|
||||
|
||||
const AntiEtcd = require('antietcd');
|
||||
|
||||
const vitastor_persist_filter = require('./vitastor_persist_filter.js');
|
||||
@@ -15,21 +13,15 @@ class AntiEtcdAdapter
|
||||
let antietcd;
|
||||
if (config.use_antietcd)
|
||||
{
|
||||
let fileConfig = {};
|
||||
if (fs.existsSync(config.config_path||'/etc/vitastor/vitastor.conf'))
|
||||
{
|
||||
fileConfig = JSON.parse(fs.readFileSync(config.config_path||'/etc/vitastor/vitastor.conf', { encoding: 'utf-8' }));
|
||||
}
|
||||
let mergedConfig = { ...fileConfig, ...config };
|
||||
let cluster = mergedConfig.etcd_address;
|
||||
let cluster = config.etcd_address;
|
||||
if (!(cluster instanceof Array))
|
||||
cluster = cluster ? (''+(cluster||'')).split(/,+/) : [];
|
||||
cluster = Object.keys(cluster.reduce((a, url) =>
|
||||
{
|
||||
a[url.toLowerCase().replace(/^https?:\/\//, '').replace(/\/.*$/, '')] = true;
|
||||
a[url.toLowerCase().replace(/^(https?:\/\/)/, '').replace(/\/.*$/, '')] = true;
|
||||
return a;
|
||||
}, {}));
|
||||
const cfg_port = mergedConfig.antietcd_port;
|
||||
const cfg_port = config.antietcd_port;
|
||||
const is_local = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
|
||||
const selected = cluster.map(s => s.split(':', 2)).filter(ip => is_local[ip[0]] && (!cfg_port || ip[1] == cfg_port));
|
||||
if (selected.length > 1)
|
||||
@@ -42,12 +34,13 @@ class AntiEtcdAdapter
|
||||
const antietcd_config = {
|
||||
ip: selected[0][0],
|
||||
port: selected[0][1],
|
||||
data: mergedConfig.antietcd_data_file || ((mergedConfig.antietcd_data_dir || '/var/lib/vitastor') + '/mon_'+selected[0][1]+'.json.gz'),
|
||||
persist_filter: vitastor_persist_filter(mergedConfig.etcd_prefix || '/vitastor'),
|
||||
data: config.antietcd_data_file || ((config.antietcd_data_dir || '/var/lib/vitastor') + '/mon_'+selected[0][1]+'.json.gz'),
|
||||
persist_filter: vitastor_persist_filter({ vitastor_prefix: config.etcd_prefix || '/vitastor' }),
|
||||
node_id: selected[0][0]+':'+selected[0][1], // node_id = ip:port
|
||||
cluster: (cluster.length == 1 ? null : cluster),
|
||||
cluster_key: (mergedConfig.etcd_prefix || '/vitastor'),
|
||||
cluster: (cluster.length == 1 ? null : cluster.reduce((a, c) => { a[c] = "http://"+c; return a; }, {})),
|
||||
cluster_key: (config.etcd_prefix || '/vitastor'),
|
||||
stale_read: 1,
|
||||
log_level: 1,
|
||||
};
|
||||
for (const key in config)
|
||||
{
|
||||
@@ -60,6 +53,7 @@ class AntiEtcdAdapter
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log('Starting Antietcd node '+antietcd_config.node_id);
|
||||
antietcd = new AntiEtcd(antietcd_config);
|
||||
await antietcd.start();
|
||||
}
|
||||
@@ -128,20 +122,23 @@ class AntiEtcdAdapter
|
||||
|
||||
async become_master()
|
||||
{
|
||||
if (!this.antietcd.raft)
|
||||
if (!this.antietcd.cluster)
|
||||
{
|
||||
console.log('Running in non-clustered mode');
|
||||
}
|
||||
else
|
||||
{
|
||||
console.log('Waiting to become master');
|
||||
await new Promise(ok => this.on_leader.push(ok));
|
||||
if (this.antietcd.cluster.raft.state !== 'leader')
|
||||
{
|
||||
await new Promise(ok => this.on_leader.push(ok));
|
||||
}
|
||||
}
|
||||
const state = { ...this.mon.get_mon_state(), id: ''+this.mon.etcd_lease_id };
|
||||
await this.etcd_call('/kv/txn', {
|
||||
success: [ { requestPut: { key: b64(this.mon.config.etcd_prefix+'/mon/master'), value: b64(JSON.stringify(state)), lease: ''+this.mon.etcd_lease_id } } ],
|
||||
}, this.mon.config.etcd_start_timeout, 0);
|
||||
if (this.antietcd.raft)
|
||||
if (this.antietcd.cluster)
|
||||
{
|
||||
console.log('Became master');
|
||||
}
|
||||
|
@@ -6,7 +6,7 @@ const etcd_nonempty_keys = {
|
||||
'config/global': 1,
|
||||
'config/node_placement': 1,
|
||||
'config/pools': 1,
|
||||
'config/pgs': 1,
|
||||
'pg/config': 1,
|
||||
'history/last_clean_pgs': 1,
|
||||
'stats': 1,
|
||||
};
|
||||
@@ -15,7 +15,8 @@ const etcd_allow = new RegExp('^'+[
|
||||
'config/node_placement',
|
||||
'config/pools',
|
||||
'config/osd/[1-9]\\d*',
|
||||
'config/pgs',
|
||||
'config/pgs', // old name
|
||||
'pg/config',
|
||||
'config/inode/[1-9]\\d*/[1-9]\\d*',
|
||||
'osd/state/[1-9]\\d*',
|
||||
'osd/stats/[1-9]\\d*',
|
||||
@@ -24,7 +25,8 @@ const etcd_allow = new RegExp('^'+[
|
||||
'mon/master',
|
||||
'mon/member/[a-f0-9]+',
|
||||
'pg/state/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/stats/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/stats/[1-9]\\d*/[1-9]\\d*', // old name
|
||||
'pgstats/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/history/[1-9]\\d*/[1-9]\\d*',
|
||||
'history/last_clean_pgs',
|
||||
'inode/stats/[1-9]\\d*/\\d+',
|
||||
@@ -205,19 +207,6 @@ const etcd_tree = {
|
||||
osd: {
|
||||
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
|
||||
},
|
||||
/* pgs: {
|
||||
hash: string,
|
||||
items: {
|
||||
<pool_id>: {
|
||||
<pg_id>: {
|
||||
osd_set: [ 1, 2, 3 ],
|
||||
primary: 1,
|
||||
pause: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}, */
|
||||
pgs: {},
|
||||
/* inode: {
|
||||
<pool_id>: {
|
||||
<inode_t>: {
|
||||
@@ -290,6 +279,19 @@ const etcd_tree = {
|
||||
},
|
||||
},
|
||||
pg: {
|
||||
/* config: {
|
||||
hash: string,
|
||||
items: {
|
||||
<pool_id>: {
|
||||
<pg_id>: {
|
||||
osd_set: [ 1, 2, 3 ],
|
||||
primary: 1,
|
||||
pause: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}, */
|
||||
config: {},
|
||||
state: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
@@ -300,18 +302,6 @@ const etcd_tree = {
|
||||
}
|
||||
}, */
|
||||
},
|
||||
stats: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
object_count: uint64_t,
|
||||
clean_count: uint64_t,
|
||||
misplaced_count: uint64_t,
|
||||
degraded_count: uint64_t,
|
||||
incomplete_count: uint64_t,
|
||||
write_osd_set: osd_num_t[],
|
||||
},
|
||||
}, */
|
||||
},
|
||||
history: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
@@ -323,6 +313,18 @@ const etcd_tree = {
|
||||
}, */
|
||||
},
|
||||
},
|
||||
pgstats: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
object_count: uint64_t,
|
||||
clean_count: uint64_t,
|
||||
misplaced_count: uint64_t,
|
||||
degraded_count: uint64_t,
|
||||
incomplete_count: uint64_t,
|
||||
write_osd_set: osd_num_t[],
|
||||
},
|
||||
}, */
|
||||
},
|
||||
inode: {
|
||||
stats: {
|
||||
/* <pool_id>: {
|
||||
|
@@ -30,7 +30,11 @@ async function create_http_server(cfg, handler)
|
||||
}
|
||||
try
|
||||
{
|
||||
let err;
|
||||
server.once('error', e => err = e);
|
||||
server.listen(cfg.mon_http_port || 8060, cfg.mon_http_ip || undefined);
|
||||
if (err)
|
||||
throw err;
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
|
140
mon/mon.js
140
mon/mon.js
@@ -22,12 +22,13 @@ class Mon
|
||||
{
|
||||
static async run_forever(config)
|
||||
{
|
||||
let mergedConfig = config;
|
||||
if (fs.existsSync(config.config_path||'/etc/vitastor/vitastor.conf'))
|
||||
{
|
||||
const fileConfig = JSON.parse(fs.readFileSync(config.config_path||'/etc/vitastor/vitastor.conf', { encoding: 'utf-8' }));
|
||||
config = { ...fileConfig, config };
|
||||
mergedConfig = { ...fileConfig, ...config };
|
||||
}
|
||||
let antietcd = await AntiEtcdAdapter.start_antietcd(config);
|
||||
let antietcd = await AntiEtcdAdapter.start_antietcd(mergedConfig);
|
||||
let mon;
|
||||
const run = () =>
|
||||
{
|
||||
@@ -74,6 +75,8 @@ class Mon
|
||||
this.prev_stats = { osd_stats: {}, osd_diff: {} };
|
||||
this.recheck_pgs_active = false;
|
||||
this.watcher_active = false;
|
||||
this.old_pg_config = false;
|
||||
this.old_pg_stats_seen = false;
|
||||
}
|
||||
|
||||
async start()
|
||||
@@ -121,7 +124,7 @@ class Mon
|
||||
!Number(this.state.pool.stats[pool_id].pg_real_size))
|
||||
{
|
||||
// Generate missing data in etcd
|
||||
this.state.config.pgs.hash = null;
|
||||
this.state.pg.config.hash = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -200,10 +203,15 @@ class Mon
|
||||
stats_changed = true;
|
||||
changed = true;
|
||||
}
|
||||
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/' || key.substr(0, 16) == '/osd/inodestats/')
|
||||
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 9) == '/pgstats/' || key.substr(0, 16) == '/osd/inodestats/')
|
||||
{
|
||||
stats_changed = true;
|
||||
}
|
||||
else if (key.substr(0, 10) == '/pg/stats/')
|
||||
{
|
||||
this.old_pg_stats_seen = true;
|
||||
stats_changed = true;
|
||||
}
|
||||
else if (key.substr(0, 10) == '/pg/state/')
|
||||
{
|
||||
pg_states_changed = true;
|
||||
@@ -284,7 +292,7 @@ class Mon
|
||||
continue next_pool;
|
||||
}
|
||||
}
|
||||
new_clean_pgs.items[pool_id] = this.state.config.pgs.items[pool_id];
|
||||
new_clean_pgs.items[pool_id] = this.state.pg.config.items[pool_id];
|
||||
}
|
||||
this.state.history.last_clean_pgs = new_clean_pgs;
|
||||
await this.etcd.etcd_call('/kv/txn', {
|
||||
@@ -395,6 +403,50 @@ class Mon
|
||||
this.parse_kv(kv);
|
||||
}
|
||||
}
|
||||
if (Object.keys((this.state.config.pgs||{}).items||{}).length)
|
||||
{
|
||||
// Support seamless upgrade to new OSDs
|
||||
if (!Object.keys((this.state.pg.config||{}).items||{}).length)
|
||||
{
|
||||
const pgs = JSON.stringify(this.state.config.pgs);
|
||||
this.state.pg.config = JSON.parse(pgs);
|
||||
const res = await this.etcd.etcd_call('/kv/txn', {
|
||||
success: [
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(pgs) } },
|
||||
],
|
||||
compare: [
|
||||
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
],
|
||||
}, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
|
||||
if (!res.succeeded)
|
||||
throw new Error('Failed to duplicate old PG config to new PG config');
|
||||
}
|
||||
this.old_pg_config = true;
|
||||
this.old_pg_config_timer = setInterval(() => this.check_clear_old_config().catch(console.error),
|
||||
this.config.old_pg_config_clear_interval||3600000);
|
||||
}
|
||||
}
|
||||
|
||||
async check_clear_old_config()
|
||||
{
|
||||
if (this.old_pg_config && this.old_pg_stats_seen)
|
||||
{
|
||||
this.old_pg_stats_seen = false;
|
||||
return;
|
||||
}
|
||||
if (this.old_pg_config)
|
||||
{
|
||||
await this.etcd.etcd_call('/kv/txn', { success: [
|
||||
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/config/pgs') } },
|
||||
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/pg/stats/'), range_end: b64(this.config.etcd_prefix+'/pg/stats0') } },
|
||||
] }, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
|
||||
this.old_pg_config = false;
|
||||
}
|
||||
if (this.old_pg_config_timer)
|
||||
{
|
||||
clearInterval(this.old_pg_config_timer);
|
||||
this.old_pg_config_timer = null;
|
||||
}
|
||||
}
|
||||
|
||||
all_osds()
|
||||
@@ -405,7 +457,7 @@ class Mon
|
||||
async stop_all_pgs(pool_id)
|
||||
{
|
||||
let has_online = false, paused = true;
|
||||
for (const pg in this.state.config.pgs.items[pool_id]||{})
|
||||
for (const pg in this.state.pg.config.items[pool_id]||{})
|
||||
{
|
||||
// FIXME: Change all (||{}) to ?. (optional chaining) at some point
|
||||
const cur_state = (((this.state.pg.state[pool_id]||{})[pg]||{}).state||[]).join(',');
|
||||
@@ -413,7 +465,7 @@ class Mon
|
||||
{
|
||||
has_online = true;
|
||||
}
|
||||
if (!this.state.config.pgs.items[pool_id][pg].pause)
|
||||
if (!this.state.pg.config.items[pool_id][pg].pause)
|
||||
{
|
||||
paused = false;
|
||||
}
|
||||
@@ -421,7 +473,7 @@ class Mon
|
||||
if (!paused)
|
||||
{
|
||||
console.log('Stopping all PGs for pool '+pool_id+' before changing PG count');
|
||||
const new_cfg = JSON.parse(JSON.stringify(this.state.config.pgs));
|
||||
const new_cfg = JSON.parse(JSON.stringify(this.state.pg.config));
|
||||
for (const pg in new_cfg.items[pool_id])
|
||||
{
|
||||
new_cfg.items[pool_id][pg].pause = true;
|
||||
@@ -429,22 +481,26 @@ class Mon
|
||||
// Check that no OSDs change their state before we pause PGs
|
||||
// Doing this we make sure that OSDs don't wake up in the middle of our "transaction"
|
||||
// and can't see the old PG configuration
|
||||
const checks = [];
|
||||
const checks = [
|
||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
];
|
||||
for (const osd_num of this.all_osds())
|
||||
{
|
||||
const key = b64(this.config.etcd_prefix+'/osd/state/'+osd_num);
|
||||
checks.push({ key, target: 'MOD', result: 'LESS', mod_revision: ''+this.etcd_watch_revision });
|
||||
}
|
||||
await this.etcd.etcd_call('/kv/txn', {
|
||||
compare: [
|
||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
...checks,
|
||||
],
|
||||
const txn = {
|
||||
compare: checks,
|
||||
success: [
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } },
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_cfg)) } },
|
||||
],
|
||||
}, this.config.etcd_mon_timeout, 0);
|
||||
};
|
||||
if (this.old_pg_config)
|
||||
{
|
||||
txn.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } });
|
||||
}
|
||||
await this.etcd.etcd_call('/kv/txn', txn, this.config.etcd_mon_timeout, 0);
|
||||
return false;
|
||||
}
|
||||
return !has_online;
|
||||
@@ -472,7 +528,7 @@ class Mon
|
||||
pools: this.state.config.pools,
|
||||
};
|
||||
const tree_hash = sha1hex(stableStringify(tree_cfg));
|
||||
if (this.state.config.pgs.hash != tree_hash)
|
||||
if (this.state.pg.config.hash != tree_hash)
|
||||
{
|
||||
// Something has changed
|
||||
console.log('Pool configuration or OSD tree changed, re-optimizing');
|
||||
@@ -513,10 +569,10 @@ class Mon
|
||||
else
|
||||
{
|
||||
// Nothing changed, but we still want to recheck the distribution of primaries
|
||||
let new_config_pgs = recheck_primary(this.state, this.config, up_osds, osd_tree);
|
||||
if (new_config_pgs)
|
||||
let new_pg_config = recheck_primary(this.state, this.config, up_osds, osd_tree);
|
||||
if (new_pg_config)
|
||||
{
|
||||
const ok = await this.save_pg_config(new_config_pgs);
|
||||
const ok = await this.save_pg_config(new_pg_config);
|
||||
if (ok)
|
||||
console.log('PG configuration successfully changed');
|
||||
else
|
||||
@@ -531,12 +587,12 @@ class Mon
|
||||
|
||||
async apply_pool_pgs(results, up_osds, osd_tree, tree_hash)
|
||||
{
|
||||
for (const pool_id in (this.state.config.pgs||{}).items||{})
|
||||
for (const pool_id in (this.state.pg.config||{}).items||{})
|
||||
{
|
||||
// We should stop all PGs when deleting a pool or changing its PG count
|
||||
if (!this.state.config.pools[pool_id] ||
|
||||
this.state.config.pgs.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
|
||||
Object.keys(this.state.config.pgs.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
|
||||
this.state.pg.config.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
|
||||
Object.keys(this.state.pg.config.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
|
||||
{
|
||||
if (!await this.stop_all_pgs(pool_id))
|
||||
{
|
||||
@@ -544,22 +600,22 @@ class Mon
|
||||
}
|
||||
}
|
||||
}
|
||||
const new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
|
||||
const new_pg_config = JSON.parse(JSON.stringify(this.state.pg.config));
|
||||
const etcd_request = { compare: [], success: [] };
|
||||
for (const pool_id in (new_config_pgs||{}).items||{})
|
||||
for (const pool_id in (new_pg_config||{}).items||{})
|
||||
{
|
||||
if (!this.state.config.pools[pool_id])
|
||||
{
|
||||
const prev_pgs = [];
|
||||
for (const pg in new_config_pgs.items[pool_id]||{})
|
||||
for (const pg in new_pg_config.items[pool_id]||{})
|
||||
{
|
||||
prev_pgs[pg-1] = new_config_pgs.items[pool_id][pg].osd_set;
|
||||
prev_pgs[pg-1] = new_pg_config.items[pool_id][pg].osd_set;
|
||||
}
|
||||
// Also delete pool statistics
|
||||
etcd_request.success.push({ requestDeleteRange: {
|
||||
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
} });
|
||||
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
|
||||
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
|
||||
this.etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, [], []);
|
||||
}
|
||||
}
|
||||
@@ -568,7 +624,7 @@ class Mon
|
||||
const pool_id = pool_res.pool_id;
|
||||
const pool_cfg = this.state.config.pools[pool_id];
|
||||
let pg_history = [];
|
||||
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
|
||||
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
|
||||
{
|
||||
if (this.state.pg.history[pool_id] &&
|
||||
this.state.pg.history[pool_id][pg])
|
||||
@@ -577,9 +633,9 @@ class Mon
|
||||
}
|
||||
}
|
||||
const real_prev_pgs = [];
|
||||
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
|
||||
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
|
||||
{
|
||||
real_prev_pgs[pg-1] = [ ...this.state.config.pgs.items[pool_id][pg].osd_set ];
|
||||
real_prev_pgs[pg-1] = [ ...this.state.pg.config.items[pool_id][pg].osd_set ];
|
||||
}
|
||||
if (real_prev_pgs.length > 0 && real_prev_pgs.length != pool_res.pgs.length)
|
||||
{
|
||||
@@ -590,8 +646,8 @@ class Mon
|
||||
pg_history = scale_pg_history(pg_history, real_prev_pgs, pool_res.pgs);
|
||||
// Drop stats
|
||||
etcd_request.success.push({ requestDeleteRange: {
|
||||
key: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'/'),
|
||||
range_end: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'0'),
|
||||
key: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'/'),
|
||||
range_end: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'0'),
|
||||
} });
|
||||
}
|
||||
const stats = {
|
||||
@@ -602,22 +658,26 @@ class Mon
|
||||
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
value: b64(JSON.stringify(stats)),
|
||||
} });
|
||||
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
|
||||
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
|
||||
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
|
||||
}
|
||||
new_config_pgs.hash = tree_hash;
|
||||
return await this.save_pg_config(new_config_pgs, etcd_request);
|
||||
new_pg_config.hash = tree_hash;
|
||||
return await this.save_pg_config(new_pg_config, etcd_request);
|
||||
}
|
||||
|
||||
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
|
||||
async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
|
||||
{
|
||||
etcd_request.compare.push(
|
||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
);
|
||||
etcd_request.success.push(
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_config_pgs)) } },
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_pg_config)) } },
|
||||
);
|
||||
if (this.old_pg_config)
|
||||
{
|
||||
etcd_request.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_pg_config)) } });
|
||||
}
|
||||
const txn_res = await this.etcd.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
|
||||
return txn_res.succeeded;
|
||||
}
|
||||
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vitastor-mon",
|
||||
"version": "1.7.0",
|
||||
"version": "1.8.0",
|
||||
"description": "Vitastor SDS monitor service",
|
||||
"main": "mon-main.js",
|
||||
"scripts": {
|
||||
@@ -9,7 +9,7 @@
|
||||
"author": "Vitaliy Filippov",
|
||||
"license": "UNLICENSED",
|
||||
"dependencies": {
|
||||
"antietcd": "^1.0.5",
|
||||
"antietcd": "^1.1.0",
|
||||
"sprintf-js": "^1.1.2",
|
||||
"ws": "^7.2.5"
|
||||
},
|
||||
|
@@ -57,7 +57,7 @@ function pick_primary(pool_config, osd_set, up_osds, aff_osds)
|
||||
|
||||
function recheck_primary(state, global_config, up_osds, osd_tree)
|
||||
{
|
||||
let new_config_pgs;
|
||||
let new_pg_config;
|
||||
for (const pool_id in state.config.pools)
|
||||
{
|
||||
const pool_cfg = state.config.pools[pool_id];
|
||||
@@ -69,30 +69,30 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
|
||||
reset_rng();
|
||||
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
|
||||
{
|
||||
if (!state.config.pgs.items[pool_id])
|
||||
if (!state.pg.config.items[pool_id])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const pg_cfg = state.config.pgs.items[pool_id][pg_num];
|
||||
const pg_cfg = state.pg.config.items[pool_id][pg_num];
|
||||
if (pg_cfg)
|
||||
{
|
||||
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
|
||||
if (pg_cfg.primary != new_primary)
|
||||
{
|
||||
if (!new_config_pgs)
|
||||
if (!new_pg_config)
|
||||
{
|
||||
new_config_pgs = JSON.parse(JSON.stringify(state.config.pgs));
|
||||
new_pg_config = JSON.parse(JSON.stringify(state.pg.config));
|
||||
}
|
||||
console.log(
|
||||
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
|
||||
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
|
||||
);
|
||||
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
|
||||
new_pg_config.items[pool_id][pg_num].primary = new_primary;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new_config_pgs;
|
||||
return new_pg_config;
|
||||
}
|
||||
|
||||
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
|
||||
@@ -185,10 +185,10 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
|
||||
}
|
||||
if (!prev_pgs.length)
|
||||
{
|
||||
// Fall back to config/pgs if it's empty
|
||||
for (const pg in ((state.config.pgs.items||{})[pool_id]||{}))
|
||||
// Fall back to pg/config if it's empty
|
||||
for (const pg in ((state.pg.config.items||{})[pool_id]||{}))
|
||||
{
|
||||
prev_pgs[pg-1] = [ ...state.config.pgs.items[pool_id][pg].osd_set ];
|
||||
prev_pgs[pg-1] = [ ...state.pg.config.items[pool_id][pg].osd_set ];
|
||||
}
|
||||
}
|
||||
const old_pg_count = prev_pgs.length;
|
||||
@@ -205,8 +205,8 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
|
||||
ordered: pool_cfg.scheme != 'replicated',
|
||||
};
|
||||
let optimize_result;
|
||||
// Re-shuffle PGs if config/pgs.hash is empty
|
||||
if (old_pg_count > 0 && state.config.pgs.hash)
|
||||
// Re-shuffle PGs if pg/config.hash is empty
|
||||
if (old_pg_count > 0 && state.pg.config.hash)
|
||||
{
|
||||
if (prev_pgs.length != pool_cfg.pg_count)
|
||||
{
|
||||
|
@@ -166,7 +166,7 @@ function export_prometheus_metrics(st)
|
||||
res += `vitastor_pool_used_raw_tb{${pool_label}} ${pool_stat.used_raw_tb||0}\n`;
|
||||
|
||||
// PG states and pool up/down status
|
||||
const real_pg_count = (Object.keys(((st.config.pgs||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
|
||||
const real_pg_count = (Object.keys(((st.pg.config||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
|
||||
const per_state = {
|
||||
active: 0,
|
||||
starting: 0,
|
||||
|
17
mon/stats.js
17
mon/stats.js
@@ -100,10 +100,19 @@ function sum_object_counts(state, global_config)
|
||||
{
|
||||
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||
for (const pool_id in state.pg.stats)
|
||||
let pgstats = state.pgstats;
|
||||
if (state.pg.stats)
|
||||
{
|
||||
// Merge with old stats for seamless transition to new stats
|
||||
for (const pool_id in state.pg.stats)
|
||||
{
|
||||
pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) };
|
||||
}
|
||||
}
|
||||
for (const pool_id in pgstats)
|
||||
{
|
||||
let object_size = 0;
|
||||
for (const osd_num of state.pg.stats[pool_id].write_osd_set||[])
|
||||
for (const osd_num of pgstats[pool_id].write_osd_set||[])
|
||||
{
|
||||
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
|
||||
{
|
||||
@@ -121,9 +130,9 @@ function sum_object_counts(state, global_config)
|
||||
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
|
||||
}
|
||||
object_size = BigInt(object_size);
|
||||
for (const pg_num in state.pg.stats[pool_id])
|
||||
for (const pg_num in pgstats[pool_id])
|
||||
{
|
||||
const st = state.pg.stats[pool_id][pg_num];
|
||||
const st = pgstats[pool_id][pg_num];
|
||||
if (st)
|
||||
{
|
||||
for (const k in object_counts)
|
||||
|
@@ -24,7 +24,7 @@ function vitastor_persist_filter(cfg)
|
||||
catch (e)
|
||||
{
|
||||
console.error('invalid JSON in '+key+' = '+value+': '+e);
|
||||
value = {};
|
||||
value = '{}';
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -35,7 +35,8 @@ function vitastor_persist_filter(cfg)
|
||||
}
|
||||
else if (key.substr(0, prefix.length+'/osd/'.length) == prefix+'/osd/' ||
|
||||
key.substr(0, prefix.length+'/inode/stats/'.length) == prefix+'/inode/stats/' ||
|
||||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' ||
|
||||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' || // old name
|
||||
key.substr(0, prefix.length+'/pgstats/'.length) == prefix+'/pgstats/' ||
|
||||
key.substr(0, prefix.length+'/pool/stats/'.length) == prefix+'/pool/stats/' ||
|
||||
key == prefix+'/stats')
|
||||
{
|
||||
|
@@ -24,7 +24,7 @@ NAN_MODULE_INIT(InitAddon)
|
||||
|
||||
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorImage::Create);
|
||||
tpl->SetClassName(Nan::New("Image").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(2);
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Nan::SetPrototypeMethod(tpl, "read", NodeVitastorImage::Read);
|
||||
Nan::SetPrototypeMethod(tpl, "write", NodeVitastorImage::Write);
|
||||
@@ -67,7 +67,7 @@ NAN_MODULE_INIT(InitAddon)
|
||||
|
||||
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorKVListing::Create);
|
||||
tpl->SetClassName(Nan::New("KVListing").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(2);
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Nan::SetPrototypeMethod(tpl, "next", NodeVitastorKVListing::Next);
|
||||
Nan::SetPrototypeMethod(tpl, "close", NodeVitastorKVListing::Close);
|
||||
|
@@ -13,7 +13,8 @@
|
||||
'<!(pkg-config --cflags vitastor)'
|
||||
],
|
||||
'libraries': [
|
||||
'<!(pkg-config --libs vitastor)'
|
||||
'<!(pkg-config --libs vitastor)',
|
||||
'-lvitastor_kv'
|
||||
]
|
||||
}
|
||||
]
|
||||
|
@@ -18,12 +18,15 @@
|
||||
class NodeVitastorRequest: public Nan::AsyncResource
|
||||
{
|
||||
public:
|
||||
NodeVitastorRequest(v8::Local<v8::Function> cb): Nan::AsyncResource("NodeVitastorRequest")
|
||||
NodeVitastorRequest(NodeVitastor *cli, v8::Local<v8::Function> cb): Nan::AsyncResource("NodeVitastorRequest")
|
||||
{
|
||||
this->cli = cli;
|
||||
callback.Reset(cb);
|
||||
}
|
||||
|
||||
iovec iov;
|
||||
std::vector<iovec> iov_list;
|
||||
NodeVitastor *cli = NULL;
|
||||
NodeVitastorImage *img = NULL;
|
||||
int op = 0;
|
||||
uint64_t offset = 0, len = 0, version = 0;
|
||||
@@ -31,6 +34,13 @@ public:
|
||||
Nan::Persistent<v8::Function> callback;
|
||||
};
|
||||
|
||||
static uint64_t get_ui64(const v8::Local<v8::Value> & val)
|
||||
{
|
||||
if (val->IsBigInt())
|
||||
return val->ToBigInt(Nan::GetCurrentContext()).ToLocalChecked()->Uint64Value();
|
||||
return Nan::To<int64_t>(val).FromJust();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// NodeVitastor
|
||||
//////////////////////////////////////////////////
|
||||
@@ -43,6 +53,7 @@ NodeVitastor::NodeVitastor(): Nan::ObjectWrap()
|
||||
|
||||
NodeVitastor::~NodeVitastor()
|
||||
{
|
||||
TRACE("NodeVitastor: destructor");
|
||||
uv_poll_stop(&poll_watcher);
|
||||
vitastor_c_destroy(c);
|
||||
c = NULL;
|
||||
@@ -74,9 +85,10 @@ NAN_METHOD(NodeVitastor::Create)
|
||||
if (res >= 0)
|
||||
{
|
||||
cli->eventfd = res;
|
||||
res = uv_poll_init_socket(uv_default_loop(), &cli->poll_watcher, cli->eventfd);
|
||||
res = uv_poll_init(uv_default_loop(), &cli->poll_watcher, cli->eventfd);
|
||||
if (res >= 0)
|
||||
res = uv_poll_start(&cli->poll_watcher, UV_READABLE, on_io_readable);
|
||||
on_io_readable(&cli->poll_watcher, 0, UV_READABLE);
|
||||
}
|
||||
if (res < 0)
|
||||
{
|
||||
@@ -97,15 +109,14 @@ void NodeVitastor::on_io_readable(uv_poll_t* handle, int status, int revents)
|
||||
if (revents & UV_READABLE)
|
||||
{
|
||||
NodeVitastor* self = (NodeVitastor*)handle->data;
|
||||
std::unique_lock<std::mutex> lock(self->mu);
|
||||
vitastor_c_uring_handle_events(self->c);
|
||||
}
|
||||
}
|
||||
|
||||
static NodeVitastorRequest* getReadRequest(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
|
||||
NodeVitastorRequest* NodeVitastor::get_read_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
|
||||
{
|
||||
uint64_t offset = Nan::To<int64_t>(info[argpos+0]).FromJust();
|
||||
uint64_t len = Nan::To<int64_t>(info[argpos+1]).FromJust();
|
||||
uint64_t offset = get_ui64(info[argpos+0]);
|
||||
uint64_t len = get_ui64(info[argpos+1]);
|
||||
uint8_t *buf = (uint8_t*)malloc(len);
|
||||
if (!buf)
|
||||
{
|
||||
@@ -113,7 +124,7 @@ static NodeVitastorRequest* getReadRequest(const Nan::FunctionCallbackInfo<v8::V
|
||||
return NULL;
|
||||
}
|
||||
v8::Local<v8::Function> callback = info[argpos+2].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
auto req = new NodeVitastorRequest(this, callback);
|
||||
|
||||
req->offset = offset;
|
||||
req->len = len;
|
||||
@@ -126,73 +137,101 @@ static NodeVitastorRequest* getReadRequest(const Nan::FunctionCallbackInfo<v8::V
|
||||
NAN_METHOD(NodeVitastor::Read)
|
||||
{
|
||||
TRACE("NodeVitastor::Read");
|
||||
if (info.Length() < 5)
|
||||
Nan::ThrowError("Not enough arguments to read(pool, inode, offset, len, callback(err, buffer, version))");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
uint64_t pool = Nan::To<int64_t>(info[0]).FromJust();
|
||||
uint64_t inode = Nan::To<int64_t>(info[1]).FromJust();
|
||||
uint64_t pool = get_ui64(info[0]);
|
||||
uint64_t inode = get_ui64(info[1]);
|
||||
|
||||
auto req = getReadRequest(info, 2);
|
||||
auto req = self->get_read_request(info, 2);
|
||||
|
||||
std::unique_lock<std::mutex> lock(self->mu);
|
||||
self->Ref();
|
||||
vitastor_c_read(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, &req->iov, 1, on_read_finish, req);
|
||||
}
|
||||
|
||||
static NodeVitastorRequest* getWriteRequest(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
|
||||
NodeVitastorRequest* NodeVitastor::get_write_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
|
||||
{
|
||||
uint64_t offset = Nan::To<int64_t>(info[argpos+0]).FromJust();
|
||||
char *buf = node::Buffer::Data(info[argpos+1]);
|
||||
uint64_t len = node::Buffer::Length(info[argpos+1]);
|
||||
uint64_t offset = get_ui64(info[argpos+0]);
|
||||
const auto & bufarg = info[argpos+1];
|
||||
uint64_t version = 0;
|
||||
|
||||
if (!info[argpos+2].IsEmpty() && info[argpos+2]->IsObject())
|
||||
if (!info[argpos+2].IsEmpty() &&
|
||||
!info[argpos+2]->IsFunction() &&
|
||||
info[argpos+2]->IsObject())
|
||||
{
|
||||
auto key = Nan::New<v8::String>("version").ToLocalChecked();
|
||||
auto params = info[argpos+2].As<v8::Object>();
|
||||
auto versionObj = Nan::Get(params, key).ToLocalChecked();
|
||||
if (!versionObj.IsEmpty())
|
||||
version = Nan::To<int64_t>(versionObj).FromJust();
|
||||
version = get_ui64(versionObj);
|
||||
argpos++;
|
||||
}
|
||||
|
||||
v8::Local<v8::Function> callback = info[argpos+2].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
auto req = new NodeVitastorRequest(this, callback);
|
||||
|
||||
req->offset = offset;
|
||||
req->len = len;
|
||||
req->version = version;
|
||||
req->iov = { .iov_base = buf, .iov_len = req->len };
|
||||
|
||||
if (bufarg->IsArray())
|
||||
{
|
||||
auto buffers = bufarg.As<v8::Array>();
|
||||
req->len = 0;
|
||||
for (uint32_t i = 0; i < buffers->Length(); i++)
|
||||
{
|
||||
auto buffer_obj = Nan::Get(buffers, i).ToLocalChecked();
|
||||
char *buf = node::Buffer::Data(buffer_obj);
|
||||
uint64_t len = node::Buffer::Length(buffer_obj);
|
||||
req->iov_list.push_back({ .iov_base = buf, .iov_len = len });
|
||||
req->len += len;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
char *buf = node::Buffer::Data(bufarg);
|
||||
uint64_t len = node::Buffer::Length(bufarg);
|
||||
req->iov = { .iov_base = buf, .iov_len = len };
|
||||
req->len = len;
|
||||
}
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
// write(pool, inode, offset, buffer, { version }?, callback(err))
|
||||
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
|
||||
NAN_METHOD(NodeVitastor::Write)
|
||||
{
|
||||
TRACE("NodeVitastor::Write");
|
||||
if (info.Length() < 5)
|
||||
Nan::ThrowError("Not enough arguments to write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
uint64_t pool = Nan::To<int64_t>(info[0]).FromJust();
|
||||
uint64_t inode = Nan::To<int64_t>(info[1]).FromJust();
|
||||
uint64_t pool = get_ui64(info[0]);
|
||||
uint64_t inode = get_ui64(info[1]);
|
||||
|
||||
auto req = getWriteRequest(info, 2);
|
||||
auto req = self->get_write_request(info, 2);
|
||||
|
||||
std::unique_lock<std::mutex> lock(self->mu);
|
||||
vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version, &req->iov, 1, on_write_finish, req);
|
||||
self->Ref();
|
||||
vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version,
|
||||
req->iov_list.size() ? req->iov_list.data() : &req->iov,
|
||||
req->iov_list.size() ? req->iov_list.size() : 1,
|
||||
on_write_finish, req);
|
||||
}
|
||||
|
||||
// sync(callback(err))
|
||||
NAN_METHOD(NodeVitastor::Sync)
|
||||
{
|
||||
TRACE("NodeVitastor::Sync");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to sync(callback(err))");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
auto req = new NodeVitastorRequest(self, callback);
|
||||
|
||||
std::unique_lock<std::mutex> lock(self->mu);
|
||||
self->Ref();
|
||||
vitastor_c_sync(self->c, on_write_finish, req);
|
||||
}
|
||||
|
||||
@@ -200,17 +239,20 @@ NAN_METHOD(NodeVitastor::Sync)
|
||||
NAN_METHOD(NodeVitastor::ReadBitmap)
|
||||
{
|
||||
TRACE("NodeVitastor::ReadBitmap");
|
||||
if (info.Length() < 6)
|
||||
Nan::ThrowError("Not enough arguments to read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
uint64_t pool = Nan::To<int64_t>(info[0]).FromJust();
|
||||
uint64_t inode = Nan::To<int64_t>(info[1]).FromJust();
|
||||
uint64_t offset = Nan::To<int64_t>(info[2]).FromJust();
|
||||
uint64_t len = Nan::To<int64_t>(info[3]).FromJust();
|
||||
uint64_t pool = get_ui64(info[0]);
|
||||
uint64_t inode = get_ui64(info[1]);
|
||||
uint64_t offset = get_ui64(info[2]);
|
||||
uint64_t len = get_ui64(info[3]);
|
||||
bool with_parents = Nan::To<bool>(info[4]).FromJust();
|
||||
v8::Local<v8::Function> callback = info[5].As<v8::Function>();
|
||||
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
auto req = new NodeVitastorRequest(self, callback);
|
||||
self->Ref();
|
||||
vitastor_c_read_bitmap(self->c, ((pool << (64-POOL_ID_BITS)) | inode), offset, len, with_parents, on_read_bitmap_finish, req);
|
||||
}
|
||||
|
||||
@@ -227,6 +269,7 @@ static void on_error(NodeVitastorRequest *req, Nan::Callback & nanCallback, long
|
||||
|
||||
void NodeVitastor::on_read_finish(void *opaque, long retval, uint64_t version)
|
||||
{
|
||||
TRACE("NodeVitastor::on_read_finish");
|
||||
Nan::HandleScope scope;
|
||||
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
@@ -235,7 +278,7 @@ void NodeVitastor::on_read_finish(void *opaque, long retval, uint64_t version)
|
||||
free(req->iov.iov_base);
|
||||
nanCallback.Call(0, NULL, req);
|
||||
}
|
||||
else if (retval < 0)
|
||||
else if (retval < 0 || (uint64_t)retval != req->len)
|
||||
{
|
||||
free(req->iov.iov_base);
|
||||
on_error(req, nanCallback, retval);
|
||||
@@ -248,20 +291,33 @@ void NodeVitastor::on_read_finish(void *opaque, long retval, uint64_t version)
|
||||
args[2] = v8::BigInt::NewFromUnsigned(v8::Isolate::GetCurrent(), version);
|
||||
nanCallback.Call(3, args, req);
|
||||
}
|
||||
req->cli->Unref();
|
||||
delete req;
|
||||
}
|
||||
|
||||
void NodeVitastor::on_write_finish(void *opaque, long retval)
|
||||
{
|
||||
TRACE("NodeVitastor::on_write_finish");
|
||||
Nan::HandleScope scope;
|
||||
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
on_error(req, nanCallback, retval);
|
||||
if (retval < 0 || (uint64_t)retval != req->len)
|
||||
{
|
||||
on_error(req, nanCallback, retval);
|
||||
}
|
||||
else
|
||||
{
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = Nan::Null();
|
||||
nanCallback.Call(1, args, req);
|
||||
}
|
||||
req->cli->Unref();
|
||||
delete req;
|
||||
}
|
||||
|
||||
void NodeVitastor::on_read_bitmap_finish(void *opaque, long retval, uint8_t *bitmap)
|
||||
{
|
||||
TRACE("NodeVitastor::on_read_bitmap_finish");
|
||||
Nan::HandleScope scope;
|
||||
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
@@ -276,6 +332,7 @@ void NodeVitastor::on_read_bitmap_finish(void *opaque, long retval, uint8_t *bit
|
||||
args[1] = Nan::NewBuffer((char*)bitmap, (retval+7)/8).ToLocalChecked();
|
||||
nanCallback.Call(2, args, req);
|
||||
}
|
||||
req->cli->Unref();
|
||||
delete req;
|
||||
}
|
||||
|
||||
@@ -291,21 +348,23 @@ void NodeVitastor::on_read_bitmap_finish(void *opaque, long retval, uint8_t *bit
|
||||
NAN_METHOD(NodeVitastorImage::Create)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Create");
|
||||
if (info.Length() < 2)
|
||||
Nan::ThrowError("Not enough arguments to Image(client, name)");
|
||||
|
||||
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
|
||||
std::string name = std::string(*Nan::Utf8String(info[1].As<v8::String>()));
|
||||
NodeVitastor *cli = Nan::ObjectWrap::Unwrap<NodeVitastor>(parent);
|
||||
|
||||
NodeVitastorImage *img = new NodeVitastorImage();
|
||||
img->Wrap(info.This());
|
||||
|
||||
img->cli = cli;
|
||||
img->name = name;
|
||||
|
||||
img->Ref();
|
||||
cli->Ref();
|
||||
std::unique_lock<std::mutex> lock(cli->mu);
|
||||
vitastor_c_watch_inode(cli->c, (char*)img->name.c_str(), on_watch_start, img);
|
||||
|
||||
img->Wrap(info.This());
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
@@ -323,10 +382,12 @@ NodeVitastorImage::~NodeVitastorImage()
|
||||
NAN_METHOD(NodeVitastorImage::Read)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Read");
|
||||
if (info.Length() < 3)
|
||||
Nan::ThrowError("Not enough arguments to read(offset, len, callback(err, buffer, version))");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
auto req = getReadRequest(info, 0);
|
||||
auto req = img->cli->get_read_request(info, 0);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_READ;
|
||||
|
||||
@@ -337,24 +398,29 @@ NAN_METHOD(NodeVitastorImage::Read)
|
||||
NAN_METHOD(NodeVitastorImage::Write)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Write");
|
||||
if (info.Length() < 3)
|
||||
Nan::ThrowError("Not enough arguments to write(offset, buffer, { version }?, callback(err))");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
auto req = getWriteRequest(info, 0);
|
||||
auto req = img->cli->get_write_request(info, 0);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_WRITE;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
// sync(callback(err))
|
||||
NAN_METHOD(NodeVitastorImage::Sync)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Sync");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to sync(callback(err))");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
auto req = new NodeVitastorRequest(img->cli, callback);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_SYNC;
|
||||
|
||||
@@ -365,15 +431,17 @@ NAN_METHOD(NodeVitastorImage::Sync)
|
||||
NAN_METHOD(NodeVitastorImage::ReadBitmap)
|
||||
{
|
||||
TRACE("NodeVitastorImage::ReadBitmap");
|
||||
if (info.Length() < 4)
|
||||
Nan::ThrowError("Not enough arguments to read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
uint64_t offset = Nan::To<int64_t>(info[0]).FromJust();
|
||||
uint64_t len = Nan::To<int64_t>(info[1]).FromJust();
|
||||
uint64_t offset = get_ui64(info[0]);
|
||||
uint64_t len = get_ui64(info[1]);
|
||||
bool with_parents = Nan::To<bool>(info[2]).FromJust();
|
||||
v8::Local<v8::Function> callback = info[3].As<v8::Function>();
|
||||
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
auto req = new NodeVitastorRequest(img->cli, callback);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_READ_BITMAP;
|
||||
req->offset = offset;
|
||||
@@ -383,14 +451,17 @@ NAN_METHOD(NodeVitastorImage::ReadBitmap)
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
// get_info(callback({ num, name, size, parent_id?, readonly?, meta?, mod_revision, block_size, bitmap_granularity, immediate_commit }))
|
||||
NAN_METHOD(NodeVitastorImage::GetInfo)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Sync");
|
||||
TRACE("NodeVitastorImage::GetInfo");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to get_info(callback({ num, name, size, parent_id?, readonly?, meta?, mod_revision, block_size, bitmap_granularity, immediate_commit }))");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
auto req = new NodeVitastorRequest(img->cli, callback);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_GET_INFO;
|
||||
|
||||
@@ -412,21 +483,26 @@ void NodeVitastorImage::exec_or_wait(NodeVitastorRequest *req)
|
||||
|
||||
void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(cli->mu);
|
||||
if (req->op == NODE_VITASTOR_READ)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
cli->Ref();
|
||||
vitastor_c_read(cli->c, ino, req->offset, req->len, &req->iov, 1, NodeVitastor::on_read_finish, req);
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_WRITE)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
vitastor_c_write(cli->c, ino, req->offset, req->len, req->version, &req->iov, 1, NodeVitastor::on_write_finish, req);
|
||||
cli->Ref();
|
||||
vitastor_c_write(cli->c, ino, req->offset, req->len, req->version,
|
||||
req->iov_list.size() ? req->iov_list.data() : &req->iov,
|
||||
req->iov_list.size() ? req->iov_list.size() : 1,
|
||||
NodeVitastor::on_write_finish, req);
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_SYNC)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
uint32_t imm = vitastor_c_inode_get_immediate_commit(cli->c, ino);
|
||||
cli->Ref();
|
||||
if (imm != IMMEDIATE_ALL)
|
||||
{
|
||||
vitastor_c_sync(cli->c, NodeVitastor::on_write_finish, req);
|
||||
@@ -439,6 +515,7 @@ void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
|
||||
else if (req->op == NODE_VITASTOR_READ_BITMAP)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
cli->Ref();
|
||||
vitastor_c_read_bitmap(cli->c, ino, req->offset, req->len, req->with_parents, NodeVitastor::on_read_bitmap_finish, req);
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_GET_INFO)
|
||||
@@ -508,10 +585,14 @@ void NodeVitastorImage::on_watch_start(void *opaque, long retval)
|
||||
// NodeVitastorKV
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
Nan::Persistent<v8::Function> NodeVitastorKV::listing_class;
|
||||
|
||||
// constructor(node_vitastor)
|
||||
NAN_METHOD(NodeVitastorKV::Create)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Create");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to new KV(client)");
|
||||
|
||||
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
|
||||
NodeVitastor *cli = Nan::ObjectWrap::Unwrap<NodeVitastor>(parent);
|
||||
@@ -519,29 +600,32 @@ NAN_METHOD(NodeVitastorKV::Create)
|
||||
NodeVitastorKV *kv = new NodeVitastorKV();
|
||||
kv->cli = cli;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(cli->mu);
|
||||
kv->dbw = new vitastorkv_dbw_t((cluster_client_t*)vitastor_c_get_internal_client(cli->c));
|
||||
}
|
||||
|
||||
kv->Wrap(info.This());
|
||||
cli->Ref();
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
NodeVitastorKV::~NodeVitastorKV()
|
||||
{
|
||||
delete dbw;
|
||||
cli->Unref();
|
||||
}
|
||||
|
||||
// open(inode_id, { ...config }, callback(err))
|
||||
// open(pool_id, inode_num, { ...config }, callback(err))
|
||||
NAN_METHOD(NodeVitastorKV::Open)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Open");
|
||||
if (info.Length() < 4)
|
||||
Nan::ThrowError("Not enough arguments to open(pool_id, inode_num, { ...config }, callback(err))");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
uint64_t inode_id = Nan::To<int64_t>(info[0]).FromJust();
|
||||
uint64_t inode_id = INODE_WITH_POOL(get_ui64(info[0]), get_ui64(info[1]));
|
||||
|
||||
v8::Local<v8::Object> jsParams = info[1].As<v8::Object>();
|
||||
v8::Local<v8::Object> jsParams = info[2].As<v8::Object>();
|
||||
v8::Local<v8::Array> keys = Nan::GetOwnPropertyNames(jsParams).ToLocalChecked();
|
||||
std::map<std::string, std::string> cfg;
|
||||
for (uint32_t i = 0; i < keys->Length(); i++)
|
||||
@@ -550,8 +634,8 @@ NAN_METHOD(NodeVitastorKV::Open)
|
||||
cfg[std::string(*Nan::Utf8String(key))] = std::string(*Nan::Utf8String(Nan::Get(jsParams, key).ToLocalChecked()));
|
||||
}
|
||||
|
||||
v8::Local<v8::Function> callback = info[2].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
v8::Local<v8::Function> callback = info[3].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(kv->cli, callback);
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->open(inode_id, cfg, [kv, req](int res)
|
||||
@@ -570,11 +654,13 @@ NAN_METHOD(NodeVitastorKV::Open)
|
||||
NAN_METHOD(NodeVitastorKV::Close)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Close");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to close(callback(err))");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
auto req = new NodeVitastorRequest(kv->cli, callback);
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->close([kv, req]()
|
||||
@@ -591,6 +677,8 @@ NAN_METHOD(NodeVitastorKV::Close)
|
||||
NAN_METHOD(NodeVitastorKV::SetConfig)
|
||||
{
|
||||
TRACE("NodeVitastorKV::SetConfig");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to set_config({ ...config })");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
@@ -627,7 +715,7 @@ void NodeVitastorKV::get_impl(const Nan::FunctionCallbackInfo<v8::Value> & info,
|
||||
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
|
||||
|
||||
v8::Local<v8::Function> callback = info[1].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
auto req = new NodeVitastorRequest(kv->cli, callback);
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->get(key, [kv, req](int res, const std::string & value)
|
||||
@@ -647,6 +735,8 @@ void NodeVitastorKV::get_impl(const Nan::FunctionCallbackInfo<v8::Value> & info,
|
||||
NAN_METHOD(NodeVitastorKV::Get)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Get");
|
||||
if (info.Length() < 2)
|
||||
Nan::ThrowError("Not enough arguments to get(key, callback(err, value))");
|
||||
get_impl(info, false);
|
||||
}
|
||||
|
||||
@@ -654,6 +744,8 @@ NAN_METHOD(NodeVitastorKV::Get)
|
||||
NAN_METHOD(NodeVitastorKV::GetCached)
|
||||
{
|
||||
TRACE("NodeVitastorKV::GetCached");
|
||||
if (info.Length() < 2)
|
||||
Nan::ThrowError("Not enough arguments to get_cached(key, callback(err, value))");
|
||||
get_impl(info, true);
|
||||
}
|
||||
|
||||
@@ -672,10 +764,12 @@ static std::function<bool(int, const std::string &)> make_cas_callback(NodeVitas
|
||||
};
|
||||
}
|
||||
|
||||
// set(key, value, callback(err), cas_compare(old_value))
|
||||
// set(key, value, callback(err), cas_compare(old_value)?)
|
||||
NAN_METHOD(NodeVitastorKV::Set)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Set");
|
||||
if (info.Length() < 3)
|
||||
Nan::ThrowError("Not enough arguments to set(key, value, callback(err), cas_compare(old_value)?)");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
@@ -684,13 +778,13 @@ NAN_METHOD(NodeVitastorKV::Set)
|
||||
std::string value(*Nan::Utf8String(info[1].As<v8::String>()));
|
||||
|
||||
v8::Local<v8::Function> callback = info[2].As<v8::Function>();
|
||||
NodeVitastorRequest *req = new NodeVitastorRequest(callback), *cas_req = NULL;
|
||||
NodeVitastorRequest *req = new NodeVitastorRequest(kv->cli, callback), *cas_req = NULL;
|
||||
|
||||
std::function<bool(int, const std::string &)> cas_cb;
|
||||
if (info.Length() > 3 && info[3]->IsObject())
|
||||
{
|
||||
v8::Local<v8::Function> cas_callback = info[3].As<v8::Function>();
|
||||
cas_req = new NodeVitastorRequest(cas_callback);
|
||||
cas_req = new NodeVitastorRequest(kv->cli, cas_callback);
|
||||
cas_cb = make_cas_callback(cas_req);
|
||||
}
|
||||
|
||||
@@ -709,10 +803,12 @@ NAN_METHOD(NodeVitastorKV::Set)
|
||||
}, cas_cb);
|
||||
}
|
||||
|
||||
// del(key, callback(err), cas_compare(old_value))
|
||||
// del(key, callback(err), cas_compare(old_value)?)
|
||||
NAN_METHOD(NodeVitastorKV::Del)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Del");
|
||||
if (info.Length() < 2)
|
||||
Nan::ThrowError("Not enough arguments to del(key, callback(err), cas_compare(old_value)?)");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
@@ -720,13 +816,13 @@ NAN_METHOD(NodeVitastorKV::Del)
|
||||
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
|
||||
|
||||
v8::Local<v8::Function> callback = info[1].As<v8::Function>();
|
||||
NodeVitastorRequest *req = new NodeVitastorRequest(callback), *cas_req = NULL;
|
||||
NodeVitastorRequest *req = new NodeVitastorRequest(kv->cli, callback), *cas_req = NULL;
|
||||
|
||||
std::function<bool(int, const std::string &)> cas_cb;
|
||||
if (info.Length() > 2 && info[2]->IsObject())
|
||||
{
|
||||
v8::Local<v8::Function> cas_callback = info[2].As<v8::Function>();
|
||||
cas_req = new NodeVitastorRequest(cas_callback);
|
||||
cas_req = new NodeVitastorRequest(kv->cli, cas_callback);
|
||||
cas_cb = make_cas_callback(cas_req);
|
||||
}
|
||||
|
||||
@@ -762,6 +858,14 @@ NAN_METHOD(NodeVitastorKV::List)
|
||||
info.GetReturnValue().Set(Nan::NewInstance(cons, narg, args).ToLocalChecked());
|
||||
}
|
||||
|
||||
/*NAN_METHOD(NodeVitastorKV::Destroy)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Destroy");
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
if (!kv->dead)
|
||||
kv->Unref();
|
||||
}*/
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// NodeVitastorKVListing
|
||||
//////////////////////////////////////////////////
|
||||
@@ -783,12 +887,10 @@ NAN_METHOD(NodeVitastorKVListing::Create)
|
||||
|
||||
NodeVitastorKVListing *list = new NodeVitastorKVListing();
|
||||
list->kv = kv;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(kv->cli->mu);
|
||||
list->handle = list->kv->dbw->list_start(start_key);
|
||||
}
|
||||
list->handle = list->kv->dbw->list_start(start_key);
|
||||
|
||||
list->Wrap(info.This());
|
||||
kv->Ref();
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
@@ -796,35 +898,54 @@ NodeVitastorKVListing::~NodeVitastorKVListing()
|
||||
{
|
||||
if (handle)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(kv->cli->mu);
|
||||
kv->dbw->list_close(handle);
|
||||
handle = NULL;
|
||||
}
|
||||
if (iter)
|
||||
{
|
||||
delete iter;
|
||||
iter = NULL;
|
||||
}
|
||||
kv->Unref();
|
||||
}
|
||||
|
||||
// next(callback(err, value))
|
||||
// next(callback(err, value)?)
|
||||
NAN_METHOD(NodeVitastorKVListing::Next)
|
||||
{
|
||||
TRACE("NodeVitastorKVListing::Next");
|
||||
|
||||
NodeVitastorKVListing* list = Nan::ObjectWrap::Unwrap<NodeVitastorKVListing>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
if (info.Length() > 0)
|
||||
{
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
if (list->iter)
|
||||
{
|
||||
delete list->iter;
|
||||
}
|
||||
list->iter = new NodeVitastorRequest(list->kv->cli, callback);
|
||||
}
|
||||
if (!list->handle)
|
||||
{
|
||||
// Already closed
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = Nan::New<v8::Int32>(-EINVAL);
|
||||
nanCallback.Call(1, args, req);
|
||||
delete req;
|
||||
if (list->iter)
|
||||
{
|
||||
auto req = list->iter;
|
||||
list->iter = NULL;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = Nan::New<v8::Int32>(-EINVAL);
|
||||
nanCallback.Call(1, args, req);
|
||||
delete req;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
list->kv->Ref();
|
||||
list->kv->dbw->list_next(list->handle, [list, req](int res, const std::string & key, const std::string & value)
|
||||
list->kv->dbw->list_next(list->handle, [list](int res, const std::string & key, const std::string & value)
|
||||
{
|
||||
auto req = list->iter;
|
||||
list->iter = NULL;
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[3];
|
||||
@@ -832,7 +953,10 @@ NAN_METHOD(NodeVitastorKVListing::Next)
|
||||
args[1] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(key).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
|
||||
args[2] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(value).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
|
||||
nanCallback.Call(3, args, req);
|
||||
delete req;
|
||||
if (list->iter)
|
||||
delete req;
|
||||
else
|
||||
list->iter = req;
|
||||
list->kv->Unref();
|
||||
});
|
||||
}
|
||||
@@ -846,8 +970,12 @@ NAN_METHOD(NodeVitastorKVListing::Close)
|
||||
|
||||
if (list->handle)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(list->kv->cli->mu);
|
||||
list->kv->dbw->list_close(list->handle);
|
||||
list->handle = NULL;
|
||||
}
|
||||
if (list->iter)
|
||||
{
|
||||
delete list->iter;
|
||||
list->iter = NULL;
|
||||
}
|
||||
}
|
||||
|
@@ -4,8 +4,6 @@
|
||||
#ifndef NODE_VITASTOR_CLIENT_H
|
||||
#define NODE_VITASTOR_CLIENT_H
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <nan.h>
|
||||
#include <vitastor_c.h>
|
||||
#include <vitastor_kv.h>
|
||||
@@ -19,7 +17,7 @@ public:
|
||||
static NAN_METHOD(Create);
|
||||
// read(pool, inode, offset, len, callback(err, buffer, version))
|
||||
static NAN_METHOD(Read);
|
||||
// write(pool, inode, offset, buffer, { version }?, callback(err))
|
||||
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
|
||||
static NAN_METHOD(Write);
|
||||
// sync(callback(err))
|
||||
static NAN_METHOD(Sync);
|
||||
@@ -34,8 +32,6 @@ private:
|
||||
vitastor_c *c = NULL;
|
||||
int eventfd = -1;
|
||||
uv_poll_t poll_watcher;
|
||||
// FIXME: Is it really needed?
|
||||
std::mutex mu;
|
||||
|
||||
NodeVitastor();
|
||||
|
||||
@@ -44,6 +40,9 @@ private:
|
||||
static void on_write_finish(void *opaque, long retval);
|
||||
static void on_read_bitmap_finish(void *opaque, long retval, uint8_t *bitmap);
|
||||
|
||||
NodeVitastorRequest* get_read_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos);
|
||||
NodeVitastorRequest* get_write_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos);
|
||||
|
||||
friend class NodeVitastorImage;
|
||||
friend class NodeVitastorKV;
|
||||
friend class NodeVitastorKVListing;
|
||||
@@ -56,7 +55,7 @@ public:
|
||||
static NAN_METHOD(Create);
|
||||
// read(offset, len, callback(err, buffer, version))
|
||||
static NAN_METHOD(Read);
|
||||
// write(offset, buffer, { version }?, callback(err))
|
||||
// write(offset, buf: Buffer | Buffer[], { version }?, callback(err))
|
||||
static NAN_METHOD(Write);
|
||||
// sync(callback(err))
|
||||
static NAN_METHOD(Sync);
|
||||
@@ -74,8 +73,6 @@ private:
|
||||
std::vector<NodeVitastorRequest*> on_init;
|
||||
Nan::Persistent<v8::Object> cliObj;
|
||||
|
||||
NodeVitastorImage();
|
||||
|
||||
static void on_watch_start(void *opaque, long retval);
|
||||
void exec_request(NodeVitastorRequest *req);
|
||||
void exec_or_wait(NodeVitastorRequest *req);
|
||||
@@ -86,7 +83,7 @@ class NodeVitastorKV: public Nan::ObjectWrap
|
||||
public:
|
||||
// constructor(node_vitastor)
|
||||
static NAN_METHOD(Create);
|
||||
// open(inode_id, { ...config }, callback(err))
|
||||
// open(pool_id, inode_num, { ...config }, callback(err))
|
||||
static NAN_METHOD(Open);
|
||||
// set_config({ ...config })
|
||||
static NAN_METHOD(SetConfig);
|
||||
@@ -98,9 +95,9 @@ public:
|
||||
static NAN_METHOD(Get);
|
||||
// get_cached(key, callback(err, value))
|
||||
static NAN_METHOD(GetCached);
|
||||
// set(key, value, callback(err), cas_compare(old_value))
|
||||
// set(key, value, callback(err), cas_compare(old_value)?)
|
||||
static NAN_METHOD(Set);
|
||||
// del(key, callback(err), cas_compare(old_value))
|
||||
// del(key, callback(err), cas_compare(old_value)?)
|
||||
static NAN_METHOD(Del);
|
||||
// list(start_key?)
|
||||
static NAN_METHOD(List);
|
||||
@@ -113,8 +110,6 @@ private:
|
||||
NodeVitastor *cli = NULL;
|
||||
vitastorkv_dbw_t *dbw = NULL;
|
||||
|
||||
NodeVitastorKV();
|
||||
|
||||
static void get_impl(const Nan::FunctionCallbackInfo<v8::Value> & info, bool allow_cache);
|
||||
|
||||
friend class NodeVitastorKVListing;
|
||||
@@ -125,7 +120,7 @@ class NodeVitastorKVListing: public Nan::ObjectWrap
|
||||
public:
|
||||
// constructor(node_vitastor_kv, start_key?)
|
||||
static NAN_METHOD(Create);
|
||||
// next(callback(err, value))
|
||||
// next(callback(err, value)?)
|
||||
static NAN_METHOD(Next);
|
||||
// close()
|
||||
static NAN_METHOD(Close);
|
||||
@@ -135,8 +130,7 @@ public:
|
||||
private:
|
||||
NodeVitastorKV *kv = NULL;
|
||||
void *handle = NULL;
|
||||
|
||||
NodeVitastorKVListing();
|
||||
NodeVitastorRequest *iter = NULL;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
1
node-binding/index.js
Normal file
1
node-binding/index.js
Normal file
@@ -0,0 +1 @@
|
||||
module.exports = require('bindings')('addon.node');
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VERSION = '1.7.0'
|
||||
VITASTOR_VERSION = '1.8.0'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -238,7 +238,7 @@ class VitastorDriver(driver.CloneableImageVD,
|
||||
|
||||
stats = {
|
||||
'vendor_name': 'Vitastor',
|
||||
'driver_version': self.VERSION,
|
||||
'driver_version': VITASTOR_VERSION,
|
||||
'storage_protocol': 'vitastor',
|
||||
'total_capacity_gb': 'unknown',
|
||||
'free_capacity_gb': 'unknown',
|
||||
|
@@ -71,7 +71,7 @@ index c9baeda639..85e1df5a56 100644
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index ca390c5700..8f11ae9fa5 100644
|
||||
index ca390c5700..d2dbaeb279 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -3201,7 +3201,7 @@
|
||||
@@ -120,7 +120,7 @@ index ca390c5700..8f11ae9fa5 100644
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5148,6 +5171,17 @@
|
||||
@@ -5148,6 +5171,20 @@
|
||||
'*cluster-size' : 'size',
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
@@ -129,6 +129,9 @@ index ca390c5700..8f11ae9fa5 100644
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @location: Where to store the new image file. This location cannot
|
||||
+# point to a snapshot.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
@@ -138,7 +141,7 @@ index ca390c5700..8f11ae9fa5 100644
|
||||
##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
@@ -5370,6 +5404,7 @@
|
||||
@@ -5370,6 +5407,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
|
@@ -71,7 +71,7 @@ index 0a99a059ec..16dc440118 100644
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index 746d1694c2..fb7aa4423b 100644
|
||||
index 746d1694c2..199a146a0b 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -3203,7 +3203,7 @@
|
||||
@@ -120,7 +120,7 @@ index 746d1694c2..fb7aa4423b 100644
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5180,6 +5203,17 @@
|
||||
@@ -5180,6 +5203,20 @@
|
||||
'*cluster-size' : 'size',
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
@@ -129,6 +129,9 @@ index 746d1694c2..fb7aa4423b 100644
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @location: Where to store the new image file. This location cannot
|
||||
+# point to a snapshot.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
@@ -138,7 +141,7 @@ index 746d1694c2..fb7aa4423b 100644
|
||||
##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
@@ -5402,6 +5436,7 @@
|
||||
@@ -5402,6 +5439,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.7.0
|
||||
Version: 1.8.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.7.0.el7.tar.gz
|
||||
Source0: vitastor-1.8.0.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.7.0
|
||||
Version: 1.8.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.7.0.el8.tar.gz
|
||||
Source0: vitastor-1.8.0.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.7.0
|
||||
Version: 1.8.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.7.0.el9.tar.gz
|
||||
Source0: vitastor-1.8.0.el9.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -19,7 +19,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVERSION="1.7.0")
|
||||
add_definitions(-DVITASTOR_VERSION="1.8.0")
|
||||
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
|
||||
add_link_options(-fno-omit-frame-pointer)
|
||||
if (${WITH_ASAN})
|
||||
|
@@ -13,7 +13,7 @@ target_link_libraries(vitastor_blk
|
||||
# for timerfd_manager
|
||||
vitastor_common
|
||||
)
|
||||
set_target_properties(vitastor_blk PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
||||
set_target_properties(vitastor_blk PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||
|
||||
if (${WITH_FIO})
|
||||
# libfio_vitastor_blk.so
|
||||
|
@@ -29,7 +29,7 @@ target_link_libraries(vitastor_client
|
||||
${LIBURING_LIBRARIES}
|
||||
${IBVERBS_LIBRARIES}
|
||||
)
|
||||
set_target_properties(vitastor_client PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
||||
set_target_properties(vitastor_client PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||
configure_file(vitastor.pc.in vitastor.pc @ONLY)
|
||||
|
||||
if (${WITH_FIO})
|
||||
|
@@ -452,11 +452,10 @@ void cluster_client_t::on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_nu
|
||||
if (pg_cfg.cur_primary != prev_primary)
|
||||
{
|
||||
// Repeat this PG operations because an OSD which stopped being primary may not fsync operations
|
||||
if (wb->repeat_ops_for(this, 0, pool_id, pg_num) > 0)
|
||||
{
|
||||
continue_ops();
|
||||
}
|
||||
wb->repeat_ops_for(this, 0, pool_id, pg_num);
|
||||
}
|
||||
// Always continue to resume operations hung because of lack of the primary OSD
|
||||
continue_ops();
|
||||
}
|
||||
|
||||
bool cluster_client_t::get_immediate_commit(uint64_t inode)
|
||||
@@ -1066,11 +1065,11 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
||||
!pg_it->second.pause && pg_it->second.cur_primary)
|
||||
{
|
||||
osd_num_t primary_osd = pg_it->second.cur_primary;
|
||||
part->osd_num = primary_osd;
|
||||
auto peer_it = msgr.osd_peer_fds.find(primary_osd);
|
||||
if (peer_it != msgr.osd_peer_fds.end())
|
||||
{
|
||||
int peer_fd = peer_it->second;
|
||||
part->osd_num = primary_osd;
|
||||
part->flags |= PART_SENT;
|
||||
op->inflight_count++;
|
||||
uint64_t pg_bitmap_size = (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8) * (
|
||||
@@ -1287,7 +1286,11 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
|
||||
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||
{
|
||||
copy_part_bitmap(op, part);
|
||||
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
||||
if (op->inode == op->cur_inode)
|
||||
{
|
||||
// Read only returns the version of the uppermost layer
|
||||
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
||||
}
|
||||
}
|
||||
else if (op->opcode == OSD_OP_WRITE)
|
||||
{
|
||||
|
@@ -333,7 +333,10 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
etcd_watch_ws = NULL;
|
||||
}
|
||||
if (this->log_level > 1)
|
||||
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju\n", etcd_address.c_str(), etcd_watch_revision);
|
||||
{
|
||||
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju/%ju/%ju\n", etcd_address.c_str(),
|
||||
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
|
||||
}
|
||||
etcd_watch_ws = open_websocket(tfd, etcd_address, etcd_api_path+"/watch", etcd_slow_timeout,
|
||||
[this, cur_addr = selected_etcd_address](const http_response_t *msg)
|
||||
{
|
||||
@@ -348,16 +351,20 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
|
||||
if (data["result"]["created"].bool_value())
|
||||
{
|
||||
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
|
||||
if (watch_id == ETCD_CONFIG_WATCH_ID ||
|
||||
watch_id == ETCD_PG_STATE_WATCH_ID ||
|
||||
watch_id == ETCD_PG_HISTORY_WATCH_ID ||
|
||||
watch_id == ETCD_OSD_STATE_WATCH_ID)
|
||||
{
|
||||
etcd_watches_initialised++;
|
||||
}
|
||||
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && this->log_level > 0)
|
||||
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju\n", cur_addr.c_str(), etcd_watch_revision);
|
||||
{
|
||||
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju/%ju/%ju\n", cur_addr.c_str(),
|
||||
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
|
||||
}
|
||||
}
|
||||
if (data["result"]["canceled"].bool_value())
|
||||
{
|
||||
@@ -375,7 +382,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
data["result"]["compact_revision"].uint64_value());
|
||||
http_close(etcd_watch_ws);
|
||||
etcd_watch_ws = NULL;
|
||||
etcd_watch_revision = 0;
|
||||
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = 0;
|
||||
on_reload_hook();
|
||||
}
|
||||
return;
|
||||
@@ -393,13 +400,29 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
// Save revision only if it's present in the message - because sometimes etcd sends something without a header, like:
|
||||
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
|
||||
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && !data["result"]["header"]["revision"].is_null())
|
||||
{
|
||||
// Protect against a revision beign split into multiple messages and some
|
||||
// of them being lost. Even though I'm not sure if etcd actually splits them
|
||||
// Also sometimes etcd sends something without a header, like:
|
||||
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
|
||||
etcd_watch_revision = data["result"]["header"]["revision"].uint64_value();
|
||||
// Restart watchers from the same revision number as in the last received message,
|
||||
// not from the next one to protect against revision being split into multiple messages,
|
||||
// even though etcd guarantees not to do that **within a single watcher** without fragment=true:
|
||||
// https://etcd.io/docs/v3.5/learning/api_guarantees/#watch-apis
|
||||
// Revision contents are ALWAYS split into separate messages for different watchers though!
|
||||
// So generally we have to resume each watcher from its own revision...
|
||||
// Progress messages may have watch_id=-1 if sent on behalf of multiple watchers though.
|
||||
// And antietcd has an advanced semantic which merges the same revision for all watchers
|
||||
// into one message and just omits watch_id.
|
||||
// So we also have to handle the case where watch_id is -1 or not present (0).
|
||||
auto watch_rev = data["result"]["header"]["revision"].uint64_value();
|
||||
if (!watch_id || watch_id == UINT64_MAX)
|
||||
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = watch_rev;
|
||||
else if (watch_id == ETCD_CONFIG_WATCH_ID)
|
||||
etcd_watch_revision_config = watch_rev;
|
||||
else if (watch_id == ETCD_PG_STATE_WATCH_ID)
|
||||
etcd_watch_revision_pg = watch_rev;
|
||||
else if (watch_id == ETCD_OSD_STATE_WATCH_ID)
|
||||
etcd_watch_revision_osd = watch_rev;
|
||||
addresses_to_try.clear();
|
||||
}
|
||||
// First gather all changes into a hash to remove multiple overwrites
|
||||
@@ -457,7 +480,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/config/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/config0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "start_revision", etcd_watch_revision_config },
|
||||
{ "watch_id", ETCD_CONFIG_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
@@ -466,29 +489,21 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/osd/state/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/osd/state0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "start_revision", etcd_watch_revision_osd },
|
||||
{ "watch_id", ETCD_OSD_STATE_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/state/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/pg/state0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/pg0") },
|
||||
{ "start_revision", etcd_watch_revision_pg },
|
||||
{ "watch_id", ETCD_PG_STATE_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/history/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/pg/history0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "watch_id", ETCD_PG_HISTORY_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
// FIXME: Do not watch /pg/history/ at all in client code (not in OSD)
|
||||
if (on_start_watcher_hook)
|
||||
{
|
||||
on_start_watcher_hook(etcd_watch_ws);
|
||||
@@ -591,6 +606,11 @@ void etcd_state_client_t::load_pgs()
|
||||
{ "key", base64_encode(etcd_prefix+"/config/pgs") },
|
||||
} }
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/config") },
|
||||
} }
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/config/inode/") },
|
||||
@@ -640,13 +660,10 @@ void etcd_state_client_t::load_pgs()
|
||||
return;
|
||||
}
|
||||
reset_pg_exists();
|
||||
if (!etcd_watch_revision)
|
||||
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = data["header"]["revision"].uint64_value()+1;
|
||||
if (this->log_level > 3)
|
||||
{
|
||||
etcd_watch_revision = data["header"]["revision"].uint64_value()+1;
|
||||
if (this->log_level > 3)
|
||||
{
|
||||
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision-1);
|
||||
}
|
||||
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision_pg-1);
|
||||
}
|
||||
for (auto & res: data["responses"].array_items())
|
||||
{
|
||||
@@ -713,7 +730,7 @@ void etcd_state_client_t::clean_nonexistent_pgs()
|
||||
{
|
||||
if (!pg_cfg.state_exists)
|
||||
{
|
||||
if (this->log_level > 3)
|
||||
if (this->log_level > 3 && (pg_cfg.cur_primary || pg_cfg.cur_state))
|
||||
{
|
||||
fprintf(stderr, "PG %u/%u primary OSD disappeared after reload, forgetting it\n", pool_item.first, pg_it->first);
|
||||
}
|
||||
@@ -723,7 +740,7 @@ void etcd_state_client_t::clean_nonexistent_pgs()
|
||||
}
|
||||
if (!pg_cfg.history_exists)
|
||||
{
|
||||
if (this->log_level > 3)
|
||||
if (this->log_level > 3 && (pg_cfg.target_history.size() || pg_cfg.all_peers.size() || pg_cfg.epoch || pg_cfg.next_scrub))
|
||||
{
|
||||
fprintf(stderr, "PG %u/%u history disappeared after reload, forgetting it\n", pool_item.first, pg_it->first);
|
||||
}
|
||||
@@ -895,8 +912,17 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
on_change_pool_config_hook();
|
||||
}
|
||||
}
|
||||
else if (key == etcd_prefix+"/config/pgs")
|
||||
else if (key == etcd_prefix+"/pg/config" || key == etcd_prefix+"/config/pgs")
|
||||
{
|
||||
if (key == etcd_prefix+"/pg/config")
|
||||
{
|
||||
new_pg_config = !value.is_null();
|
||||
}
|
||||
else if (new_pg_config)
|
||||
{
|
||||
// Ignore old key if the new one is present
|
||||
return;
|
||||
}
|
||||
for (auto & pool_item: this->pool_config)
|
||||
{
|
||||
for (auto & pg_item: pool_item.second.pg_config)
|
||||
|
@@ -10,10 +10,9 @@
|
||||
#include "timerfd_manager.h"
|
||||
|
||||
#define ETCD_CONFIG_WATCH_ID 1
|
||||
#define ETCD_PG_STATE_WATCH_ID 2
|
||||
#define ETCD_PG_HISTORY_WATCH_ID 3
|
||||
#define ETCD_OSD_STATE_WATCH_ID 4
|
||||
#define ETCD_TOTAL_WATCHES 4
|
||||
#define ETCD_OSD_STATE_WATCH_ID 2
|
||||
#define ETCD_PG_STATE_WATCH_ID 3
|
||||
#define ETCD_TOTAL_WATCHES 3
|
||||
|
||||
#define DEFAULT_BLOCK_SIZE 128*1024
|
||||
#define MIN_DATA_BLOCK_SIZE 4*1024
|
||||
@@ -95,7 +94,7 @@ protected:
|
||||
std::string selected_etcd_address;
|
||||
std::vector<std::string> addresses_to_try;
|
||||
std::vector<inode_watch_t*> watches;
|
||||
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
|
||||
bool new_pg_config = false;
|
||||
int ws_keepalive_timer = -1;
|
||||
int ws_alive = 0;
|
||||
bool rand_initialized = false;
|
||||
@@ -115,8 +114,11 @@ public:
|
||||
int log_level = 0;
|
||||
timerfd_manager_t *tfd = NULL;
|
||||
|
||||
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
|
||||
int etcd_watches_initialised = 0;
|
||||
uint64_t etcd_watch_revision = 0;
|
||||
uint64_t etcd_watch_revision_config = 0;
|
||||
uint64_t etcd_watch_revision_osd = 0;
|
||||
uint64_t etcd_watch_revision_pg = 0;
|
||||
std::map<pool_id_t, pool_config_t> pool_config;
|
||||
std::map<osd_num_t, json11::Json> peer_states;
|
||||
std::set<osd_num_t> seen_peers;
|
||||
|
@@ -253,7 +253,7 @@ nla_put_failure:
|
||||
const char *exe_name = NULL;
|
||||
|
||||
const char *help_text =
|
||||
"Vitastor NBD proxy " VERSION "\n"
|
||||
"Vitastor NBD proxy " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
|
@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
|
||||
|
||||
Name: Vitastor
|
||||
Description: Vitastor client library
|
||||
Version: 1.7.0
|
||||
Version: 1.8.0
|
||||
Libs: -L${libdir} -lvitastor_client
|
||||
Cflags: -I${includedir}
|
||||
|
||||
|
@@ -17,7 +17,7 @@
|
||||
static const char *exe_name = NULL;
|
||||
|
||||
static const char* help_text =
|
||||
"Vitastor command-line tool " VERSION "\n"
|
||||
"Vitastor command-line tool " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
|
@@ -119,7 +119,7 @@ resume_1:
|
||||
{ "result", "LESS" },
|
||||
{ "mod_revision", osd_cfg_mod_rev+1 },
|
||||
});
|
||||
if (osd_cfg.size())
|
||||
if (!osd_cfg.size())
|
||||
{
|
||||
success.push_back(json11::Json::object {
|
||||
{ "request_delete_range", json11::Json::object {
|
||||
|
@@ -49,8 +49,8 @@ struct pg_lister_t
|
||||
{ "success", json11::Json::array {
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/stats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
|
||||
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/stats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
|
||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
|
||||
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
|
||||
} },
|
||||
},
|
||||
} },
|
||||
@@ -65,7 +65,7 @@ resume_1:
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
||||
{
|
||||
pg_stats[(pool_pg_num_t){ .pool_id = pool_id, .pg_num = (pg_num_t)pg_num }] = value;
|
||||
});
|
||||
|
@@ -214,10 +214,10 @@ resume_1:
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+"/pg/stats/"
|
||||
parent->cli->st_cli.etcd_prefix+"/pgstats/"
|
||||
) },
|
||||
{ "range_end", base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+"/pg/stats0"
|
||||
parent->cli->st_cli.etcd_prefix+"/pgstats0"
|
||||
) },
|
||||
} },
|
||||
},
|
||||
@@ -235,7 +235,7 @@ resume_1:
|
||||
}
|
||||
// Calculate recovery percent
|
||||
std::map<pool_id_t, object_counts_t> counts;
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/",
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/",
|
||||
[&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
||||
{
|
||||
auto & cnt = counts[pool_id];
|
||||
|
@@ -176,7 +176,7 @@ struct rm_osd_t
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+"/config/pgs"
|
||||
parent->cli->st_cli.etcd_prefix+"/pg/config"
|
||||
) },
|
||||
} },
|
||||
},
|
||||
@@ -229,7 +229,7 @@ struct rm_osd_t
|
||||
}
|
||||
if (!new_pgs.is_null())
|
||||
{
|
||||
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/config/pgs");
|
||||
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/config");
|
||||
rm_items.push_back(json11::Json::object {
|
||||
{ "request_put", json11::Json::object {
|
||||
{ "key", pgs_key },
|
||||
@@ -427,7 +427,7 @@ struct rm_osd_t
|
||||
{ "target", "MOD" },
|
||||
{ "key", history_key },
|
||||
{ "result", "LESS" },
|
||||
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision+1 },
|
||||
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision_pg+1 },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@@ -5,7 +5,7 @@
|
||||
#include "str_util.h"
|
||||
|
||||
static const char *help_text =
|
||||
"Vitastor disk management tool " VERSION "\n"
|
||||
"Vitastor disk management tool " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2022+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
|
@@ -383,7 +383,7 @@ int disk_tool_t::pre_exec_osd(std::string device)
|
||||
|
||||
int disk_tool_t::purge_devices(const std::vector<std::string> & devices)
|
||||
{
|
||||
std::vector<uint64_t> osd_numbers;
|
||||
std::set<uint64_t> osd_numbers;
|
||||
json11::Json::array superblocks;
|
||||
for (auto & device: devices)
|
||||
{
|
||||
@@ -391,8 +391,11 @@ int disk_tool_t::purge_devices(const std::vector<std::string> & devices)
|
||||
if (!sb.is_null())
|
||||
{
|
||||
uint64_t osd_num = sb["params"]["osd_num"].uint64_value();
|
||||
osd_numbers.push_back(osd_num);
|
||||
superblocks.push_back(sb);
|
||||
if (osd_numbers.find(osd_num) == osd_numbers.end())
|
||||
{
|
||||
osd_numbers.insert(osd_num);
|
||||
superblocks.push_back(sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!osd_numbers.size())
|
||||
|
@@ -10,7 +10,7 @@ set_target_properties(vitastor_kv PROPERTIES PUBLIC_HEADER "kv/vitastor_kv.h")
|
||||
target_link_libraries(vitastor_kv
|
||||
vitastor_client
|
||||
)
|
||||
set_target_properties(vitastor_kv PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
||||
set_target_properties(vitastor_kv PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||
|
||||
# vitastor-kv
|
||||
add_executable(vitastor-kv
|
||||
|
@@ -501,7 +501,7 @@ void kv_block_t::dump(int base_level)
|
||||
|
||||
void kv_db_t::open(inode_t inode_id, json11::Json cfg, std::function<void(int)> cb)
|
||||
{
|
||||
if (block_cache.size() > 0 || inode_id)
|
||||
if (block_cache.size() > 0 || this->inode_id)
|
||||
{
|
||||
cb(-EINVAL);
|
||||
return;
|
||||
|
@@ -53,7 +53,7 @@ nfs_proxy_t::~nfs_proxy_t()
|
||||
}
|
||||
|
||||
static const char* help_text =
|
||||
"Vitastor NFS 3.0 proxy " VERSION "\n"
|
||||
"Vitastor NFS 3.0 proxy " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2021+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"vitastor-nfs (--fs <NAME> | --block) [-o <OPT>] mount <MOUNTPOINT>\n"
|
||||
@@ -372,24 +372,6 @@ void nfs_proxy_t::watch_stats()
|
||||
assert(cli->st_cli.on_start_watcher_hook == NULL);
|
||||
cli->st_cli.on_start_watcher_hook = [this](http_co_t *etcd_watch_ws)
|
||||
{
|
||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats/") },
|
||||
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats0") },
|
||||
{ "start_revision", cli->st_cli.etcd_watch_revision },
|
||||
{ "watch_id", ETCD_INODE_STATS_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats/") },
|
||||
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats0") },
|
||||
{ "start_revision", cli->st_cli.etcd_watch_revision },
|
||||
{ "watch_id", ETCD_POOL_STATS_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
cli->st_cli.etcd_txn_slow(json11::Json::object {
|
||||
{ "success", json11::Json::array {
|
||||
json11::Json::object {
|
||||
@@ -415,6 +397,28 @@ void nfs_proxy_t::watch_stats()
|
||||
parse_stats(kv);
|
||||
}
|
||||
}
|
||||
if (cli->st_cli.etcd_watch_ws)
|
||||
{
|
||||
auto watch_rev = res["header"]["revision"].uint64_value()+1;
|
||||
http_post_message(cli->st_cli.etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats/") },
|
||||
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/inode/stats0") },
|
||||
{ "start_revision", watch_rev },
|
||||
{ "watch_id", ETCD_INODE_STATS_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
http_post_message(cli->st_cli.etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats/") },
|
||||
{ "range_end", base64_encode(cli->st_cli.etcd_prefix+"/pool/stats0") },
|
||||
{ "start_revision", watch_rev },
|
||||
{ "watch_id", ETCD_POOL_STATS_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
}
|
||||
});
|
||||
};
|
||||
cli->st_cli.on_change_hook = [this, old_hook = cli->st_cli.on_change_hook](std::map<std::string, etcd_kv_t> & changes)
|
||||
@@ -518,17 +522,15 @@ void nfs_proxy_t::do_accept(int listen_fd)
|
||||
{
|
||||
cli->proc_table.insert(fn);
|
||||
}
|
||||
rpc_clients[nfs_fd] = cli;
|
||||
epmgr->tfd->set_fd_handler(nfs_fd, true, [cli](int nfs_fd, int epoll_events)
|
||||
{
|
||||
// Handle incoming event
|
||||
if (epoll_events & EPOLLRDHUP)
|
||||
{
|
||||
auto parent = cli->parent;
|
||||
if (parent->trace)
|
||||
if (cli->parent->trace)
|
||||
fprintf(stderr, "Client %d disconnected\n", nfs_fd);
|
||||
cli->stop();
|
||||
parent->active_connections--;
|
||||
parent->check_exit();
|
||||
return;
|
||||
}
|
||||
cli->epoll_events |= epoll_events;
|
||||
@@ -691,6 +693,8 @@ void nfs_client_t::handle_read(int result)
|
||||
frag_num++;
|
||||
}
|
||||
}
|
||||
// Increase client refcount while the RPC call is being processed
|
||||
refs++;
|
||||
// Handle full message
|
||||
int referenced = handle_rpc_message(cur_buffer.buf, data+4, wanted-4*fragments);
|
||||
cur_buffer.refs += referenced ? 1 : 0;
|
||||
@@ -775,9 +779,13 @@ void nfs_client_t::stop()
|
||||
stopped = true;
|
||||
if (refs <= 0)
|
||||
{
|
||||
auto parent = this->parent;
|
||||
parent->rpc_clients.erase(nfs_fd);
|
||||
parent->active_connections--;
|
||||
parent->epmgr->tfd->set_fd_handler(nfs_fd, true, NULL);
|
||||
close(nfs_fd);
|
||||
delete this;
|
||||
parent->check_exit();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -830,6 +838,10 @@ void nfs_client_t::handle_send(int result)
|
||||
}
|
||||
}
|
||||
free(rop);
|
||||
if (deref())
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
result -= iov.iov_len;
|
||||
done++;
|
||||
@@ -1052,6 +1064,11 @@ int nfs_client_t::handle_rpc_message(void *base_buf, void *msg_buf, uint32_t msg
|
||||
|
||||
void nfs_proxy_t::daemonize()
|
||||
{
|
||||
// Stop all clients because client I/O sometimes breaks during daemonize
|
||||
// I.e. the new process stops receiving events on the old FD
|
||||
// It doesn't happen if we call sleep(1) here, but we don't want to call sleep(1)...
|
||||
for (auto & clp: rpc_clients)
|
||||
clp.second->stop();
|
||||
if (fork())
|
||||
exit(0);
|
||||
setsid();
|
||||
|
@@ -21,6 +21,7 @@ class cli_tool_t;
|
||||
|
||||
struct kv_fs_state_t;
|
||||
struct block_fs_state_t;
|
||||
class nfs_client_t;
|
||||
|
||||
class nfs_proxy_t
|
||||
{
|
||||
@@ -54,6 +55,7 @@ public:
|
||||
vitastorkv_dbw_t *db = NULL;
|
||||
kv_fs_state_t *kvfs = NULL;
|
||||
block_fs_state_t *blockfs = NULL;
|
||||
std::map<int, nfs_client_t*> rpc_clients;
|
||||
|
||||
std::vector<XDR*> xdr_pool;
|
||||
|
||||
|
@@ -169,6 +169,7 @@ json11::Json osd_t::get_osd_state()
|
||||
else
|
||||
st["addresses"] = getifaddr_list();
|
||||
st["host"] = std::string(hostname.data(), hostname.size());
|
||||
st["version"] = VITASTOR_VERSION;
|
||||
st["port"] = listening_port;
|
||||
st["primary_enabled"] = run_primary;
|
||||
st["blockstore_enabled"] = bs ? true : false;
|
||||
@@ -199,6 +200,7 @@ json11::Json osd_t::get_statistics()
|
||||
st["bitmap_granularity"] = (uint64_t)bs_bitmap_granularity;
|
||||
st["immediate_commit"] = immediate_commit == IMMEDIATE_ALL ? "all" : (immediate_commit == IMMEDIATE_SMALL ? "small" : "none");
|
||||
st["host"] = self_state["host"];
|
||||
st["version"] = VITASTOR_VERSION;
|
||||
json11::Json::object op_stats, subop_stats;
|
||||
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
|
||||
{
|
||||
@@ -371,7 +373,7 @@ void osd_t::report_statistics()
|
||||
pg_stats["write_osd_set"] = pg.cur_set;
|
||||
txn.push_back(json11::Json::object {
|
||||
{ "request_put", json11::Json::object {
|
||||
{ "key", base64_encode(st_cli.etcd_prefix+"/pg/stats/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num)) },
|
||||
{ "key", base64_encode(st_cli.etcd_prefix+"/pgstats/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num)) },
|
||||
{ "value", base64_encode(json11::Json(pg_stats).dump()) },
|
||||
} }
|
||||
});
|
||||
@@ -418,7 +420,7 @@ void osd_t::on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes
|
||||
}
|
||||
if (run_primary)
|
||||
{
|
||||
bool pgs = changes.find(st_cli.etcd_prefix+"/config/pgs") != changes.end();
|
||||
bool pgs = changes.find(st_cli.etcd_prefix+"/pg/config") != changes.end();
|
||||
if (pools || pgs)
|
||||
{
|
||||
apply_pg_count();
|
||||
@@ -903,7 +905,7 @@ void osd_t::report_pg_states()
|
||||
{ "target", "MOD" },
|
||||
{ "key", state_key_base64 },
|
||||
{ "result", "LESS" },
|
||||
{ "mod_revision", st_cli.etcd_watch_revision+1 },
|
||||
{ "mod_revision", st_cli.etcd_watch_revision_pg+1 },
|
||||
});
|
||||
continue;
|
||||
}
|
||||
@@ -974,7 +976,7 @@ void osd_t::report_pg_states()
|
||||
{ "target", "MOD" },
|
||||
{ "key", history_key },
|
||||
{ "result", "LESS" },
|
||||
{ "mod_revision", st_cli.etcd_watch_revision+1 },
|
||||
{ "mod_revision", st_cli.etcd_watch_revision_pg+1 },
|
||||
});
|
||||
success.push_back(json11::Json::object {
|
||||
{ "request_put", json11::Json::object {
|
||||
|
@@ -20,7 +20,7 @@ static void handle_sigint(int sig)
|
||||
}
|
||||
|
||||
static const char* help_text =
|
||||
"Vitastor OSD (block object storage daemon) " VERSION "\n"
|
||||
"Vitastor OSD (block object storage daemon) " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"OSDs are usually started by vitastor-disk.\n"
|
||||
|
@@ -261,7 +261,7 @@ void osd_t::start_pg_peering(pg_t & pg)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (found < (nonzero >= pg.pg_data_size ? pg.pg_data_size : 1))
|
||||
if (nonzero >= pg.pg_data_size && found < pg.pg_data_size)
|
||||
{
|
||||
pg.state = PG_INCOMPLETE;
|
||||
report_pg_state(pg);
|
||||
|
@@ -22,7 +22,7 @@ void configure_single_pg_pool(cluster_client_t *cli)
|
||||
},
|
||||
});
|
||||
cli->st_cli.parse_state((etcd_kv_t){
|
||||
.key = "/config/pgs",
|
||||
.key = "/pg/config",
|
||||
.value = json11::Json::object {
|
||||
{ "items", json11::Json::object {
|
||||
{ "1", json11::Json::object {
|
||||
|
@@ -24,44 +24,77 @@ ETCD=${ETCD:-etcd}
|
||||
ETCD_IP=${ETCD_IP:-127.0.0.1}
|
||||
ETCD_PORT=${ETCD_PORT:-12379}
|
||||
ETCD_COUNT=${ETCD_COUNT:-1}
|
||||
ANTIETCD=${ANTIETCD}
|
||||
USE_RAMDISK=${USE_RAMDISK}
|
||||
|
||||
if [ "$KEEP_DATA" = "" ]; then
|
||||
RAMDISK=/run/user/$(id -u)
|
||||
findmnt $RAMDISK >/dev/null || (sudo mkdir -p $RAMDISK && sudo mount -t tmpfs tmpfs $RAMDISK)
|
||||
|
||||
if [[ -z "$KEEP_DATA" ]]; then
|
||||
rm -rf ./testdata
|
||||
rm -rf /run/user/$(id -u)/testdata_etcd*
|
||||
rm -rf /run/user/$(id -u)/testdata_etcd* /run/user/$(id -u)/testdata_bin
|
||||
mkdir -p ./testdata
|
||||
if [[ -n "$USE_RAMDISK" ]]; then
|
||||
OSD_ARGS="$OSD_ARGS --data_io cached"
|
||||
mkdir -p /run/user/$(id -u)/testdata_bin
|
||||
ln -s /run/user/$(id -u)/testdata_bin ./testdata/bin
|
||||
else
|
||||
mkdir -p ./testdata/bin
|
||||
fi
|
||||
fi
|
||||
|
||||
ETCD_URL="http://$ETCD_IP:$ETCD_PORT"
|
||||
ETCD_CLUSTER="etcd1=http://$ETCD_IP:$((ETCD_PORT+1))"
|
||||
for i in $(seq 2 $ETCD_COUNT); do
|
||||
ETCD_URL="$ETCD_URL,http://$ETCD_IP:$((ETCD_PORT+2*i-2))"
|
||||
ETCD_CLUSTER="$ETCD_CLUSTER,etcd$i=http://$ETCD_IP:$((ETCD_PORT+2*i-1))"
|
||||
done
|
||||
ETCDCTL="${ETCD}ctl --endpoints=$ETCD_URL --dial-timeout=5s --command-timeout=10s"
|
||||
|
||||
start_etcd()
|
||||
{
|
||||
local i=$1
|
||||
local t=/run/user/$(id -u)
|
||||
findmnt $t >/dev/null || (sudo mkdir -p $t && sudo mount -t tmpfs tmpfs $t)
|
||||
ionice -c2 -n0 $ETCD -name etcd$i --data-dir /run/user/$(id -u)/testdata_etcd$i \
|
||||
--advertise-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) --listen-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) \
|
||||
--initial-advertise-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) --listen-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) \
|
||||
--initial-cluster-token vitastor-tests-etcd --initial-cluster-state new \
|
||||
--initial-cluster "$ETCD_CLUSTER" --max-request-bytes=104857600 \
|
||||
--max-txn-ops=100000 --auto-compaction-retention=10 --auto-compaction-mode=revision &>./testdata/etcd$i.log &
|
||||
eval ETCD${i}_PID=$!
|
||||
if [[ -z "$ANTIETCD" ]]; then
|
||||
ionice -c2 -n0 $ETCD -name etcd$i --data-dir $RAMDISK/testdata_etcd$i \
|
||||
--advertise-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) --listen-client-urls http://$ETCD_IP:$((ETCD_PORT+2*i-2)) \
|
||||
--initial-advertise-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) --listen-peer-urls http://$ETCD_IP:$((ETCD_PORT+2*i-1)) \
|
||||
--initial-cluster-token vitastor-tests-etcd --initial-cluster-state new \
|
||||
--initial-cluster "$ETCD_CLUSTER" --max-request-bytes=104857600 \
|
||||
--max-txn-ops=100000 --auto-compaction-retention=10 --auto-compaction-mode=revision &>./testdata/etcd$i.log &
|
||||
eval ETCD${i}_PID=$!
|
||||
else
|
||||
node mon/mon-main.js $MON_PARAMS --antietcd_port $((ETCD_PORT+2*i-2)) --etcd_address $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 >>./testdata/mon$i.log 2>&1 &
|
||||
eval ETCD${i}_PID=$!
|
||||
fi
|
||||
}
|
||||
|
||||
for i in $(seq 1 $ETCD_COUNT); do
|
||||
start_etcd $i
|
||||
done
|
||||
for i in {1..30}; do
|
||||
${ETCD}ctl --endpoints=$ETCD_URL --dial-timeout=1s --command-timeout=1s member list >/dev/null && break
|
||||
if [[ $i = 30 ]]; then
|
||||
format_error "Failed to start etcd"
|
||||
fi
|
||||
done
|
||||
start_etcd_cluster()
|
||||
{
|
||||
ETCD_CLUSTER="etcd1=http://$ETCD_IP:$((ETCD_PORT+1))"
|
||||
for i in $(seq 2 $ETCD_COUNT); do
|
||||
ETCD_CLUSTER="$ETCD_CLUSTER,etcd$i=http://$ETCD_IP:$((ETCD_PORT+2*i-1))"
|
||||
done
|
||||
for i in $(seq 1 $ETCD_COUNT); do
|
||||
start_etcd $i
|
||||
done
|
||||
}
|
||||
|
||||
wait_etcd()
|
||||
{
|
||||
for i in {1..30}; do
|
||||
$ETCDCTL --dial-timeout=1s --command-timeout=1s get --prefix / && break
|
||||
if [[ $i = 30 ]]; then
|
||||
format_error "Failed to start etcd"
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
if [[ -n "$ANTIETCD" ]]; then
|
||||
ETCDCTL="node mon/node_modules/.bin/anticli -e $ETCD_URL"
|
||||
MON_PARAMS="--use_antietcd 1 --antietcd_data_dir ./testdata --antietcd_persist_interval 500 $MON_PARAMS"
|
||||
else
|
||||
ETCDCTL="${ETCD}ctl --endpoints=$ETCD_URL --dial-timeout=5s --command-timeout=10s"
|
||||
MON_PARAMS="$MON_PARAMS"
|
||||
start_etcd_cluster
|
||||
fi
|
||||
|
||||
echo leak:fio >> testdata/lsan-suppress.txt
|
||||
echo leak:tcmalloc >> testdata/lsan-suppress.txt
|
||||
|
@@ -18,6 +18,16 @@ else
|
||||
OSD_COUNT=${OSD_COUNT:-3}
|
||||
fi
|
||||
|
||||
if [[ -n "$ANTIETCD" ]]; then
|
||||
for i in $(seq 1 $ETCD_COUNT); do
|
||||
start_etcd $i
|
||||
done
|
||||
else
|
||||
node mon/mon-main.js $MON_PARAMS --etcd_address $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 >>./testdata/mon.log 2>&1 &
|
||||
MON_PID=$!
|
||||
fi
|
||||
wait_etcd
|
||||
|
||||
if [ "$IMMEDIATE_COMMIT" != "" ]; then
|
||||
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5"
|
||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"all","client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
|
||||
@@ -40,8 +50,8 @@ if ! type -t osd_dev; then
|
||||
osd_dev()
|
||||
{
|
||||
local i=$1
|
||||
[[ -f ./testdata/test_osd$i.bin ]] || dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
echo ./testdata/test_osd$i.bin
|
||||
[[ -f ./testdata/bin/test_osd$i.bin ]] || dd if=/dev/zero of=./testdata/bin/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
echo ./testdata/bin/test_osd$i.bin
|
||||
}
|
||||
fi
|
||||
|
||||
@@ -54,9 +64,6 @@ for i in $(seq 1 $OSD_COUNT); do
|
||||
start_osd $i
|
||||
done
|
||||
|
||||
node mon/mon-main.js --etcd_address $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 >>./testdata/mon.log 2>&1 &
|
||||
MON_PID=$!
|
||||
|
||||
if [ "$SCHEME" = "ec" ]; then
|
||||
PG_SIZE=${PG_SIZE:-5}
|
||||
PG_MINSIZE=${PG_MINSIZE:-4}
|
||||
@@ -82,7 +89,7 @@ wait_up()
|
||||
local i=0
|
||||
local configured=0
|
||||
while [[ $i -lt $sec ]]; do
|
||||
if $ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
|
||||
if $ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] |
|
||||
select(((.osd_set | select(. != 0) | sort | unique) | length) == '$PG_SIZE') ] | length) == '$PG_COUNT; then
|
||||
configured=1
|
||||
if $ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT; then
|
||||
|
@@ -15,6 +15,7 @@ SCHEME=ec ./test_change_pg_count.sh
|
||||
./test_create_nomaxid.sh
|
||||
|
||||
./test_etcd_fail.sh
|
||||
ANTIETCD=1 ./test_etcd_fail.sh
|
||||
|
||||
./test_interrupted_rebalance.sh
|
||||
IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
||||
@@ -56,6 +57,7 @@ SCHEME=xor ./test_write.sh
|
||||
|
||||
PG_SIZE=2 ./test_heal.sh
|
||||
SCHEME=ec ./test_heal.sh
|
||||
ANTIETCD=1 ./test_heal.sh
|
||||
|
||||
TEST_NAME=csum_32k_dmj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS ./test_heal.sh
|
||||
TEST_NAME=csum_32k_dj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS ./test_heal.sh
|
||||
|
@@ -13,14 +13,14 @@ start_osd 4
|
||||
sleep 2
|
||||
|
||||
for i in {1..30}; do
|
||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])') && \
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$PG_COUNT) && \
|
||||
break
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4"])'); then
|
||||
format_error "FAILED: OSD NOT ADDED INTO DISTRIBUTION"
|
||||
fi
|
||||
@@ -35,14 +35,14 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm-osd --force 4
|
||||
sleep 2
|
||||
|
||||
for i in {1..30}; do
|
||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "left_on_dead"]) ] | length) == '$PG_COUNT'') && \
|
||||
break
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
|
||||
format_error "FAILED: OSD NOT REMOVED FROM DISTRIBUTION"
|
||||
fi
|
||||
|
@@ -23,7 +23,7 @@ try_change()
|
||||
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
|
||||
|
||||
for i in {1..60}; do
|
||||
($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
|
||||
($ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "has_misplaced"]) ] | length) == '$n'') && \
|
||||
break
|
||||
sleep 1
|
||||
@@ -36,14 +36,14 @@ try_change()
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n); then
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: $n PGS NOT CONFIGURED"
|
||||
fi
|
||||
|
||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: $n PGS NOT UP"
|
||||
fi
|
||||
@@ -53,7 +53,7 @@ try_change()
|
||||
nobj=0
|
||||
waittime=0
|
||||
while [[ $nobj -ne $NOBJ && $waittime -lt 7 ]]; do
|
||||
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||
nobj=`$ETCDCTL get --prefix '/vitastor/pgstats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||
if [[ $nobj -ne $NOBJ ]]; then
|
||||
waittime=$((waittime+1))
|
||||
sleep 1
|
||||
|
@@ -13,7 +13,7 @@ try_change()
|
||||
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$s',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
|
||||
|
||||
for i in {1..10}; do
|
||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])') && \
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
|
||||
@@ -21,16 +21,16 @@ try_change()
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3"])'); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
|
||||
fi
|
||||
|
||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
|
||||
fi
|
||||
|
@@ -13,13 +13,13 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
|
||||
# Write
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
||||
-mirror_file=./testdata/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg -runtime=10
|
||||
-mirror_file=./testdata/bin/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg -runtime=10
|
||||
|
||||
# Intentionally corrupt OSD data and restart it
|
||||
kill $OSD1_PID
|
||||
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/test_osd1.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
||||
truncate -s $data_offset ./testdata/test_osd1.bin
|
||||
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/bin/test_osd1.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
||||
truncate -s $data_offset ./testdata/bin/test_osd1.bin
|
||||
dd if=/dev/zero of=./testdata/bin/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
start_osd 1
|
||||
|
||||
# FIXME: corrupt the journal WHEN OSD IS RUNNING and check reads too
|
||||
@@ -30,8 +30,8 @@ wait_up 10
|
||||
# Read everything back
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/read.bin
|
||||
-O raw ./testdata/bin/read.bin
|
||||
|
||||
diff ./testdata/read.bin ./testdata/mirror.bin
|
||||
diff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
|
||||
|
||||
format_green OK
|
||||
|
@@ -2,6 +2,10 @@
|
||||
|
||||
. `dirname $0`/common.sh
|
||||
|
||||
node mon/mon-main.js $MON_PARAMS --etcd_address $ETCD_URL --etcd_prefix "/vitastor" >>./testdata/mon.log 2>&1 &
|
||||
MON_PID=$!
|
||||
wait_etcd
|
||||
|
||||
TIME=$(date '+%s')
|
||||
$ETCDCTL put /vitastor/config/global '{"placement_levels":{"rack":1,"host":2,"osd":3},"immediate_commit":"none"}'
|
||||
$ETCDCTL put /vitastor/config/node_placement '{"rack1":{"level":"rack"},"rack2":{"level":"rack"},"host1":{"level":"host","parent":"rack1"},"host2":{"level":"host","parent":"rack1"},"host3":{"level":"host","parent":"rack2"},"host4":{"level":"host","parent":"rack2"}}'
|
||||
@@ -22,12 +26,9 @@ $ETCDCTL get --print-value-only /vitastor/config/pools | jq -s -e '. == [{}]'
|
||||
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create-pool testpool -s 2 -n 4 --failure_domain rack --force
|
||||
$ETCDCTL get --print-value-only /vitastor/config/pools | jq -s -e '. == [{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":4,"failure_domain":"rack"}}]'
|
||||
|
||||
node mon/mon-main.js --etcd_address $ETCD_URL --etcd_prefix "/vitastor" >>./testdata/mon.log 2>&1 &
|
||||
MON_PID=$!
|
||||
|
||||
sleep 2
|
||||
|
||||
etcdctl --endpoints=http://localhost:12379 get --prefix /vitastor/config/pgs --print-value-only | \
|
||||
$ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
|
||||
jq -s -e '([ .[0].items["1"] | .[].osd_set | map_values(. | tonumber) | select((.[0] <= 4) != (.[1] <= 4)) ] | length) == 4'
|
||||
|
||||
format_green OK
|
||||
|
@@ -20,7 +20,7 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
|
||||
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
|
||||
-mirror_file=./testdata/bin/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
|
||||
|
||||
kill_osds()
|
||||
{
|
||||
@@ -53,13 +53,13 @@ kill_osds &
|
||||
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bsrange=4k-128k -blockalign=4k -direct=1 -iodepth=32 -fsync=256 -rw=randrw \
|
||||
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
|
||||
-randrepeat=0 -refill_buffers=1 -mirror_file=./testdata/bin/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/read.bin
|
||||
-O raw ./testdata/bin/read.bin
|
||||
|
||||
if ! diff -q ./testdata/read.bin ./testdata/mirror.bin; then
|
||||
if ! diff -q ./testdata/bin/read.bin ./testdata/bin/mirror.bin; then
|
||||
format_error Data lost during self-heal
|
||||
fi
|
||||
|
||||
|
@@ -43,7 +43,7 @@ wait_finish_rebalance 300
|
||||
#fi
|
||||
|
||||
# Check that no objects are lost !
|
||||
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||
nobj=`$ETCDCTL get --prefix '/vitastor/pgstats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||
if [ "$nobj" -ne $((IMG_SIZE*8/PG_DATA_SIZE)) ]; then
|
||||
format_error "Data lost after multiple interrupted rebalancings"
|
||||
fi
|
||||
|
@@ -1,19 +1,21 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
USE_ANTIETCD=""
|
||||
|
||||
. `dirname $0`/common.sh
|
||||
|
||||
OSD_SIZE=1024
|
||||
OSD_COUNT=5
|
||||
OSD_ARGS="$OSD_ARGS"
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
build/src/osd/vitastor-osd --log_level 10 --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
||||
dd if=/dev/zero of=./testdata/bin/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
build/src/osd/vitastor-osd --log_level 10 --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
||||
eval OSD${i}_PID=$!
|
||||
done
|
||||
|
||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":1,"failure_domain":"osd","immediate_commit":"none"}}'
|
||||
|
||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[1,0],"primary":1}}}}'
|
||||
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[1,0],"primary":1}}}}'
|
||||
|
||||
for i in {1..30}; do
|
||||
sleep 1
|
||||
@@ -28,7 +30,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
|
||||
|
||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[1,0],"primary":0}}}}'
|
||||
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[1,0],"primary":0}}}}'
|
||||
|
||||
for i in {1..30}; do
|
||||
sleep 1
|
||||
@@ -41,7 +43,7 @@ done
|
||||
|
||||
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
|
||||
|
||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
||||
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
||||
|
||||
sleep 5
|
||||
for i in {1..30}; do
|
||||
@@ -58,7 +60,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=1 -number_ios=2 -rw=write \
|
||||
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
|
||||
|
||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":0}}}}'
|
||||
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":0}}}}'
|
||||
|
||||
$ETCDCTL put /vitastor/pg/history/1/1 '{"all_peers":[1,2,3]}'
|
||||
|
||||
@@ -74,7 +76,7 @@ done
|
||||
cp testdata/osd4.log testdata/osd4_pre.log
|
||||
>testdata/osd4.log
|
||||
|
||||
$ETCDCTL put /vitastor/config/pgs '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
||||
$ETCDCTL put /vitastor/pg/config '{"items":{"1":{"1":{"osd_set":[4,5],"primary":4}}}}'
|
||||
|
||||
for i in {1..30}; do
|
||||
sleep 1
|
||||
|
@@ -2,6 +2,10 @@
|
||||
|
||||
. `dirname $0`/common.sh
|
||||
|
||||
node mon/mon-main.js $MON_PARAMS --etcd_address $ETCD_URL --etcd_prefix "/vitastor" >>./testdata/mon.log 2>&1 &
|
||||
MON_PID=$!
|
||||
wait_etcd
|
||||
|
||||
TIME=$(date '+%s')
|
||||
$ETCDCTL put /vitastor/config/osd/1 '{"tags":["a"]}'
|
||||
$ETCDCTL put /vitastor/config/osd/2 '{"tags":["a"]}'
|
||||
@@ -21,15 +25,12 @@ $ETCDCTL put /vitastor/osd/stats/7 '{"host":"stor4","size":1073741824,"time":"'$
|
||||
$ETCDCTL put /vitastor/osd/stats/8 '{"host":"stor4","size":1073741824,"time":"'$TIME'"}'
|
||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":2,"pg_count":16,"failure_domain":"host","osd_tags":["a"],"immediate_commit":"none"}}'
|
||||
|
||||
node mon/mon-main.js --etcd_address $ETCD_URL --etcd_prefix "/vitastor" >>./testdata/mon.log 2>&1 &
|
||||
MON_PID=$!
|
||||
|
||||
sleep 2
|
||||
|
||||
etcdctl --endpoints=http://localhost:12379 get --prefix /vitastor/config/pgs --print-value-only
|
||||
$ETCDCTL get --prefix /vitastor/pg/config --print-value-only
|
||||
|
||||
if ! (etcdctl --endpoints=http://localhost:12379 get --prefix /vitastor/config/pgs --print-value-only | \
|
||||
jq -s -e '[ [ .[0].items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
|
||||
if ! ($ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
|
||||
jq -s -e '[ [ .[] | select(has("items")) | .items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
|
||||
format_error "Some PGs missing replicas"
|
||||
fi
|
||||
|
||||
|
@@ -16,7 +16,7 @@ try_change()
|
||||
s=$2
|
||||
|
||||
for i in {1..10}; do
|
||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])') && \
|
||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n'') && \
|
||||
@@ -24,16 +24,16 @@ try_change()
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||
if ! ($ETCDCTL get /vitastor/pg/config --print-value-only |\
|
||||
jq -s -e '(.[0].items["1"] | map( ([ .osd_set[] | select(. != 0) ] | length) == '$s' ) | length == '$n')
|
||||
and ([ .[0].items["1"] | map(.osd_set)[][] ] | sort | unique == ["1","2","3","4","5","6","7"])'); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: PG SIZE NOT CHANGED OR SOME OSDS DO NOT HAVE PGS"
|
||||
fi
|
||||
|
||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$n); then
|
||||
$ETCDCTL get /vitastor/config/pgs
|
||||
$ETCDCTL get /vitastor/pg/config
|
||||
$ETCDCTL get --prefix /vitastor/pg/state/
|
||||
format_error "FAILED: PGS NOT UP AFTER PG SIZE CHANGE"
|
||||
fi
|
||||
|
@@ -16,14 +16,14 @@ trap "sudo build/src/client/vitastor-nbd unmap $NBD_DEV"'; kill -9 $(jobs -p)' E
|
||||
|
||||
sudo chown $(id -u) $NBD_DEV
|
||||
|
||||
dd if=/dev/urandom of=./testdata/img1.bin bs=1M count=$IMG_SIZE
|
||||
dd if=/dev/urandom of=./testdata/bin/img1.bin bs=1M count=$IMG_SIZE
|
||||
|
||||
dd if=./testdata/img1.bin of=$NBD_DEV bs=1M count=$IMG_SIZE oflag=direct
|
||||
dd if=./testdata/bin/img1.bin of=$NBD_DEV bs=1M count=$IMG_SIZE oflag=direct
|
||||
|
||||
verify() {
|
||||
echo "Verifying before rebalance"
|
||||
dd if=$NBD_DEV of=./testdata/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
||||
diff ./testdata/img1.bin ./testdata/img2.bin
|
||||
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
||||
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
|
||||
|
||||
$ETCDCTL put /vitastor/config/osd/1 '{"reweight":'$1'}'
|
||||
$ETCDCTL put /vitastor/config/osd/2 '{"reweight":'$1'}'
|
||||
@@ -31,18 +31,18 @@ verify() {
|
||||
|
||||
for i in {1..10000}; do
|
||||
O=$(((RANDOM*RANDOM) % (IMG_SIZE*128)))
|
||||
dd if=$NBD_DEV of=./testdata/img2.bin bs=4k seek=$O skip=$O count=1 iflag=direct conv=notrunc
|
||||
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=4k seek=$O skip=$O count=1 iflag=direct conv=notrunc
|
||||
done
|
||||
|
||||
echo "Verifying during rebalance"
|
||||
diff ./testdata/img1.bin ./testdata/img2.bin
|
||||
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
|
||||
|
||||
# Wait for the rebalance to finish
|
||||
wait_finish_rebalance 300
|
||||
|
||||
echo "Verifying after rebalance"
|
||||
dd if=$NBD_DEV of=./testdata/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
||||
diff ./testdata/img1.bin ./testdata/img2.bin
|
||||
dd if=$NBD_DEV of=./testdata/bin/img2.bin bs=1M count=$IMG_SIZE iflag=direct
|
||||
diff ./testdata/bin/img1.bin ./testdata/bin/img2.bin
|
||||
}
|
||||
|
||||
# Verify with regular reads
|
||||
|
@@ -14,7 +14,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
|
||||
-O raw ./testdata/before.bin
|
||||
-O raw ./testdata/bin/before.bin
|
||||
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
pid=OSD${i}_PID
|
||||
@@ -23,19 +23,19 @@ for i in $(seq 1 $OSD_COUNT); do
|
||||
done
|
||||
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
offsets=$(build/src/disk_tool/vitastor-disk simple-offsets --format json ./testdata/test_osd$i.bin)
|
||||
offsets=$(build/src/disk_tool/vitastor-disk simple-offsets --format json ./testdata/bin/test_osd$i.bin)
|
||||
meta_offset=$(echo $offsets | jq -r .meta_offset)
|
||||
data_offset=$(echo $offsets | jq -r .data_offset)
|
||||
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/bin/test_osd$i.bin 4096 0 $meta_offset >./testdata/journal_before_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-meta ./testdata/bin/test_osd$i.bin 4096 $meta_offset $((data_offset-meta_offset)) >./testdata/meta_before_resize.json
|
||||
build/src/disk_tool/vitastor-disk resize \
|
||||
$(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) \
|
||||
$(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) \
|
||||
--new_meta_offset 0 \
|
||||
--new_meta_len $((1024*1024)) \
|
||||
--new_journal_offset $((1024*1024)) \
|
||||
--new_data_offset $((128*1024*1024))
|
||||
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-meta ./testdata/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-journal --json ./testdata/bin/test_osd$i.bin 4096 $((1024*1024)) $((127*1024*1024)) >./testdata/journal_after_resize.json
|
||||
build/src/disk_tool/vitastor-disk dump-meta ./testdata/bin/test_osd$i.bin 4096 0 $((1024*1024)) >./testdata/meta_after_resize.json
|
||||
if ! (cat ./testdata/meta_before_resize.json ./testdata/meta_after_resize.json | \
|
||||
jq -e -s 'map([ .entries[] | del(.block) ] | sort_by(.pool, .inode, .stripe)) | .[0] == .[1] and (.[0] | length) > 1000'); then
|
||||
format_error "OSD $i metadata corrupted after resizing"
|
||||
@@ -50,7 +50,7 @@ $ETCDCTL del --prefix /vitastor/osd/state/
|
||||
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
|
||||
--data_device ./testdata/test_osd$i.bin \
|
||||
--data_device ./testdata/bin/test_osd$i.bin \
|
||||
--meta_offset 0 \
|
||||
--journal_offset $((1024*1024)) \
|
||||
--data_offset $((128*1024*1024)) >>./testdata/osd$i.log 2>&1 &
|
||||
@@ -59,9 +59,9 @@ done
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((256*1024*1024))" \
|
||||
-O raw ./testdata/after.bin
|
||||
-O raw ./testdata/bin/after.bin
|
||||
|
||||
if ! cmp ./testdata/before.bin ./testdata/after.bin; then
|
||||
if ! cmp ./testdata/bin/before.bin ./testdata/bin/after.bin; then
|
||||
format_error "Data differs after resizing"
|
||||
fi
|
||||
|
||||
|
@@ -2,6 +2,10 @@
|
||||
|
||||
. `dirname $0`/common.sh
|
||||
|
||||
node mon/mon-main.js $MON_PARAMS --etcd_address $ETCD_URL --etcd_prefix "/vitastor" >>./testdata/mon.log 2>&1 &
|
||||
MON_PID=$!
|
||||
wait_etcd
|
||||
|
||||
TIME=$(date '+%s')
|
||||
$ETCDCTL put /vitastor/config/global '{"placement_levels":{"rack":100,"host":101,"osd":102},"immediate_commit":"none"}'
|
||||
$ETCDCTL put /vitastor/config/node_placement '{"rack1":{"level":"rack"},"rack2":{"level":"rack"},"stor1":{"level":"host","parent":"rack1"},"stor2":{"level":"host","parent":"rack1"},"stor3":{"level":"host","parent":"rack2"},"stor4":{"level":"host","parent":"rack2"}}'
|
||||
@@ -15,14 +19,9 @@ $ETCDCTL put /vitastor/osd/stats/7 '{"host":"stor4","size":1073741824,"time":"'$
|
||||
$ETCDCTL put /vitastor/osd/stats/8 '{"host":"stor4","size":1073741824,"time":"'$TIME'"}'
|
||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":2,"pg_count":16,"failure_domain":"host","root_node":"rack1"}}'
|
||||
|
||||
node mon/mon-main.js --etcd_address $ETCD_URL --etcd_prefix "/vitastor" >>./testdata/mon.log 2>&1 &
|
||||
MON_PID=$!
|
||||
|
||||
sleep 2
|
||||
|
||||
etcdctl --endpoints=http://localhost:12379 get --prefix /vitastor/config/pgs --print-value-only
|
||||
|
||||
if ! (etcdctl --endpoints=http://localhost:12379 get --prefix /vitastor/config/pgs --print-value-only | \
|
||||
if ! ($ETCDCTL get --prefix /vitastor/pg/config --print-value-only | \
|
||||
jq -s -e '[ [ .[0].items["1"] | .[].osd_set | map(. | select(. != "" and (.|tonumber) < 5)) ][] | select((. | length) == 2) ] | length == 16'); then
|
||||
format_error "Some PGs missing replicas"
|
||||
fi
|
||||
|
@@ -18,19 +18,19 @@ $ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1
|
||||
# Write
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
||||
-mirror_file=./testdata/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg
|
||||
-mirror_file=./testdata/bin/mirror.bin -end_fsync=1 -rw=write -etcd=$ETCD_URL -image=testimg
|
||||
|
||||
# Save PG primary
|
||||
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
|
||||
primary=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
|
||||
|
||||
# Intentionally corrupt OSD data and restart it
|
||||
zero_osd_pid=OSD${ZERO_OSD}_PID
|
||||
kill ${!zero_osd_pid}
|
||||
sleep 1
|
||||
kill -9 ${!zero_osd_pid} || true
|
||||
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/test_osd$ZERO_OSD.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
||||
truncate -s $data_offset ./testdata/test_osd$ZERO_OSD.bin
|
||||
dd if=/dev/zero of=./testdata/test_osd$ZERO_OSD.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
data_offset=$(build/src/disk_tool/vitastor-disk simple-offsets ./testdata/bin/test_osd$ZERO_OSD.bin $OFFSET_ARGS | grep data_offset | awk '{print $2}')
|
||||
truncate -s $data_offset ./testdata/bin/test_osd$ZERO_OSD.bin
|
||||
dd if=/dev/zero of=./testdata/bin/test_osd$ZERO_OSD.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||
$ETCDCTL del /vitastor/osd/state/$ZERO_OSD
|
||||
start_osd $ZERO_OSD
|
||||
|
||||
@@ -38,7 +38,7 @@ start_osd $ZERO_OSD
|
||||
wait_up 10
|
||||
|
||||
# Wait until PG is back on the same primary
|
||||
wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/config/pgs | jq -s -e \'.[0].items["1"]["1"].primary == "'$primary'"'"'"
|
||||
wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/pg/config | jq -s -e \'.[0].items["1"]["1"].primary == "'$primary'"'"'"
|
||||
|
||||
# Trigger scrub
|
||||
$ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'`
|
||||
@@ -64,8 +64,8 @@ fi
|
||||
# Read everything back
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/read.bin
|
||||
-O raw ./testdata/bin/read.bin
|
||||
|
||||
diff ./testdata/read.bin ./testdata/mirror.bin
|
||||
diff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
|
||||
|
||||
format_green OK
|
||||
|
@@ -34,21 +34,21 @@ qemu-img convert -p \
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024))" \
|
||||
-O raw ./testdata/merged.bin
|
||||
-O raw ./testdata/bin/merged.bin
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg@0" \
|
||||
-O raw ./testdata/layer0.bin
|
||||
-O raw ./testdata/bin/layer0.bin
|
||||
|
||||
$ETCDCTL put /vitastor/config/inode/1/3 '{"name":"testimg","size":'$((32*1024*1024))'}'
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
-O raw ./testdata/bin/layer1.bin
|
||||
|
||||
node tests/merge.js ./testdata/layer0.bin ./testdata/layer1.bin ./testdata/check.bin
|
||||
node tests/merge.js ./testdata/bin/layer0.bin ./testdata/bin/layer1.bin ./testdata/bin/check.bin
|
||||
|
||||
cmp ./testdata/merged.bin ./testdata/check.bin
|
||||
cmp ./testdata/bin/merged.bin ./testdata/bin/check.bin
|
||||
|
||||
# Test merge
|
||||
|
||||
@@ -58,22 +58,22 @@ build/src/cmd/vitastor-cli rm --etcd_address $ETCD_URL testimg@0
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||
-O raw ./testdata/merged-by-tool.bin
|
||||
-O raw ./testdata/bin/merged-by-tool.bin
|
||||
|
||||
cmp ./testdata/merged.bin ./testdata/merged-by-tool.bin
|
||||
cmp ./testdata/bin/merged.bin ./testdata/bin/merged-by-tool.bin
|
||||
|
||||
# Test merge by qemu-img
|
||||
|
||||
qemu-img rebase -u -b layer0.qcow2 -F qcow2 ./testdata/layer1.qcow2
|
||||
|
||||
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
|
||||
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/bin/rebased.bin
|
||||
|
||||
cmp ./testdata/merged.bin ./testdata/rebased.bin
|
||||
cmp ./testdata/bin/merged.bin ./testdata/bin/rebased.bin
|
||||
|
||||
qemu-img rebase -u -b '' ./testdata/layer1.qcow2
|
||||
|
||||
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
|
||||
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/bin/rebased.bin
|
||||
|
||||
cmp ./testdata/layer1.bin ./testdata/rebased.bin
|
||||
cmp ./testdata/bin/layer1.bin ./testdata/bin/rebased.bin
|
||||
|
||||
format_green OK
|
||||
|
@@ -9,7 +9,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create -s 32M testchain
|
||||
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
|
||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/bin/mirror.bin
|
||||
|
||||
for i in {1..10}; do
|
||||
# Create a snapshot
|
||||
@@ -17,18 +17,18 @@ for i in {1..10}; do
|
||||
# Check that the new snapshot is see-through
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/check.bin
|
||||
cmp ./testdata/check.bin ./testdata/mirror.bin
|
||||
-O raw ./testdata/bin/check.bin
|
||||
cmp ./testdata/bin/check.bin ./testdata/bin/mirror.bin
|
||||
# Write something to it
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -rw=randwrite \
|
||||
-randrepeat=$((i <= 2)) -buffer_pattern=0x$((10+i))$((10+i))$((10+i))$((10+i)) \
|
||||
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/mirror.bin
|
||||
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/bin/mirror.bin
|
||||
# Check the new content
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
-O raw ./testdata/bin/layer1.bin
|
||||
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||
done
|
||||
|
||||
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
|
||||
@@ -36,13 +36,13 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
|
||||
# Check the final image
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
-O raw ./testdata/bin/layer1.bin
|
||||
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||
|
||||
# Check the last remaining snapshot
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain@10" \
|
||||
-O raw ./testdata/layer0.bin
|
||||
cmp ./testdata/layer0.bin ./testdata/check.bin
|
||||
-O raw ./testdata/bin/layer0.bin
|
||||
cmp ./testdata/bin/layer0.bin ./testdata/bin/check.bin
|
||||
|
||||
format_green OK
|
||||
|
@@ -9,7 +9,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create -s 128M testchain
|
||||
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
|
||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/bin/mirror.bin
|
||||
|
||||
# Create a snapshot
|
||||
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
|
||||
@@ -17,13 +17,13 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
|
||||
# Write something to it
|
||||
LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/client/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 -rw=randwrite \
|
||||
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/mirror.bin
|
||||
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/bin/mirror.bin
|
||||
|
||||
# Check the new content
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
-O raw ./testdata/bin/layer1.bin
|
||||
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||
|
||||
# Merge
|
||||
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
|
||||
@@ -31,7 +31,7 @@ build/src/cmd/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
|
||||
# Check the final image
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
-O raw ./testdata/bin/layer1.bin
|
||||
cmp ./testdata/bin/layer1.bin ./testdata/bin/mirror.bin
|
||||
|
||||
format_green OK
|
||||
|
@@ -23,7 +23,7 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
|
||||
kill $OSD2_PID
|
||||
build/src/osd/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL \
|
||||
$(build/src/disk_tool/vitastor-disk simple-offsets --format options --device ./testdata/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
|
||||
$(build/src/disk_tool/vitastor-disk simple-offsets --format options --device ./testdata/bin/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
|
||||
sleep 2
|
||||
|
||||
# Check PG state - it should NOT become active
|
||||
|
@@ -2,14 +2,14 @@
|
||||
|
||||
. `dirname $0`/run_3osds.sh
|
||||
|
||||
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
|
||||
primary=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
|
||||
primary_pid=OSD${primary}_PID
|
||||
kill -9 ${!primary_pid}
|
||||
|
||||
sleep 15
|
||||
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/config/pgs | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
|
||||
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/pg/config | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
|
||||
|
||||
newprim=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
|
||||
newprim=$($ETCDCTL get --print-value-only /vitastor/pg/config | jq -r '.items["1"]["1"].primary')
|
||||
|
||||
if [ "$newprim" = "$primary" ]; then
|
||||
format_error Primary not switched
|
||||
|
@@ -3,21 +3,22 @@
|
||||
export KEEP_DATA=1
|
||||
. `dirname $0`/common.sh
|
||||
|
||||
etcdctl --endpoints=http://127.0.0.1:12379/v3 del --prefix /vitastor/mon/master
|
||||
etcdctl --endpoints=http://127.0.0.1:12379/v3 del --prefix /vitastor/pg/state
|
||||
etcdctl --endpoints=http://127.0.0.1:12379/v3 del --prefix /vitastor/osd/state
|
||||
node mon/mon-main.js $MON_PARAMS --etcd_address $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 >>./testdata/mon.log 2>&1 &
|
||||
MON_PID=$!
|
||||
wait_etcd
|
||||
|
||||
$ETCDCTL del --prefix /vitastor/mon/master
|
||||
$ETCDCTL del --prefix /vitastor/pg/state
|
||||
$ETCDCTL del --prefix /vitastor/osd/state
|
||||
|
||||
OSD_COUNT=3
|
||||
OSD_ARGS="$OSD_ARGS"
|
||||
OFFSET_ARGS="$OFFSET_ARGS"
|
||||
for i in $(seq 1 $OSD_COUNT); do
|
||||
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
||||
build/src/osd/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/disk_tool/vitastor-disk simple-offsets --format options ./testdata/bin/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
|
||||
eval OSD${i}_PID=$!
|
||||
done
|
||||
|
||||
node mon/mon-main.js --etcd_address $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 >>./testdata/mon.log 2>&1 &
|
||||
MON_PID=$!
|
||||
|
||||
sleep 3
|
||||
|
||||
if ! ($ETCDCTL get /vitastor/pg/state/1/1 --print-value-only | jq -s -e '(. | length) != 0 and .[0].state == ["active"]'); then
|
||||
|
@@ -43,10 +43,10 @@ LD_PRELOAD="build/src/client/libfio_vitastor.so" \
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))" \
|
||||
-O raw ./testdata/read.bin
|
||||
-O raw ./testdata/bin/read.bin
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw ./testdata/read.bin \
|
||||
-f raw ./testdata/bin/read.bin \
|
||||
-O raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))"
|
||||
|
||||
format_green OK
|
||||
|
Reference in New Issue
Block a user