forked from vitalif/vitastor
Compare commits
18 Commits
Author | SHA1 | Date | |
---|---|---|---|
101592bbff | |||
be4087d9d2 | |||
404e43dd2d | |||
87613ed590 | |||
2a2e914ef9 | |||
0cdc9292c8 | |||
3e1b03bb5c | |||
36e851505a | |||
1efbbb0c36 | |||
088dd15449 | |||
4a531d7b8b | |||
a0cae4c180 | |||
c4eb46600d | |||
21b306e25f | |||
d8313e939a | |||
3e92c3f082 | |||
82b9f4c52d | |||
2bdf415eb3 |
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8)
|
|||||||
|
|
||||||
project(vitastor)
|
project(vitastor)
|
||||||
|
|
||||||
set(VERSION "0.6.17")
|
set(VERSION "0.7.1")
|
||||||
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
VERSION ?= v0.6.17
|
VERSION ?= v0.7.1
|
||||||
|
|
||||||
all: build push
|
all: build push
|
||||||
|
|
||||||
|
@@ -49,7 +49,7 @@ spec:
|
|||||||
capabilities:
|
capabilities:
|
||||||
add: ["SYS_ADMIN"]
|
add: ["SYS_ADMIN"]
|
||||||
allowPrivilegeEscalation: true
|
allowPrivilegeEscalation: true
|
||||||
image: vitalif/vitastor-csi:v0.6.17
|
image: vitalif/vitastor-csi:v0.7.1
|
||||||
args:
|
args:
|
||||||
- "--node=$(NODE_ID)"
|
- "--node=$(NODE_ID)"
|
||||||
- "--endpoint=$(CSI_ENDPOINT)"
|
- "--endpoint=$(CSI_ENDPOINT)"
|
||||||
|
@@ -116,7 +116,7 @@ spec:
|
|||||||
privileged: true
|
privileged: true
|
||||||
capabilities:
|
capabilities:
|
||||||
add: ["SYS_ADMIN"]
|
add: ["SYS_ADMIN"]
|
||||||
image: vitalif/vitastor-csi:v0.6.17
|
image: vitalif/vitastor-csi:v0.7.1
|
||||||
args:
|
args:
|
||||||
- "--node=$(NODE_ID)"
|
- "--node=$(NODE_ID)"
|
||||||
- "--endpoint=$(CSI_ENDPOINT)"
|
- "--endpoint=$(CSI_ENDPOINT)"
|
||||||
|
@@ -5,7 +5,7 @@ package vitastor
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
vitastorCSIDriverName = "csi.vitastor.io"
|
vitastorCSIDriverName = "csi.vitastor.io"
|
||||||
vitastorCSIDriverVersion = "0.6.17"
|
vitastorCSIDriverVersion = "0.7.1"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Config struct fills the parameters of request or user input
|
// Config struct fills the parameters of request or user input
|
||||||
|
16
debian/changelog
vendored
16
debian/changelog
vendored
@@ -1,4 +1,18 @@
|
|||||||
vitastor (0.6.17-1) unstable; urgency=medium
|
vitastor (0.7.1-1) unstable; urgency=medium
|
||||||
|
|
||||||
|
* Bugfixes
|
||||||
|
|
||||||
|
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
||||||
|
|
||||||
|
vitastor (0.7.1-1) unstable; urgency=medium
|
||||||
|
|
||||||
|
* Implement NFS proxy
|
||||||
|
* Add documentation
|
||||||
|
* Bugfixes
|
||||||
|
|
||||||
|
-- Vitaliy Filippov <vitalif@yourcmc.ru> Sun, 29 May 2022 23:39:13 +0300
|
||||||
|
|
||||||
|
vitastor (0.6.3-1) unstable; urgency=medium
|
||||||
|
|
||||||
* RDMA support
|
* RDMA support
|
||||||
* Bugfixes
|
* Bugfixes
|
||||||
|
2
debian/control
vendored
2
debian/control
vendored
@@ -2,7 +2,7 @@ Source: vitastor
|
|||||||
Section: admin
|
Section: admin
|
||||||
Priority: optional
|
Priority: optional
|
||||||
Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
|
Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||||
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev
|
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev
|
||||||
Standards-Version: 4.5.0
|
Standards-Version: 4.5.0
|
||||||
Homepage: https://vitastor.io/
|
Homepage: https://vitastor.io/
|
||||||
Rules-Requires-Root: no
|
Rules-Requires-Root: no
|
||||||
|
11
debian/libisal.pc
vendored
Normal file
11
debian/libisal.pc
vendored
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
prefix=/usr
|
||||||
|
exec_prefix=${prefix}
|
||||||
|
libdir=${prefix}/lib/x86_64-linux-gnu
|
||||||
|
includedir=${prefix}/include
|
||||||
|
|
||||||
|
Name: libisal
|
||||||
|
Description: Library for storage systems
|
||||||
|
Version: 2.30.0
|
||||||
|
Libs: -L${libdir} -lisal
|
||||||
|
Libs.private:
|
||||||
|
Cflags: -I${includedir}
|
11
debian/vitastor.Dockerfile
vendored
11
debian/vitastor.Dockerfile
vendored
@@ -22,10 +22,11 @@ RUN apt-get update
|
|||||||
RUN apt-get -y install fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
|
RUN apt-get -y install fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
|
||||||
RUN apt-get -y build-dep fio
|
RUN apt-get -y build-dep fio
|
||||||
RUN apt-get --download-only source fio
|
RUN apt-get --download-only source fio
|
||||||
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev
|
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev
|
||||||
|
|
||||||
ADD . /root/vitastor
|
ADD . /root/vitastor
|
||||||
RUN set -e -x; \
|
RUN set -e -x; \
|
||||||
|
[ -e /usr/lib/x86_64-linux-gnu/pkgconfig/libisal.pc ] || cp /root/vitastor/debian/libisal.pc /usr/lib/x86_64-linux-gnu/pkgconfig; \
|
||||||
mkdir -p /root/fio-build/; \
|
mkdir -p /root/fio-build/; \
|
||||||
cd /root/fio-build/; \
|
cd /root/fio-build/; \
|
||||||
rm -rf /root/fio-build/*; \
|
rm -rf /root/fio-build/*; \
|
||||||
@@ -33,8 +34,8 @@ RUN set -e -x; \
|
|||||||
mkdir -p /root/packages/vitastor-$REL; \
|
mkdir -p /root/packages/vitastor-$REL; \
|
||||||
rm -rf /root/packages/vitastor-$REL/*; \
|
rm -rf /root/packages/vitastor-$REL/*; \
|
||||||
cd /root/packages/vitastor-$REL; \
|
cd /root/packages/vitastor-$REL; \
|
||||||
cp -r /root/vitastor vitastor-0.6.17; \
|
cp -r /root/vitastor vitastor-0.7.1; \
|
||||||
cd vitastor-0.6.17; \
|
cd vitastor-0.7.1; \
|
||||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||||
@@ -47,8 +48,8 @@ RUN set -e -x; \
|
|||||||
rm -rf a b; \
|
rm -rf a b; \
|
||||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||||
cd /root/packages/vitastor-$REL; \
|
cd /root/packages/vitastor-$REL; \
|
||||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.6.17.orig.tar.xz vitastor-0.6.17; \
|
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.7.1.orig.tar.xz vitastor-0.7.1; \
|
||||||
cd vitastor-0.6.17; \
|
cd vitastor-0.7.1; \
|
||||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||||
|
@@ -23,6 +23,7 @@ initialization and can be changed with an OSD restart.
|
|||||||
- [no_rebalance](#no_rebalance)
|
- [no_rebalance](#no_rebalance)
|
||||||
- [print_stats_interval](#print_stats_interval)
|
- [print_stats_interval](#print_stats_interval)
|
||||||
- [slow_log_interval](#slow_log_interval)
|
- [slow_log_interval](#slow_log_interval)
|
||||||
|
- [inode_vanish_time](#inode_vanish_time)
|
||||||
- [max_write_iodepth](#max_write_iodepth)
|
- [max_write_iodepth](#max_write_iodepth)
|
||||||
- [min_flusher_count](#min_flusher_count)
|
- [min_flusher_count](#min_flusher_count)
|
||||||
- [max_flusher_count](#max_flusher_count)
|
- [max_flusher_count](#max_flusher_count)
|
||||||
@@ -163,6 +164,13 @@ Time interval at which OSDs dump slow or stuck operations on stdout, if
|
|||||||
they're any. Also it's the time after which an operation is considered
|
they're any. Also it's the time after which an operation is considered
|
||||||
"slow".
|
"slow".
|
||||||
|
|
||||||
|
## inode_vanish_time
|
||||||
|
|
||||||
|
- Type: seconds
|
||||||
|
- Default: 60
|
||||||
|
|
||||||
|
Number of seconds after which a deleted inode is removed from OSD statistics.
|
||||||
|
|
||||||
## max_write_iodepth
|
## max_write_iodepth
|
||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
|
@@ -24,6 +24,7 @@
|
|||||||
- [no_rebalance](#no_rebalance)
|
- [no_rebalance](#no_rebalance)
|
||||||
- [print_stats_interval](#print_stats_interval)
|
- [print_stats_interval](#print_stats_interval)
|
||||||
- [slow_log_interval](#slow_log_interval)
|
- [slow_log_interval](#slow_log_interval)
|
||||||
|
- [inode_vanish_time](#inode_vanish_time)
|
||||||
- [max_write_iodepth](#max_write_iodepth)
|
- [max_write_iodepth](#max_write_iodepth)
|
||||||
- [min_flusher_count](#min_flusher_count)
|
- [min_flusher_count](#min_flusher_count)
|
||||||
- [max_flusher_count](#max_flusher_count)
|
- [max_flusher_count](#max_flusher_count)
|
||||||
@@ -169,6 +170,13 @@ OSD.
|
|||||||
медленных или зависших операций, если таковые имеются. Также время, при
|
медленных или зависших операций, если таковые имеются. Также время, при
|
||||||
превышении которого операция считается "медленной".
|
превышении которого операция считается "медленной".
|
||||||
|
|
||||||
|
## inode_vanish_time
|
||||||
|
|
||||||
|
- Тип: секунды
|
||||||
|
- Значение по умолчанию: 60
|
||||||
|
|
||||||
|
Число секунд, через которое удалённые инод удаляется и из статистики OSD.
|
||||||
|
|
||||||
## max_write_iodepth
|
## max_write_iodepth
|
||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
|
@@ -106,9 +106,12 @@ Pool name.
|
|||||||
|
|
||||||
- Type: string
|
- Type: string
|
||||||
- Required
|
- Required
|
||||||
- One of: "replicated", "xor" or "jerasure"
|
- One of: "replicated", "xor", "ec" or "jerasure"
|
||||||
|
|
||||||
Redundancy scheme used for data in this pool.
|
Redundancy scheme used for data in this pool. "jerasure" is an alias for "ec",
|
||||||
|
both use Reed-Solomon-Vandermonde codes based on ISA-L or jerasure libraries.
|
||||||
|
Fast ISA-L based implementation is used automatically when it's available,
|
||||||
|
slower jerasure version is used otherwise.
|
||||||
|
|
||||||
## pg_size
|
## pg_size
|
||||||
|
|
||||||
@@ -243,7 +246,7 @@ of the OSDs containing a data chunk for a PG.
|
|||||||
{
|
{
|
||||||
"2": {
|
"2": {
|
||||||
"name":"ecpool",
|
"name":"ecpool",
|
||||||
"scheme":"jerasure",
|
"scheme":"ec",
|
||||||
"pg_size":3,
|
"pg_size":3,
|
||||||
"parity_chunks":1,
|
"parity_chunks":1,
|
||||||
"pg_minsize":2,
|
"pg_minsize":2,
|
||||||
|
@@ -106,9 +106,13 @@
|
|||||||
|
|
||||||
- Тип: строка
|
- Тип: строка
|
||||||
- Обязательный
|
- Обязательный
|
||||||
- Возможные значения: "replicated", "xor" или "jerasure"
|
- Возможные значения: "replicated", "xor", "ec" или "jerasure"
|
||||||
|
|
||||||
Схема избыточности, используемая в данном пуле.
|
Схема избыточности, используемая в данном пуле. "jerasure" - синоним для "ec",
|
||||||
|
в обеих схемах используются коды Рида-Соломона-Вандермонда, реализованные на
|
||||||
|
основе библиотек ISA-L или jerasure. Быстрая реализацяю на основе ISA-L
|
||||||
|
используется автоматически, когда доступна, в противном случае используется
|
||||||
|
более медленная jerasure-версия.
|
||||||
|
|
||||||
## pg_size
|
## pg_size
|
||||||
|
|
||||||
@@ -242,7 +246,7 @@ PG в Vitastor эферемерны, то есть вы можете менят
|
|||||||
{
|
{
|
||||||
"2": {
|
"2": {
|
||||||
"name":"ecpool",
|
"name":"ecpool",
|
||||||
"scheme":"jerasure",
|
"scheme":"ec",
|
||||||
"pg_size":3,
|
"pg_size":3,
|
||||||
"parity_chunks":1,
|
"parity_chunks":1,
|
||||||
"pg_minsize":2,
|
"pg_minsize":2,
|
||||||
|
@@ -158,6 +158,13 @@
|
|||||||
Временной интервал, с которым OSD выводят в стандартный вывод список
|
Временной интервал, с которым OSD выводят в стандартный вывод список
|
||||||
медленных или зависших операций, если таковые имеются. Также время, при
|
медленных или зависших операций, если таковые имеются. Также время, при
|
||||||
превышении которого операция считается "медленной".
|
превышении которого операция считается "медленной".
|
||||||
|
- name: inode_vanish_time
|
||||||
|
type: sec
|
||||||
|
default: 60
|
||||||
|
info: |
|
||||||
|
Number of seconds after which a deleted inode is removed from OSD statistics.
|
||||||
|
info_ru: |
|
||||||
|
Число секунд, через которое удалённые инод удаляется и из статистики OSD.
|
||||||
- name: max_write_iodepth
|
- name: max_write_iodepth
|
||||||
type: int
|
type: int
|
||||||
default: 128
|
default: 128
|
||||||
|
@@ -15,7 +15,8 @@
|
|||||||
- gcc and g++ 8 or newer, clang 10 or newer, or other compiler with C++11 plus
|
- gcc and g++ 8 or newer, clang 10 or newer, or other compiler with C++11 plus
|
||||||
designated initializers support from C++20
|
designated initializers support from C++20
|
||||||
- CMake
|
- CMake
|
||||||
- liburing, jerasure headers
|
- liburing, jerasure headers and libraries
|
||||||
|
- ISA-L, libibverbs headers and libraries (optional)
|
||||||
- tcmalloc (google-perftools-dev)
|
- tcmalloc (google-perftools-dev)
|
||||||
|
|
||||||
## Basic instructions
|
## Basic instructions
|
||||||
|
@@ -15,7 +15,8 @@
|
|||||||
- gcc и g++ >= 8, либо clang >= 10, либо другой компилятор с поддержкой C++11 плюс
|
- gcc и g++ >= 8, либо clang >= 10, либо другой компилятор с поддержкой C++11 плюс
|
||||||
назначенных инициализаторов (designated initializers) из C++20
|
назначенных инициализаторов (designated initializers) из C++20
|
||||||
- CMake
|
- CMake
|
||||||
- Заголовки liburing, jerasure
|
- Заголовки и библиотеки liburing, jerasure
|
||||||
|
- Опционально - заголовки и библиотеки ISA-L, libibverbs
|
||||||
- tcmalloc (google-perftools-dev)
|
- tcmalloc (google-perftools-dev)
|
||||||
|
|
||||||
## Базовая инструкция
|
## Базовая инструкция
|
||||||
|
@@ -15,7 +15,7 @@
|
|||||||
- Basic part: highly-available block storage with symmetric clustering and no SPOF
|
- Basic part: highly-available block storage with symmetric clustering and no SPOF
|
||||||
- [Performance](../performance/comparison1.en.md) ;-D
|
- [Performance](../performance/comparison1.en.md) ;-D
|
||||||
- [Multiple redundancy schemes](../config/pool.en.md#scheme): Replication, XOR n+1, Reed-Solomon erasure codes
|
- [Multiple redundancy schemes](../config/pool.en.md#scheme): Replication, XOR n+1, Reed-Solomon erasure codes
|
||||||
based on jerasure library with any number of data and parity drives in a group
|
based on jerasure and ISA-L libraries with any number of data and parity drives in a group
|
||||||
- Configuration via simple JSON data structures in etcd (parameters, pools and images)
|
- Configuration via simple JSON data structures in etcd (parameters, pools and images)
|
||||||
- Automatic data distribution over OSDs, with support for:
|
- Automatic data distribution over OSDs, with support for:
|
||||||
- Mathematical optimization for better uniformity and less data movement
|
- Mathematical optimization for better uniformity and less data movement
|
||||||
|
@@ -15,7 +15,7 @@
|
|||||||
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
|
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
|
||||||
- [Производительность](../comparison1.ru.md) ;-D
|
- [Производительность](../comparison1.ru.md) ;-D
|
||||||
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
|
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
|
||||||
Рида-Соломона на основе библиотеки jerasure с любым числом дисков данных и чётности в группе
|
Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе
|
||||||
- Конфигурация через простые человекочитаемые JSON-структуры в etcd
|
- Конфигурация через простые человекочитаемые JSON-структуры в etcd
|
||||||
- Автоматическое распределение данных по OSD, с поддержкой:
|
- Автоматическое распределение данных по OSD, с поддержкой:
|
||||||
- Математической оптимизации для лучшей равномерности распределения и минимизации перемещений данных
|
- Математической оптимизации для лучшей равномерности распределения и минимизации перемещений данных
|
||||||
|
@@ -63,11 +63,11 @@ etcdctl --endpoints=... put /vitastor/config/pools '{"1":{"name":"testpool",
|
|||||||
"scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":256,"failure_domain":"host"}}'
|
"scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":256,"failure_domain":"host"}}'
|
||||||
```
|
```
|
||||||
|
|
||||||
For jerasure pools the configuration should look like the following:
|
For EC pools the configuration should look like the following:
|
||||||
|
|
||||||
```
|
```
|
||||||
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
||||||
"scheme":"jerasure","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}`
|
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}`
|
||||||
```
|
```
|
||||||
|
|
||||||
After you do this, one of the monitors will configure PGs and OSDs will start them.
|
After you do this, one of the monitors will configure PGs and OSDs will start them.
|
||||||
|
@@ -75,7 +75,7 @@ etcdctl --endpoints=... put /vitastor/config/pools '{"1":{"name":"testpool",
|
|||||||
|
|
||||||
```
|
```
|
||||||
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
||||||
"scheme":"jerasure","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}`
|
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}`
|
||||||
```
|
```
|
||||||
|
|
||||||
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
|
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
|
||||||
|
21
mon/mon.js
21
mon/mon.js
@@ -105,6 +105,7 @@ const etcd_tree = {
|
|||||||
no_rebalance: false,
|
no_rebalance: false,
|
||||||
print_stats_interval: 3,
|
print_stats_interval: 3,
|
||||||
slow_log_interval: 10,
|
slow_log_interval: 10,
|
||||||
|
inode_vanish_time: 60,
|
||||||
osd_memlock: false,
|
osd_memlock: false,
|
||||||
// blockstore - fixed in superblock
|
// blockstore - fixed in superblock
|
||||||
block_size,
|
block_size,
|
||||||
@@ -147,11 +148,11 @@ const etcd_tree = {
|
|||||||
/* pools: {
|
/* pools: {
|
||||||
<id>: {
|
<id>: {
|
||||||
name: 'testpool',
|
name: 'testpool',
|
||||||
// jerasure uses Reed-Solomon-Vandermonde codes
|
// 'ec' uses Reed-Solomon-Vandermonde codes, 'jerasure' is an alias for 'ec'
|
||||||
scheme: 'replicated' | 'xor' | 'jerasure',
|
scheme: 'replicated' | 'xor' | 'ec' | 'jerasure',
|
||||||
pg_size: 3,
|
pg_size: 3,
|
||||||
pg_minsize: 2,
|
pg_minsize: 2,
|
||||||
// number of parity chunks, required for jerasure
|
// number of parity chunks, required for EC
|
||||||
parity_chunks?: 1,
|
parity_chunks?: 1,
|
||||||
pg_count: 100,
|
pg_count: 100,
|
||||||
failure_domain: 'host',
|
failure_domain: 'host',
|
||||||
@@ -1013,14 +1014,15 @@ class Mon
|
|||||||
console.log('Pool ID '+pool_id+' is invalid');
|
console.log('Pool ID '+pool_id+' is invalid');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (pool_cfg.scheme !== 'xor' && pool_cfg.scheme !== 'replicated' && pool_cfg.scheme !== 'jerasure')
|
if (pool_cfg.scheme !== 'xor' && pool_cfg.scheme !== 'replicated' &&
|
||||||
|
pool_cfg.scheme !== 'ec' && pool_cfg.scheme !== 'jerasure')
|
||||||
{
|
{
|
||||||
if (warn)
|
if (warn)
|
||||||
console.log('Pool '+pool_id+' has invalid coding scheme (one of "xor", "replicated" and "jerasure" required)');
|
console.log('Pool '+pool_id+' has invalid coding scheme (one of "xor", "replicated", "ec" and "jerasure" required)');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!pool_cfg.pg_size || pool_cfg.pg_size < 1 || pool_cfg.pg_size > 256 ||
|
if (!pool_cfg.pg_size || pool_cfg.pg_size < 1 || pool_cfg.pg_size > 256 ||
|
||||||
(pool_cfg.scheme === 'xor' || pool_cfg.scheme == 'jerasure') && pool_cfg.pg_size < 3)
|
pool_cfg.scheme !== 'replicated' && pool_cfg.pg_size < 3)
|
||||||
{
|
{
|
||||||
if (warn)
|
if (warn)
|
||||||
console.log('Pool '+pool_id+' has invalid pg_size');
|
console.log('Pool '+pool_id+' has invalid pg_size');
|
||||||
@@ -1039,7 +1041,8 @@ class Mon
|
|||||||
console.log('Pool '+pool_id+' has invalid parity_chunks (must be 1)');
|
console.log('Pool '+pool_id+' has invalid parity_chunks (must be 1)');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (pool_cfg.scheme === 'jerasure' && (pool_cfg.parity_chunks < 1 || pool_cfg.parity_chunks > pool_cfg.pg_size-2))
|
if ((pool_cfg.scheme === 'ec' || pool_cfg.scheme === 'jerasure') &&
|
||||||
|
(pool_cfg.parity_chunks < 1 || pool_cfg.parity_chunks > pool_cfg.pg_size-2))
|
||||||
{
|
{
|
||||||
if (warn)
|
if (warn)
|
||||||
console.log('Pool '+pool_id+' has invalid parity_chunks (must be between 1 and pg_size-2)');
|
console.log('Pool '+pool_id+' has invalid parity_chunks (must be between 1 and pg_size-2)');
|
||||||
@@ -1153,6 +1156,10 @@ class Mon
|
|||||||
{
|
{
|
||||||
prev_pgs[pg-1] = this.state.config.pgs.items[pool_id][pg].osd_set;
|
prev_pgs[pg-1] = this.state.config.pgs.items[pool_id][pg].osd_set;
|
||||||
}
|
}
|
||||||
|
// Also delete pool statistics
|
||||||
|
etcd_request.success.push({ requestDeleteRange: {
|
||||||
|
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
||||||
|
} });
|
||||||
this.save_new_pgs_txn(etcd_request, pool_id, up_osds, osd_tree, prev_pgs, [], []);
|
this.save_new_pgs_txn(etcd_request, pool_id, up_osds, osd_tree, prev_pgs, [], []);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
|||||||
from cinder.volume import driver
|
from cinder.volume import driver
|
||||||
from cinder.volume import volume_utils
|
from cinder.volume import volume_utils
|
||||||
|
|
||||||
VERSION = '0.6.17'
|
VERSION = '0.7.1'
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
659
patches/libvirt-6.0-vitastor.diff
Normal file
659
patches/libvirt-6.0-vitastor.diff
Normal file
@@ -0,0 +1,659 @@
|
|||||||
|
commit 7f01510ef207940b07fac4f5fc8b9f1580b443aa
|
||||||
|
Author: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||||
|
Date: Sun Jun 27 12:52:40 2021 +0300
|
||||||
|
|
||||||
|
Add Vitastor support
|
||||||
|
|
||||||
|
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
|
||||||
|
index aa50eac..082b4f8 100644
|
||||||
|
--- a/docs/schemas/domaincommon.rng
|
||||||
|
+++ b/docs/schemas/domaincommon.rng
|
||||||
|
@@ -1766,6 +1766,35 @@
|
||||||
|
</element>
|
||||||
|
</define>
|
||||||
|
|
||||||
|
+ <define name="diskSourceNetworkProtocolVitastor">
|
||||||
|
+ <element name="source">
|
||||||
|
+ <interleave>
|
||||||
|
+ <attribute name="protocol">
|
||||||
|
+ <value>vitastor</value>
|
||||||
|
+ </attribute>
|
||||||
|
+ <ref name="diskSourceCommon"/>
|
||||||
|
+ <optional>
|
||||||
|
+ <attribute name="name"/>
|
||||||
|
+ </optional>
|
||||||
|
+ <optional>
|
||||||
|
+ <attribute name="query"/>
|
||||||
|
+ </optional>
|
||||||
|
+ <zeroOrMore>
|
||||||
|
+ <ref name="diskSourceNetworkHost"/>
|
||||||
|
+ </zeroOrMore>
|
||||||
|
+ <optional>
|
||||||
|
+ <element name="config">
|
||||||
|
+ <attribute name="file">
|
||||||
|
+ <ref name="absFilePath"/>
|
||||||
|
+ </attribute>
|
||||||
|
+ <empty/>
|
||||||
|
+ </element>
|
||||||
|
+ </optional>
|
||||||
|
+ <empty/>
|
||||||
|
+ </interleave>
|
||||||
|
+ </element>
|
||||||
|
+ </define>
|
||||||
|
+
|
||||||
|
<define name="diskSourceNetworkProtocolISCSI">
|
||||||
|
<element name="source">
|
||||||
|
<attribute name="protocol">
|
||||||
|
@@ -1891,6 +1920,7 @@
|
||||||
|
<ref name="diskSourceNetworkProtocolHTTP"/>
|
||||||
|
<ref name="diskSourceNetworkProtocolSimple"/>
|
||||||
|
<ref name="diskSourceNetworkProtocolVxHS"/>
|
||||||
|
+ <ref name="diskSourceNetworkProtocolVitastor"/>
|
||||||
|
</choice>
|
||||||
|
</define>
|
||||||
|
|
||||||
|
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
|
||||||
|
index 4bf2b5f..dbc011b 100644
|
||||||
|
--- a/include/libvirt/libvirt-storage.h
|
||||||
|
+++ b/include/libvirt/libvirt-storage.h
|
||||||
|
@@ -245,6 +245,7 @@ typedef enum {
|
||||||
|
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17,
|
||||||
|
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18,
|
||||||
|
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19,
|
||||||
|
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20,
|
||||||
|
} virConnectListAllStoragePoolsFlags;
|
||||||
|
|
||||||
|
int virConnectListAllStoragePools(virConnectPtr conn,
|
||||||
|
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
|
||||||
|
index 222bb8c..2c30c55 100644
|
||||||
|
--- a/src/conf/domain_conf.c
|
||||||
|
+++ b/src/conf/domain_conf.c
|
||||||
|
@@ -5114,8 +5114,7 @@ virDomainDiskDefPostParse(virDomainDiskD
|
||||||
|
const virDomainDef *def,
|
||||||
|
virDomainXMLOptionPtr xmlopt)
|
||||||
|
{
|
||||||
|
- /* internal snapshots and config files are currently supported
|
||||||
|
- * only with rbd: */
|
||||||
|
+ /* internal snapshots are currently supported only with rbd: */
|
||||||
|
if (virStorageSourceGetActualType(disk->src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||||
|
disk->src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
|
||||||
|
if (disk->src->snapshot) {
|
||||||
|
@@ -5124,11 +5123,15 @@ virDomainDiskDefPostParse(virDomainDiskD
|
||||||
|
"only with 'rbd' disks"));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
-
|
||||||
|
+ }
|
||||||
|
+ /* config files are currently supported only with rbd and vitastor: */
|
||||||
|
+ if (virStorageSourceGetActualType(disk->src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||||
|
+ disk->src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
|
||||||
|
+ disk->src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||||
|
if (disk->src->configFile) {
|
||||||
|
virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||||
|
_("<config> element is currently supported "
|
||||||
|
- "only with 'rbd' disks"));
|
||||||
|
+ "only with 'rbd' and 'vitastor' disks"));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -9258,6 +9261,10 @@ virDomainDiskSourceNetworkParse(xmlNodeP
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||||
|
+ src->relPath = virXMLPropString(node, "query");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if ((haveTLS = virXMLPropString(node, "tls")) &&
|
||||||
|
(src->haveTLS = virTristateBoolTypeFromString(haveTLS)) <= 0) {
|
||||||
|
virReportError(VIR_ERR_XML_ERROR,
|
||||||
|
@@ -9303,6 +9310,10 @@ virDomainDiskSourceNetworkParse(xmlNodeP
|
||||||
|
/* config file currently only works with remote disks */
|
||||||
|
src->configFile = virXPathString("string(./config/@file)", ctxt);
|
||||||
|
|
||||||
|
+ if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
|
||||||
|
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
|
||||||
|
+ src->query = virXMLPropString(node, "query");
|
||||||
|
+
|
||||||
|
if (virDomainStorageNetworkParseHosts(node, &src->hosts, &src->nhosts) < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
@@ -24141,6 +24152,10 @@ virDomainDiskSourceFormatNetwork(virBuff
|
||||||
|
|
||||||
|
virBufferEscapeString(attrBuf, " name='%s'", path ? path : src->path);
|
||||||
|
|
||||||
|
+ if (src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR && src->relPath != NULL) {
|
||||||
|
+ virBufferEscapeString(attrBuf, " query='%s'", src->relPath);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (src->haveTLS != VIR_TRISTATE_BOOL_ABSENT &&
|
||||||
|
!(flags & VIR_DOMAIN_DEF_FORMAT_MIGRATABLE &&
|
||||||
|
src->tlsFromConfig))
|
||||||
|
@@ -31402,6 +31417,7 @@ virDomainDiskTranslateSourcePool(virDomainDiskDefPtr def)
|
||||||
|
|
||||||
|
case VIR_STORAGE_POOL_MPATH:
|
||||||
|
case VIR_STORAGE_POOL_RBD:
|
||||||
|
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||||
|
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||||
|
case VIR_STORAGE_POOL_GLUSTER:
|
||||||
|
case VIR_STORAGE_POOL_LAST:
|
||||||
|
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
|
||||||
|
index 55db7a9..7cbe937 100644
|
||||||
|
--- a/src/conf/storage_conf.c
|
||||||
|
+++ b/src/conf/storage_conf.c
|
||||||
|
@@ -59,7 +59,7 @@ VIR_ENUM_IMPL(virStoragePool,
|
||||||
|
"logical", "disk", "iscsi",
|
||||||
|
"iscsi-direct", "scsi", "mpath",
|
||||||
|
"rbd", "sheepdog", "gluster",
|
||||||
|
- "zfs", "vstorage",
|
||||||
|
+ "zfs", "vstorage", "vitastor",
|
||||||
|
);
|
||||||
|
|
||||||
|
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
|
||||||
|
@@ -248,6 +248,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
|
||||||
|
.formatToString = virStorageFileFormatTypeToString,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
|
||||||
|
+ .poolOptions = {
|
||||||
|
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||||
|
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
|
||||||
|
+ VIR_STORAGE_POOL_SOURCE_NAME),
|
||||||
|
+ },
|
||||||
|
+ .volOptions = {
|
||||||
|
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
|
||||||
|
+ .formatFromString = virStorageVolumeFormatFromString,
|
||||||
|
+ .formatToString = virStorageFileFormatTypeToString,
|
||||||
|
+ }
|
||||||
|
+ },
|
||||||
|
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
|
||||||
|
.poolOptions = {
|
||||||
|
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||||
|
@@ -550,6 +562,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
|
||||||
|
_("element 'name' is mandatory for RBD pool"));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
|
||||||
|
+ virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||||
|
+ _("element 'name' is mandatory for Vitastor pool"));
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
if (options->formatFromString) {
|
||||||
|
char *format = virXPathString("string(./format/@type)", ctxt);
|
||||||
|
@@ -1173,6 +1190,7 @@ virStoragePoolDefFormatBuf(virBufferPtr buf,
|
||||||
|
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
|
||||||
|
* files, so they don't have a target */
|
||||||
|
if (def->type != VIR_STORAGE_POOL_RBD &&
|
||||||
|
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
|
||||||
|
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
|
||||||
|
def->type != VIR_STORAGE_POOL_GLUSTER &&
|
||||||
|
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
|
||||||
|
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
|
||||||
|
index dc0aa2a..ed4983d 100644
|
||||||
|
--- a/src/conf/storage_conf.h
|
||||||
|
+++ b/src/conf/storage_conf.h
|
||||||
|
@@ -110,6 +110,7 @@ typedef enum {
|
||||||
|
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
|
||||||
|
VIR_STORAGE_POOL_ZFS, /* ZFS */
|
||||||
|
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
|
||||||
|
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
|
||||||
|
|
||||||
|
VIR_STORAGE_POOL_LAST,
|
||||||
|
} virStoragePoolType;
|
||||||
|
@@ -466,6 +467,7 @@ VIR_ENUM_DECL(virStoragePartedFs)
|
||||||
|
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
|
||||||
|
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
|
||||||
|
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
|
||||||
|
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
|
||||||
|
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
|
||||||
|
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
|
||||||
|
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
|
||||||
|
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
|
||||||
|
index 6ea6a97..3ba45b9 100644
|
||||||
|
--- a/src/conf/virstorageobj.c
|
||||||
|
+++ b/src/conf/virstorageobj.c
|
||||||
|
@@ -1493,6 +1493,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||||
|
case VIR_STORAGE_POOL_RBD:
|
||||||
|
case VIR_STORAGE_POOL_LAST:
|
||||||
|
break;
|
||||||
|
@@ -1994,6 +1995,8 @@ virStoragePoolObjMatch(virStoragePoolObjPtr obj,
|
||||||
|
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
|
||||||
|
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
|
||||||
|
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
|
||||||
|
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
|
||||||
|
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
|
||||||
|
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
|
||||||
|
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
|
||||||
|
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
|
||||||
|
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
|
||||||
|
index 2ea3e94..d5d2273 100644
|
||||||
|
--- a/src/libvirt-storage.c
|
||||||
|
+++ b/src/libvirt-storage.c
|
||||||
|
@@ -92,6 +92,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
|
||||||
|
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
|
||||||
|
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
|
||||||
|
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
|
||||||
|
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
|
||||||
|
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
|
||||||
|
*
|
||||||
|
* Returns the number of storage pools found or -1 and sets @pools to
|
||||||
|
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
|
||||||
|
index 73e988a..ab7bb81 100644
|
||||||
|
--- a/src/libxl/libxl_conf.c
|
||||||
|
+++ b/src/libxl/libxl_conf.c
|
||||||
|
@@ -888,6 +888,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSourcePtr src,
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||||
|
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||||
|
virReportError(VIR_ERR_NO_SUPPORT,
|
||||||
|
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
|
||||||
|
index 17b93d0..c5a0084 100644
|
||||||
|
--- a/src/libxl/xen_xl.c
|
||||||
|
+++ b/src/libxl/xen_xl.c
|
||||||
|
@@ -1601,6 +1601,7 @@ xenFormatXLDiskSrcNet(virStorageSourcePtr src)
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||||
|
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||||
|
virReportError(VIR_ERR_NO_SUPPORT,
|
||||||
|
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
|
||||||
|
index cbf0aa4..f0ca9e7 100644
|
||||||
|
--- a/src/qemu/qemu_block.c
|
||||||
|
+++ b/src/qemu/qemu_block.c
|
||||||
|
@@ -869,6 +869,42 @@ qemuBlockStorageSourceGetRBDProps(virStorageSourcePtr src)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
+static virJSONValuePtr
|
||||||
|
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
|
||||||
|
+{
|
||||||
|
+ virJSONValuePtr ret = NULL;
|
||||||
|
+ virStorageNetHostDefPtr host;
|
||||||
|
+ size_t i;
|
||||||
|
+ virBuffer buf = VIR_BUFFER_INITIALIZER;
|
||||||
|
+ char *etcd = NULL;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < src->nhosts; i++) {
|
||||||
|
+ host = src->hosts + i;
|
||||||
|
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
|
||||||
|
+ goto cleanup;
|
||||||
|
+ }
|
||||||
|
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
|
||||||
|
+ }
|
||||||
|
+ if (src->nhosts > 0) {
|
||||||
|
+ etcd = virBufferContentAndReset(&buf);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (virJSONValueObjectCreate(&ret,
|
||||||
|
+ "s:driver", "vitastor",
|
||||||
|
+ "S:etcd-host", etcd,
|
||||||
|
+ "S:etcd-prefix", src->relPath,
|
||||||
|
+ "S:config-path", src->configFile,
|
||||||
|
+ "s:image", src->path,
|
||||||
|
+ NULL) < 0)
|
||||||
|
+ goto cleanup;
|
||||||
|
+
|
||||||
|
+cleanup:
|
||||||
|
+ VIR_FREE(etcd);
|
||||||
|
+ virBufferFreeAndReset(&buf);
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+
|
||||||
|
static virJSONValuePtr
|
||||||
|
qemuBlockStorageSourceGetSheepdogProps(virStorageSourcePtr src)
|
||||||
|
{
|
||||||
|
@@ -1130,6 +1166,11 @@ qemuBlockStorageSourceGetBackendProps(virStorageSourcePtr src,
|
||||||
|
return NULL;
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||||
|
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||||
|
+ return NULL;
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||||
|
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||||
|
return NULL;
|
||||||
|
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
|
||||||
|
index 822d5f8..abec34e 100644
|
||||||
|
--- a/src/qemu/qemu_command.c
|
||||||
|
+++ b/src/qemu/qemu_command.c
|
||||||
|
@@ -1078,6 +1078,43 @@ qemuBuildNetworkDriveStr(virStorageSourcePtr src,
|
||||||
|
ret = virBufferContentAndReset(&buf);
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||||
|
+ if (strchr(src->path, ':')) {
|
||||||
|
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
|
||||||
|
+ _("':' not allowed in Vitastor source volume name '%s'"),
|
||||||
|
+ src->path);
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ virBufferStrcat(&buf, "vitastor:image=", src->path, NULL);
|
||||||
|
+
|
||||||
|
+ if (src->nhosts > 0) {
|
||||||
|
+ virBufferAddLit(&buf, ":etcd-host=");
|
||||||
|
+ for (i = 0; i < src->nhosts; i++) {
|
||||||
|
+ if (i)
|
||||||
|
+ virBufferAddLit(&buf, ",");
|
||||||
|
+
|
||||||
|
+ /* assume host containing : is ipv6 */
|
||||||
|
+ if (strchr(src->hosts[i].name, ':'))
|
||||||
|
+ virBufferEscape(&buf, '\\', ":", "[%s]",
|
||||||
|
+ src->hosts[i].name);
|
||||||
|
+ else
|
||||||
|
+ virBufferAsprintf(&buf, "%s", src->hosts[i].name);
|
||||||
|
+
|
||||||
|
+ if (src->hosts[i].port)
|
||||||
|
+ virBufferAsprintf(&buf, "\\:%u", src->hosts[i].port);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (src->configFile)
|
||||||
|
+ virBufferEscape(&buf, '\\', ":", ":config-path=%s", src->configFile);
|
||||||
|
+
|
||||||
|
+ if (src->relPath)
|
||||||
|
+ virBufferEscape(&buf, '\\', ":", ":etcd-prefix=%s", src->relPath);
|
||||||
|
+
|
||||||
|
+ ret = virBufferContentAndReset(&buf);
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||||
|
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
||||||
|
_("VxHS protocol does not support URI syntax"));
|
||||||
|
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
|
||||||
|
index ec6b340..f399efa 100644
|
||||||
|
--- a/src/qemu/qemu_domain.c
|
||||||
|
+++ b/src/qemu/qemu_domain.c
|
||||||
|
@@ -6862,6 +6862,16 @@ qemuDomainValidateStorageSource(virStora
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (src->query &&
|
||||||
|
+ (actualType != VIR_STORAGE_TYPE_NETWORK ||
|
||||||
|
+ (src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
|
||||||
|
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
|
||||||
|
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
|
||||||
|
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
||||||
|
+ _("query is supported only with HTTP(S) protocols"));
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -13836,6 +13846,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSourcePtr src,
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||||
|
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||||
|
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
|
||||||
|
index 1d96170..2d24396 100644
|
||||||
|
--- a/src/qemu/qemu_driver.c
|
||||||
|
+++ b/src/qemu/qemu_driver.c
|
||||||
|
@@ -14841,6 +14841,7 @@ qemuDomainSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDefPtr snapdi
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_TFTP:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||||
|
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||||
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
||||||
|
_("external inactive snapshots are not supported on "
|
||||||
|
@@ -14925,6 +14926,7 @@ qemuDomainSnapshotPrepareDiskExternalActive(virDomainSnapshotDiskDefPtr snapdisk
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_TFTP:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||||
|
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||||
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
||||||
|
_("external active snapshots are not supported on "
|
||||||
|
@@ -15054,6 +15056,7 @@ qemuDomainSnapshotPrepareDiskInternal(virDomainDiskDefPtr disk,
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_TFTP:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||||
|
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||||
|
virReportError(VIR_ERR_INTERNAL_ERROR,
|
||||||
|
_("internal inactive snapshots are not supported on "
|
||||||
|
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
|
||||||
|
index 4a13e90..33301c7 100644
|
||||||
|
--- a/src/storage/storage_driver.c
|
||||||
|
+++ b/src/storage/storage_driver.c
|
||||||
|
@@ -1641,6 +1641,7 @@ storageVolLookupByPathCallback(virStoragePoolObjPtr obj,
|
||||||
|
case VIR_STORAGE_POOL_RBD:
|
||||||
|
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||||
|
case VIR_STORAGE_POOL_ZFS:
|
||||||
|
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||||
|
case VIR_STORAGE_POOL_LAST:
|
||||||
|
ignore_value(VIR_STRDUP(stable_path, data->path));
|
||||||
|
break;
|
||||||
|
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
|
||||||
|
index 29c4c86..a27ad94 100644
|
||||||
|
--- a/src/test/test_driver.c
|
||||||
|
+++ b/src/test/test_driver.c
|
||||||
|
@@ -7086,6 +7086,7 @@ testStorageVolumeTypeForPool(int pooltype)
|
||||||
|
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||||
|
case VIR_STORAGE_POOL_GLUSTER:
|
||||||
|
case VIR_STORAGE_POOL_RBD:
|
||||||
|
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||||
|
return VIR_STORAGE_VOL_NETWORK;
|
||||||
|
case VIR_STORAGE_POOL_LOGICAL:
|
||||||
|
case VIR_STORAGE_POOL_DISK:
|
||||||
|
diff --git a/src/util/virstoragefile.c b/src/util/virstoragefile.c
|
||||||
|
index 0d3c2af..edb7f9e 100644
|
||||||
|
--- a/src/util/virstoragefile.c
|
||||||
|
+++ b/src/util/virstoragefile.c
|
||||||
|
@@ -90,6 +90,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
|
||||||
|
"tftp",
|
||||||
|
"ssh",
|
||||||
|
"vxhs",
|
||||||
|
+ "vitastor",
|
||||||
|
);
|
||||||
|
|
||||||
|
VIR_ENUM_IMPL(virStorageNetHostTransport,
|
||||||
|
@@ -2927,6 +2928,73 @@ virStorageSourceParseRBDColonString(cons
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int
|
||||||
|
+virStorageSourceParseVitastorColonString(const char *colonstr,
|
||||||
|
+ virStorageSourcePtr src)
|
||||||
|
+{
|
||||||
|
+ char *p, *e, *next;
|
||||||
|
+ g_autofree char *options = NULL;
|
||||||
|
+
|
||||||
|
+ /* optionally skip the "vitastor:" prefix if provided */
|
||||||
|
+ if (STRPREFIX(colonstr, "vitastor:"))
|
||||||
|
+ colonstr += strlen("vitastor:");
|
||||||
|
+
|
||||||
|
+ options = g_strdup(colonstr);
|
||||||
|
+
|
||||||
|
+ p = options;
|
||||||
|
+ while (*p) {
|
||||||
|
+ /* find : delimiter or end of string */
|
||||||
|
+ for (e = p; *e && *e != ':'; ++e) {
|
||||||
|
+ if (*e == '\\') {
|
||||||
|
+ e++;
|
||||||
|
+ if (*e == '\0')
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (*e == '\0') {
|
||||||
|
+ next = e; /* last kv pair */
|
||||||
|
+ } else {
|
||||||
|
+ next = e + 1;
|
||||||
|
+ *e = '\0';
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (STRPREFIX(p, "image=")) {
|
||||||
|
+ src->path = g_strdup(p + strlen("image="));
|
||||||
|
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
|
||||||
|
+ src->query = g_strdup(p + strlen("etcd-prefix="));
|
||||||
|
+ } else if (STRPREFIX(p, "config-path=")) {
|
||||||
|
+ src->configFile = g_strdup(p + strlen("config-path="));
|
||||||
|
+ } else if (STRPREFIX(p, "etcd-host=")) {
|
||||||
|
+ char *h, *sep;
|
||||||
|
+
|
||||||
|
+ h = p + strlen("etcd-host=");
|
||||||
|
+ while (h < e) {
|
||||||
|
+ for (sep = h; sep < e; ++sep) {
|
||||||
|
+ if (*sep == '\\' && (sep[1] == ',' ||
|
||||||
|
+ sep[1] == ';' ||
|
||||||
|
+ sep[1] == ' ')) {
|
||||||
|
+ *sep = '\0';
|
||||||
|
+ sep += 2;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (virStorageSourceRBDAddHost(src, h) < 0)
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ h = sep;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ p = next;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!src->path) {
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
|
||||||
|
static int
|
||||||
|
virStorageSourceParseNBDColonString(const char *nbdstr,
|
||||||
|
@@ -3022,6 +3090,11 @@ virStorageSourceParseBackingColon(virSto
|
||||||
|
return -1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||||
|
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
|
||||||
|
+ return -1;
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||||
|
@@ -3507,6 +3580,54 @@ virStorageSourceParseBackingJSONRBD(virS
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
+virStorageSourceParseBackingJSONVitastor(virStorageSourcePtr src,
|
||||||
|
+ virJSONValuePtr json,
|
||||||
|
+ const char *jsonstr G_GNUC_UNUSED,
|
||||||
|
+ int opaque G_GNUC_UNUSED)
|
||||||
|
+{
|
||||||
|
+ const char *filename;
|
||||||
|
+ const char *image = virJSONValueObjectGetString(json, "image");
|
||||||
|
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
|
||||||
|
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
|
||||||
|
+ virJSONValuePtr servers = virJSONValueObjectGetArray(json, "server");
|
||||||
|
+ size_t nservers;
|
||||||
|
+ size_t i;
|
||||||
|
+
|
||||||
|
+ src->type = VIR_STORAGE_TYPE_NETWORK;
|
||||||
|
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
|
||||||
|
+
|
||||||
|
+ /* legacy syntax passed via 'filename' option */
|
||||||
|
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
|
||||||
|
+ return virStorageSourceParseVitastorColonString(filename, src);
|
||||||
|
+
|
||||||
|
+ if (!image) {
|
||||||
|
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
|
||||||
|
+ _("missing image name in Vitastor backing volume "
|
||||||
|
+ "JSON specification"));
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ src->path = g_strdup(image);
|
||||||
|
+ src->configFile = g_strdup(conf);
|
||||||
|
+ src->query = g_strdup(etcd_prefix);
|
||||||
|
+
|
||||||
|
+ if (servers) {
|
||||||
|
+ nservers = virJSONValueArraySize(servers);
|
||||||
|
+
|
||||||
|
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
|
||||||
|
+ src->nhosts = nservers;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < nservers; i++) {
|
||||||
|
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
|
||||||
|
+ virJSONValueArrayGet(servers, i)) < 0)
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int
|
||||||
|
virStorageSourceParseBackingJSONRaw(virStorageSourcePtr src,
|
||||||
|
virJSONValuePtr json,
|
||||||
|
int opaque G_GNUC_UNUSED)
|
||||||
|
@@ -3578,6 +3699,7 @@ static const struct virStorageSourceJSON
|
||||||
|
{"sheepdog", virStorageSourceParseBackingJSONSheepdog, 0},
|
||||||
|
{"ssh", virStorageSourceParseBackingJSONSSH, 0},
|
||||||
|
{"rbd", virStorageSourceParseBackingJSONRBD, 0},
|
||||||
|
+ {"vitastor", virStorageSourceParseBackingJSONVitastor, 0},
|
||||||
|
{"raw", virStorageSourceParseBackingJSONRaw, 0},
|
||||||
|
{"vxhs", virStorageSourceParseBackingJSONVxHS, 0},
|
||||||
|
};
|
||||||
|
@@ -4364,6 +4486,7 @@ virStorageSourceNetworkDefaultPort(virSt
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||||
|
return 24007;
|
||||||
|
|
||||||
|
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||||
|
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||||
|
/* we don't provide a default for RBD */
|
||||||
|
return 0;
|
||||||
|
diff --git a/src/util/virstoragefile.h b/src/util/virstoragefile.h
|
||||||
|
index 1d6161a..8d83bf3 100644
|
||||||
|
--- a/src/util/virstoragefile.h
|
||||||
|
+++ b/src/util/virstoragefile.h
|
||||||
|
@@ -134,6 +134,7 @@ typedef enum {
|
||||||
|
VIR_STORAGE_NET_PROTOCOL_TFTP,
|
||||||
|
VIR_STORAGE_NET_PROTOCOL_SSH,
|
||||||
|
VIR_STORAGE_NET_PROTOCOL_VXHS,
|
||||||
|
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
|
||||||
|
|
||||||
|
VIR_STORAGE_NET_PROTOCOL_LAST
|
||||||
|
} virStorageNetProtocol;
|
||||||
|
@@ -265,6 +266,7 @@ struct _virStorageSource {
|
||||||
|
char *snapshot; /* for storage systems supporting internal snapshots */
|
||||||
|
char *configFile; /* some storage systems use config file as part of
|
||||||
|
the source definition */
|
||||||
|
+ char *query; /* query string for HTTP based protocols */
|
||||||
|
size_t nhosts;
|
||||||
|
virStorageNetHostDefPtr hosts;
|
||||||
|
virStorageSourcePoolDefPtr srcpool;
|
||||||
|
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
|
||||||
|
index 70ca39b..9caef51 100644
|
||||||
|
--- a/tools/virsh-pool.c
|
||||||
|
+++ b/tools/virsh-pool.c
|
||||||
|
@@ -1219,6 +1219,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd ATTRIBUTE_UNUSED)
|
||||||
|
case VIR_STORAGE_POOL_VSTORAGE:
|
||||||
|
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
|
||||||
|
break;
|
||||||
|
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||||
|
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
|
||||||
|
+ break;
|
||||||
|
case VIR_STORAGE_POOL_LAST:
|
||||||
|
break;
|
||||||
|
}
|
@@ -25,4 +25,4 @@ rm fio
|
|||||||
mv fio-copy fio
|
mv fio-copy fio
|
||||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||||
tar --transform 's#^#vitastor-0.6.17/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.6.17$(rpm --eval '%dist').tar.gz *
|
tar --transform 's#^#vitastor-0.7.1/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.7.1$(rpm --eval '%dist').tar.gz *
|
||||||
|
@@ -9,7 +9,8 @@ WORKDIR /root
|
|||||||
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
||||||
RUN yum -y --enablerepo=extras install centos-release-scl epel-release yum-utils rpm-build
|
RUN yum -y --enablerepo=extras install centos-release-scl epel-release yum-utils rpm-build
|
||||||
RUN yum -y install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm
|
RUN yum -y install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm
|
||||||
RUN yum -y install devtoolset-9-gcc-c++ devtoolset-9-libatomic-devel gcc make cmake gperftools-devel fio rh-nodejs12 jerasure-devel gf-complete-devel rdma-core-devel
|
RUN yum -y install devtoolset-9-gcc-c++ devtoolset-9-libatomic-devel gcc make cmake gperftools-devel \
|
||||||
|
fio rh-nodejs12 jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel
|
||||||
RUN yumdownloader --disablerepo=centos-sclo-rh --source fio
|
RUN yumdownloader --disablerepo=centos-sclo-rh --source fio
|
||||||
RUN rpm --nomd5 -i fio*.src.rpm
|
RUN rpm --nomd5 -i fio*.src.rpm
|
||||||
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
||||||
@@ -34,7 +35,7 @@ ADD . /root/vitastor
|
|||||||
RUN set -e; \
|
RUN set -e; \
|
||||||
cd /root/vitastor/rpm; \
|
cd /root/vitastor/rpm; \
|
||||||
sh build-tarball.sh; \
|
sh build-tarball.sh; \
|
||||||
cp /root/vitastor-0.6.17.el7.tar.gz ~/rpmbuild/SOURCES; \
|
cp /root/vitastor-0.7.1.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||||
cd ~/rpmbuild/SPECS/; \
|
cd ~/rpmbuild/SPECS/; \
|
||||||
rpmbuild -ba vitastor.spec; \
|
rpmbuild -ba vitastor.spec; \
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
Name: vitastor
|
Name: vitastor
|
||||||
Version: 0.6.17
|
Version: 0.7.1
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Vitastor, a fast software-defined clustered block storage
|
Summary: Vitastor, a fast software-defined clustered block storage
|
||||||
|
|
||||||
License: Vitastor Network Public License 1.1
|
License: Vitastor Network Public License 1.1
|
||||||
URL: https://vitastor.io/
|
URL: https://vitastor.io/
|
||||||
Source0: vitastor-0.6.17.el7.tar.gz
|
Source0: vitastor-0.7.1.el7.tar.gz
|
||||||
|
|
||||||
BuildRequires: liburing-devel >= 0.6
|
BuildRequires: liburing-devel >= 0.6
|
||||||
BuildRequires: gperftools-devel
|
BuildRequires: gperftools-devel
|
||||||
@@ -13,6 +13,7 @@ BuildRequires: devtoolset-9-gcc-c++
|
|||||||
BuildRequires: rh-nodejs12
|
BuildRequires: rh-nodejs12
|
||||||
BuildRequires: rh-nodejs12-npm
|
BuildRequires: rh-nodejs12-npm
|
||||||
BuildRequires: jerasure-devel
|
BuildRequires: jerasure-devel
|
||||||
|
BuildRequires: libisa-l-devel
|
||||||
BuildRequires: gf-complete-devel
|
BuildRequires: gf-complete-devel
|
||||||
BuildRequires: libibverbs-devel
|
BuildRequires: libibverbs-devel
|
||||||
BuildRequires: cmake
|
BuildRequires: cmake
|
||||||
@@ -32,6 +33,7 @@ size with configurable redundancy (replication or erasure codes/XOR).
|
|||||||
%package -n vitastor-osd
|
%package -n vitastor-osd
|
||||||
Summary: Vitastor - OSD
|
Summary: Vitastor - OSD
|
||||||
Requires: libJerasure2
|
Requires: libJerasure2
|
||||||
|
Requires: libisa-l
|
||||||
Requires: liburing >= 0.6
|
Requires: liburing >= 0.6
|
||||||
Requires: vitastor-client = %{version}-%{release}
|
Requires: vitastor-client = %{version}-%{release}
|
||||||
|
|
||||||
|
@@ -6,10 +6,12 @@ FROM centos:8
|
|||||||
WORKDIR /root
|
WORKDIR /root
|
||||||
|
|
||||||
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
||||||
|
RUN sed -i 's/^mirrorlist=/#mirrorlist=/; s!#baseurl=http://mirror.centos.org/!baseurl=http://vault.centos.org/!' /etc/yum.repos.d/*.repo
|
||||||
RUN dnf -y install centos-release-advanced-virtualization epel-release dnf-plugins-core
|
RUN dnf -y install centos-release-advanced-virtualization epel-release dnf-plugins-core
|
||||||
|
RUN sed -i 's/^mirrorlist=/#mirrorlist=/; s!#baseurl=.*!baseurl=http://vault.centos.org/centos/8.4.2105/virt/$basearch/$avdir/!; s!^baseurl=.*Source/.*!baseurl=http://vault.centos.org/centos/8.4.2105/virt/Source/advanced-virtualization/!' /etc/yum.repos.d/CentOS-Advanced-Virtualization.repo
|
||||||
RUN yum -y install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm
|
RUN yum -y install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm
|
||||||
RUN dnf -y install gcc-toolset-9 gcc-toolset-9-gcc-c++ gperftools-devel \
|
RUN dnf -y install gcc-toolset-9 gcc-toolset-9-gcc-c++ gperftools-devel \
|
||||||
fio nodejs rpm-build jerasure-devel gf-complete-devel libibverbs-devel libarchive cmake
|
fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel libibverbs-devel libarchive cmake
|
||||||
RUN dnf download --source fio
|
RUN dnf download --source fio
|
||||||
RUN rpm --nomd5 -i fio*.src.rpm
|
RUN rpm --nomd5 -i fio*.src.rpm
|
||||||
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=powertools --spec fio.spec
|
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=powertools --spec fio.spec
|
||||||
@@ -33,7 +35,7 @@ ADD . /root/vitastor
|
|||||||
RUN set -e; \
|
RUN set -e; \
|
||||||
cd /root/vitastor/rpm; \
|
cd /root/vitastor/rpm; \
|
||||||
sh build-tarball.sh; \
|
sh build-tarball.sh; \
|
||||||
cp /root/vitastor-0.6.17.el8.tar.gz ~/rpmbuild/SOURCES; \
|
cp /root/vitastor-0.7.1.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||||
cd ~/rpmbuild/SPECS/; \
|
cd ~/rpmbuild/SPECS/; \
|
||||||
rpmbuild -ba vitastor.spec; \
|
rpmbuild -ba vitastor.spec; \
|
||||||
|
@@ -1,17 +1,18 @@
|
|||||||
Name: vitastor
|
Name: vitastor
|
||||||
Version: 0.6.17
|
Version: 0.7.1
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Vitastor, a fast software-defined clustered block storage
|
Summary: Vitastor, a fast software-defined clustered block storage
|
||||||
|
|
||||||
License: Vitastor Network Public License 1.1
|
License: Vitastor Network Public License 1.1
|
||||||
URL: https://vitastor.io/
|
URL: https://vitastor.io/
|
||||||
Source0: vitastor-0.6.17.el8.tar.gz
|
Source0: vitastor-0.7.1.el8.tar.gz
|
||||||
|
|
||||||
BuildRequires: liburing-devel >= 0.6
|
BuildRequires: liburing-devel >= 0.6
|
||||||
BuildRequires: gperftools-devel
|
BuildRequires: gperftools-devel
|
||||||
BuildRequires: gcc-toolset-9-gcc-c++
|
BuildRequires: gcc-toolset-9-gcc-c++
|
||||||
BuildRequires: nodejs >= 10
|
BuildRequires: nodejs >= 10
|
||||||
BuildRequires: jerasure-devel
|
BuildRequires: jerasure-devel
|
||||||
|
BuildRequires: libisa-l-devel
|
||||||
BuildRequires: gf-complete-devel
|
BuildRequires: gf-complete-devel
|
||||||
BuildRequires: libibverbs-devel
|
BuildRequires: libibverbs-devel
|
||||||
BuildRequires: cmake
|
BuildRequires: cmake
|
||||||
@@ -31,6 +32,7 @@ size with configurable redundancy (replication or erasure codes/XOR).
|
|||||||
%package -n vitastor-osd
|
%package -n vitastor-osd
|
||||||
Summary: Vitastor - OSD
|
Summary: Vitastor - OSD
|
||||||
Requires: libJerasure2
|
Requires: libJerasure2
|
||||||
|
Requires: libisa-l
|
||||||
Requires: liburing >= 0.6
|
Requires: liburing >= 0.6
|
||||||
Requires: vitastor-client = %{version}-%{release}
|
Requires: vitastor-client = %{version}-%{release}
|
||||||
|
|
||||||
|
@@ -15,7 +15,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
|||||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_definitions(-DVERSION="0.6.17")
|
add_definitions(-DVERSION="0.7.1")
|
||||||
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
||||||
if (${WITH_ASAN})
|
if (${WITH_ASAN})
|
||||||
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
||||||
@@ -50,6 +50,10 @@ pkg_check_modules(IBVERBS libibverbs)
|
|||||||
if (IBVERBS_LIBRARIES)
|
if (IBVERBS_LIBRARIES)
|
||||||
add_definitions(-DWITH_RDMA)
|
add_definitions(-DWITH_RDMA)
|
||||||
endif (IBVERBS_LIBRARIES)
|
endif (IBVERBS_LIBRARIES)
|
||||||
|
pkg_check_modules(ISAL libisal)
|
||||||
|
if (ISAL_LIBRARIES)
|
||||||
|
add_definitions(-DWITH_ISAL)
|
||||||
|
endif (ISAL_LIBRARIES)
|
||||||
|
|
||||||
include_directories(
|
include_directories(
|
||||||
../
|
../
|
||||||
@@ -104,6 +108,7 @@ target_link_libraries(vitastor-osd
|
|||||||
vitastor_common
|
vitastor_common
|
||||||
vitastor_blk
|
vitastor_blk
|
||||||
Jerasure
|
Jerasure
|
||||||
|
${ISAL_LIBRARIES}
|
||||||
${IBVERBS_LIBRARIES}
|
${IBVERBS_LIBRARIES}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -225,7 +230,7 @@ target_link_libraries(osd_test tcmalloc_minimal)
|
|||||||
|
|
||||||
# osd_rmw_test
|
# osd_rmw_test
|
||||||
add_executable(osd_rmw_test osd_rmw_test.cpp allocator.cpp)
|
add_executable(osd_rmw_test osd_rmw_test.cpp allocator.cpp)
|
||||||
target_link_libraries(osd_rmw_test Jerasure tcmalloc_minimal)
|
target_link_libraries(osd_rmw_test Jerasure ${ISAL_LIBRARIES} tcmalloc_minimal)
|
||||||
|
|
||||||
# stub_uring_osd
|
# stub_uring_osd
|
||||||
add_executable(stub_uring_osd
|
add_executable(stub_uring_osd
|
||||||
|
@@ -912,7 +912,11 @@ void blockstore_init_journal::erase_dirty_object(blockstore_dirty_db_t::iterator
|
|||||||
? clean_it->second.location : UINT64_MAX;
|
? clean_it->second.location : UINT64_MAX;
|
||||||
if (exists && clean_loc == UINT64_MAX)
|
if (exists && clean_loc == UINT64_MAX)
|
||||||
{
|
{
|
||||||
bs->inode_space_stats[oid.inode] -= bs->block_size;
|
auto & sp = bs->inode_space_stats[oid.inode];
|
||||||
|
if (sp > bs->block_size)
|
||||||
|
sp -= bs->block_size;
|
||||||
|
else
|
||||||
|
bs->inode_space_stats.erase(oid.inode);
|
||||||
}
|
}
|
||||||
bs->erase_dirty(dirty_it, dirty_end, clean_loc);
|
bs->erase_dirty(dirty_it, dirty_end, clean_loc);
|
||||||
// Remove it from the flusher's queue, too
|
// Remove it from the flusher's queue, too
|
||||||
|
@@ -200,7 +200,11 @@ void blockstore_impl_t::mark_stable(const obj_ver_id & v, bool forget_dirty)
|
|||||||
}
|
}
|
||||||
else if (IS_DELETE(dirty_it->second.state))
|
else if (IS_DELETE(dirty_it->second.state))
|
||||||
{
|
{
|
||||||
inode_space_stats[dirty_it->first.oid.inode] -= block_size;
|
auto & sp = inode_space_stats[dirty_it->first.oid.inode];
|
||||||
|
if (sp > block_size)
|
||||||
|
sp -= block_size;
|
||||||
|
else
|
||||||
|
inode_space_stats.erase(dirty_it->first.oid.inode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (forget_dirty && (IS_BIG_WRITE(dirty_it->second.state) ||
|
if (forget_dirty && (IS_BIG_WRITE(dirty_it->second.state) ||
|
||||||
|
@@ -39,6 +39,7 @@ public:
|
|||||||
ring_loop_t *ringloop = NULL;
|
ring_loop_t *ringloop = NULL;
|
||||||
epoll_manager_t *epmgr = NULL;
|
epoll_manager_t *epmgr = NULL;
|
||||||
cluster_client_t *cli = NULL;
|
cluster_client_t *cli = NULL;
|
||||||
|
bool no_recovery = false, no_rebalance = false, readonly = false;
|
||||||
|
|
||||||
int waiting = 0;
|
int waiting = 0;
|
||||||
cli_result_t etcd_err;
|
cli_result_t etcd_err;
|
||||||
|
@@ -127,7 +127,7 @@ resume_1:
|
|||||||
pool_stats[pool_cfg.id] = json11::Json::object {
|
pool_stats[pool_cfg.id] = json11::Json::object {
|
||||||
{ "name", pool_cfg.name },
|
{ "name", pool_cfg.name },
|
||||||
{ "pg_count", pool_cfg.pg_count },
|
{ "pg_count", pool_cfg.pg_count },
|
||||||
{ "scheme", pool_cfg.scheme == POOL_SCHEME_REPLICATED ? "replicated" : "jerasure" },
|
{ "scheme", pool_cfg.scheme == POOL_SCHEME_REPLICATED ? "replicated" : "ec" },
|
||||||
{ "scheme_name", pool_cfg.scheme == POOL_SCHEME_REPLICATED
|
{ "scheme_name", pool_cfg.scheme == POOL_SCHEME_REPLICATED
|
||||||
? std::to_string(pool_cfg.pg_size)+"/"+std::to_string(pool_cfg.pg_minsize)
|
? std::to_string(pool_cfg.pg_size)+"/"+std::to_string(pool_cfg.pg_minsize)
|
||||||
: "EC "+std::to_string(pool_cfg.pg_size-pool_cfg.parity_chunks)+"+"+std::to_string(pool_cfg.parity_chunks) },
|
: "EC "+std::to_string(pool_cfg.pg_size-pool_cfg.parity_chunks)+"+"+std::to_string(pool_cfg.parity_chunks) },
|
||||||
|
@@ -64,8 +64,9 @@ struct rm_inode_t
|
|||||||
}
|
}
|
||||||
rm->obj_pos = rm->objects.begin();
|
rm->obj_pos = rm->objects.begin();
|
||||||
lists.push_back(rm);
|
lists.push_back(rm);
|
||||||
if (parent->list_first)
|
if (parent->list_first && !(status & INODE_LIST_DONE))
|
||||||
{
|
{
|
||||||
|
// The listing object is dead when DONE => don't call next()
|
||||||
parent->cli->list_inode_next(lister, 1);
|
parent->cli->list_inode_next(lister, 1);
|
||||||
}
|
}
|
||||||
if (status & INODE_LIST_DONE)
|
if (status & INODE_LIST_DONE)
|
||||||
|
@@ -5,6 +5,7 @@
|
|||||||
#include "cluster_client.h"
|
#include "cluster_client.h"
|
||||||
#include "base64.h"
|
#include "base64.h"
|
||||||
#include "pg_states.h"
|
#include "pg_states.h"
|
||||||
|
#include "http_client.h"
|
||||||
|
|
||||||
// Print cluster status:
|
// Print cluster status:
|
||||||
// etcd, mon, osd states
|
// etcd, mon, osd states
|
||||||
@@ -207,6 +208,9 @@ resume_2:
|
|||||||
obj_n = agg_stats["object_counts"]["incomplete"].uint64_value();
|
obj_n = agg_stats["object_counts"]["incomplete"].uint64_value();
|
||||||
if (obj_n > 0)
|
if (obj_n > 0)
|
||||||
more_states += ", "+format_size(obj_n*object_size)+" incomplete";
|
more_states += ", "+format_size(obj_n*object_size)+" incomplete";
|
||||||
|
bool readonly = json_is_true(parent->cli->merged_config["readonly"]);
|
||||||
|
bool no_recovery = json_is_true(parent->cli->merged_config["no_recovery"]);
|
||||||
|
bool no_rebalance = json_is_true(parent->cli->merged_config["no_rebalance"]);
|
||||||
std::string recovery_io;
|
std::string recovery_io;
|
||||||
{
|
{
|
||||||
uint64_t deg_bps = agg_stats["recovery_stats"]["degraded"]["bps"].uint64_value();
|
uint64_t deg_bps = agg_stats["recovery_stats"]["degraded"]["bps"].uint64_value();
|
||||||
@@ -214,9 +218,19 @@ resume_2:
|
|||||||
uint64_t misp_bps = agg_stats["recovery_stats"]["misplaced"]["bps"].uint64_value();
|
uint64_t misp_bps = agg_stats["recovery_stats"]["misplaced"]["bps"].uint64_value();
|
||||||
uint64_t misp_iops = agg_stats["recovery_stats"]["misplaced"]["iops"].uint64_value();
|
uint64_t misp_iops = agg_stats["recovery_stats"]["misplaced"]["iops"].uint64_value();
|
||||||
if (deg_iops > 0 || deg_bps > 0)
|
if (deg_iops > 0 || deg_bps > 0)
|
||||||
recovery_io += " recovery: "+format_size(deg_bps)+"/s, "+format_size(deg_iops, true)+" op/s\n";
|
{
|
||||||
|
recovery_io += " recovery: "+std::string(no_recovery ? "disabled, " : "")+
|
||||||
|
format_size(deg_bps)+"/s, "+format_size(deg_iops, true)+" op/s\n";
|
||||||
|
}
|
||||||
|
else if (no_recovery)
|
||||||
|
recovery_io += " recovery: disabled\n";
|
||||||
if (misp_iops > 0 || misp_bps > 0)
|
if (misp_iops > 0 || misp_bps > 0)
|
||||||
recovery_io += " rebalance: "+format_size(misp_bps)+"/s, "+format_size(misp_iops, true)+" op/s\n";
|
{
|
||||||
|
recovery_io += " rebalance: "+std::string(no_rebalance ? "disabled, " : "")+
|
||||||
|
format_size(misp_bps)+"/s, "+format_size(misp_iops, true)+" op/s\n";
|
||||||
|
}
|
||||||
|
else if (no_rebalance)
|
||||||
|
recovery_io += " rebalance: disabled\n";
|
||||||
}
|
}
|
||||||
if (parent->json_output)
|
if (parent->json_output)
|
||||||
{
|
{
|
||||||
@@ -233,6 +247,9 @@ resume_2:
|
|||||||
{ "free_raw", free_raw },
|
{ "free_raw", free_raw },
|
||||||
{ "down_raw", down_raw },
|
{ "down_raw", down_raw },
|
||||||
{ "free_down_raw", free_down_raw },
|
{ "free_down_raw", free_down_raw },
|
||||||
|
{ "readonly", readonly },
|
||||||
|
{ "no_recovery", no_recovery },
|
||||||
|
{ "no_rebalance", no_rebalance },
|
||||||
{ "clean_data", agg_stats["object_counts"]["clean"].uint64_value() * object_size },
|
{ "clean_data", agg_stats["object_counts"]["clean"].uint64_value() * object_size },
|
||||||
{ "misplaced_data", agg_stats["object_counts"]["misplaced"].uint64_value() * object_size },
|
{ "misplaced_data", agg_stats["object_counts"]["misplaced"].uint64_value() * object_size },
|
||||||
{ "degraded_data", agg_stats["object_counts"]["degraded"].uint64_value() * object_size },
|
{ "degraded_data", agg_stats["object_counts"]["degraded"].uint64_value() * object_size },
|
||||||
@@ -259,7 +276,7 @@ resume_2:
|
|||||||
" pools: %d / %d active\n"
|
" pools: %d / %d active\n"
|
||||||
" pgs: %s\n"
|
" pgs: %s\n"
|
||||||
" \n"
|
" \n"
|
||||||
" io:\n"
|
" io%s:\n"
|
||||||
" client:%s %s/s rd, %s op/s rd, %s/s wr, %s op/s wr\n"
|
" client:%s %s/s rd, %s op/s rd, %s/s wr, %s op/s wr\n"
|
||||||
"%s",
|
"%s",
|
||||||
etcd_alive, etcd_states.size(), format_size(etcd_db_size).c_str(),
|
etcd_alive, etcd_states.size(), format_size(etcd_db_size).c_str(),
|
||||||
@@ -272,6 +289,7 @@ resume_2:
|
|||||||
format_size(agg_stats["object_counts"]["clean"].uint64_value() * object_size).c_str(), more_states.c_str(),
|
format_size(agg_stats["object_counts"]["clean"].uint64_value() * object_size).c_str(), more_states.c_str(),
|
||||||
pools_active, pool_count,
|
pools_active, pool_count,
|
||||||
pgs_by_state_str.c_str(),
|
pgs_by_state_str.c_str(),
|
||||||
|
readonly ? " (read-only mode)" : "",
|
||||||
recovery_io.size() > 0 ? " " : "",
|
recovery_io.size() > 0 ? " " : "",
|
||||||
format_size(agg_stats["op_stats"]["primary_read"]["bps"].uint64_value()).c_str(),
|
format_size(agg_stats["op_stats"]["primary_read"]["bps"].uint64_value()).c_str(),
|
||||||
format_size(agg_stats["op_stats"]["primary_read"]["iops"].uint64_value(), true).c_str(),
|
format_size(agg_stats["op_stats"]["primary_read"]["iops"].uint64_value(), true).c_str(),
|
||||||
|
@@ -279,6 +279,11 @@ static uint32_t is_power_of_two(uint64_t value)
|
|||||||
|
|
||||||
void cluster_client_t::on_load_config_hook(json11::Json::object & config)
|
void cluster_client_t::on_load_config_hook(json11::Json::object & config)
|
||||||
{
|
{
|
||||||
|
this->merged_config = config;
|
||||||
|
for (auto & kv: this->config.object_items())
|
||||||
|
{
|
||||||
|
this->merged_config[kv.first] = kv.second;
|
||||||
|
}
|
||||||
bs_block_size = config["block_size"].uint64_value();
|
bs_block_size = config["block_size"].uint64_value();
|
||||||
bs_bitmap_granularity = config["bitmap_granularity"].uint64_value();
|
bs_bitmap_granularity = config["bitmap_granularity"].uint64_value();
|
||||||
if (!bs_block_size)
|
if (!bs_block_size)
|
||||||
|
@@ -111,6 +111,7 @@ public:
|
|||||||
etcd_state_client_t st_cli;
|
etcd_state_client_t st_cli;
|
||||||
osd_messenger_t msgr;
|
osd_messenger_t msgr;
|
||||||
json11::Json config;
|
json11::Json config;
|
||||||
|
json11::Json::object merged_config;
|
||||||
|
|
||||||
cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config);
|
cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config);
|
||||||
~cluster_client_t();
|
~cluster_client_t();
|
||||||
|
@@ -153,16 +153,6 @@ void cluster_client_t::continue_listing(inode_list_t *lst)
|
|||||||
{
|
{
|
||||||
if (lst->done_pgs >= lst->pgs.size())
|
if (lst->done_pgs >= lst->pgs.size())
|
||||||
{
|
{
|
||||||
// All done
|
|
||||||
for (int i = 0; i < lists.size(); i++)
|
|
||||||
{
|
|
||||||
if (lists[i] == lst)
|
|
||||||
{
|
|
||||||
lists.erase(lists.begin()+i, lists.begin()+i+1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
delete lst;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (lst->want <= 0)
|
if (lst->want <= 0)
|
||||||
@@ -178,7 +168,7 @@ void cluster_client_t::continue_listing(inode_list_t *lst)
|
|||||||
send_list(&lst->pgs[i]->list_osds[j]);
|
send_list(&lst->pgs[i]->list_osds[j]);
|
||||||
if (lst->want <= 0)
|
if (lst->want <= 0)
|
||||||
{
|
{
|
||||||
break;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -268,6 +258,24 @@ void cluster_client_t::send_list(inode_list_osd_t *cur_list)
|
|||||||
lst->callback(lst, std::move(pg->objects), pg->pg_num, pg->cur_primary, status);
|
lst->callback(lst, std::move(pg->objects), pg->pg_num, pg->cur_primary, status);
|
||||||
lst->pgs[pg->pos] = NULL;
|
lst->pgs[pg->pos] = NULL;
|
||||||
delete pg;
|
delete pg;
|
||||||
|
if (lst->done_pgs >= lst->pgs.size())
|
||||||
|
{
|
||||||
|
// All done
|
||||||
|
for (int i = 0; i < lists.size(); i++)
|
||||||
|
{
|
||||||
|
if (lists[i] == lst)
|
||||||
|
{
|
||||||
|
lists.erase(lists.begin()+i, lists.begin()+i+1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete lst;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lst->want++;
|
||||||
}
|
}
|
||||||
continue_listing(lst);
|
continue_listing(lst);
|
||||||
};
|
};
|
||||||
|
@@ -673,18 +673,18 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
|||||||
pc.scheme = POOL_SCHEME_REPLICATED;
|
pc.scheme = POOL_SCHEME_REPLICATED;
|
||||||
else if (pool_item.second["scheme"] == "xor")
|
else if (pool_item.second["scheme"] == "xor")
|
||||||
pc.scheme = POOL_SCHEME_XOR;
|
pc.scheme = POOL_SCHEME_XOR;
|
||||||
else if (pool_item.second["scheme"] == "jerasure")
|
else if (pool_item.second["scheme"] == "ec" || pool_item.second["scheme"] == "jerasure")
|
||||||
pc.scheme = POOL_SCHEME_JERASURE;
|
pc.scheme = POOL_SCHEME_EC;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Pool %u has invalid coding scheme (one of \"xor\", \"replicated\" or \"jerasure\" required), skipping pool\n", pool_id);
|
fprintf(stderr, "Pool %u has invalid coding scheme (one of \"xor\", \"replicated\", \"ec\" or \"jerasure\" required), skipping pool\n", pool_id);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// PG Size
|
// PG Size
|
||||||
pc.pg_size = pool_item.second["pg_size"].uint64_value();
|
pc.pg_size = pool_item.second["pg_size"].uint64_value();
|
||||||
if (pc.pg_size < 1 ||
|
if (pc.pg_size < 1 ||
|
||||||
pool_item.second["pg_size"].uint64_value() < 3 &&
|
pool_item.second["pg_size"].uint64_value() < 3 &&
|
||||||
(pc.scheme == POOL_SCHEME_XOR || pc.scheme == POOL_SCHEME_JERASURE) ||
|
(pc.scheme == POOL_SCHEME_XOR || pc.scheme == POOL_SCHEME_EC) ||
|
||||||
pool_item.second["pg_size"].uint64_value() > 256)
|
pool_item.second["pg_size"].uint64_value() > 256)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Pool %u has invalid pg_size, skipping pool\n", pool_id);
|
fprintf(stderr, "Pool %u has invalid pg_size, skipping pool\n", pool_id);
|
||||||
@@ -701,7 +701,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
|||||||
}
|
}
|
||||||
pc.parity_chunks = 1;
|
pc.parity_chunks = 1;
|
||||||
}
|
}
|
||||||
if (pc.scheme == POOL_SCHEME_JERASURE &&
|
if (pc.scheme == POOL_SCHEME_EC &&
|
||||||
(pc.parity_chunks < 1 || pc.parity_chunks > pc.pg_size-2))
|
(pc.parity_chunks < 1 || pc.parity_chunks > pc.pg_size-2))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Pool %u has invalid parity_chunks (must be between 1 and pg_size-2), skipping pool\n", pool_id);
|
fprintf(stderr, "Pool %u has invalid parity_chunks (must be between 1 and pg_size-2), skipping pool\n", pool_id);
|
||||||
@@ -710,7 +710,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
|||||||
// PG MinSize
|
// PG MinSize
|
||||||
pc.pg_minsize = pool_item.second["pg_minsize"].uint64_value();
|
pc.pg_minsize = pool_item.second["pg_minsize"].uint64_value();
|
||||||
if (pc.pg_minsize < 1 || pc.pg_minsize > pc.pg_size ||
|
if (pc.pg_minsize < 1 || pc.pg_minsize > pc.pg_size ||
|
||||||
(pc.scheme == POOL_SCHEME_XOR || pc.scheme == POOL_SCHEME_JERASURE) &&
|
(pc.scheme == POOL_SCHEME_XOR || pc.scheme == POOL_SCHEME_EC) &&
|
||||||
pc.pg_minsize < (pc.pg_size-pc.parity_chunks))
|
pc.pg_minsize < (pc.pg_size-pc.parity_chunks))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Pool %u has invalid pg_minsize, skipping pool\n", pool_id);
|
fprintf(stderr, "Pool %u has invalid pg_minsize, skipping pool\n", pool_id);
|
||||||
|
@@ -36,6 +36,7 @@ struct sec_data
|
|||||||
/* The list of completed io_u structs. */
|
/* The list of completed io_u structs. */
|
||||||
std::vector<io_u*> completed;
|
std::vector<io_u*> completed;
|
||||||
uint64_t inflight = 0;
|
uint64_t inflight = 0;
|
||||||
|
int mirror_fd = -1;
|
||||||
bool trace = false;
|
bool trace = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -46,6 +47,7 @@ struct sec_options
|
|||||||
char *etcd_host = NULL;
|
char *etcd_host = NULL;
|
||||||
char *etcd_prefix = NULL;
|
char *etcd_prefix = NULL;
|
||||||
char *image = NULL;
|
char *image = NULL;
|
||||||
|
char *mirror_file = NULL;
|
||||||
uint64_t pool = 0;
|
uint64_t pool = 0;
|
||||||
uint64_t inode = 0;
|
uint64_t inode = 0;
|
||||||
int cluster_log = 0;
|
int cluster_log = 0;
|
||||||
@@ -132,6 +134,15 @@ static struct fio_option options[] = {
|
|||||||
.category = FIO_OPT_C_ENGINE,
|
.category = FIO_OPT_C_ENGINE,
|
||||||
.group = FIO_OPT_G_FILENAME,
|
.group = FIO_OPT_G_FILENAME,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.name = "mirror_file",
|
||||||
|
.lname = "File name to mirror writes to",
|
||||||
|
.type = FIO_OPT_STR_STORE,
|
||||||
|
.off1 = offsetof(struct sec_options, mirror_file),
|
||||||
|
.help = "File name to mirror writes to (for debug purpose)",
|
||||||
|
.category = FIO_OPT_C_ENGINE,
|
||||||
|
.group = FIO_OPT_G_FILENAME,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.name = "use_rdma",
|
.name = "use_rdma",
|
||||||
.lname = "Use RDMA",
|
.lname = "Use RDMA",
|
||||||
@@ -212,6 +223,16 @@ static int sec_setup(struct thread_data *td)
|
|||||||
td->o.open_files++;
|
td->o.open_files++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (o->mirror_file)
|
||||||
|
{
|
||||||
|
bsd->mirror_fd = open(o->mirror_file, O_CREAT|O_RDWR, 0666);
|
||||||
|
if (bsd->mirror_fd < 0)
|
||||||
|
{
|
||||||
|
td_verror(td, errno, "open mirror file");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!o->image)
|
if (!o->image)
|
||||||
{
|
{
|
||||||
if (!(o->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)))
|
if (!(o->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)))
|
||||||
@@ -265,6 +286,10 @@ static void sec_cleanup(struct thread_data *td)
|
|||||||
sec_data *bsd = (sec_data*)td->io_ops_data;
|
sec_data *bsd = (sec_data*)td->io_ops_data;
|
||||||
if (bsd)
|
if (bsd)
|
||||||
{
|
{
|
||||||
|
if (bsd->mirror_fd >= 0)
|
||||||
|
{
|
||||||
|
close(bsd->mirror_fd);
|
||||||
|
}
|
||||||
if (bsd->watch)
|
if (bsd->watch)
|
||||||
{
|
{
|
||||||
vitastor_c_close_watch(bsd->cli, bsd->watch);
|
vitastor_c_close_watch(bsd->cli, bsd->watch);
|
||||||
@@ -325,6 +350,24 @@ static enum fio_q_status sec_queue(struct thread_data *td, struct io_u *io)
|
|||||||
bsd->last_sync = false;
|
bsd->last_sync = false;
|
||||||
break;
|
break;
|
||||||
case DDIR_WRITE:
|
case DDIR_WRITE:
|
||||||
|
if (opt->mirror_file)
|
||||||
|
{
|
||||||
|
size_t done = 0;
|
||||||
|
while (done < io->xfer_buflen)
|
||||||
|
{
|
||||||
|
ssize_t r = pwrite(bsd->mirror_fd, io->xfer_buf+done, io->xfer_buflen-done, io->offset+done);
|
||||||
|
if (r < 0 && errno != EAGAIN)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Error writing mirror file: %s\n", strerror(errno));
|
||||||
|
io->error = errno;
|
||||||
|
return FIO_Q_COMPLETED;
|
||||||
|
}
|
||||||
|
if (r > 0)
|
||||||
|
{
|
||||||
|
done += r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if (opt->image && vitastor_c_inode_get_readonly(bsd->watch))
|
if (opt->image && vitastor_c_inode_get_readonly(bsd->watch))
|
||||||
{
|
{
|
||||||
io->error = EROFS;
|
io->error = EROFS;
|
||||||
|
@@ -758,3 +758,21 @@ static std::string trim(const std::string & in)
|
|||||||
int end = in.find_last_not_of(" \n\r\t");
|
int end = in.find_last_not_of(" \n\r\t");
|
||||||
return in.substr(begin, end+1-begin);
|
return in.substr(begin, end+1-begin);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool json_is_true(const json11::Json & val)
|
||||||
|
{
|
||||||
|
if (val.is_string())
|
||||||
|
return val == "true" || val == "yes" || val == "1";
|
||||||
|
return val.bool_value();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool json_is_false(const json11::Json & val)
|
||||||
|
{
|
||||||
|
if (val.is_string())
|
||||||
|
return val.string_value() == "false" || val.string_value() == "no" || val.string_value() == "0";
|
||||||
|
if (val.is_number())
|
||||||
|
return val.number_value() == 0;
|
||||||
|
if (val.is_bool())
|
||||||
|
return !val.bool_value();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
@@ -52,3 +52,6 @@ void http_close(http_co_t *co);
|
|||||||
// Utils
|
// Utils
|
||||||
uint64_t stoull_full(const std::string & str, int base = 10);
|
uint64_t stoull_full(const std::string & str, int base = 10);
|
||||||
std::string strtolower(const std::string & in);
|
std::string strtolower(const std::string & in);
|
||||||
|
// FIXME: move to json11
|
||||||
|
bool json_is_true(const json11::Json & val);
|
||||||
|
bool json_is_false(const json11::Json & val);
|
||||||
|
@@ -92,10 +92,7 @@ void nfs_proxy_t::run(json11::Json cfg)
|
|||||||
if (bind_address == "")
|
if (bind_address == "")
|
||||||
bind_address = "0.0.0.0";
|
bind_address = "0.0.0.0";
|
||||||
default_pool = cfg["pool"].as_string();
|
default_pool = cfg["pool"].as_string();
|
||||||
portmap_enabled = cfg.object_items().find("portmap") == cfg.object_items().end() ||
|
portmap_enabled = !json_is_false(cfg["portmap"]);
|
||||||
cfg["portmap"].uint64_value() ||
|
|
||||||
cfg["portmap"].string_value() == "yes" ||
|
|
||||||
cfg["portmap"].string_value() == "true";
|
|
||||||
nfs_port = cfg["port"].uint64_value() & 0xffff;
|
nfs_port = cfg["port"].uint64_value() & 0xffff;
|
||||||
if (!nfs_port)
|
if (!nfs_port)
|
||||||
nfs_port = 2049;
|
nfs_port = 2049;
|
||||||
|
15
src/osd.cpp
15
src/osd.cpp
@@ -54,7 +54,7 @@ osd_t::osd_t(const json11::Json & config, ring_loop_t *ringloop)
|
|||||||
autosync_writes = max_autosync;
|
autosync_writes = max_autosync;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->config["osd_memlock"] == "true" || this->config["osd_memlock"] == "1" || this->config["osd_memlock"] == "yes")
|
if (json_is_true(this->config["osd_memlock"]))
|
||||||
{
|
{
|
||||||
// Lock all OSD memory if requested
|
// Lock all OSD memory if requested
|
||||||
if (mlockall(MCL_CURRENT|MCL_FUTURE
|
if (mlockall(MCL_CURRENT|MCL_FUTURE
|
||||||
@@ -127,11 +127,11 @@ void osd_t::parse_config(const json11::Json & config)
|
|||||||
etcd_report_interval = config["etcd_report_interval"].uint64_value();
|
etcd_report_interval = config["etcd_report_interval"].uint64_value();
|
||||||
if (etcd_report_interval <= 0)
|
if (etcd_report_interval <= 0)
|
||||||
etcd_report_interval = 5;
|
etcd_report_interval = 5;
|
||||||
readonly = config["readonly"] == "true" || config["readonly"] == "1" || config["readonly"] == "yes";
|
readonly = json_is_true(config["readonly"]);
|
||||||
run_primary = config["run_primary"] != "false" && config["run_primary"] != "0" && config["run_primary"] != "no";
|
run_primary = !json_is_false(config["run_primary"]);
|
||||||
no_rebalance = config["no_rebalance"] == "true" || config["no_rebalance"] == "1" || config["no_rebalance"] == "yes";
|
no_rebalance = json_is_true(config["no_rebalance"]);
|
||||||
no_recovery = config["no_recovery"] == "true" || config["no_recovery"] == "1" || config["no_recovery"] == "yes";
|
no_recovery = json_is_true(config["no_recovery"]);
|
||||||
allow_test_ops = config["allow_test_ops"] == "true" || config["allow_test_ops"] == "1" || config["allow_test_ops"] == "yes";
|
allow_test_ops = json_is_true(config["allow_test_ops"]);
|
||||||
if (config["immediate_commit"] == "all")
|
if (config["immediate_commit"] == "all")
|
||||||
immediate_commit = IMMEDIATE_ALL;
|
immediate_commit = IMMEDIATE_ALL;
|
||||||
else if (config["immediate_commit"] == "small")
|
else if (config["immediate_commit"] == "small")
|
||||||
@@ -168,6 +168,9 @@ void osd_t::parse_config(const json11::Json & config)
|
|||||||
slow_log_interval = config["slow_log_interval"].uint64_value();
|
slow_log_interval = config["slow_log_interval"].uint64_value();
|
||||||
if (!slow_log_interval)
|
if (!slow_log_interval)
|
||||||
slow_log_interval = 10;
|
slow_log_interval = 10;
|
||||||
|
inode_vanish_time = config["inode_vanish_time"].uint64_value();
|
||||||
|
if (!inode_vanish_time)
|
||||||
|
inode_vanish_time = 60;
|
||||||
}
|
}
|
||||||
|
|
||||||
void osd_t::bind_socket()
|
void osd_t::bind_socket()
|
||||||
|
@@ -113,6 +113,7 @@ class osd_t
|
|||||||
int autosync_writes = DEFAULT_AUTOSYNC_WRITES;
|
int autosync_writes = DEFAULT_AUTOSYNC_WRITES;
|
||||||
int recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
|
int recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
|
||||||
int recovery_sync_batch = DEFAULT_RECOVERY_BATCH;
|
int recovery_sync_batch = DEFAULT_RECOVERY_BATCH;
|
||||||
|
int inode_vanish_time = 60;
|
||||||
int log_level = 0;
|
int log_level = 0;
|
||||||
|
|
||||||
// cluster state
|
// cluster state
|
||||||
@@ -165,6 +166,7 @@ class osd_t
|
|||||||
// op statistics
|
// op statistics
|
||||||
osd_op_stats_t prev_stats;
|
osd_op_stats_t prev_stats;
|
||||||
std::map<uint64_t, inode_stats_t> inode_stats;
|
std::map<uint64_t, inode_stats_t> inode_stats;
|
||||||
|
std::map<uint64_t, timespec> vanishing_inodes;
|
||||||
const char* recovery_stat_names[2] = { "degraded", "misplaced" };
|
const char* recovery_stat_names[2] = { "degraded", "misplaced" };
|
||||||
uint64_t recovery_stat_count[2][2] = {};
|
uint64_t recovery_stat_count[2][2] = {};
|
||||||
uint64_t recovery_stat_bytes[2][2] = {};
|
uint64_t recovery_stat_bytes[2][2] = {};
|
||||||
|
@@ -186,7 +186,8 @@ void osd_t::report_statistics()
|
|||||||
json11::Json::object inode_space;
|
json11::Json::object inode_space;
|
||||||
json11::Json::object last_stat;
|
json11::Json::object last_stat;
|
||||||
pool_id_t last_pool = 0;
|
pool_id_t last_pool = 0;
|
||||||
for (auto kv: bs->get_inode_space_stats())
|
auto & bs_inode_space = bs->get_inode_space_stats();
|
||||||
|
for (auto kv: bs_inode_space)
|
||||||
{
|
{
|
||||||
pool_id_t pool_id = INODE_POOL(kv.first);
|
pool_id_t pool_id = INODE_POOL(kv.first);
|
||||||
uint64_t only_inode_num = INODE_NO_POOL(kv.first);
|
uint64_t only_inode_num = INODE_NO_POOL(kv.first);
|
||||||
@@ -204,8 +205,26 @@ void osd_t::report_statistics()
|
|||||||
last_stat = json11::Json::object();
|
last_stat = json11::Json::object();
|
||||||
last_pool = 0;
|
last_pool = 0;
|
||||||
json11::Json::object inode_ops;
|
json11::Json::object inode_ops;
|
||||||
for (auto kv: inode_stats)
|
timespec tv_now;
|
||||||
|
for (auto st_it = inode_stats.begin(); st_it != inode_stats.end(); )
|
||||||
{
|
{
|
||||||
|
auto & kv = *st_it;
|
||||||
|
if (!bs_inode_space[kv.first])
|
||||||
|
{
|
||||||
|
// Is it an empty inode?
|
||||||
|
if (!tv_now.tv_sec)
|
||||||
|
clock_gettime(CLOCK_REALTIME, &tv_now);
|
||||||
|
auto & tv_van = vanishing_inodes[kv.first];
|
||||||
|
if (!tv_van.tv_sec)
|
||||||
|
tv_van = tv_now;
|
||||||
|
else if (tv_van.tv_sec < tv_now.tv_sec-inode_vanish_time)
|
||||||
|
{
|
||||||
|
// Inode vanished <inode_vanish_time> seconds ago, remove it from stats
|
||||||
|
vanishing_inodes.erase(kv.first);
|
||||||
|
inode_stats.erase(st_it++);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
pool_id_t pool_id = INODE_POOL(kv.first);
|
pool_id_t pool_id = INODE_POOL(kv.first);
|
||||||
uint64_t only_inode_num = (kv.first & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1));
|
uint64_t only_inode_num = (kv.first & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1));
|
||||||
if (!last_pool || pool_id != last_pool)
|
if (!last_pool || pool_id != last_pool)
|
||||||
@@ -232,6 +251,7 @@ void osd_t::report_statistics()
|
|||||||
{ "bytes", kv.second.op_bytes[INODE_STATS_DELETE] },
|
{ "bytes", kv.second.op_bytes[INODE_STATS_DELETE] },
|
||||||
} },
|
} },
|
||||||
};
|
};
|
||||||
|
st_it++;
|
||||||
}
|
}
|
||||||
if (last_pool)
|
if (last_pool)
|
||||||
inode_ops[std::to_string(last_pool)] = last_stat;
|
inode_ops[std::to_string(last_pool)] = last_stat;
|
||||||
@@ -370,7 +390,11 @@ void osd_t::acquire_lease()
|
|||||||
etcd_lease_id = data["ID"].string_value();
|
etcd_lease_id = data["ID"].string_value();
|
||||||
create_osd_state();
|
create_osd_state();
|
||||||
});
|
});
|
||||||
printf("[OSD %lu] reporting to etcd at %s every %d seconds\n", this->osd_num, config["etcd_address"].string_value().c_str(), etcd_report_interval);
|
printf(
|
||||||
|
"[OSD %lu] reporting to etcd at %s every %d seconds\n", this->osd_num,
|
||||||
|
(config["etcd_address"].is_string() ? config["etcd_address"].string_value() : config["etcd_address"].dump()).c_str(),
|
||||||
|
etcd_report_interval
|
||||||
|
);
|
||||||
tfd->set_timer(etcd_report_interval*1000, true, [this](int timer_id)
|
tfd->set_timer(etcd_report_interval*1000, true, [this](int timer_id)
|
||||||
{
|
{
|
||||||
renew_lease();
|
renew_lease();
|
||||||
@@ -676,9 +700,9 @@ void osd_t::apply_pg_config()
|
|||||||
.all_peers = std::vector<osd_num_t>(all_peers.begin(), all_peers.end()),
|
.all_peers = std::vector<osd_num_t>(all_peers.begin(), all_peers.end()),
|
||||||
.target_set = pg_cfg.target_set,
|
.target_set = pg_cfg.target_set,
|
||||||
};
|
};
|
||||||
if (pg.scheme == POOL_SCHEME_JERASURE)
|
if (pg.scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
use_jerasure(pg.pg_size, pg.pg_data_size, true);
|
use_ec(pg.pg_size, pg.pg_data_size, true);
|
||||||
}
|
}
|
||||||
this->pg_state_dirty.insert({ .pool_id = pool_id, .pg_num = pg_num });
|
this->pg_state_dirty.insert({ .pool_id = pool_id, .pg_num = pg_num });
|
||||||
pg.print_state();
|
pg.print_state();
|
||||||
@@ -890,9 +914,9 @@ void osd_t::report_pg_states()
|
|||||||
{
|
{
|
||||||
// Forget offline PGs after reporting their state
|
// Forget offline PGs after reporting their state
|
||||||
// (if the state wasn't changed again)
|
// (if the state wasn't changed again)
|
||||||
if (pg_it->second.scheme == POOL_SCHEME_JERASURE)
|
if (pg_it->second.scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
use_jerasure(pg_it->second.pg_size, pg_it->second.pg_data_size, false);
|
use_ec(pg_it->second.pg_size, pg_it->second.pg_data_size, false);
|
||||||
}
|
}
|
||||||
this->pgs.erase(pg_it);
|
this->pgs.erase(pg_it);
|
||||||
}
|
}
|
||||||
|
@@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
#define POOL_SCHEME_REPLICATED 1
|
#define POOL_SCHEME_REPLICATED 1
|
||||||
#define POOL_SCHEME_XOR 2
|
#define POOL_SCHEME_XOR 2
|
||||||
#define POOL_SCHEME_JERASURE 3
|
#define POOL_SCHEME_EC 3
|
||||||
#define POOL_ID_MAX 0x10000
|
#define POOL_ID_MAX 0x10000
|
||||||
#define POOL_ID_BITS 16
|
#define POOL_ID_BITS 16
|
||||||
#define INODE_POOL(inode) (pool_id_t)((inode) >> (64 - POOL_ID_BITS))
|
#define INODE_POOL(inode) (pool_id_t)((inode) >> (64 - POOL_ID_BITS))
|
||||||
|
@@ -317,7 +317,7 @@ void osd_t::submit_sync_and_list_subop(osd_num_t role_osd, pg_peering_state_t *p
|
|||||||
// Self
|
// Self
|
||||||
osd_op_t *op = new osd_op_t();
|
osd_op_t *op = new osd_op_t();
|
||||||
op->op_type = 0;
|
op->op_type = 0;
|
||||||
op->peer_fd = 0;
|
op->peer_fd = -1;
|
||||||
clock_gettime(CLOCK_REALTIME, &op->tv_begin);
|
clock_gettime(CLOCK_REALTIME, &op->tv_begin);
|
||||||
op->bs_op = new blockstore_op_t();
|
op->bs_op = new blockstore_op_t();
|
||||||
op->bs_op->opcode = BS_OP_SYNC;
|
op->bs_op->opcode = BS_OP_SYNC;
|
||||||
@@ -383,7 +383,7 @@ void osd_t::submit_list_subop(osd_num_t role_osd, pg_peering_state_t *ps)
|
|||||||
// Self
|
// Self
|
||||||
osd_op_t *op = new osd_op_t();
|
osd_op_t *op = new osd_op_t();
|
||||||
op->op_type = 0;
|
op->op_type = 0;
|
||||||
op->peer_fd = 0;
|
op->peer_fd = -1;
|
||||||
clock_gettime(CLOCK_REALTIME, &op->tv_begin);
|
clock_gettime(CLOCK_REALTIME, &op->tv_begin);
|
||||||
op->bs_op = new blockstore_op_t();
|
op->bs_op = new blockstore_op_t();
|
||||||
op->bs_op->opcode = BS_OP_LIST;
|
op->bs_op->opcode = BS_OP_LIST;
|
||||||
|
@@ -241,9 +241,9 @@ resume_2:
|
|||||||
{
|
{
|
||||||
reconstruct_stripes_xor(stripes, op_data->pg_size, clean_entry_bitmap_size);
|
reconstruct_stripes_xor(stripes, op_data->pg_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (op_data->scheme == POOL_SCHEME_JERASURE)
|
else if (op_data->scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size);
|
reconstruct_stripes_ec(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||||
for (int role = 0; role < op_data->pg_size; role++)
|
for (int role = 0; role < op_data->pg_size; role++)
|
||||||
|
@@ -110,9 +110,9 @@ resume_1:
|
|||||||
{
|
{
|
||||||
reconstruct_stripes_xor(local_stripes, pg.pg_size, clean_entry_bitmap_size);
|
reconstruct_stripes_xor(local_stripes, pg.pg_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (pg.scheme == POOL_SCHEME_JERASURE)
|
else if (pg.scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
reconstruct_stripes_jerasure(local_stripes, pg.pg_size, pg.pg_data_size, clean_entry_bitmap_size);
|
reconstruct_stripes_ec(local_stripes, pg.pg_size, pg.pg_data_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -295,6 +295,7 @@ int osd_t::submit_bitmap_subops(osd_op_t *cur_op, pg_t & pg)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Fail it immediately
|
// Fail it immediately
|
||||||
|
subop->peer_fd = -1;
|
||||||
subop->reply.hdr.retval = -EPIPE;
|
subop->reply.hdr.retval = -EPIPE;
|
||||||
subop->callback(subop);
|
subop->callback(subop);
|
||||||
}
|
}
|
||||||
@@ -505,9 +506,9 @@ void osd_t::send_chained_read_results(pg_t & pg, osd_op_t *cur_op)
|
|||||||
{
|
{
|
||||||
reconstruct_stripes_xor(stripes, pg.pg_size, clean_entry_bitmap_size);
|
reconstruct_stripes_xor(stripes, pg.pg_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (op_data->scheme == POOL_SCHEME_JERASURE)
|
else if (op_data->scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
reconstruct_stripes_jerasure(stripes, pg.pg_size, pg.pg_data_size, clean_entry_bitmap_size);
|
reconstruct_stripes_ec(stripes, pg.pg_size, pg.pg_data_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -233,6 +233,7 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Fail it immediately
|
// Fail it immediately
|
||||||
|
subop->peer_fd = -1;
|
||||||
subop->reply.hdr.retval = -EPIPE;
|
subop->reply.hdr.retval = -EPIPE;
|
||||||
subop->callback(subop);
|
subop->callback(subop);
|
||||||
}
|
}
|
||||||
@@ -321,7 +322,21 @@ void osd_t::handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op)
|
|||||||
osd_primary_op_data_t *op_data = cur_op->op_data;
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
if (retval != expected)
|
if (retval != expected)
|
||||||
{
|
{
|
||||||
printf("%s subop failed: retval = %d (expected %d)\n", osd_op_names[opcode], retval, expected);
|
if (opcode == OSD_OP_SEC_READ || opcode == OSD_OP_SEC_WRITE || opcode == OSD_OP_SEC_WRITE_STABLE)
|
||||||
|
{
|
||||||
|
printf(
|
||||||
|
"%s subop to %lx:%lx v%lu failed on peer %d: retval = %d (expected %d)\n",
|
||||||
|
osd_op_names[opcode], subop->req.sec_rw.oid.inode, subop->req.sec_rw.oid.stripe, subop->req.sec_rw.version,
|
||||||
|
subop->peer_fd, retval, expected
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf(
|
||||||
|
"%s subop failed on peer %d: retval = %d (expected %d)\n",
|
||||||
|
osd_op_names[opcode], subop->peer_fd, retval, expected
|
||||||
|
);
|
||||||
|
}
|
||||||
if (retval == -EPIPE)
|
if (retval == -EPIPE)
|
||||||
{
|
{
|
||||||
op_data->epipe++;
|
op_data->epipe++;
|
||||||
@@ -495,6 +510,7 @@ void osd_t::submit_primary_del_batch(osd_op_t *cur_op, obj_ver_osd_t *chunks_to_
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Fail it immediately
|
// Fail it immediately
|
||||||
|
subops[i].peer_fd = -1;
|
||||||
subops[i].reply.hdr.retval = -EPIPE;
|
subops[i].reply.hdr.retval = -EPIPE;
|
||||||
subops[i].callback(&subops[i]);
|
subops[i].callback(&subops[i]);
|
||||||
}
|
}
|
||||||
@@ -609,6 +625,7 @@ void osd_t::submit_primary_stab_subops(osd_op_t *cur_op)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Fail it immediately
|
// Fail it immediately
|
||||||
|
subops[i].peer_fd = -1;
|
||||||
subops[i].reply.hdr.retval = -EPIPE;
|
subops[i].reply.hdr.retval = -EPIPE;
|
||||||
subops[i].callback(&subops[i]);
|
subops[i].callback(&subops[i]);
|
||||||
}
|
}
|
||||||
|
@@ -132,9 +132,9 @@ resume_3:
|
|||||||
{
|
{
|
||||||
calc_rmw_parity_xor(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
calc_rmw_parity_xor(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
else if (pg.scheme == POOL_SCHEME_JERASURE)
|
else if (pg.scheme == POOL_SCHEME_EC)
|
||||||
{
|
{
|
||||||
calc_rmw_parity_jerasure(op_data->stripes, pg.pg_size, op_data->pg_data_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
calc_rmw_parity_ec(op_data->stripes, pg.pg_size, op_data->pg_data_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Send writes
|
// Send writes
|
||||||
|
169
src/osd_rmw.cpp
169
src/osd_rmw.cpp
@@ -4,8 +4,11 @@
|
|||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <jerasure/reed_sol.h>
|
#include <reed_sol.h>
|
||||||
#include <jerasure.h>
|
#include <jerasure.h>
|
||||||
|
#ifdef WITH_ISAL
|
||||||
|
#include <isa-l/erasure_code.h>
|
||||||
|
#endif
|
||||||
#include <map>
|
#include <map>
|
||||||
#include "allocator.h"
|
#include "allocator.h"
|
||||||
#include "xor.h"
|
#include "xor.h"
|
||||||
@@ -147,13 +150,14 @@ inline bool operator < (const reed_sol_erased_t &a, const reed_sol_erased_t &b)
|
|||||||
struct reed_sol_matrix_t
|
struct reed_sol_matrix_t
|
||||||
{
|
{
|
||||||
int refs = 0;
|
int refs = 0;
|
||||||
int *data;
|
int *je_data;
|
||||||
std::map<reed_sol_erased_t, int*> decodings;
|
uint8_t *isal_data;
|
||||||
|
std::map<reed_sol_erased_t, void*> decodings;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::map<uint64_t, reed_sol_matrix_t> matrices;
|
static std::map<uint64_t, reed_sol_matrix_t> matrices;
|
||||||
|
|
||||||
void use_jerasure(int pg_size, int pg_minsize, bool use)
|
void use_ec(int pg_size, int pg_minsize, bool use)
|
||||||
{
|
{
|
||||||
uint64_t key = (uint64_t)pg_size | ((uint64_t)pg_minsize) << 32;
|
uint64_t key = (uint64_t)pg_size | ((uint64_t)pg_minsize) << 32;
|
||||||
auto rs_it = matrices.find(key);
|
auto rs_it = matrices.find(key);
|
||||||
@@ -164,19 +168,33 @@ void use_jerasure(int pg_size, int pg_minsize, bool use)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int *matrix = reed_sol_vandermonde_coding_matrix(pg_minsize, pg_size-pg_minsize, OSD_JERASURE_W);
|
int *matrix = reed_sol_vandermonde_coding_matrix(pg_minsize, pg_size-pg_minsize, OSD_JERASURE_W);
|
||||||
|
uint8_t *isal_table = NULL;
|
||||||
|
#ifdef WITH_ISAL
|
||||||
|
uint8_t *isal_matrix = (uint8_t*)malloc_or_die(pg_minsize*(pg_size-pg_minsize));
|
||||||
|
for (int i = 0; i < pg_minsize*(pg_size-pg_minsize); i++)
|
||||||
|
{
|
||||||
|
isal_matrix[i] = matrix[i];
|
||||||
|
}
|
||||||
|
isal_table = (uint8_t*)malloc_or_die(pg_minsize*(pg_size-pg_minsize)*32);
|
||||||
|
ec_init_tables(pg_minsize, pg_size-pg_minsize, isal_matrix, isal_table);
|
||||||
|
free(isal_matrix);
|
||||||
|
#endif
|
||||||
matrices[key] = (reed_sol_matrix_t){
|
matrices[key] = (reed_sol_matrix_t){
|
||||||
.refs = 0,
|
.refs = 0,
|
||||||
.data = matrix,
|
.je_data = matrix,
|
||||||
|
.isal_data = isal_table,
|
||||||
};
|
};
|
||||||
rs_it = matrices.find(key);
|
rs_it = matrices.find(key);
|
||||||
}
|
}
|
||||||
rs_it->second.refs += (!use ? -1 : 1);
|
rs_it->second.refs += (!use ? -1 : 1);
|
||||||
if (rs_it->second.refs <= 0)
|
if (rs_it->second.refs <= 0)
|
||||||
{
|
{
|
||||||
free(rs_it->second.data);
|
free(rs_it->second.je_data);
|
||||||
|
if (rs_it->second.isal_data)
|
||||||
|
free(rs_it->second.isal_data);
|
||||||
for (auto dec_it = rs_it->second.decodings.begin(); dec_it != rs_it->second.decodings.end();)
|
for (auto dec_it = rs_it->second.decodings.begin(); dec_it != rs_it->second.decodings.end();)
|
||||||
{
|
{
|
||||||
int *data = dec_it->second;
|
void *data = dec_it->second;
|
||||||
rs_it->second.decodings.erase(dec_it++);
|
rs_it->second.decodings.erase(dec_it++);
|
||||||
free(data);
|
free(data);
|
||||||
}
|
}
|
||||||
@@ -184,7 +202,7 @@ void use_jerasure(int pg_size, int pg_minsize, bool use)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
reed_sol_matrix_t* get_jerasure_matrix(int pg_size, int pg_minsize)
|
static reed_sol_matrix_t* get_ec_matrix(int pg_size, int pg_minsize)
|
||||||
{
|
{
|
||||||
uint64_t key = (uint64_t)pg_size | ((uint64_t)pg_minsize) << 32;
|
uint64_t key = (uint64_t)pg_size | ((uint64_t)pg_minsize) << 32;
|
||||||
auto rs_it = matrices.find(key);
|
auto rs_it = matrices.find(key);
|
||||||
@@ -199,7 +217,7 @@ reed_sol_matrix_t* get_jerasure_matrix(int pg_size, int pg_minsize)
|
|||||||
// we don't need it. also it makes an extra allocation of int *erased on every call and doesn't cache
|
// we don't need it. also it makes an extra allocation of int *erased on every call and doesn't cache
|
||||||
// the decoding matrix.
|
// the decoding matrix.
|
||||||
// all these flaws are fixed in this function:
|
// all these flaws are fixed in this function:
|
||||||
int* get_jerasure_decoding_matrix(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize)
|
static void* get_jerasure_decoding_matrix(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize)
|
||||||
{
|
{
|
||||||
int edd = 0;
|
int edd = 0;
|
||||||
int erased[pg_size];
|
int erased[pg_size];
|
||||||
@@ -210,16 +228,57 @@ int* get_jerasure_decoding_matrix(osd_rmw_stripe_t *stripes, int pg_size, int pg
|
|||||||
edd++;
|
edd++;
|
||||||
if (edd == 0)
|
if (edd == 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
reed_sol_matrix_t *matrix = get_jerasure_matrix(pg_size, pg_minsize);
|
reed_sol_matrix_t *matrix = get_ec_matrix(pg_size, pg_minsize);
|
||||||
auto dec_it = matrix->decodings.find((reed_sol_erased_t){ .data = erased, .size = pg_size });
|
auto dec_it = matrix->decodings.find((reed_sol_erased_t){ .data = erased, .size = pg_size });
|
||||||
if (dec_it == matrix->decodings.end())
|
if (dec_it == matrix->decodings.end())
|
||||||
{
|
{
|
||||||
|
#ifdef WITH_ISAL
|
||||||
|
int smrow = 0;
|
||||||
|
uint8_t *submatrix = (uint8_t*)malloc_or_die(pg_minsize*pg_minsize*2);
|
||||||
|
for (int i = 0; i < pg_size; i++)
|
||||||
|
{
|
||||||
|
if (!erased[i])
|
||||||
|
{
|
||||||
|
if (i < pg_minsize)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < pg_minsize; j++)
|
||||||
|
submatrix[smrow*pg_minsize + j] = j == i;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int j = 0; j < pg_minsize; j++)
|
||||||
|
submatrix[smrow*pg_minsize + j] = (uint8_t)matrix->je_data[(i-pg_minsize)*pg_minsize + j];
|
||||||
|
}
|
||||||
|
smrow++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (smrow < pg_minsize)
|
||||||
|
{
|
||||||
|
free(submatrix);
|
||||||
|
throw std::runtime_error("failed to make an invertible submatrix");
|
||||||
|
}
|
||||||
|
gf_invert_matrix(submatrix, submatrix + pg_minsize*pg_minsize, pg_minsize);
|
||||||
|
smrow = 0;
|
||||||
|
for (int i = 0; i < pg_minsize; i++)
|
||||||
|
{
|
||||||
|
if (erased[i])
|
||||||
|
{
|
||||||
|
memcpy(submatrix + pg_minsize*smrow, submatrix + (pg_minsize+i)*pg_minsize, pg_minsize);
|
||||||
|
smrow++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint8_t *rectable = (uint8_t*)malloc_or_die(32*smrow*pg_minsize + pg_size*sizeof(int));
|
||||||
|
ec_init_tables(pg_minsize, smrow, submatrix, rectable);
|
||||||
|
free(submatrix);
|
||||||
|
int *erased_copy = (int*)(rectable + 32*smrow*pg_minsize);
|
||||||
|
memcpy(erased_copy, erased, pg_size*sizeof(int));
|
||||||
|
matrix->decodings.emplace((reed_sol_erased_t){ .data = erased_copy, .size = pg_size }, rectable);
|
||||||
|
return rectable;
|
||||||
|
#else
|
||||||
int *dm_ids = (int*)malloc_or_die(sizeof(int)*(pg_minsize + pg_minsize*pg_minsize + pg_size));
|
int *dm_ids = (int*)malloc_or_die(sizeof(int)*(pg_minsize + pg_minsize*pg_minsize + pg_size));
|
||||||
int *decoding_matrix = dm_ids + pg_minsize;
|
int *decoding_matrix = dm_ids + pg_minsize;
|
||||||
if (!dm_ids)
|
|
||||||
throw std::bad_alloc();
|
|
||||||
// we always use row_k_ones=1 and w=8 (OSD_JERASURE_W)
|
// we always use row_k_ones=1 and w=8 (OSD_JERASURE_W)
|
||||||
if (jerasure_make_decoding_matrix(pg_minsize, pg_size-pg_minsize, OSD_JERASURE_W, matrix->data, erased, decoding_matrix, dm_ids) < 0)
|
if (jerasure_make_decoding_matrix(pg_minsize, pg_size-pg_minsize, OSD_JERASURE_W, matrix->je_data, erased, decoding_matrix, dm_ids) < 0)
|
||||||
{
|
{
|
||||||
free(dm_ids);
|
free(dm_ids);
|
||||||
throw std::runtime_error("jerasure_make_decoding_matrix() failed");
|
throw std::runtime_error("jerasure_make_decoding_matrix() failed");
|
||||||
@@ -228,13 +287,64 @@ int* get_jerasure_decoding_matrix(osd_rmw_stripe_t *stripes, int pg_size, int pg
|
|||||||
memcpy(erased_copy, erased, pg_size*sizeof(int));
|
memcpy(erased_copy, erased, pg_size*sizeof(int));
|
||||||
matrix->decodings.emplace((reed_sol_erased_t){ .data = erased_copy, .size = pg_size }, dm_ids);
|
matrix->decodings.emplace((reed_sol_erased_t){ .data = erased_copy, .size = pg_size }, dm_ids);
|
||||||
return dm_ids;
|
return dm_ids;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
return dec_it->second;
|
return dec_it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
void reconstruct_stripes_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size)
|
#ifdef WITH_ISAL
|
||||||
|
void reconstruct_stripes_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size)
|
||||||
{
|
{
|
||||||
int *dm_ids = get_jerasure_decoding_matrix(stripes, pg_size, pg_minsize);
|
uint8_t *dectable = (uint8_t*)get_jerasure_decoding_matrix(stripes, pg_size, pg_minsize);
|
||||||
|
if (!dectable)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint8_t *data_ptrs[pg_size];
|
||||||
|
int wanted_base = 0, wanted = 0;
|
||||||
|
uint64_t read_start = 0, read_end = 0;
|
||||||
|
auto recover_seq = [&]()
|
||||||
|
{
|
||||||
|
int orig = 0;
|
||||||
|
for (int other = 0; other < pg_size; other++)
|
||||||
|
{
|
||||||
|
if (stripes[other].read_end != 0 && !stripes[other].missing)
|
||||||
|
{
|
||||||
|
assert(stripes[other].read_start <= read_start);
|
||||||
|
assert(stripes[other].read_end >= read_end);
|
||||||
|
data_ptrs[orig++] = (uint8_t*)stripes[other].read_buf + (read_start - stripes[other].read_start);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ec_encode_data(
|
||||||
|
read_end-read_start, pg_minsize, wanted, dectable + wanted_base*32*pg_minsize,
|
||||||
|
data_ptrs, data_ptrs + pg_minsize
|
||||||
|
);
|
||||||
|
wanted_base += wanted;
|
||||||
|
wanted = 0;
|
||||||
|
};
|
||||||
|
for (int role = 0; role < pg_minsize; role++)
|
||||||
|
{
|
||||||
|
if (stripes[role].read_end != 0 && stripes[role].missing)
|
||||||
|
{
|
||||||
|
if (read_end && (stripes[role].read_start != read_start ||
|
||||||
|
stripes[role].read_end != read_end))
|
||||||
|
{
|
||||||
|
recover_seq();
|
||||||
|
}
|
||||||
|
read_start = stripes[role].read_start;
|
||||||
|
read_end = stripes[role].read_end;
|
||||||
|
data_ptrs[pg_minsize + (wanted++)] = (uint8_t*)stripes[role].read_buf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (wanted > 0)
|
||||||
|
{
|
||||||
|
recover_seq();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
void reconstruct_stripes_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size)
|
||||||
|
{
|
||||||
|
int *dm_ids = (int*)get_jerasure_decoding_matrix(stripes, pg_size, pg_minsize);
|
||||||
if (!dm_ids)
|
if (!dm_ids)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
@@ -242,7 +352,9 @@ void reconstruct_stripes_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg
|
|||||||
int *decoding_matrix = dm_ids + pg_minsize;
|
int *decoding_matrix = dm_ids + pg_minsize;
|
||||||
char *data_ptrs[pg_size];
|
char *data_ptrs[pg_size];
|
||||||
for (int role = 0; role < pg_size; role++)
|
for (int role = 0; role < pg_size; role++)
|
||||||
|
{
|
||||||
data_ptrs[role] = NULL;
|
data_ptrs[role] = NULL;
|
||||||
|
}
|
||||||
for (int role = 0; role < pg_minsize; role++)
|
for (int role = 0; role < pg_minsize; role++)
|
||||||
{
|
{
|
||||||
if (stripes[role].read_end != 0 && stripes[role].missing)
|
if (stripes[role].read_end != 0 && stripes[role].missing)
|
||||||
@@ -279,6 +391,7 @@ void reconstruct_stripes_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int pg_minsize, int pg_size)
|
int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int pg_minsize, int pg_size)
|
||||||
{
|
{
|
||||||
@@ -679,12 +792,12 @@ void calc_rmw_parity_xor(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_
|
|||||||
calc_rmw_parity_copy_parity(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, start, end);
|
calc_rmw_parity_copy_parity(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calc_rmw_parity_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
void calc_rmw_parity_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
||||||
uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size, uint32_t bitmap_size)
|
uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size, uint32_t bitmap_size)
|
||||||
{
|
{
|
||||||
uint32_t bitmap_granularity = bitmap_size > 0 ? chunk_size / bitmap_size / 8 : 0;
|
uint32_t bitmap_granularity = bitmap_size > 0 ? chunk_size / bitmap_size / 8 : 0;
|
||||||
reed_sol_matrix_t *matrix = get_jerasure_matrix(pg_size, pg_minsize);
|
reed_sol_matrix_t *matrix = get_ec_matrix(pg_size, pg_minsize);
|
||||||
reconstruct_stripes_jerasure(stripes, pg_size, pg_minsize, bitmap_size);
|
reconstruct_stripes_ec(stripes, pg_size, pg_minsize, bitmap_size);
|
||||||
uint32_t start = 0, end = 0;
|
uint32_t start = 0, end = 0;
|
||||||
calc_rmw_parity_copy_mod(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, bitmap_granularity, start, end);
|
calc_rmw_parity_copy_mod(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, bitmap_granularity, start, end);
|
||||||
if (end != 0)
|
if (end != 0)
|
||||||
@@ -741,20 +854,34 @@ void calc_rmw_parity_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg_min
|
|||||||
curbuf[i]++;
|
curbuf[i]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef WITH_ISAL
|
||||||
|
ec_encode_data(
|
||||||
|
next_end-pos, pg_minsize, pg_size-pg_minsize, matrix->isal_data,
|
||||||
|
(uint8_t**)data_ptrs, (uint8_t**)data_ptrs+pg_minsize
|
||||||
|
);
|
||||||
|
#else
|
||||||
jerasure_matrix_encode(
|
jerasure_matrix_encode(
|
||||||
pg_minsize, pg_size-pg_minsize, OSD_JERASURE_W, matrix->data,
|
pg_minsize, pg_size-pg_minsize, OSD_JERASURE_W, matrix->je_data,
|
||||||
(char**)data_ptrs, (char**)data_ptrs+pg_minsize, next_end-pos
|
(char**)data_ptrs, (char**)data_ptrs+pg_minsize, next_end-pos
|
||||||
);
|
);
|
||||||
|
#endif
|
||||||
pos = next_end;
|
pos = next_end;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < pg_size; i++)
|
for (int i = 0; i < pg_size; i++)
|
||||||
{
|
{
|
||||||
data_ptrs[i] = stripes[i].bmp_buf;
|
data_ptrs[i] = stripes[i].bmp_buf;
|
||||||
}
|
}
|
||||||
|
#ifdef WITH_ISAL
|
||||||
|
ec_encode_data(
|
||||||
|
bitmap_size, pg_minsize, pg_size-pg_minsize, matrix->isal_data,
|
||||||
|
(uint8_t**)data_ptrs, (uint8_t**)data_ptrs+pg_minsize
|
||||||
|
);
|
||||||
|
#else
|
||||||
jerasure_matrix_encode(
|
jerasure_matrix_encode(
|
||||||
pg_minsize, pg_size-pg_minsize, OSD_JERASURE_W, matrix->data,
|
pg_minsize, pg_size-pg_minsize, OSD_JERASURE_W, matrix->je_data,
|
||||||
(char**)data_ptrs, (char**)data_ptrs+pg_minsize, bitmap_size
|
(char**)data_ptrs, (char**)data_ptrs+pg_minsize, bitmap_size
|
||||||
);
|
);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
calc_rmw_parity_copy_parity(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, start, end);
|
calc_rmw_parity_copy_parity(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, start, end);
|
||||||
|
@@ -44,9 +44,9 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
|||||||
void calc_rmw_parity_xor(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set,
|
void calc_rmw_parity_xor(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set,
|
||||||
uint32_t chunk_size, uint32_t bitmap_size);
|
uint32_t chunk_size, uint32_t bitmap_size);
|
||||||
|
|
||||||
void use_jerasure(int pg_size, int pg_minsize, bool use);
|
void use_ec(int pg_size, int pg_minsize, bool use);
|
||||||
|
|
||||||
void reconstruct_stripes_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size);
|
void reconstruct_stripes_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize, uint32_t bitmap_size);
|
||||||
|
|
||||||
void calc_rmw_parity_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
void calc_rmw_parity_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
||||||
uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size, uint32_t bitmap_size);
|
uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size, uint32_t bitmap_size);
|
||||||
|
@@ -587,14 +587,14 @@ void test12()
|
|||||||
input buffer: [ write0, write1 ],
|
input buffer: [ write0, write1 ],
|
||||||
rmw buffer: [ write2, write3, read0, read1 ],
|
rmw buffer: [ write2, write3, read0, read1 ],
|
||||||
}
|
}
|
||||||
then, after calc_rmw_parity_jerasure(): all the same
|
then, after calc_rmw_parity_ec(): all the same
|
||||||
then simulate read with read_osd_set=[0,0,3,4] and check read0,read1 buffers
|
then simulate read with read_osd_set=[0,0,3,4] and check read0,read1 buffers
|
||||||
|
|
||||||
***/
|
***/
|
||||||
|
|
||||||
void test13()
|
void test13()
|
||||||
{
|
{
|
||||||
use_jerasure(4, 2, true);
|
use_ec(4, 2, true);
|
||||||
osd_num_t osd_set[4] = { 1, 2, 0, 0 };
|
osd_num_t osd_set[4] = { 1, 2, 0, 0 };
|
||||||
osd_num_t write_osd_set[4] = { 1, 2, 3, 4 };
|
osd_num_t write_osd_set[4] = { 1, 2, 3, 4 };
|
||||||
osd_rmw_stripe_t stripes[4] = {};
|
osd_rmw_stripe_t stripes[4] = {};
|
||||||
@@ -628,7 +628,7 @@ void test13()
|
|||||||
set_pattern(write_buf, 8192, PATTERN3);
|
set_pattern(write_buf, 8192, PATTERN3);
|
||||||
set_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
set_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
||||||
set_pattern(stripes[1].read_buf, 128*1024-4096, PATTERN2);
|
set_pattern(stripes[1].read_buf, 128*1024-4096, PATTERN2);
|
||||||
calc_rmw_parity_jerasure(stripes, 4, 2, osd_set, write_osd_set, 128*1024, 0);
|
calc_rmw_parity_ec(stripes, 4, 2, osd_set, write_osd_set, 128*1024, 0);
|
||||||
assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024);
|
assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024);
|
||||||
assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096);
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096);
|
||||||
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
@@ -663,7 +663,7 @@ void test13()
|
|||||||
assert(stripes[3].read_buf == (uint8_t*)read_buf+3*128*1024);
|
assert(stripes[3].read_buf == (uint8_t*)read_buf+3*128*1024);
|
||||||
memcpy((uint8_t*)read_buf+2*128*1024, rmw_buf, 128*1024);
|
memcpy((uint8_t*)read_buf+2*128*1024, rmw_buf, 128*1024);
|
||||||
memcpy((uint8_t*)read_buf+3*128*1024, (uint8_t*)rmw_buf+128*1024, 128*1024);
|
memcpy((uint8_t*)read_buf+3*128*1024, (uint8_t*)rmw_buf+128*1024, 128*1024);
|
||||||
reconstruct_stripes_jerasure(stripes, 4, 2, 0);
|
reconstruct_stripes_ec(stripes, 4, 2, 0);
|
||||||
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
||||||
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
||||||
check_pattern(stripes[1].read_buf, 4096, PATTERN3);
|
check_pattern(stripes[1].read_buf, 4096, PATTERN3);
|
||||||
@@ -694,14 +694,14 @@ void test13()
|
|||||||
assert(stripes[3].read_buf == (uint8_t*)read_buf+2*128*1024);
|
assert(stripes[3].read_buf == (uint8_t*)read_buf+2*128*1024);
|
||||||
memcpy((uint8_t*)read_buf+128*1024, rmw_buf, 128*1024);
|
memcpy((uint8_t*)read_buf+128*1024, rmw_buf, 128*1024);
|
||||||
memcpy((uint8_t*)read_buf+2*128*1024, (uint8_t*)rmw_buf+128*1024, 128*1024);
|
memcpy((uint8_t*)read_buf+2*128*1024, (uint8_t*)rmw_buf+128*1024, 128*1024);
|
||||||
reconstruct_stripes_jerasure(stripes, 4, 2, 0);
|
reconstruct_stripes_ec(stripes, 4, 2, 0);
|
||||||
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
||||||
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
||||||
free(read_buf);
|
free(read_buf);
|
||||||
// Huh done
|
// Huh done
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
free(write_buf);
|
free(write_buf);
|
||||||
use_jerasure(4, 2, false);
|
use_ec(4, 2, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/***
|
/***
|
||||||
@@ -714,7 +714,7 @@ void test13()
|
|||||||
input buffer: [ write0, write1 ],
|
input buffer: [ write0, write1 ],
|
||||||
rmw buffer: [ write2, read0, read1 ],
|
rmw buffer: [ write2, read0, read1 ],
|
||||||
}
|
}
|
||||||
then, after calc_rmw_parity_jerasure(): all the same
|
then, after calc_rmw_parity_ec(): all the same
|
||||||
then simulate read with read_osd_set=[0,2,3] and check read0 buffer
|
then simulate read with read_osd_set=[0,2,3] and check read0 buffer
|
||||||
|
|
||||||
***/
|
***/
|
||||||
@@ -722,7 +722,7 @@ void test13()
|
|||||||
void test14()
|
void test14()
|
||||||
{
|
{
|
||||||
const int bmp = 4;
|
const int bmp = 4;
|
||||||
use_jerasure(3, 2, true);
|
use_ec(3, 2, true);
|
||||||
osd_num_t osd_set[3] = { 1, 2, 0 };
|
osd_num_t osd_set[3] = { 1, 2, 0 };
|
||||||
osd_num_t write_osd_set[3] = { 1, 2, 3 };
|
osd_num_t write_osd_set[3] = { 1, 2, 3 };
|
||||||
osd_rmw_stripe_t stripes[3] = {};
|
osd_rmw_stripe_t stripes[3] = {};
|
||||||
@@ -757,7 +757,7 @@ void test14()
|
|||||||
memset(stripes[0].bmp_buf, 0, bmp);
|
memset(stripes[0].bmp_buf, 0, bmp);
|
||||||
memset(stripes[1].bmp_buf, 0, bmp);
|
memset(stripes[1].bmp_buf, 0, bmp);
|
||||||
memset(stripes[2].bmp_buf, 0, bmp);
|
memset(stripes[2].bmp_buf, 0, bmp);
|
||||||
calc_rmw_parity_jerasure(stripes, 3, 2, osd_set, write_osd_set, 128*1024, bmp);
|
calc_rmw_parity_ec(stripes, 3, 2, osd_set, write_osd_set, 128*1024, bmp);
|
||||||
assert(*(uint32_t*)stripes[0].bmp_buf == 0x80000000);
|
assert(*(uint32_t*)stripes[0].bmp_buf == 0x80000000);
|
||||||
assert(*(uint32_t*)stripes[1].bmp_buf == 0x00000001);
|
assert(*(uint32_t*)stripes[1].bmp_buf == 0x00000001);
|
||||||
assert(*(uint32_t*)stripes[2].bmp_buf == 0x80000001); // jerasure 2+1 is still just XOR
|
assert(*(uint32_t*)stripes[2].bmp_buf == 0x80000001); // jerasure 2+1 is still just XOR
|
||||||
@@ -793,12 +793,12 @@ void test14()
|
|||||||
set_pattern(stripes[1].read_buf, 4096, PATTERN3);
|
set_pattern(stripes[1].read_buf, 4096, PATTERN3);
|
||||||
set_pattern(stripes[1].read_buf+4096, 128*1024-4096, PATTERN2);
|
set_pattern(stripes[1].read_buf+4096, 128*1024-4096, PATTERN2);
|
||||||
memcpy(stripes[2].read_buf, rmw_buf, 128*1024);
|
memcpy(stripes[2].read_buf, rmw_buf, 128*1024);
|
||||||
reconstruct_stripes_jerasure(stripes, 3, 2, bmp);
|
reconstruct_stripes_ec(stripes, 3, 2, bmp);
|
||||||
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
check_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
||||||
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
check_pattern(stripes[0].read_buf+128*1024-4096, 4096, PATTERN3);
|
||||||
free(read_buf);
|
free(read_buf);
|
||||||
// Huh done
|
// Huh done
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
free(write_buf);
|
free(write_buf);
|
||||||
use_jerasure(3, 2, false);
|
use_ec(3, 2, false);
|
||||||
}
|
}
|
||||||
|
@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
|
|||||||
|
|
||||||
Name: Vitastor
|
Name: Vitastor
|
||||||
Description: Vitastor client library
|
Description: Vitastor client library
|
||||||
Version: 0.6.17
|
Version: 0.7.1
|
||||||
Libs: -L${libdir} -lvitastor_client
|
Libs: -L${libdir} -lvitastor_client
|
||||||
Cflags: -I${includedir}
|
Cflags: -I${includedir}
|
||||||
|
|
||||||
|
@@ -1,47 +1,103 @@
|
|||||||
#!/bin/bash -ex
|
#!/bin/bash
|
||||||
|
|
||||||
. `dirname $0`/common.sh
|
. `dirname $0`/common.sh
|
||||||
|
|
||||||
OSD_SIZE=${OSD_SIZE:-1024}
|
OSD_SIZE=${OSD_SIZE:-1024}
|
||||||
PG_COUNT=${PG_COUNT:-1}
|
PG_COUNT=${PG_COUNT:-1}
|
||||||
PG_SIZE=${PG_SIZE:-3}
|
# OSD_COUNT
|
||||||
PG_MINSIZE=${PG_MINSIZE:-2}
|
SCHEME=${SCHEME:-replicated}
|
||||||
OSD_COUNT=${OSD_COUNT:-3}
|
# OSD_ARGS
|
||||||
SCHEME=${SCHEME:-ec}
|
# PG_SIZE
|
||||||
|
# PG_MINSIZE
|
||||||
|
|
||||||
|
if [ "$SCHEME" = "ec" ]; then
|
||||||
|
OSD_COUNT=${OSD_COUNT:-5}
|
||||||
|
else
|
||||||
|
OSD_COUNT=${OSD_COUNT:-3}
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$IMMEDIATE_COMMIT" != "" ]; then
|
||||||
|
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 1"
|
||||||
|
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":1,"immediate_commit":"all"}'
|
||||||
|
else
|
||||||
|
NO_SAME="--journal_sector_buffer_count 1024 --log_level 1"
|
||||||
|
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":1}'
|
||||||
|
fi
|
||||||
|
|
||||||
|
start_osd()
|
||||||
|
{
|
||||||
|
local i=$1
|
||||||
|
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
|
||||||
|
eval OSD${i}_PID=$!
|
||||||
|
}
|
||||||
|
|
||||||
for i in $(seq 1 $OSD_COUNT); do
|
for i in $(seq 1 $OSD_COUNT); do
|
||||||
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
||||||
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
|
start_osd $i
|
||||||
eval OSD${i}_PID=$!
|
|
||||||
done
|
done
|
||||||
|
|
||||||
cd mon
|
cd mon
|
||||||
npm install
|
npm install
|
||||||
cd ..
|
cd ..
|
||||||
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log &
|
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 &>./testdata/mon.log &
|
||||||
MON_PID=$!
|
MON_PID=$!
|
||||||
|
|
||||||
if [ -n "$GLOBAL_CONF" ]; then
|
if [ "$SCHEME" = "ec" ]; then
|
||||||
$ETCDCTL put /vitastor/config/global "$GLOBAL_CONF"
|
PG_SIZE=${PG_SIZE:-5}
|
||||||
fi
|
PG_MINSIZE=${PG_MINSIZE:-3}
|
||||||
|
PG_DATA_SIZE=$PG_MINSIZE
|
||||||
if [ "$SCHEME" = "replicated" ]; then
|
POOLCFG='"scheme":"ec","parity_chunks":'$((PG_SIZE-PG_MINSIZE))
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"pg_count":'$PG_COUNT',"failure_domain":"osd"}}'
|
elif [ "$SCHEME" = "xor" ]; then
|
||||||
|
PG_SIZE=${PG_SIZE:-3}
|
||||||
|
PG_MINSIZE=${PG_MINSIZE:-2}
|
||||||
|
PG_DATA_SIZE=$PG_MINSIZE
|
||||||
|
POOLCFG='"scheme":"xor","parity_chunks":'$((PG_SIZE-PG_MINSIZE))
|
||||||
else
|
else
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"xor","pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"parity_chunks":1,"pg_count":'$PG_COUNT',"failure_domain":"osd"}}'
|
PG_SIZE=${PG_SIZE:-2}
|
||||||
|
PG_MINSIZE=${PG_MINSIZE:-2}
|
||||||
|
PG_DATA_SIZE=1
|
||||||
|
POOLCFG='"scheme":"replicated"'
|
||||||
|
fi
|
||||||
|
POOLCFG='"name":"testpool","failure_domain":"osd",'$POOLCFG
|
||||||
|
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"pg_count":'$PG_COUNT'}}'
|
||||||
|
|
||||||
|
sleep 2
|
||||||
|
|
||||||
|
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == '$PG_SIZE') | length) == '$PG_COUNT); then
|
||||||
|
format_error "FAILED: $PG_COUNT PGS NOT CONFIGURED"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
sleep 3
|
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$PG_COUNT); then
|
||||||
|
format_error "FAILED: $PG_COUNT PGS NOT UP"
|
||||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] | select(((.osd_set | select(. != 0) | sort | unique) | length) == '$PG_SIZE') ] | length) == '$PG_COUNT); then
|
|
||||||
format_error "FAILED: $PG_COUNT PG(s) NOT CONFIGURED"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! ($ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT); then
|
try_reweight()
|
||||||
format_error "FAILED: $PG_COUNT PG(s) NOT UP"
|
{
|
||||||
fi
|
osd=$1
|
||||||
|
w=$2
|
||||||
|
$ETCDCTL put /vitastor/config/osd/$osd '{"reweight":'$w'}'
|
||||||
|
sleep 3
|
||||||
|
}
|
||||||
|
|
||||||
if ! cmp build/src/block-vitastor.so /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so; then
|
wait_finish_rebalance()
|
||||||
sudo rm -f /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so
|
{
|
||||||
sudo ln -s "$(realpath .)/build/src/block-vitastor.so" /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so
|
sec=$1
|
||||||
fi
|
i=0
|
||||||
|
while [[ $i -lt $sec ]]; do
|
||||||
|
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == 32') && \
|
||||||
|
break
|
||||||
|
if [ $i -eq 60 ]; then
|
||||||
|
format_error "Rebalance couldn't finish in $sec seconds"
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
i=$((i+1))
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
check_qemu()
|
||||||
|
{
|
||||||
|
if ! cmp build/src/block-vitastor.so /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so; then
|
||||||
|
sudo rm -f /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so
|
||||||
|
sudo ln -s "$(realpath .)/build/src/block-vitastor.so" /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
@@ -1,68 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
. `dirname $0`/common.sh
|
|
||||||
|
|
||||||
if [ "$IMMEDIATE_COMMIT" != "" ]; then
|
|
||||||
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 1"
|
|
||||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":5,"immediate_commit":"all"}'
|
|
||||||
else
|
|
||||||
NO_SAME="--journal_sector_buffer_count 1024 --log_level 1"
|
|
||||||
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":5}'
|
|
||||||
fi
|
|
||||||
|
|
||||||
OSD_SIZE=1024
|
|
||||||
OSD_COUNT=7
|
|
||||||
OSD_ARGS=
|
|
||||||
for i in $(seq 1 $OSD_COUNT); do
|
|
||||||
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
|
||||||
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $NO_SAME $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
|
|
||||||
eval OSD${i}_PID=$!
|
|
||||||
done
|
|
||||||
|
|
||||||
cd mon
|
|
||||||
npm install
|
|
||||||
cd ..
|
|
||||||
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 &>./testdata/mon.log &
|
|
||||||
MON_PID=$!
|
|
||||||
|
|
||||||
if [ "$EC" != "" ]; then
|
|
||||||
POOLCFG='"scheme":"xor","pg_size":3,"pg_minsize":2,"parity_chunks":1'
|
|
||||||
PG_SIZE=3
|
|
||||||
else
|
|
||||||
POOLCFG='"scheme":"replicated","pg_size":2,"pg_minsize":2'
|
|
||||||
PG_SIZE=2
|
|
||||||
fi
|
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool",'$POOLCFG',"pg_count":32,"failure_domain":"osd"}}'
|
|
||||||
|
|
||||||
sleep 2
|
|
||||||
|
|
||||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == '$PG_SIZE') | length) == 32'); then
|
|
||||||
format_error "FAILED: 32 PGS NOT CONFIGURED"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == 32'); then
|
|
||||||
format_error "FAILED: 32 PGS NOT UP"
|
|
||||||
fi
|
|
||||||
|
|
||||||
try_reweight()
|
|
||||||
{
|
|
||||||
osd=$1
|
|
||||||
w=$2
|
|
||||||
$ETCDCTL put /vitastor/config/osd/$osd '{"reweight":'$w'}'
|
|
||||||
sleep 3
|
|
||||||
}
|
|
||||||
|
|
||||||
wait_finish_rebalance()
|
|
||||||
{
|
|
||||||
sec=$1
|
|
||||||
i=0
|
|
||||||
while [[ $i -lt $sec ]]; do
|
|
||||||
($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == 32') && \
|
|
||||||
break
|
|
||||||
if [ $i -eq 60 ]; then
|
|
||||||
format_error "Rebalance couldn't finish in $sec seconds"
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
i=$((i+1))
|
|
||||||
done
|
|
||||||
}
|
|
@@ -8,7 +8,7 @@ cd $(dirname $0)
|
|||||||
./test_cas.sh
|
./test_cas.sh
|
||||||
|
|
||||||
./test_change_pg_count.sh
|
./test_change_pg_count.sh
|
||||||
EC=1 ./test_change_pg_count.sh
|
SCHEME=ec ./test_change_pg_count.sh
|
||||||
|
|
||||||
./test_change_pg_size.sh
|
./test_change_pg_size.sh
|
||||||
|
|
||||||
@@ -18,8 +18,8 @@ EC=1 ./test_change_pg_count.sh
|
|||||||
|
|
||||||
./test_interrupted_rebalance.sh
|
./test_interrupted_rebalance.sh
|
||||||
IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
||||||
EC=1 ./test_interrupted_rebalance.sh
|
SCHEME=ec ./test_interrupted_rebalance.sh
|
||||||
EC=1 IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
||||||
|
|
||||||
./test_minsize_1.sh
|
./test_minsize_1.sh
|
||||||
|
|
||||||
@@ -27,17 +27,17 @@ EC=1 IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
|||||||
|
|
||||||
./test_rebalance_verify.sh
|
./test_rebalance_verify.sh
|
||||||
IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
|
IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
|
||||||
EC=1 ./test_rebalance_verify.sh
|
SCHEME=ec ./test_rebalance_verify.sh
|
||||||
EC=1 IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
|
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
|
||||||
|
|
||||||
./test_rm.sh
|
./test_rm.sh
|
||||||
|
|
||||||
./test_snapshot.sh
|
./test_snapshot.sh
|
||||||
SCHEME=replicated ./test_snapshot.sh
|
SCHEME=ec ./test_snapshot.sh
|
||||||
|
|
||||||
./test_splitbrain.sh
|
./test_splitbrain.sh
|
||||||
|
|
||||||
./test_write.sh
|
./test_write.sh
|
||||||
SCHEME=replicated ./test_write.sh
|
SCHEME=xor ./test_write.sh
|
||||||
|
|
||||||
./test_write_no_same.sh
|
./test_write_no_same.sh
|
||||||
|
@@ -1,41 +1,11 @@
|
|||||||
#!/bin/bash -ex
|
#!/bin/bash -ex
|
||||||
|
|
||||||
. `dirname $0`/common.sh
|
OSD_COUNT=${OSD_COUNT:-6}
|
||||||
|
PG_COUNT=16
|
||||||
|
|
||||||
if [ "$EC" != "" ]; then
|
. `dirname $0`/run_3osds.sh
|
||||||
POOLCFG='"scheme":"xor","pg_size":3,"pg_minsize":2,"parity_chunks":1'
|
|
||||||
NOBJ=512
|
|
||||||
else
|
|
||||||
POOLCFG='"scheme":"replicated","pg_size":2,"pg_minsize":2'
|
|
||||||
NOBJ=1024
|
|
||||||
fi
|
|
||||||
|
|
||||||
OSD_SIZE=1024
|
NOBJ=$(((128*8+PG_DATA_SIZE-1)/PG_DATA_SIZE))
|
||||||
OSD_COUNT=6
|
|
||||||
OSD_ARGS=
|
|
||||||
for i in $(seq 1 $OSD_COUNT); do
|
|
||||||
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
|
||||||
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
|
|
||||||
eval OSD${i}_PID=$!
|
|
||||||
done
|
|
||||||
|
|
||||||
cd mon
|
|
||||||
npm install
|
|
||||||
cd ..
|
|
||||||
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 &>./testdata/mon.log &
|
|
||||||
MON_PID=$!
|
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool",'$POOLCFG',"pg_count":16,"failure_domain":"osd"}}'
|
|
||||||
|
|
||||||
sleep 2
|
|
||||||
|
|
||||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == 16'); then
|
|
||||||
format_error "FAILED: 16 PGS NOT CONFIGURED"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == 16'); then
|
|
||||||
format_error "FAILED: 16 PGS NOT UP"
|
|
||||||
fi
|
|
||||||
|
|
||||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||||
@@ -49,7 +19,7 @@ try_change()
|
|||||||
echo --- Change PG count to $n --- >>testdata/osd$i.log
|
echo --- Change PG count to $n --- >>testdata/osd$i.log
|
||||||
done
|
done
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool",'$POOLCFG',"pg_count":'$n',"failure_domain":"osd"}}'
|
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$PG_SIZE',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
|
||||||
|
|
||||||
for i in {1..10}; do
|
for i in {1..10}; do
|
||||||
($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
|
($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | select(. > 0)) | length == 2) | length) == '$n) && \
|
||||||
|
@@ -1,40 +1,16 @@
|
|||||||
#!/bin/bash -ex
|
#!/bin/bash -ex
|
||||||
|
|
||||||
. `dirname $0`/common.sh
|
PG_COUNT=16
|
||||||
|
SCHEME=${SCHEME:-replicated}
|
||||||
|
|
||||||
OSD_SIZE=1024
|
. `dirname $0`/run_3osds.sh
|
||||||
OSD_COUNT=3
|
|
||||||
OSD_ARGS=
|
|
||||||
for i in $(seq 1 $OSD_COUNT); do
|
|
||||||
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
|
|
||||||
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-cli simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
|
|
||||||
eval OSD${i}_PID=$!
|
|
||||||
done
|
|
||||||
|
|
||||||
cd mon
|
|
||||||
npm install
|
|
||||||
cd ..
|
|
||||||
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log &
|
|
||||||
MON_PID=$!
|
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":3,"pg_minsize":2,"pg_count":16,"failure_domain":"osd"}}'
|
|
||||||
|
|
||||||
sleep 2
|
|
||||||
|
|
||||||
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(.[0].items["1"] | map((.osd_set | sort) == ["1","2","3"]) | length) == 16'); then
|
|
||||||
format_error "FAILED: 16 PGS NOT CONFIGURED"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == 16'); then
|
|
||||||
format_error "FAILED: 16 PGS NOT UP"
|
|
||||||
fi
|
|
||||||
|
|
||||||
try_change()
|
try_change()
|
||||||
{
|
{
|
||||||
n=$1
|
n=$1
|
||||||
s=$2
|
s=$2
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":'$s',"pg_minsize":2,"pg_count":'$n',"failure_domain":"osd"}}'
|
$ETCDCTL put /vitastor/config/pools '{"1":{'$POOLCFG',"pg_size":'$s',"pg_minsize":'$PG_MINSIZE',"pg_count":'$n'}}'
|
||||||
|
|
||||||
for i in {1..10}; do
|
for i in {1..10}; do
|
||||||
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
($ETCDCTL get /vitastor/config/pgs --print-value-only |\
|
||||||
|
54
tests/test_heal.sh
Executable file
54
tests/test_heal.sh
Executable file
@@ -0,0 +1,54 @@
|
|||||||
|
#!/bin/bash -ex
|
||||||
|
|
||||||
|
# Kill OSDs while writing
|
||||||
|
|
||||||
|
PG_SIZE=3
|
||||||
|
OSD_COUNT=7
|
||||||
|
PG_COUNT=32
|
||||||
|
. `dirname $0`/run_3osds.sh
|
||||||
|
check_qemu
|
||||||
|
|
||||||
|
IMG_SIZE=960
|
||||||
|
|
||||||
|
$ETCDCTL put /vitastor/config/inode/1/1 '{"name":"testimg","size":'$((IMG_SIZE*1024*1024))'}'
|
||||||
|
|
||||||
|
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||||
|
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||||
|
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -cluster_log_level=10
|
||||||
|
|
||||||
|
kill_osds()
|
||||||
|
{
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
kill -9 $OSD1_PID
|
||||||
|
$ETCDCTL del /vitastor/osd/state/1
|
||||||
|
|
||||||
|
for i in 2 3 4 5 6 7; do
|
||||||
|
sleep 15
|
||||||
|
echo Killing OSD $i and starting OSD $((i-1))
|
||||||
|
p=OSD${i}_PID
|
||||||
|
kill -9 ${!p}
|
||||||
|
$ETCDCTL del /vitastor/osd/state/$i
|
||||||
|
start_osd $((i-1))
|
||||||
|
sleep 15
|
||||||
|
done
|
||||||
|
|
||||||
|
sleep 5
|
||||||
|
start_osd 7
|
||||||
|
|
||||||
|
sleep 5
|
||||||
|
}
|
||||||
|
|
||||||
|
kill_osds &
|
||||||
|
|
||||||
|
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||||
|
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=16 -fsync=256 -rw=randwrite \
|
||||||
|
-mirror_file=./testdata/mirror.bin -etcd=$ETCD_URL -image=testimg -loops=10 -runtime=120 2>/dev/null
|
||||||
|
|
||||||
|
qemu-img convert -S 4096 -p \
|
||||||
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
|
||||||
|
-O raw ./testdata/read.bin
|
||||||
|
|
||||||
|
diff ./testdata/read.bin ./testdata/mirror.bin
|
||||||
|
|
||||||
|
format_green OK
|
@@ -1,6 +1,8 @@
|
|||||||
#!/bin/bash -ex
|
#!/bin/bash -ex
|
||||||
|
|
||||||
. `dirname $0`/run_7osds.sh
|
OSD_COUNT=7
|
||||||
|
PG_COUNT=32
|
||||||
|
. `dirname $0`/run_3osds.sh
|
||||||
|
|
||||||
IMG_SIZE=960
|
IMG_SIZE=960
|
||||||
|
|
||||||
@@ -32,13 +34,14 @@ try_reweight 5 1
|
|||||||
wait_finish_rebalance 60
|
wait_finish_rebalance 60
|
||||||
|
|
||||||
# Check that PGs never had degraded objects !
|
# Check that PGs never had degraded objects !
|
||||||
if grep has_degraded ./testdata/mon.log; then
|
# FIXME: In fact, the test doesn't guarantee it because PGs aren't always peered only with full prior OSD sets :-(
|
||||||
format_error "Some copies of objects were lost during interrupted rebalancings"
|
#if grep has_degraded ./testdata/mon.log; then
|
||||||
fi
|
# format_error "Some copies of objects were lost during interrupted rebalancings"
|
||||||
|
#fi
|
||||||
|
|
||||||
# Check that no objects are lost !
|
# Check that no objects are lost !
|
||||||
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
nobj=`$ETCDCTL get --prefix '/vitastor/pg/stats' --print-value-only | jq -s '[ .[].object_count ] | reduce .[] as $num (0; .+$num)'`
|
||||||
if [ "$nobj" -ne $((IMG_SIZE*8)) ]; then
|
if [ "$nobj" -ne $((IMG_SIZE*8/PG_DATA_SIZE)) ]; then
|
||||||
format_error "Data lost after multiple interrupted rebalancings"
|
format_error "Data lost after multiple interrupted rebalancings"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@@ -1,6 +1,8 @@
|
|||||||
#!/bin/bash -ex
|
#!/bin/bash -ex
|
||||||
|
|
||||||
. `dirname $0`/run_7osds.sh
|
OSD_COUNT=7
|
||||||
|
PG_COUNT=32
|
||||||
|
. `dirname $0`/run_3osds.sh
|
||||||
|
|
||||||
IMG_SIZE=256
|
IMG_SIZE=256
|
||||||
|
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
#!/bin/bash -ex
|
#!/bin/bash -ex
|
||||||
|
|
||||||
. `dirname $0`/run_3osds.sh
|
. `dirname $0`/run_3osds.sh
|
||||||
|
check_qemu
|
||||||
|
|
||||||
# Test basic write and snapshot
|
# Test basic write and snapshot
|
||||||
|
|
||||||
|
@@ -3,6 +3,7 @@
|
|||||||
OSD_SIZE=2048
|
OSD_SIZE=2048
|
||||||
|
|
||||||
. `dirname $0`/run_3osds.sh
|
. `dirname $0`/run_3osds.sh
|
||||||
|
check_qemu
|
||||||
|
|
||||||
$ETCDCTL put /vitastor/config/inode/1/1 '{"name":"debian9","size":'$((2048*1024*1024))'}'
|
$ETCDCTL put /vitastor/config/inode/1/1 '{"name":"debian9","size":'$((2048*1024*1024))'}'
|
||||||
|
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
#!/bin/bash -ex
|
#!/bin/bash -ex
|
||||||
|
|
||||||
. `dirname $0`/run_3osds.sh
|
. `dirname $0`/run_3osds.sh
|
||||||
|
check_qemu
|
||||||
|
|
||||||
#LD_PRELOAD=libasan.so.5 \
|
#LD_PRELOAD=libasan.so.5 \
|
||||||
# fio -thread -name=test -ioengine=build/src/libfio_vitastor_sec.so -bs=4k -fsync=128 `$ETCDCTL get /vitastor/osd/state/1 --print-value-only | jq -r '"-host="+.addresses[0]+" -port="+(.port|tostring)'` -rw=write -size=32M
|
# fio -thread -name=test -ioengine=build/src/libfio_vitastor_sec.so -bs=4k -fsync=128 `$ETCDCTL get /vitastor/osd/state/1 --print-value-only | jq -r '"-host="+.addresses[0]+" -port="+(.port|tostring)'` -rw=write -size=32M
|
||||||
|
Reference in New Issue
Block a user