Compare commits
73 Commits
Author | SHA1 | Date | |
---|---|---|---|
e898335b8d | |||
e7869611fa | |||
e1c2500b60 | |||
42cf3a11df | |||
4d9293f0e9 | |||
7a13f85ae2 | |||
fc219b8602 | |||
989d73f874 | |||
f0630722ce | |||
93b0947720 | |||
9c628646fa | |||
cf476a3b95 | |||
23f9273ba3 | |||
74b88bf8ba | |||
1254d5a0de | |||
f87bece253 | |||
ba85d0ef16 | |||
17a909ea3a | |||
a4dfc220ab | |||
26426dd95e | |||
9f38b7e5c1 | |||
20057defbe | |||
b4e9140755 | |||
413959e75a | |||
8973982570 | |||
990c3ba7eb | |||
1771d2ef36 | |||
d88ab76636 | |||
c010a0aa54 | |||
0d42712d29 | |||
66b438106a | |||
3aef6682fb | |||
8535bccf4c | |||
0487b3b239 | |||
a54ef97f5d | |||
10434a9b2b | |||
c6be194508 | |||
df668286fb | |||
667c5999c9 | |||
8ad63465cd | |||
976290e6a9 | |||
79f1d1969b | |||
918e1f83b0 | |||
abbba6ade4 | |||
21d1171ba4 | |||
![]() |
8f83086889 | ||
ceb18f25db | |||
ed51a89f70 | |||
f59456f22d | |||
ca63cd507d | |||
ea0d72289c | |||
e400a851f4 | |||
0fec7a9fea | |||
b9de2a92a9 | |||
5360a70853 | |||
4c2328eb13 | |||
313daef12d | |||
ad9c12e1b9 | |||
4473eb5512 | |||
6501abc060 | |||
1228403e74 | |||
4eabebd245 | |||
cf60b6818c | |||
1a4a7cdc37 | |||
1b48085e21 | |||
a71847244e | |||
848c2d2722 | |||
86832dc43f | |||
1f6da79463 | |||
9bf57c3760 | |||
a0305b5b4a | |||
1546f8e447 | |||
8ce962b312 |
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
project(vitastor)
|
||||
|
||||
set(VERSION "1.6.1")
|
||||
set(VERSION "1.7.1")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
VERSION ?= v1.6.1
|
||||
VERSION ?= v1.7.1
|
||||
|
||||
all: build push
|
||||
|
||||
|
@@ -49,7 +49,7 @@ spec:
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
allowPrivilegeEscalation: true
|
||||
image: vitalif/vitastor-csi:v1.6.1
|
||||
image: vitalif/vitastor-csi:v1.7.1
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -121,7 +121,7 @@ spec:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
image: vitalif/vitastor-csi:v1.6.1
|
||||
image: vitalif/vitastor-csi:v1.7.1
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -5,7 +5,7 @@ package vitastor
|
||||
|
||||
const (
|
||||
vitastorCSIDriverName = "csi.vitastor.io"
|
||||
vitastorCSIDriverVersion = "1.6.1"
|
||||
vitastorCSIDriverVersion = "1.7.1"
|
||||
)
|
||||
|
||||
// Config struct fills the parameters of request or user input
|
||||
|
2
debian/changelog
vendored
2
debian/changelog
vendored
@@ -1,4 +1,4 @@
|
||||
vitastor (1.6.1-1) unstable; urgency=medium
|
||||
vitastor (1.7.1-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
|
5
debian/vitastor-mon.install
vendored
5
debian/vitastor-mon.install
vendored
@@ -1,2 +1,3 @@
|
||||
mon usr/lib/vitastor
|
||||
mon/vitastor-mon.service /lib/systemd/system
|
||||
mon usr/lib/vitastor/
|
||||
mon/scripts/make-etcd usr/lib/vitastor/mon
|
||||
mon/scripts/vitastor-mon.service /lib/systemd/system
|
||||
|
2
debian/vitastor-mon.postinst
vendored
2
debian/vitastor-mon.postinst
vendored
@@ -6,4 +6,6 @@ if [ "$1" = "configure" ]; then
|
||||
addgroup --system --quiet vitastor
|
||||
adduser --system --quiet --ingroup vitastor --no-create-home --home /nonexistent vitastor
|
||||
mkdir -p /etc/vitastor
|
||||
mkdir -p /var/lib/vitastor
|
||||
chown vitastor:vitastor /var/lib/vitastor
|
||||
fi
|
||||
|
6
debian/vitastor-osd.install
vendored
6
debian/vitastor-osd.install
vendored
@@ -1,6 +1,6 @@
|
||||
usr/bin/vitastor-osd
|
||||
usr/bin/vitastor-disk
|
||||
usr/bin/vitastor-dump-journal
|
||||
mon/vitastor-osd@.service /lib/systemd/system
|
||||
mon/vitastor.target /lib/systemd/system
|
||||
mon/90-vitastor.rules /lib/udev/rules.d
|
||||
mon/scripts/vitastor-osd@.service /lib/systemd/system
|
||||
mon/scripts/vitastor.target /lib/systemd/system
|
||||
mon/scripts/90-vitastor.rules /lib/udev/rules.d
|
||||
|
33
debian/vitastor.Dockerfile
vendored
33
debian/vitastor.Dockerfile
vendored
@@ -9,12 +9,12 @@ ARG REL=
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
|
||||
echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
|
||||
echo >> /etc/apt/preferences; \
|
||||
echo 'Package: *' >> /etc/apt/preferences; \
|
||||
echo "Pin: release a=$REL-backports" >> /etc/apt/preferences; \
|
||||
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
|
||||
RUN set -e -x; \
|
||||
if [ "$REL" = "buster" ]; then \
|
||||
apt-get update; \
|
||||
apt-get -y install wget; \
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg; \
|
||||
echo "deb https://vitastor.io/debian $REL main" >> /etc/apt/sources.list; \
|
||||
fi; \
|
||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
|
||||
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
|
||||
@@ -22,10 +22,9 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
|
||||
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
|
||||
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl
|
||||
RUN apt-get -y build-dep fio
|
||||
RUN apt-get --download-only source fio
|
||||
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev libnl-3-dev libnl-genl-3-dev
|
||||
|
||||
ADD . /root/vitastor
|
||||
RUN set -e -x; \
|
||||
@@ -37,8 +36,10 @@ RUN set -e -x; \
|
||||
mkdir -p /root/packages/vitastor-$REL; \
|
||||
rm -rf /root/packages/vitastor-$REL/*; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
cp -r /root/vitastor vitastor-1.6.1; \
|
||||
cd vitastor-1.6.1; \
|
||||
FULLVER=$(head -n1 /root/vitastor/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
VER=${FULLVER%%-*}; \
|
||||
cp -r /root/vitastor vitastor-$VER; \
|
||||
cd vitastor-$VER; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
@@ -50,10 +51,14 @@ RUN set -e -x; \
|
||||
echo fio-headers.patch >> debian/patches/series; \
|
||||
rm -rf a b; \
|
||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||
cd /root/packages/vitastor-$REL/vitastor-$VER; \
|
||||
mkdir mon/node_modules; \
|
||||
cd mon/node_modules; \
|
||||
curl -s https://git.yourcmc.ru/vitalif/antietcd/archive/master.tar.gz | tar -zx; \
|
||||
curl -s https://git.yourcmc.ru/vitalif/tinyraft/archive/master.tar.gz | tar -zx; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.6.1.orig.tar.xz vitastor-1.6.1; \
|
||||
cd vitastor-1.6.1; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_$VER.orig.tar.xz vitastor-$VER; \
|
||||
cd vitastor-$VER; \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$FULLVER""$REL" "Rebuild for $REL"; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
rm -rf /root/packages/vitastor-$REL/vitastor-*/
|
||||
|
@@ -9,6 +9,7 @@
|
||||
These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
|
||||
affect their interaction with the cluster.
|
||||
|
||||
- [client_iothread_count](#client_iothread_count)
|
||||
- [client_retry_interval](#client_retry_interval)
|
||||
- [client_eio_retry_interval](#client_eio_retry_interval)
|
||||
- [client_retry_enospc](#client_retry_enospc)
|
||||
@@ -23,6 +24,23 @@ affect their interaction with the cluster.
|
||||
- [nbd_max_part](#nbd_max_part)
|
||||
- [osd_nearfull_ratio](#osd_nearfull_ratio)
|
||||
|
||||
## client_iothread_count
|
||||
|
||||
- Type: integer
|
||||
- Default: 0
|
||||
|
||||
Number of separate threads for handling TCP network I/O at client library
|
||||
side. Enabling 4 threads usually allows to increase peak performance of each
|
||||
client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
|
||||
100-150 to 400 thousand iops, but at the same time it increases latency.
|
||||
Latency increase depends on CPU: with CPU power saving disabled latency
|
||||
only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
|
||||
with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
|
||||
iops decrease from 2000 to 1000). RDMA isn't affected by this option.
|
||||
|
||||
It's recommended to enable client I/O threads if you don't use RDMA and want
|
||||
to increase peak client performance.
|
||||
|
||||
## client_retry_interval
|
||||
|
||||
- Type: milliseconds
|
||||
|
@@ -9,6 +9,7 @@
|
||||
Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
|
||||
затрагивают логику их работы с кластером.
|
||||
|
||||
- [client_iothread_count](#client_iothread_count)
|
||||
- [client_retry_interval](#client_retry_interval)
|
||||
- [client_eio_retry_interval](#client_eio_retry_interval)
|
||||
- [client_retry_enospc](#client_retry_enospc)
|
||||
@@ -23,6 +24,24 @@
|
||||
- [nbd_max_part](#nbd_max_part)
|
||||
- [osd_nearfull_ratio](#osd_nearfull_ratio)
|
||||
|
||||
## client_iothread_count
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 0
|
||||
|
||||
Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
|
||||
клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
|
||||
производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
|
||||
чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
|
||||
секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
|
||||
при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
|
||||
падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
|
||||
и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
|
||||
RDMA данная опция не влияет.
|
||||
|
||||
Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
|
||||
RDMA и хотите повысить пиковую производительность клиентов.
|
||||
|
||||
## client_retry_interval
|
||||
|
||||
- Тип: миллисекунды
|
||||
|
@@ -56,14 +56,24 @@ Can't be smaller than the OSD data device sector.
|
||||
## immediate_commit
|
||||
|
||||
- Type: string
|
||||
- Default: false
|
||||
- Default: all
|
||||
|
||||
Another parameter which is really important for performance.
|
||||
One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
|
||||
|
||||
This parameter is also really important for performance.
|
||||
|
||||
TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
|
||||
power loss protection (nonvolatile write-through cache) and also for most HDDs.
|
||||
"none" or "small" should be only selected if you use desktop SSDs without
|
||||
capacitors or drives with slow write-back cache that can't be disabled. Check
|
||||
immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
|
||||
|
||||
Detailed explanation:
|
||||
|
||||
Desktop SSDs are very fast (100000+ iops) for simple random writes
|
||||
without cache flush. However, they are really slow (only around 1000 iops)
|
||||
if you try to fsync() each write, that is, when you want to guarantee that
|
||||
each change gets immediately persisted to the physical media.
|
||||
if you try to fsync() each write, that is, if you want to guarantee that
|
||||
each change gets actually persisted to the physical media.
|
||||
|
||||
Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
|
||||
"Supercapacitor-based Power Loss Protection", on the other hand, are equally
|
||||
@@ -75,8 +85,8 @@ really slow when used with desktop SSDs. Vitastor, however, can also
|
||||
efficiently utilize desktop SSDs by postponing fsync until the client calls
|
||||
it explicitly.
|
||||
|
||||
This is what this parameter regulates. When it's set to "all" the whole
|
||||
Vitastor cluster commits each change to disks immediately and clients just
|
||||
This is what this parameter regulates. When it's set to "all" Vitastor
|
||||
cluster commits each change to disks immediately and clients just
|
||||
ignore fsyncs because they know for sure that they're unneeded. This reduces
|
||||
the amount of network roundtrips performed by clients and improves
|
||||
performance. So it's always better to use server grade SSDs with
|
||||
@@ -99,9 +109,5 @@ Setting this parameter to "all" or "small" in OSD parameters requires enabling
|
||||
[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
|
||||
[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
|
||||
"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
|
||||
|
||||
TLDR: For optimal performance, set immediate_commit to "all" if you only use
|
||||
SSDs with supercapacitor-based power loss protection (nonvolatile
|
||||
write-through cache) for both data and journals in the whole Vitastor
|
||||
cluster. Set it to "small" if you only use such SSDs for journals. Leave
|
||||
empty if your drives have write-back cache.
|
||||
vitastor-disk tried to do that by default, first checking/disabling drive cache.
|
||||
If it can't disable drive cache, OSD get initialized with "none".
|
||||
|
@@ -57,9 +57,18 @@ amplification) и эффективность распределения нагр
|
||||
## immediate_commit
|
||||
|
||||
- Тип: строка
|
||||
- Значение по умолчанию: false
|
||||
- Значение по умолчанию: all
|
||||
|
||||
Ещё один важный для производительности параметр.
|
||||
Одно из значений "none", "small" или "all". Глобальное значение, может быть
|
||||
переопределено [на уровне пула](pool.ru.md#immediate_commit).
|
||||
|
||||
Данный параметр тоже важен для производительности.
|
||||
|
||||
Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
|
||||
суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
|
||||
устанавливать только при использовании SSD настольного класса без
|
||||
суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
|
||||
Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
|
||||
|
||||
Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
|
||||
секунду) при простой случайной записи без сбросов кэша. Однако они очень
|
||||
@@ -80,7 +89,7 @@ Power Loss Protection" - одинаково быстрые и со сбросо
|
||||
эффективно утилизировать настольные SSD.
|
||||
|
||||
Данный параметр влияет как раз на это. Когда он установлен в значение "all",
|
||||
весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
|
||||
кластер Vitastor мгновенно фиксирует каждое изменение на физические
|
||||
носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
|
||||
знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
|
||||
по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
|
||||
@@ -106,10 +115,3 @@ HDD-дисках с внутренним SSD или "медиа" кэшем - н
|
||||
включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
|
||||
[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
|
||||
также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
|
||||
|
||||
Итого, вкратце: для оптимальной производительности установите
|
||||
immediate_commit в значение "all", если вы используете в кластере только SSD
|
||||
с суперконденсаторами и для данных, и для журналов. Если вы используете
|
||||
такие SSD для всех журналов, но не для данных - можете установить параметр
|
||||
в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
|
||||
оставьте параметр пустым.
|
||||
|
@@ -8,6 +8,14 @@
|
||||
|
||||
These parameters only apply to Monitors.
|
||||
|
||||
- [use_antietcd](#use_antietcd)
|
||||
- [enable_prometheus](#enable_prometheus)
|
||||
- [mon_http_port](#mon_http_port)
|
||||
- [mon_http_ip](#mon_http_ip)
|
||||
- [mon_https_cert](#mon_https_cert)
|
||||
- [mon_https_key](#mon_https_key)
|
||||
- [mon_https_client_auth](#mon_https_client_auth)
|
||||
- [mon_https_ca](#mon_https_ca)
|
||||
- [etcd_mon_ttl](#etcd_mon_ttl)
|
||||
- [etcd_mon_timeout](#etcd_mon_timeout)
|
||||
- [etcd_mon_retries](#etcd_mon_retries)
|
||||
@@ -17,6 +25,87 @@ These parameters only apply to Monitors.
|
||||
- [placement_levels](#placement_levels)
|
||||
- [use_old_pg_combinator](#use_old_pg_combinator)
|
||||
|
||||
## use_antietcd
|
||||
|
||||
- Type: boolean
|
||||
- Default: false
|
||||
|
||||
Enable experimental built-in etcd replacement (clustered key-value database):
|
||||
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
|
||||
|
||||
When set to true, monitor runs internal antietcd automatically if it finds
|
||||
a network interface with an IP address matching one of addresses in the
|
||||
`etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
|
||||
the monitor command line). If there are multiple matching addresses, it also
|
||||
checks `antietcd_port` and antietcd is started for address with matching port.
|
||||
By default, antietcd accepts connection on the selected IP address, but it
|
||||
can also be overridden manually in the `antietcd_ip` option.
|
||||
|
||||
When antietcd is started, monitor stores cluster metadata itself and exposes
|
||||
a etcd-compatible REST API. On disk, these metadata are stored in
|
||||
`/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
|
||||
or antietcd_data_dir options). All other antietcd parameters
|
||||
(see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
|
||||
cluster, cluster_key, persist_filter, stale_read can also be set in
|
||||
Vitastor configuration with `antietcd_` prefix.
|
||||
|
||||
You can dump/load data to or from antietcd using Antietcd `anticli` tool:
|
||||
|
||||
```
|
||||
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
|
||||
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
|
||||
```
|
||||
|
||||
## enable_prometheus
|
||||
|
||||
- Type: boolean
|
||||
- Default: true
|
||||
|
||||
Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
|
||||
|
||||
Note that only the active (master) monitor exposes metrics, others return
|
||||
HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
|
||||
|
||||
Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
|
||||
|
||||
## mon_http_port
|
||||
|
||||
- Type: integer
|
||||
- Default: 8060
|
||||
|
||||
HTTP port for monitors to listen on (including metrics exporter)
|
||||
|
||||
## mon_http_ip
|
||||
|
||||
- Type: string
|
||||
|
||||
IP address for monitors to listen on (all addresses by default)
|
||||
|
||||
## mon_https_cert
|
||||
|
||||
- Type: string
|
||||
|
||||
Path to PEM SSL certificate file for monitor to listen using HTTPS
|
||||
|
||||
## mon_https_key
|
||||
|
||||
- Type: string
|
||||
|
||||
Path to PEM SSL private key file for monitor to listen using HTTPS
|
||||
|
||||
## mon_https_client_auth
|
||||
|
||||
- Type: boolean
|
||||
- Default: false
|
||||
|
||||
Enable HTTPS client certificate-based authorization for monitor connections
|
||||
|
||||
## mon_https_ca
|
||||
|
||||
- Type: string
|
||||
|
||||
Path to CA certificate for client HTTPS authorization
|
||||
|
||||
## etcd_mon_ttl
|
||||
|
||||
- Type: seconds
|
||||
|
@@ -8,6 +8,14 @@
|
||||
|
||||
Данные параметры используются только мониторами Vitastor.
|
||||
|
||||
- [use_antietcd](#use_antietcd)
|
||||
- [enable_prometheus](#enable_prometheus)
|
||||
- [mon_http_port](#mon_http_port)
|
||||
- [mon_http_ip](#mon_http_ip)
|
||||
- [mon_https_cert](#mon_https_cert)
|
||||
- [mon_https_key](#mon_https_key)
|
||||
- [mon_https_client_auth](#mon_https_client_auth)
|
||||
- [mon_https_ca](#mon_https_ca)
|
||||
- [etcd_mon_ttl](#etcd_mon_ttl)
|
||||
- [etcd_mon_timeout](#etcd_mon_timeout)
|
||||
- [etcd_mon_retries](#etcd_mon_retries)
|
||||
@@ -17,6 +25,89 @@
|
||||
- [placement_levels](#placement_levels)
|
||||
- [use_old_pg_combinator](#use_old_pg_combinator)
|
||||
|
||||
## use_antietcd
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
- Значение по умолчанию: false
|
||||
|
||||
Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
|
||||
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
|
||||
|
||||
Если параметр установлен в true, монитор запускает antietcd автоматически,
|
||||
если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
|
||||
конфигурации `etcd_address` (в `/etc/vitastor/vitastor.conf` или в опциях
|
||||
командной строки монитора). Если таких адресов несколько, также проверяется
|
||||
опция `antietcd_port` и antietcd запускается для адреса с соответствующим
|
||||
портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
|
||||
IP, но его также можно определить вручную опцией `antietcd_ip`.
|
||||
|
||||
При запуске antietcd монитор сам хранит центральные метаданные кластера и
|
||||
выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
|
||||
`/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
|
||||
antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
|
||||
(смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
|
||||
node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
|
||||
в конфигурации Vitastor с префиксом `antietcd_`.
|
||||
|
||||
Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
|
||||
`anticli`:
|
||||
|
||||
```
|
||||
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
|
||||
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
|
||||
```
|
||||
|
||||
## enable_prometheus
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
- Значение по умолчанию: true
|
||||
|
||||
Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
|
||||
|
||||
Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
|
||||
возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
|
||||
в задание по сбору метрик Prometheus.
|
||||
|
||||
Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
|
||||
|
||||
## mon_http_port
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 8060
|
||||
|
||||
Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
|
||||
|
||||
## mon_http_ip
|
||||
|
||||
- Тип: строка
|
||||
|
||||
IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
|
||||
|
||||
## mon_https_cert
|
||||
|
||||
- Тип: строка
|
||||
|
||||
Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
|
||||
|
||||
## mon_https_key
|
||||
|
||||
- Тип: строка
|
||||
|
||||
Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
|
||||
|
||||
## mon_https_client_auth
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
- Значение по умолчанию: false
|
||||
|
||||
Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
|
||||
|
||||
## mon_https_ca
|
||||
|
||||
- Тип: строка
|
||||
|
||||
Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений
|
||||
|
||||
## etcd_mon_ttl
|
||||
|
||||
- Тип: секунды
|
||||
|
@@ -248,7 +248,7 @@ etcd_report_interval to guarantee that keepalive actually works.
|
||||
## etcd_ws_keepalive_interval
|
||||
|
||||
- Type: seconds
|
||||
- Default: 30
|
||||
- Default: 5
|
||||
- Can be changed online: yes
|
||||
|
||||
etcd websocket ping interval required to keep the connection alive and
|
||||
|
@@ -259,7 +259,7 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
|
||||
## etcd_ws_keepalive_interval
|
||||
|
||||
- Тип: секунды
|
||||
- Значение по умолчанию: 30
|
||||
- Значение по умолчанию: 5
|
||||
- Можно менять на лету: да
|
||||
|
||||
Интервал проверки живости вебсокет-подключений к etcd.
|
||||
|
@@ -10,6 +10,7 @@ These parameters only apply to OSDs, are not fixed at the moment of OSD drive
|
||||
initialization and can be changed - either with an OSD restart or, for some of
|
||||
them, even without restarting by updating configuration in etcd.
|
||||
|
||||
- [osd_iothread_count](#osd_iothread_count)
|
||||
- [etcd_report_interval](#etcd_report_interval)
|
||||
- [etcd_stats_interval](#etcd_stats_interval)
|
||||
- [run_primary](#run_primary)
|
||||
@@ -61,6 +62,18 @@ them, even without restarting by updating configuration in etcd.
|
||||
- [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
|
||||
- [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
|
||||
|
||||
## osd_iothread_count
|
||||
|
||||
- Type: integer
|
||||
- Default: 0
|
||||
|
||||
TCP network I/O thread count for OSD. When non-zero, a single OSD process
|
||||
may handle more TCP I/O, but at a cost of increased latency because thread
|
||||
switching overhead occurs. RDMA isn't affected by this option.
|
||||
|
||||
Because of latency, instead of enabling OSD I/O threads it's recommended to
|
||||
just create multiple OSDs per disk, or use RDMA.
|
||||
|
||||
## etcd_report_interval
|
||||
|
||||
- Type: seconds
|
||||
|
@@ -11,6 +11,7 @@
|
||||
момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
|
||||
изменения конфигурации в etcd.
|
||||
|
||||
- [osd_iothread_count](#osd_iothread_count)
|
||||
- [etcd_report_interval](#etcd_report_interval)
|
||||
- [etcd_stats_interval](#etcd_stats_interval)
|
||||
- [run_primary](#run_primary)
|
||||
@@ -62,6 +63,19 @@
|
||||
- [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
|
||||
- [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
|
||||
|
||||
## osd_iothread_count
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 0
|
||||
|
||||
Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
|
||||
стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
|
||||
по сети больше данных, но ухудшает задержку из-за накладных расходов
|
||||
переключения потоков. На работу RDMA опция не влияет.
|
||||
|
||||
Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
|
||||
просто создавать по несколько OSD на каждом диске, или использовать RDMA.
|
||||
|
||||
## etcd_report_interval
|
||||
|
||||
- Тип: секунды
|
||||
|
@@ -1,3 +1,32 @@
|
||||
- name: client_iothread_count
|
||||
type: int
|
||||
default: 0
|
||||
online: false
|
||||
info: |
|
||||
Number of separate threads for handling TCP network I/O at client library
|
||||
side. Enabling 4 threads usually allows to increase peak performance of each
|
||||
client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
|
||||
100-150 to 400 thousand iops, but at the same time it increases latency.
|
||||
Latency increase depends on CPU: with CPU power saving disabled latency
|
||||
only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
|
||||
with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
|
||||
iops decrease from 2000 to 1000). RDMA isn't affected by this option.
|
||||
|
||||
It's recommended to enable client I/O threads if you don't use RDMA and want
|
||||
to increase peak client performance.
|
||||
info_ru: |
|
||||
Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
|
||||
клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
|
||||
производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
|
||||
чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
|
||||
секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
|
||||
при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
|
||||
падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
|
||||
и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
|
||||
RDMA данная опция не влияет.
|
||||
|
||||
Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
|
||||
RDMA и хотите повысить пиковую производительность клиентов.
|
||||
- name: client_retry_interval
|
||||
type: ms
|
||||
min: 10
|
||||
|
@@ -47,14 +47,24 @@
|
||||
Не может быть меньше размера сектора дисков данных OSD.
|
||||
- name: immediate_commit
|
||||
type: string
|
||||
default: false
|
||||
default: all
|
||||
info: |
|
||||
Another parameter which is really important for performance.
|
||||
One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
|
||||
|
||||
This parameter is also really important for performance.
|
||||
|
||||
TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
|
||||
power loss protection (nonvolatile write-through cache) and also for most HDDs.
|
||||
"none" or "small" should be only selected if you use desktop SSDs without
|
||||
capacitors or drives with slow write-back cache that can't be disabled. Check
|
||||
immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
|
||||
|
||||
Detailed explanation:
|
||||
|
||||
Desktop SSDs are very fast (100000+ iops) for simple random writes
|
||||
without cache flush. However, they are really slow (only around 1000 iops)
|
||||
if you try to fsync() each write, that is, when you want to guarantee that
|
||||
each change gets immediately persisted to the physical media.
|
||||
if you try to fsync() each write, that is, if you want to guarantee that
|
||||
each change gets actually persisted to the physical media.
|
||||
|
||||
Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
|
||||
"Supercapacitor-based Power Loss Protection", on the other hand, are equally
|
||||
@@ -66,8 +76,8 @@
|
||||
efficiently utilize desktop SSDs by postponing fsync until the client calls
|
||||
it explicitly.
|
||||
|
||||
This is what this parameter regulates. When it's set to "all" the whole
|
||||
Vitastor cluster commits each change to disks immediately and clients just
|
||||
This is what this parameter regulates. When it's set to "all" Vitastor
|
||||
cluster commits each change to disks immediately and clients just
|
||||
ignore fsyncs because they know for sure that they're unneeded. This reduces
|
||||
the amount of network roundtrips performed by clients and improves
|
||||
performance. So it's always better to use server grade SSDs with
|
||||
@@ -90,14 +100,19 @@
|
||||
[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
|
||||
[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
|
||||
"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
|
||||
|
||||
TLDR: For optimal performance, set immediate_commit to "all" if you only use
|
||||
SSDs with supercapacitor-based power loss protection (nonvolatile
|
||||
write-through cache) for both data and journals in the whole Vitastor
|
||||
cluster. Set it to "small" if you only use such SSDs for journals. Leave
|
||||
empty if your drives have write-back cache.
|
||||
vitastor-disk tried to do that by default, first checking/disabling drive cache.
|
||||
If it can't disable drive cache, OSD get initialized with "none".
|
||||
info_ru: |
|
||||
Ещё один важный для производительности параметр.
|
||||
Одно из значений "none", "small" или "all". Глобальное значение, может быть
|
||||
переопределено [на уровне пула](pool.ru.md#immediate_commit).
|
||||
|
||||
Данный параметр тоже важен для производительности.
|
||||
|
||||
Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
|
||||
суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
|
||||
устанавливать только при использовании SSD настольного класса без
|
||||
суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
|
||||
Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
|
||||
|
||||
Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
|
||||
секунду) при простой случайной записи без сбросов кэша. Однако они очень
|
||||
@@ -118,7 +133,7 @@
|
||||
эффективно утилизировать настольные SSD.
|
||||
|
||||
Данный параметр влияет как раз на это. Когда он установлен в значение "all",
|
||||
весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
|
||||
кластер Vitastor мгновенно фиксирует каждое изменение на физические
|
||||
носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
|
||||
знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
|
||||
по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
|
||||
@@ -144,10 +159,3 @@
|
||||
включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
|
||||
[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
|
||||
также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
|
||||
|
||||
Итого, вкратце: для оптимальной производительности установите
|
||||
immediate_commit в значение "all", если вы используете в кластере только SSD
|
||||
с суперконденсаторами и для данных, и для журналов. Если вы используете
|
||||
такие SSD для всех журналов, но не для данных - можете установить параметр
|
||||
в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
|
||||
оставьте параметр пустым.
|
||||
|
@@ -1,3 +1,103 @@
|
||||
- name: use_antietcd
|
||||
type: bool
|
||||
default: false
|
||||
info: |
|
||||
Enable experimental built-in etcd replacement (clustered key-value database):
|
||||
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
|
||||
|
||||
When set to true, monitor runs internal antietcd automatically if it finds
|
||||
a network interface with an IP address matching one of addresses in the
|
||||
`etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
|
||||
the monitor command line). If there are multiple matching addresses, it also
|
||||
checks `antietcd_port` and antietcd is started for address with matching port.
|
||||
By default, antietcd accepts connection on the selected IP address, but it
|
||||
can also be overridden manually in the `antietcd_ip` option.
|
||||
|
||||
When antietcd is started, monitor stores cluster metadata itself and exposes
|
||||
a etcd-compatible REST API. On disk, these metadata are stored in
|
||||
`/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
|
||||
or antietcd_data_dir options). All other antietcd parameters
|
||||
(see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
|
||||
cluster, cluster_key, persist_filter, stale_read can also be set in
|
||||
Vitastor configuration with `antietcd_` prefix.
|
||||
|
||||
You can dump/load data to or from antietcd using Antietcd `anticli` tool:
|
||||
|
||||
```
|
||||
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
|
||||
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
|
||||
```
|
||||
info_ru: |
|
||||
Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
|
||||
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
|
||||
|
||||
Если параметр установлен в true, монитор запускает antietcd автоматически,
|
||||
если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
|
||||
конфигурации `etcd_address` (в `/etc/vitastor/vitastor.conf` или в опциях
|
||||
командной строки монитора). Если таких адресов несколько, также проверяется
|
||||
опция `antietcd_port` и antietcd запускается для адреса с соответствующим
|
||||
портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
|
||||
IP, но его также можно определить вручную опцией `antietcd_ip`.
|
||||
|
||||
При запуске antietcd монитор сам хранит центральные метаданные кластера и
|
||||
выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
|
||||
`/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
|
||||
antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
|
||||
(смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
|
||||
node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
|
||||
в конфигурации Vitastor с префиксом `antietcd_`.
|
||||
|
||||
Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
|
||||
`anticli`:
|
||||
|
||||
```
|
||||
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
|
||||
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
|
||||
```
|
||||
- name: enable_prometheus
|
||||
type: bool
|
||||
default: true
|
||||
info: |
|
||||
Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
|
||||
|
||||
Note that only the active (master) monitor exposes metrics, others return
|
||||
HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
|
||||
|
||||
Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
|
||||
info_ru: |
|
||||
Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
|
||||
|
||||
Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
|
||||
возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
|
||||
в задание по сбору метрик Prometheus.
|
||||
|
||||
Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
|
||||
- name: mon_http_port
|
||||
type: int
|
||||
default: 8060
|
||||
info: HTTP port for monitors to listen on (including metrics exporter)
|
||||
info_ru: Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
|
||||
- name: mon_http_ip
|
||||
type: string
|
||||
info: IP address for monitors to listen on (all addresses by default)
|
||||
info_ru: IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
|
||||
- name: mon_https_cert
|
||||
type: string
|
||||
info: Path to PEM SSL certificate file for monitor to listen using HTTPS
|
||||
info_ru: Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
|
||||
- name: mon_https_key
|
||||
type: string
|
||||
info: Path to PEM SSL private key file for monitor to listen using HTTPS
|
||||
info_ru: Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
|
||||
- name: mon_https_client_auth
|
||||
type: bool
|
||||
default: false
|
||||
info: Enable HTTPS client certificate-based authorization for monitor connections
|
||||
info_ru: Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
|
||||
- name: mon_https_ca
|
||||
type: string
|
||||
info: Path to CA certificate for client HTTPS authorization
|
||||
info_ru: Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений
|
||||
- name: etcd_mon_ttl
|
||||
type: sec
|
||||
min: 5
|
||||
|
@@ -282,7 +282,7 @@
|
||||
etcd_report_interval, чтобы keepalive гарантированно работал.
|
||||
- name: etcd_ws_keepalive_interval
|
||||
type: sec
|
||||
default: 30
|
||||
default: 5
|
||||
online: true
|
||||
info: |
|
||||
etcd websocket ping interval required to keep the connection alive and
|
||||
|
@@ -1,3 +1,21 @@
|
||||
- name: osd_iothread_count
|
||||
type: int
|
||||
default: 0
|
||||
info: |
|
||||
TCP network I/O thread count for OSD. When non-zero, a single OSD process
|
||||
may handle more TCP I/O, but at a cost of increased latency because thread
|
||||
switching overhead occurs. RDMA isn't affected by this option.
|
||||
|
||||
Because of latency, instead of enabling OSD I/O threads it's recommended to
|
||||
just create multiple OSDs per disk, or use RDMA.
|
||||
info_ru: |
|
||||
Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
|
||||
стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
|
||||
по сети больше данных, но ухудшает задержку из-за накладных расходов
|
||||
переключения потоков. На работу RDMA опция не влияет.
|
||||
|
||||
Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
|
||||
просто создавать по несколько OSD на каждом диске, или использовать RDMA.
|
||||
- name: etcd_report_interval
|
||||
type: sec
|
||||
default: 5
|
||||
|
@@ -16,8 +16,6 @@
|
||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||
- Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
|
||||
stable version from 0.9.x branch instead of 1.x
|
||||
- For Debian 10 (Buster) also enable backports repository:
|
||||
`deb http://deb.debian.org/debian buster-backports main`
|
||||
- Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
|
||||
|
||||
## CentOS
|
||||
|
@@ -16,8 +16,6 @@
|
||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||
- Добавьте `-oldstable` к слову bookworm/bullseye/buster в этой строке, чтобы
|
||||
установить последнюю стабильную версию из ветки 0.9.x вместо 1.x
|
||||
- Для Debian 10 (Buster) также включите репозиторий backports:
|
||||
`deb http://deb.debian.org/debian buster-backports main`
|
||||
- Установите пакеты: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
|
||||
|
||||
## CentOS
|
||||
|
@@ -34,6 +34,8 @@
|
||||
- [Client write-back cache](../config/client.en.md#client_enable_writeback)
|
||||
- [Intelligent recovery auto-tuning](../config/osd.en.md#recovery_tune_interval)
|
||||
- [Clustered file system](../usage/nfs.en.md#vitastorfs)
|
||||
- [Experimental internal etcd replacement - antietcd](../config/monitor.en.md#use_antietcd)
|
||||
- [Built-in Prometheus metric exporter](../config/monitor.en.md#enable_prometheus)
|
||||
|
||||
## Plugins and tools
|
||||
|
||||
|
@@ -36,6 +36,8 @@
|
||||
- [Буферизация записи на стороне клиента](../config/client.ru.md#client_enable_writeback)
|
||||
- [Интеллектуальная автоподстройка скорости восстановления](../config/osd.ru.md#recovery_tune_interval)
|
||||
- [Кластерная файловая система](../usage/nfs.ru.md#vitastorfs)
|
||||
- [Экспериментальная встроенная замена etcd - antietcd](../config/monitor.ru.md#use_antietcd)
|
||||
- [Встроенный Prometheus-экспортер метрик](../config/monitor.ru.md#enable_prometheus)
|
||||
|
||||
## Драйверы и инструменты
|
||||
|
||||
|
@@ -22,7 +22,7 @@
|
||||
with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
|
||||
[here](../config/layout-cluster.en.md#immediate_commit).
|
||||
- If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar,
|
||||
Toshiba MG08, Seagate EXOS or something similar. If your drives don't have such cache then
|
||||
Toshiba MG, Seagate EXOS or something similar. If your drives don't have such cache then
|
||||
you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
|
||||
- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
|
||||
- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
|
||||
@@ -33,7 +33,7 @@
|
||||
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
|
||||
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
|
||||
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
|
||||
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
|
||||
- HDD: HGST Ultrastar, Toshiba MG, Seagate EXOS
|
||||
|
||||
## Configure monitors
|
||||
|
||||
@@ -68,10 +68,6 @@ On the monitor hosts:
|
||||
but some free unpartitioned space must be available because the script creates new partitions for journals.
|
||||
- You can change OSD configuration in units or in `vitastor.conf`.
|
||||
Check [Configuration Reference](../config.en.md) for parameter descriptions.
|
||||
- If all your drives have capacitors, and even if not, but if you ran `vitastor-disk`
|
||||
without `--disable_data_fsync off` at the first step, then put the following
|
||||
setting into etcd: \
|
||||
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
|
||||
- Start all OSDs: `systemctl start vitastor.target`
|
||||
|
||||
## Create a pool
|
||||
@@ -88,6 +84,10 @@ For EC pools the configuration should look like the following:
|
||||
vitastor-cli create-pool testpool --ec 2+2 --pg_count 256
|
||||
```
|
||||
|
||||
Add `--immediate_commit none` if you added `--disable_data_fsync off` at the OSD
|
||||
initialization step, or if `vitastor-disk` complained about impossibility to
|
||||
disable drive cache.
|
||||
|
||||
After you do this, one of the monitors will configure PGs and OSDs will start them.
|
||||
|
||||
If you use HDDs you should also add `"block_size": 1048576` to pool configuration.
|
||||
|
@@ -69,11 +69,6 @@
|
||||
для журналов, на SSD должно быть доступно свободное нераспределённое место.
|
||||
- Вы можете менять параметры OSD в юнитах systemd или в `vitastor.conf`. Описания параметров
|
||||
смотрите в [справке по конфигурации](../config.ru.md).
|
||||
- Если все ваши диски - серверные с конденсаторами, и даже если нет, но при этом
|
||||
вы не добавляли опцию `--disable_data_fsync off` на первом шаге, а `vitastor-disk`
|
||||
не ругался на невозможность отключения кэша дисков, пропишите следующую настройку
|
||||
в глобальную конфигурацию в etcd: \
|
||||
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`.
|
||||
- Запустите все OSD: `systemctl start vitastor.target`
|
||||
|
||||
## Создайте пул
|
||||
@@ -90,6 +85,10 @@ vitastor-cli create-pool testpool --pg_size 2 --pg_count 256
|
||||
vitastor-cli create-pool testpool --ec 2+2 --pg_count 256
|
||||
```
|
||||
|
||||
Добавьте также опцию `--immediate_commit none`, если вы добавляли `--disable_data_fsync off`
|
||||
на этапе инициализации OSD, либо если `vitastor-disk` ругался на невозможность отключения
|
||||
кэша дисков.
|
||||
|
||||
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
|
||||
|
||||
Если вы используете HDD-диски, то добавьте в конфигурацию пулов опцию `"block_size": 1048576`.
|
||||
@@ -123,4 +122,4 @@ vitastor-cli create -s 10G testimg
|
||||
Если вы хотите использовать не только блочные образы виртуальных машин или контейнеров,
|
||||
а также кластерную файловую систему, то:
|
||||
|
||||
- [Следуйте инструкциям](../usage/nfs.en.md#vitastorfs)
|
||||
- [Следуйте инструкциям](../usage/nfs.ru.md#vitastorfs)
|
||||
|
@@ -107,16 +107,17 @@ If a PG is active it can also have any number of the following additional states
|
||||
|
||||
## Removing a healthy disk
|
||||
|
||||
Befor removing a healthy disk from the cluster set its OSD weight(s) to 0 to
|
||||
move data away. To do that, add `"reweight":0` to etcd key `/vitastor/config/osd/<OSD_NUMBER>`.
|
||||
For example:
|
||||
Before removing a healthy disk from the cluster set its OSD weight(s) to 0 to
|
||||
move data away. To do that, run `vitastor-cli modify-osd --reweight 0 <НОМЕР_OSD>`.
|
||||
|
||||
Then wait until rebalance finishes and remove OSD by running `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
Zero weight can also be put manually into etcd key `/vitastor/config/osd/<НОМЕР_OSD>`, for example:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://1.1.1.1:2379/v3 put /vitastor/config/osd/1 '{"reweight":0}'
|
||||
```
|
||||
|
||||
Then wait until rebalance finishes and remove OSD by running `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
## Removing a failed disk
|
||||
|
||||
If a disk is already dead, its OSD(s) are likely already stopped.
|
||||
|
@@ -105,14 +105,16 @@ PG должны очень быстро переходить из них в др
|
||||
## Удаление исправного диска
|
||||
|
||||
Перед удалением исправного диска из кластера установите его OSD вес в 0, чтобы убрать с него данные.
|
||||
Для этого добавьте в ключ `/vitastor/config/osd/<НОМЕР_OSD>` в etcd значение `"reweight":0`, например:
|
||||
Для этого выполните команду `vitastor-cli modify-osd --reweight 0 <НОМЕР_OSD>`.
|
||||
|
||||
Дождитесь завершения перебалансировки данных, после чего удалите OSD командой `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
Также вес 0 можно прописать вручную прямо в etcd в ключ `/vitastor/config/osd/<НОМЕР_OSD>`, например:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://1.1.1.1:2379/v3 put /vitastor/config/osd/1 '{"reweight":0}'
|
||||
```
|
||||
|
||||
Дождитесь завершения ребаланса, после чего удалите OSD командой `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
## Удаление неисправного диска
|
||||
|
||||
Если диск уже умер, его OSD, скорее всего, уже будет/будут остановлен(ы).
|
||||
|
@@ -24,6 +24,10 @@ It supports the following commands:
|
||||
- [fix](#fix)
|
||||
- [alloc-osd](#alloc-osd)
|
||||
- [rm-osd](#rm-osd)
|
||||
- [osd-tree](#osd-tree)
|
||||
- [ls-osd](#ls-osd)
|
||||
- [modify-osd](#modify-osd)
|
||||
- [pg-list](#pg-list)
|
||||
- [create-pool](#create-pool)
|
||||
- [modify-pool](#modify-pool)
|
||||
- [ls-pools](#ls-pools)
|
||||
@@ -174,6 +178,7 @@ Remove inode data without changing metadata.
|
||||
--wait-list Retrieve full objects listings before starting to remove objects.
|
||||
Requires more memory, but allows to show correct removal progress.
|
||||
--min-offset Purge only data starting with specified offset.
|
||||
--max-offset Purge only data before specified offset.
|
||||
```
|
||||
|
||||
## merge-data
|
||||
@@ -246,6 +251,82 @@ Refuses to remove OSDs with data without `--force` and `--allow-data-loss`.
|
||||
With `--dry-run` only checks if deletion is possible without data loss and
|
||||
redundancy degradation.
|
||||
|
||||
## osd-tree
|
||||
|
||||
`vitastor-cli osd-tree [-l|--long]`
|
||||
|
||||
Show current OSD tree, optionally with I/O statistics if -l is specified.
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
TYPE NAME UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
|
||||
host kaveri
|
||||
disk nvme0n1p1
|
||||
osd 3 down 100G 0 % abc,kaveri 1 128k 4k none -
|
||||
osd 4 down 100G 0 % 1 128k 4k none -
|
||||
disk nvme1n1p1
|
||||
osd 5 down 100G 0 % abc,kaveri 1 128k 4k none -
|
||||
osd 6 down 100G 0 % 1 128k 4k none -
|
||||
host stump
|
||||
osd 1 up 100G 37.29 % osdone 1 128k 4k all -
|
||||
osd 2 up 100G 26.8 % abc 1 128k 4k all -
|
||||
osd 7 up 100G 21.84 % 1 128k 4k all -
|
||||
osd 8 up 100G 21.63 % 1 128k 4k all -
|
||||
osd 9 up 100G 20.69 % 1 128k 4k all -
|
||||
osd 10 up 100G 21.61 % 1 128k 4k all -
|
||||
osd 11 up 100G 21.53 % 1 128k 4k all -
|
||||
osd 12 up 100G 22.4 % 1 128k 4k all -
|
||||
```
|
||||
|
||||
## ls-osd
|
||||
|
||||
`vitastor-cli osds|ls-osd|osd-ls [-l|--long]`
|
||||
|
||||
Show current OSDs as list, optionally with I/O statistics if -l is specified.
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
OSD PARENT UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
|
||||
3 kaveri/nvme0n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
4 kaveri/nvme0n1p1 down 100G 0 % 1 128k 4k none -
|
||||
5 kaveri/nvme1n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
6 kaveri/nvme1n1p1 down 100G 0 % 1 128k 4k none -
|
||||
1 stump up 100G 37.29 % osdone 1 128k 4k all -
|
||||
2 stump up 100G 26.8 % globl 1 128k 4k all -
|
||||
7 stump up 100G 21.84 % 1 128k 4k all -
|
||||
8 stump up 100G 21.63 % 1 128k 4k all -
|
||||
9 stump up 100G 20.69 % 1 128k 4k all -
|
||||
10 stump up 100G 21.61 % 1 128k 4k all -
|
||||
11 stump up 100G 21.53 % 1 128k 4k all -
|
||||
12 stump up 100G 22.4 % 1 128k 4k all -
|
||||
```
|
||||
|
||||
## modify-osd
|
||||
|
||||
`vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>`
|
||||
|
||||
Set OSD reweight, tags or noout flag. See detail description in [OSD config documentation](../config.pool.en.md#osd-settings).
|
||||
|
||||
## pg-list
|
||||
|
||||
`vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
|
||||
|
||||
List PGs with any of listed state filters (^ or ! in the beginning is negation). Options:
|
||||
|
||||
```
|
||||
--pool <pool name or number> Only list PGs of the given pool.
|
||||
--min <min pg number> Only list PGs with number >= min.
|
||||
--max <max pg number> Only list PGs with number <= max.
|
||||
```
|
||||
|
||||
Examples:
|
||||
|
||||
`vitastor-cli pg-list active+degraded`
|
||||
|
||||
`vitastor-cli pg-list ^active`
|
||||
|
||||
## create-pool
|
||||
|
||||
`vitastor-cli create-pool|pool-create <name> (-s <pg_size>|--ec <N>+<K>) -n <pg_count> [OPTIONS]`
|
||||
|
@@ -23,6 +23,10 @@ vitastor-cli - интерфейс командной строки для адм
|
||||
- [merge-data](#merge-data)
|
||||
- [alloc-osd](#alloc-osd)
|
||||
- [rm-osd](#rm-osd)
|
||||
- [osd-tree](#osd-tree)
|
||||
- [ls-osd](#ls-osd)
|
||||
- [modify-osd](#modify-osd)
|
||||
- [pg-list](#pg-list)
|
||||
- [create-pool](#create-pool)
|
||||
- [modify-pool](#modify-pool)
|
||||
- [ls-pools](#ls-pools)
|
||||
@@ -182,6 +186,7 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
|
||||
--wait-list Сначала запросить полный листинг объектов, а потом начать удалять.
|
||||
Требует больше памяти, но позволяет правильно печатать прогресс удаления.
|
||||
--min-offset Удалять только данные, начиная с заданного смещения.
|
||||
--max-offset Удалять только данные до (исключительно) заданного смещения.
|
||||
```
|
||||
|
||||
## merge-data
|
||||
@@ -263,6 +268,83 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
|
||||
С опцией `--dry-run` только проверяет, возможно ли удаление без потери данных и деградации
|
||||
избыточности.
|
||||
|
||||
## osd-tree
|
||||
|
||||
`vitastor-cli osd-tree [-l|--long]`
|
||||
|
||||
Показать дерево OSD, со статистикой ввода-вывода, если установлено -l.
|
||||
|
||||
Пример вывода:
|
||||
|
||||
```
|
||||
TYPE NAME UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
|
||||
host kaveri
|
||||
disk nvme0n1p1
|
||||
osd 3 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
osd 4 down 100G 0 % 1 128k 4k none -
|
||||
disk nvme1n1p1
|
||||
osd 5 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
osd 6 down 100G 0 % 1 128k 4k none -
|
||||
host stump
|
||||
osd 1 up 100G 37.29 % osdone 1 128k 4k all -
|
||||
osd 2 up 100G 26.8 % globl 1 128k 4k all -
|
||||
osd 7 up 100G 21.84 % 1 128k 4k all -
|
||||
osd 8 up 100G 21.63 % 1 128k 4k all -
|
||||
osd 9 up 100G 20.69 % 1 128k 4k all -
|
||||
osd 10 up 100G 21.61 % 1 128k 4k all -
|
||||
osd 11 up 100G 21.53 % 1 128k 4k all -
|
||||
osd 12 up 100G 22.4 % 1 128k 4k all -
|
||||
```
|
||||
|
||||
## ls-osd
|
||||
|
||||
`vitastor-cli osds|ls-osd|osd-ls [-l|--long]`
|
||||
|
||||
Показать список OSD, со статистикой ввода-вывода, если установлено -l.
|
||||
|
||||
Пример вывода:
|
||||
|
||||
```
|
||||
OSD PARENT UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
|
||||
3 kaveri/nvme0n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
4 kaveri/nvme0n1p1 down 100G 0 % 1 128k 4k none -
|
||||
5 kaveri/nvme1n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
6 kaveri/nvme1n1p1 down 100G 0 % 1 128k 4k none -
|
||||
1 stump up 100G 37.29 % osdone 1 128k 4k all -
|
||||
2 stump up 100G 26.8 % globl 1 128k 4k all -
|
||||
7 stump up 100G 21.84 % 1 128k 4k all -
|
||||
8 stump up 100G 21.63 % 1 128k 4k all -
|
||||
9 stump up 100G 20.69 % 1 128k 4k all -
|
||||
10 stump up 100G 21.61 % 1 128k 4k all -
|
||||
11 stump up 100G 21.53 % 1 128k 4k all -
|
||||
12 stump up 100G 22.4 % 1 128k 4k all -
|
||||
```
|
||||
|
||||
## modify-osd
|
||||
|
||||
`vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>`
|
||||
|
||||
Установить вес OSD, теги или флаг noout. Смотрите подробное описание в [документации настроек OSD](../config.pool.ru.md#настройки-osd).
|
||||
|
||||
## pg-list
|
||||
|
||||
`vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
|
||||
|
||||
Вывести список PG с состояними, удовлетворяющими любому из переданных фильтров (^ или !
|
||||
в начале фильтра означает отрицание). Опции:
|
||||
|
||||
```
|
||||
--pool <pool name or number> Only list PGs of the given pool.
|
||||
--min <min pg number> Only list PGs with number >= min.
|
||||
--max <max pg number> Only list PGs with number <= max.
|
||||
```
|
||||
|
||||
Примеры:
|
||||
|
||||
`vitastor-cli pg-list active+degraded`
|
||||
|
||||
`vitastor-cli pg-list ^active`
|
||||
|
||||
## create-pool
|
||||
|
||||
`vitastor-cli create-pool|pool-create <name> (-s <pg_size>|--ec <N>+<K>) -n <pg_count> [OPTIONS]`
|
||||
|
@@ -11,6 +11,8 @@ Vitastor has two file system implementations. Both can be used via `vitastor-nfs
|
||||
Commands:
|
||||
- [mount](#mount)
|
||||
- [start](#start)
|
||||
- [upgrade](#upgrade)
|
||||
- [defrag](#defrag)
|
||||
|
||||
## Pseudo-FS
|
||||
|
||||
@@ -86,10 +88,6 @@ POSIX features currently not implemented in VitastorFS:
|
||||
- Modification time (`mtime`) is updated lazily every second (like `-o lazytime`)
|
||||
|
||||
Other notable missing features which should be addressed in the future:
|
||||
- Defragmentation of "shared" inodes. Files smaller than pool object size (block_size
|
||||
multiplied by data part count if pool is EC) are internally stored in large block
|
||||
volumes sequentially, one after another, and leave garbage after deleting or resizing.
|
||||
Defragmentator will be implemented to collect this garbage.
|
||||
- Inode ID reuse. Currently inode IDs always grow, the limit is 2^48 inodes, so
|
||||
in theory you may hit it if you create and delete a very large number of files
|
||||
- Compaction of the key-value B-Tree. Current implementation never merges or deletes
|
||||
@@ -139,6 +137,37 @@ Start network NFS server. Options:
|
||||
| `--port <PORT>` | use port \<PORT> for NFS services (default is 2049) |
|
||||
| `--portmap 0` | do not listen on port 111 (portmap/rpcbind, requires root) |
|
||||
|
||||
### upgrade
|
||||
|
||||
`vitastor-nfs --fs <NAME> upgrade`
|
||||
|
||||
Upgrade FS metadata. Can be run online, but server(s) should be restarted after upgrade.
|
||||
|
||||
### defrag
|
||||
|
||||
`vitastor-nfs --fs <NAME> defrag [OPTIONS] [--dry-run]`
|
||||
|
||||
Defragment volumes used for small file storage having more than \<defrag_percent> %
|
||||
of data removed. Can be run online.
|
||||
|
||||
In VitastorFS, small files are stored in large "volumes" / "shared inodes" one
|
||||
after another. When you delete or extend such files, they are moved and garbage is left
|
||||
behind. Defragmentation removes garbage and moves data still in use to new volumes.
|
||||
|
||||
Options:
|
||||
|
||||
| <!-- --> | <!-- --> |
|
||||
|--------------------------|------------------------------------------------------------------------ |
|
||||
| --volume_untouched 86400 | Defragment volumes last appended to at least this number of seconds ago |
|
||||
| --defrag_percent 50 | Defragment volumes with at least this % of removed data |
|
||||
| --defrag_block_count 16 | Read this number of pool blocks at once during defrag |
|
||||
| --defrag_iodepth 16 | Move up to this number of files in parallel during defrag |
|
||||
| --trace | Print verbose defragmentation status |
|
||||
| --dry-run | Skip modifications, only print status |
|
||||
| --recalc-stats | Recalculate all volume statistics |
|
||||
| --include-empty | Include old and empty volumes; make sure to restart NFS servers before using it |
|
||||
| --no-rm | Move, but do not delete data |
|
||||
|
||||
## Common options
|
||||
|
||||
| <!-- --> | <!-- --> |
|
||||
|
@@ -11,6 +11,8 @@
|
||||
Команды:
|
||||
- [mount](#mount)
|
||||
- [start](#start)
|
||||
- [upgrade](#upgrade)
|
||||
- [defrag](#defrag)
|
||||
|
||||
## Псевдо-ФС
|
||||
|
||||
@@ -88,11 +90,6 @@ JSON-формате :-). Для инспекции содержимого БД
|
||||
- Времена модификации (`mtime`) отслеживаются асинхронно (как будто ФС смонтирована с `-o lazytime`)
|
||||
|
||||
Другие недостающие функции, которые нужно добавить в будущем:
|
||||
- Дефрагментация "общих инодов". На уровне реализации ФС файлы, меньшие, чем размер
|
||||
объекта пула (block_size умножить на число частей данных, если пул EC),
|
||||
упаковываются друг за другом в большие "общие" иноды/тома. Если такие файлы удалять
|
||||
или увеличивать, они перемещаются и оставляют за собой "мусор", вот тут-то и нужен
|
||||
дефрагментатор.
|
||||
- Переиспользование номеров инодов. В текущей реализации номера инодов всё время
|
||||
увеличиваются, так что в теории вы можете упереться в лимит, если насоздаёте
|
||||
и наудаляете больше, чем 2^48 файлов.
|
||||
@@ -145,6 +142,40 @@ JSON-формате :-). Для инспекции содержимого БД
|
||||
| `--port <PORT>` | использовать порт \<PORT> для NFS-сервисов (по умолчанию 2049) |
|
||||
| `--portmap 0` | отключить сервис portmap/rpcbind на порту 111 (по умолчанию включён и требует root привилегий) |
|
||||
|
||||
### upgrade
|
||||
|
||||
`vitastor-nfs --fs <NAME> upgrade`
|
||||
|
||||
Обновить метаданные ФС. Можно запускать онлайн (при запущенных серверах NFS), но после выполнения их всё
|
||||
же желательно перезапустить.
|
||||
|
||||
### defrag
|
||||
|
||||
`vitastor-nfs --fs <NAME> defrag [OPTIONS] [--dry-run]`
|
||||
|
||||
Дефрагментировать тома, используемые для хранения мелких файлов, в которых более, чем
|
||||
<defrag_percent> процентов данных удалено. Можно запускать онлайн.
|
||||
|
||||
На уровне реализации ФС файлы, меньшие, чем размер объекта пула (block_size умножить на число
|
||||
частей данных, если пул EC), упаковываются друг за другом в большие "тома" / "общие иноды".
|
||||
Когда такие файлы удаляются или увеличиваются, они перемещаются и оставляют за собой "мусор".
|
||||
|
||||
При дефрагментации мусор удаляется, а всё ещё используемые данные перемещаются в новые тома.
|
||||
|
||||
Опции:
|
||||
|
||||
| <!-- --> | <!-- --> |
|
||||
|--------------------------|------------------------------------------------------------------------ |
|
||||
| --volume_untouched 86400 | Дефрагментировать только тома, в которые уже не писали это число секунд |
|
||||
| --defrag_percent 50 | Дефрагментировать только тома, в которых этот % данных удалён |
|
||||
| --defrag_block_count 16 | Читать это количество блоков пула за один раз |
|
||||
| --defrag_iodepth 16 | Перемещать одновременно до этого числа файлов |
|
||||
| --trace | Печатать детальную статистику дефрагментации |
|
||||
| --dry-run | Не производить никаких изменений, только описать выполняемые действия |
|
||||
| --recalc-stats | Пересчитать и сохранить статистику всех томов |
|
||||
| --include-empty | Дефрагментировать старые и пустые тома; обязательно перезапустите NFS-сервера после использования этой опции |
|
||||
| --no-rm | Перемещать, но не удалять данные |
|
||||
|
||||
## Общие опции
|
||||
|
||||
| <!-- --> | <!-- --> |
|
||||
|
@@ -11,6 +11,7 @@ module.exports = {
|
||||
"ecmaVersion": 2020
|
||||
},
|
||||
"plugins": [
|
||||
"import"
|
||||
],
|
||||
"rules": {
|
||||
"indent": [
|
||||
@@ -44,6 +45,10 @@ module.exports = {
|
||||
],
|
||||
"node/shebang": [
|
||||
"off"
|
||||
],
|
||||
"import/no-unresolved": [
|
||||
2,
|
||||
{ "commonjs": true }
|
||||
]
|
||||
}
|
||||
};
|
||||
|
191
mon/antietcd_adapter.js
Normal file
191
mon/antietcd_adapter.js
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const fs = require('fs');
|
||||
|
||||
const AntiEtcd = require('antietcd');
|
||||
|
||||
const vitastor_persist_filter = require('./vitastor_persist_filter.js');
|
||||
const { b64, local_ips } = require('./utils.js');
|
||||
|
||||
class AntiEtcdAdapter
|
||||
{
|
||||
static async start_antietcd(config)
|
||||
{
|
||||
let antietcd;
|
||||
if (config.use_antietcd)
|
||||
{
|
||||
let fileConfig = {};
|
||||
if (fs.existsSync(config.config_path||'/etc/vitastor/vitastor.conf'))
|
||||
{
|
||||
fileConfig = JSON.parse(fs.readFileSync(config.config_path||'/etc/vitastor/vitastor.conf', { encoding: 'utf-8' }));
|
||||
}
|
||||
let mergedConfig = { ...fileConfig, ...config };
|
||||
let cluster = mergedConfig.etcd_address;
|
||||
if (!(cluster instanceof Array))
|
||||
cluster = cluster ? (''+(cluster||'')).split(/,+/) : [];
|
||||
cluster = Object.keys(cluster.reduce((a, url) =>
|
||||
{
|
||||
a[url.toLowerCase().replace(/^https?:\/\//, '').replace(/\/.*$/, '')] = true;
|
||||
return a;
|
||||
}, {}));
|
||||
const cfg_port = mergedConfig.antietcd_port;
|
||||
const is_local = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
|
||||
const selected = cluster.map(s => s.split(':', 2)).filter(ip => is_local[ip[0]] && (!cfg_port || ip[1] == cfg_port));
|
||||
if (selected.length > 1)
|
||||
{
|
||||
console.error('More than 1 etcd_address matches local IPs, please specify port');
|
||||
process.exit(1);
|
||||
}
|
||||
else if (selected.length == 1)
|
||||
{
|
||||
const antietcd_config = {
|
||||
ip: selected[0][0],
|
||||
port: selected[0][1],
|
||||
data: mergedConfig.antietcd_data_file || ((mergedConfig.antietcd_data_dir || '/var/lib/vitastor') + '/mon_'+selected[0][1]+'.json.gz'),
|
||||
persist_filter: vitastor_persist_filter(mergedConfig.etcd_prefix || '/vitastor'),
|
||||
node_id: selected[0][0]+':'+selected[0][1], // node_id = ip:port
|
||||
cluster: (cluster.length == 1 ? null : cluster),
|
||||
cluster_key: (mergedConfig.etcd_prefix || '/vitastor'),
|
||||
stale_read: 1,
|
||||
};
|
||||
for (const key in config)
|
||||
{
|
||||
if (key.substr(0, 9) === 'antietcd_')
|
||||
{
|
||||
const noprefix = key.substr(9);
|
||||
if (!(noprefix in antietcd_config) || noprefix == 'ip' || noprefix == 'cluster_key')
|
||||
{
|
||||
antietcd_config[noprefix] = config[key];
|
||||
}
|
||||
}
|
||||
}
|
||||
antietcd = new AntiEtcd(antietcd_config);
|
||||
await antietcd.start();
|
||||
}
|
||||
else
|
||||
{
|
||||
console.log('Antietcd is enabled, but etcd_address does not contain local IPs, proceeding without it');
|
||||
}
|
||||
}
|
||||
return antietcd;
|
||||
}
|
||||
|
||||
constructor(mon, antietcd)
|
||||
{
|
||||
this.mon = mon;
|
||||
this.antietcd = antietcd;
|
||||
this.on_leader = [];
|
||||
this.on_change = (st) =>
|
||||
{
|
||||
if (st.state === 'leader')
|
||||
{
|
||||
for (const cb of this.on_leader)
|
||||
{
|
||||
cb();
|
||||
}
|
||||
this.on_leader = [];
|
||||
}
|
||||
};
|
||||
this.antietcd.on('raftchange', this.on_change);
|
||||
}
|
||||
|
||||
parse_config(/*config*/)
|
||||
{
|
||||
}
|
||||
|
||||
stop_watcher()
|
||||
{
|
||||
this.antietcd.off('raftchange', this.on_change);
|
||||
const watch_id = this.watch_id;
|
||||
if (watch_id)
|
||||
{
|
||||
this.watch_id = null;
|
||||
this.antietcd.cancel_watch(watch_id).catch(console.error);
|
||||
}
|
||||
}
|
||||
|
||||
async start_watcher()
|
||||
{
|
||||
if (this.watch_id)
|
||||
{
|
||||
await this.antietcd.cancel_watch(this.watch_id);
|
||||
this.watch_id = null;
|
||||
}
|
||||
const watch_id = await this.antietcd.create_watch({
|
||||
key: b64(this.mon.config.etcd_prefix+'/'),
|
||||
range_end: b64(this.mon.config.etcd_prefix+'0'),
|
||||
start_revision: ''+this.mon.etcd_watch_revision,
|
||||
watch_id: 1,
|
||||
progress_notify: true,
|
||||
}, (message) =>
|
||||
{
|
||||
setImmediate(() => this.mon.on_message(message.result));
|
||||
});
|
||||
console.log('Successfully subscribed to antietcd revision '+this.antietcd.etctree.mod_revision);
|
||||
this.watch_id = watch_id;
|
||||
}
|
||||
|
||||
async become_master()
|
||||
{
|
||||
if (!this.antietcd.raft)
|
||||
{
|
||||
console.log('Running in non-clustered mode');
|
||||
}
|
||||
else
|
||||
{
|
||||
console.log('Waiting to become master');
|
||||
await new Promise(ok => this.on_leader.push(ok));
|
||||
}
|
||||
const state = { ...this.mon.get_mon_state(), id: ''+this.mon.etcd_lease_id };
|
||||
await this.etcd_call('/kv/txn', {
|
||||
success: [ { requestPut: { key: b64(this.mon.config.etcd_prefix+'/mon/master'), value: b64(JSON.stringify(state)), lease: ''+this.mon.etcd_lease_id } } ],
|
||||
}, this.mon.config.etcd_start_timeout, 0);
|
||||
if (this.antietcd.raft)
|
||||
{
|
||||
console.log('Became master');
|
||||
}
|
||||
}
|
||||
|
||||
async etcd_call(path, body, timeout, retries)
|
||||
{
|
||||
let retry = 0;
|
||||
if (retries >= 0 && retries < 1)
|
||||
{
|
||||
retries = 1;
|
||||
}
|
||||
let prev = 0;
|
||||
while (retries < 0 || retry < retries)
|
||||
{
|
||||
retry++;
|
||||
if (this.mon.stopped)
|
||||
{
|
||||
throw new Error('Monitor instance is stopped');
|
||||
}
|
||||
try
|
||||
{
|
||||
if (Date.now()-prev < timeout)
|
||||
{
|
||||
await new Promise(ok => setTimeout(ok, timeout-(Date.now()-prev)));
|
||||
}
|
||||
prev = Date.now();
|
||||
const res = await this.antietcd.api(path.replace(/^\/+/, '').replace(/\/+$/, '').replace(/\/+/g, '_'), body);
|
||||
if (res.error)
|
||||
{
|
||||
console.error('Failed to query antietcd '+path+' (retry '+retry+'/'+retries+'): '+res.error);
|
||||
}
|
||||
else
|
||||
{
|
||||
return res;
|
||||
}
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
console.error('Failed to query antietcd '+path+' (retry '+retry+'/'+retries+'): '+e.stack);
|
||||
}
|
||||
}
|
||||
throw new Error('Failed to query antietcd ('+retries+' retries)');
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = AntiEtcdAdapter;
|
352
mon/etcd_adapter.js
Normal file
352
mon/etcd_adapter.js
Normal file
@@ -0,0 +1,352 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const http = require('http');
|
||||
const WebSocket = require('ws');
|
||||
const { b64, local_ips } = require('./utils.js');
|
||||
|
||||
const MON_STOPPED = 'Monitor instance is stopped';
|
||||
|
||||
class EtcdAdapter
|
||||
{
|
||||
constructor(mon)
|
||||
{
|
||||
this.mon = mon;
|
||||
this.ws = null;
|
||||
this.ws_alive = false;
|
||||
this.ws_keepalive_timer = null;
|
||||
}
|
||||
|
||||
parse_config(config)
|
||||
{
|
||||
this.parse_etcd_addresses(config.etcd_address||config.etcd_url);
|
||||
}
|
||||
|
||||
parse_etcd_addresses(addrs)
|
||||
{
|
||||
const is_local_ip = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
|
||||
this.etcd_local = [];
|
||||
this.etcd_urls = [];
|
||||
this.selected_etcd_url = null;
|
||||
this.etcd_urls_to_try = [];
|
||||
if (!(addrs instanceof Array))
|
||||
addrs = addrs ? (''+(addrs||'')).split(/,/) : [];
|
||||
if (!addrs.length)
|
||||
{
|
||||
console.error('Vitastor etcd address(es) not specified. Please set on the command line or in the config file');
|
||||
process.exit(1);
|
||||
}
|
||||
for (let url of addrs)
|
||||
{
|
||||
let scheme = 'http';
|
||||
url = url.trim().replace(/^(https?):\/\//, (m, m1) => { scheme = m1; return ''; });
|
||||
const slash = url.indexOf('/');
|
||||
const colon = url.indexOf(':');
|
||||
const is_local = is_local_ip[colon >= 0 ? url.substr(0, colon) : (slash >= 0 ? url.substr(0, slash) : url)];
|
||||
url = scheme+'://'+(slash >= 0 ? url : url+'/v3');
|
||||
if (is_local)
|
||||
this.etcd_local.push(url);
|
||||
else
|
||||
this.etcd_urls.push(url);
|
||||
}
|
||||
}
|
||||
|
||||
pick_next_etcd()
|
||||
{
|
||||
if (this.selected_etcd_url)
|
||||
return this.selected_etcd_url;
|
||||
if (!this.etcd_urls_to_try || !this.etcd_urls_to_try.length)
|
||||
{
|
||||
this.etcd_urls_to_try = [ ...this.etcd_local ];
|
||||
const others = [ ...this.etcd_urls ];
|
||||
while (others.length)
|
||||
{
|
||||
const url = others.splice(0|(others.length*Math.random()), 1);
|
||||
this.etcd_urls_to_try.push(url[0]);
|
||||
}
|
||||
}
|
||||
this.selected_etcd_url = this.etcd_urls_to_try.shift();
|
||||
return this.selected_etcd_url;
|
||||
}
|
||||
|
||||
stop_watcher(cur_addr)
|
||||
{
|
||||
cur_addr = cur_addr || this.selected_etcd_url;
|
||||
if (this.ws)
|
||||
{
|
||||
console.log('Disconnected from etcd at '+this.ws_used_url);
|
||||
this.ws.close();
|
||||
this.ws = null;
|
||||
}
|
||||
if (this.ws_keepalive_timer)
|
||||
{
|
||||
clearInterval(this.ws_keepalive_timer);
|
||||
this.ws_keepalive_timer = null;
|
||||
}
|
||||
if (this.selected_etcd_url == cur_addr)
|
||||
{
|
||||
this.selected_etcd_url = null;
|
||||
}
|
||||
}
|
||||
|
||||
restart_watcher(cur_addr)
|
||||
{
|
||||
this.stop_watcher(cur_addr);
|
||||
this.start_watcher(this.mon.config.etcd_mon_retries).catch(this.mon.die);
|
||||
}
|
||||
|
||||
async start_watcher(retries)
|
||||
{
|
||||
let retry = 0;
|
||||
if (!retries || retries < 1)
|
||||
{
|
||||
retries = 1;
|
||||
}
|
||||
const tried = {};
|
||||
while (retries < 0 || retry < retries)
|
||||
{
|
||||
const cur_addr = this.pick_next_etcd();
|
||||
const base = 'ws'+cur_addr.substr(4);
|
||||
let now = Date.now();
|
||||
if (tried[base] && now-tried[base] < this.mon.config.etcd_start_timeout)
|
||||
{
|
||||
await new Promise(ok => setTimeout(ok, this.mon.config.etcd_start_timeout-(now-tried[base])));
|
||||
now = Date.now();
|
||||
}
|
||||
tried[base] = now;
|
||||
if (this.mon.stopped)
|
||||
{
|
||||
return;
|
||||
}
|
||||
const ok = await new Promise(ok =>
|
||||
{
|
||||
const timer_id = setTimeout(() =>
|
||||
{
|
||||
if (this.ws)
|
||||
{
|
||||
console.log('Disconnected from etcd at '+this.ws_used_url);
|
||||
this.ws.close();
|
||||
this.ws = null;
|
||||
}
|
||||
ok(false);
|
||||
}, this.mon.config.etcd_mon_timeout);
|
||||
this.ws = new WebSocket(base+'/watch');
|
||||
this.ws_used_url = cur_addr;
|
||||
const fail = () =>
|
||||
{
|
||||
ok(false);
|
||||
};
|
||||
this.ws.on('error', fail);
|
||||
this.ws.on('open', () =>
|
||||
{
|
||||
this.ws.removeListener('error', fail);
|
||||
if (timer_id)
|
||||
clearTimeout(timer_id);
|
||||
ok(true);
|
||||
});
|
||||
});
|
||||
if (ok)
|
||||
break;
|
||||
if (this.selected_etcd_url == cur_addr)
|
||||
this.selected_etcd_url = null;
|
||||
this.ws = null;
|
||||
retry++;
|
||||
}
|
||||
if (!this.ws)
|
||||
{
|
||||
this.mon.die('Failed to open etcd watch websocket');
|
||||
return;
|
||||
}
|
||||
if (this.mon.stopped)
|
||||
{
|
||||
this.stop_watcher();
|
||||
return;
|
||||
}
|
||||
const cur_addr = this.selected_etcd_url;
|
||||
this.ws_alive = true;
|
||||
this.ws_keepalive_timer = setInterval(() =>
|
||||
{
|
||||
if (this.ws_alive && this.ws)
|
||||
{
|
||||
this.ws_alive = false;
|
||||
this.ws.send(JSON.stringify({ progress_request: {} }));
|
||||
}
|
||||
else
|
||||
{
|
||||
console.log('etcd websocket timed out, restarting it');
|
||||
this.restart_watcher(cur_addr);
|
||||
}
|
||||
}, (Number(this.mon.config.etcd_ws_keepalive_interval) || 5)*1000);
|
||||
this.ws.on('error', () => this.restart_watcher(cur_addr));
|
||||
this.ws.send(JSON.stringify({
|
||||
create_request: {
|
||||
key: b64(this.mon.config.etcd_prefix+'/'),
|
||||
range_end: b64(this.mon.config.etcd_prefix+'0'),
|
||||
start_revision: ''+this.mon.etcd_watch_revision,
|
||||
watch_id: 1,
|
||||
progress_notify: true,
|
||||
},
|
||||
}));
|
||||
this.ws.on('message', (msg) =>
|
||||
{
|
||||
if (this.mon.stopped)
|
||||
{
|
||||
this.stop_watcher();
|
||||
return;
|
||||
}
|
||||
this.ws_alive = true;
|
||||
let data;
|
||||
try
|
||||
{
|
||||
data = JSON.parse(msg);
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
}
|
||||
if (!data || !data.result)
|
||||
{
|
||||
console.error('Unknown message received from watch websocket: '+msg);
|
||||
}
|
||||
else if (data.result.canceled)
|
||||
{
|
||||
// etcd watch canceled
|
||||
if (data.result.compact_revision)
|
||||
{
|
||||
// we may miss events if we proceed
|
||||
this.mon.die('Revisions before '+data.result.compact_revision+' were compacted by etcd, exiting');
|
||||
}
|
||||
this.mon.die('Watch canceled by etcd, reason: '+data.result.cancel_reason+', exiting');
|
||||
}
|
||||
else if (data.result.created)
|
||||
{
|
||||
// etcd watch created
|
||||
console.log('Successfully subscribed to etcd at '+this.selected_etcd_url+', revision '+data.result.header.revision);
|
||||
}
|
||||
else
|
||||
{
|
||||
this.mon.on_message(data.result);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async become_master()
|
||||
{
|
||||
const state = { ...this.mon.get_mon_state(), id: ''+this.mon.etcd_lease_id };
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (1)
|
||||
{
|
||||
const res = await this.etcd_call('/kv/txn', {
|
||||
compare: [ { target: 'CREATE', create_revision: 0, key: b64(this.mon.config.etcd_prefix+'/mon/master') } ],
|
||||
success: [ { requestPut: { key: b64(this.mon.config.etcd_prefix+'/mon/master'), value: b64(JSON.stringify(state)), lease: ''+this.mon.etcd_lease_id } } ],
|
||||
}, this.mon.config.etcd_start_timeout, 0);
|
||||
if (res.succeeded)
|
||||
{
|
||||
break;
|
||||
}
|
||||
console.log('Waiting to become master');
|
||||
await new Promise(ok => setTimeout(ok, this.mon.config.etcd_start_timeout));
|
||||
}
|
||||
console.log('Became master');
|
||||
}
|
||||
|
||||
async etcd_call(path, body, timeout, retries)
|
||||
{
|
||||
let retry = 0;
|
||||
if (retries >= 0 && retries < 1)
|
||||
{
|
||||
retries = 1;
|
||||
}
|
||||
const tried = {};
|
||||
while (retries < 0 || retry < retries)
|
||||
{
|
||||
retry++;
|
||||
const base = this.pick_next_etcd();
|
||||
let now = Date.now();
|
||||
if (tried[base] && now-tried[base] < timeout)
|
||||
{
|
||||
await new Promise(ok => setTimeout(ok, timeout-(now-tried[base])));
|
||||
now = Date.now();
|
||||
}
|
||||
tried[base] = now;
|
||||
if (this.mon.stopped)
|
||||
{
|
||||
throw new Error(MON_STOPPED);
|
||||
}
|
||||
const res = await POST(base+path, body, timeout);
|
||||
if (this.mon.stopped)
|
||||
{
|
||||
throw new Error(MON_STOPPED);
|
||||
}
|
||||
if (res.error)
|
||||
{
|
||||
if (this.selected_etcd_url == base)
|
||||
this.selected_etcd_url = null;
|
||||
console.error('Failed to query etcd '+path+' (retry '+retry+'/'+retries+'): '+res.error);
|
||||
continue;
|
||||
}
|
||||
if (res.json)
|
||||
{
|
||||
if (res.json.error)
|
||||
{
|
||||
console.error(path+': etcd returned error: '+res.json.error);
|
||||
break;
|
||||
}
|
||||
return res.json;
|
||||
}
|
||||
}
|
||||
throw new Error('Failed to query etcd ('+retries+' retries)');
|
||||
}
|
||||
}
|
||||
|
||||
function POST(url, body, timeout)
|
||||
{
|
||||
return new Promise(ok =>
|
||||
{
|
||||
const body_text = Buffer.from(JSON.stringify(body));
|
||||
let timer_id = timeout > 0 ? setTimeout(() =>
|
||||
{
|
||||
if (req)
|
||||
req.abort();
|
||||
req = null;
|
||||
ok({ error: 'timeout' });
|
||||
}, timeout) : null;
|
||||
let req = http.request(url, { method: 'POST', headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': body_text.length,
|
||||
} }, (res) =>
|
||||
{
|
||||
if (!req)
|
||||
{
|
||||
return;
|
||||
}
|
||||
clearTimeout(timer_id);
|
||||
let res_body = '';
|
||||
res.setEncoding('utf8');
|
||||
res.on('error', (error) => ok({ error }));
|
||||
res.on('data', chunk => { res_body += chunk; });
|
||||
res.on('end', () =>
|
||||
{
|
||||
if (res.statusCode != 200)
|
||||
{
|
||||
ok({ error: res_body, code: res.statusCode });
|
||||
return;
|
||||
}
|
||||
try
|
||||
{
|
||||
res_body = JSON.parse(res_body);
|
||||
ok({ response: res, json: res_body });
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
ok({ error: e, response: res, body: res_body });
|
||||
}
|
||||
});
|
||||
});
|
||||
req.on('error', (error) => ok({ error }));
|
||||
req.on('close', () => ok({ error: new Error('Connection closed prematurely') }));
|
||||
req.write(body_text);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = EtcdAdapter;
|
394
mon/etcd_schema.js
Normal file
394
mon/etcd_schema.js
Normal file
@@ -0,0 +1,394 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
// FIXME document all etcd keys and config variables in the form of JSON schema or similar
|
||||
const etcd_nonempty_keys = {
|
||||
'config/global': 1,
|
||||
'config/node_placement': 1,
|
||||
'config/pools': 1,
|
||||
'config/pgs': 1,
|
||||
'history/last_clean_pgs': 1,
|
||||
'stats': 1,
|
||||
};
|
||||
const etcd_allow = new RegExp('^'+[
|
||||
'config/global',
|
||||
'config/node_placement',
|
||||
'config/pools',
|
||||
'config/osd/[1-9]\\d*',
|
||||
'config/pgs',
|
||||
'config/inode/[1-9]\\d*/[1-9]\\d*',
|
||||
'osd/state/[1-9]\\d*',
|
||||
'osd/stats/[1-9]\\d*',
|
||||
'osd/inodestats/[1-9]\\d*',
|
||||
'osd/space/[1-9]\\d*',
|
||||
'mon/master',
|
||||
'mon/member/[a-f0-9]+',
|
||||
'pg/state/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/stats/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/history/[1-9]\\d*/[1-9]\\d*',
|
||||
'history/last_clean_pgs',
|
||||
'inode/stats/[1-9]\\d*/\\d+',
|
||||
'pool/stats/[1-9]\\d*',
|
||||
'stats',
|
||||
'index/image/.*',
|
||||
'index/maxid/[1-9]\\d*',
|
||||
].join('$|^')+'$');
|
||||
|
||||
const etcd_tree = {
|
||||
config: {
|
||||
/* global: {
|
||||
// WARNING: NOT ALL OF THESE ARE ACTUALLY CONFIGURABLE HERE
|
||||
// THIS IS JUST A POOR MAN'S CONFIG DOCUMENTATION
|
||||
// etcd connection
|
||||
config_path: "/etc/vitastor/vitastor.conf",
|
||||
etcd_prefix: "/vitastor",
|
||||
// etcd connection - configurable online
|
||||
etcd_address: "10.0.115.10:2379/v3",
|
||||
// mon
|
||||
etcd_mon_ttl: 5, // min: 1
|
||||
etcd_mon_timeout: 1000, // ms. min: 0
|
||||
etcd_mon_retries: 5, // min: 0
|
||||
mon_change_timeout: 1000, // ms. min: 100
|
||||
mon_retry_change_timeout: 50, // ms. min: 10
|
||||
mon_stats_timeout: 1000, // ms. min: 100
|
||||
osd_out_time: 600, // seconds. min: 0
|
||||
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
|
||||
use_old_pg_combinator: false,
|
||||
// client and osd
|
||||
tcp_header_buffer_size: 65536,
|
||||
use_sync_send_recv: false,
|
||||
use_rdma: true,
|
||||
rdma_device: null, // for example, "rocep5s0f0"
|
||||
rdma_port_num: 1,
|
||||
rdma_gid_index: 0,
|
||||
rdma_mtu: 4096,
|
||||
rdma_max_sge: 128,
|
||||
rdma_max_send: 8,
|
||||
rdma_max_recv: 16,
|
||||
rdma_max_msg: 132096,
|
||||
block_size: 131072,
|
||||
disk_alignment: 4096,
|
||||
bitmap_granularity: 4096,
|
||||
immediate_commit: 'all', // 'none', 'all' or 'small'
|
||||
// client - configurable online
|
||||
client_max_dirty_bytes: 33554432,
|
||||
client_max_dirty_ops: 1024,
|
||||
client_enable_writeback: false,
|
||||
client_max_buffered_bytes: 33554432,
|
||||
client_max_buffered_ops: 1024,
|
||||
client_max_writeback_iodepth: 256,
|
||||
client_retry_interval: 50, // ms. min: 10
|
||||
client_eio_retry_interval: 1000, // ms
|
||||
client_retry_enospc: true,
|
||||
osd_nearfull_ratio: 0.95,
|
||||
// client and osd - configurable online
|
||||
log_level: 0,
|
||||
peer_connect_interval: 5, // seconds. min: 1
|
||||
peer_connect_timeout: 5, // seconds. min: 1
|
||||
osd_idle_timeout: 5, // seconds. min: 1
|
||||
osd_ping_timeout: 5, // seconds. min: 1
|
||||
max_etcd_attempts: 5,
|
||||
etcd_quick_timeout: 1000, // ms
|
||||
etcd_slow_timeout: 5000, // ms
|
||||
etcd_keepalive_timeout: 30, // seconds, default is max(30, etcd_report_interval*2)
|
||||
etcd_ws_keepalive_interval: 5, // seconds
|
||||
// osd
|
||||
etcd_report_interval: 5, // seconds
|
||||
etcd_stats_interval: 30, // seconds
|
||||
run_primary: true,
|
||||
osd_network: null, // "192.168.7.0/24" or an array of masks
|
||||
bind_address: "0.0.0.0",
|
||||
bind_port: 0,
|
||||
readonly: false,
|
||||
osd_memlock: false,
|
||||
// osd - configurable online
|
||||
autosync_interval: 5,
|
||||
autosync_writes: 128,
|
||||
client_queue_depth: 128, // unused
|
||||
recovery_queue_depth: 1,
|
||||
recovery_sleep_us: 0,
|
||||
recovery_tune_util_low: 0.1,
|
||||
recovery_tune_client_util_low: 0,
|
||||
recovery_tune_util_high: 1.0,
|
||||
recovery_tune_client_util_high: 0.5,
|
||||
recovery_tune_interval: 1,
|
||||
recovery_tune_agg_interval: 10, // 10 times recovery_tune_interval
|
||||
recovery_tune_sleep_min_us: 10, // 10 microseconds
|
||||
recovery_pg_switch: 128,
|
||||
recovery_sync_batch: 16,
|
||||
no_recovery: false,
|
||||
no_rebalance: false,
|
||||
print_stats_interval: 3,
|
||||
slow_log_interval: 10,
|
||||
inode_vanish_time: 60,
|
||||
auto_scrub: false,
|
||||
no_scrub: false,
|
||||
scrub_interval: '30d', // 1s/1m/1h/1d
|
||||
scrub_queue_depth: 1,
|
||||
scrub_sleep: 0, // milliseconds
|
||||
scrub_list_limit: 1000, // objects to list on one scrub iteration
|
||||
scrub_find_best: true,
|
||||
scrub_ec_max_bruteforce: 100, // maximum EC error locator brute-force iterators
|
||||
// blockstore - fixed in superblock
|
||||
block_size,
|
||||
disk_alignment,
|
||||
journal_block_size,
|
||||
meta_block_size,
|
||||
bitmap_granularity,
|
||||
journal_device,
|
||||
journal_offset,
|
||||
journal_size,
|
||||
disable_journal_fsync,
|
||||
data_device,
|
||||
data_offset,
|
||||
data_size,
|
||||
disable_data_fsync,
|
||||
meta_device,
|
||||
meta_offset,
|
||||
disable_meta_fsync,
|
||||
disable_device_lock,
|
||||
// blockstore - configurable offline
|
||||
inmemory_metadata,
|
||||
inmemory_journal,
|
||||
journal_sector_buffer_count,
|
||||
journal_no_same_sector_overwrites,
|
||||
// blockstore - configurable online
|
||||
max_write_iodepth,
|
||||
min_flusher_count: 1,
|
||||
max_flusher_count: 256,
|
||||
throttle_small_writes: false,
|
||||
throttle_target_iops: 100,
|
||||
throttle_target_mbs: 100,
|
||||
throttle_target_parallelism: 1,
|
||||
throttle_threshold_us: 50,
|
||||
}, */
|
||||
global: {},
|
||||
/* node_placement: {
|
||||
host1: { level: 'host', parent: 'rack1' },
|
||||
...
|
||||
}, */
|
||||
node_placement: {},
|
||||
/* pools: {
|
||||
<id>: {
|
||||
name: 'testpool',
|
||||
// 'ec' uses Reed-Solomon-Vandermonde codes, 'jerasure' is an alias for 'ec'
|
||||
scheme: 'replicated' | 'xor' | 'ec' | 'jerasure',
|
||||
pg_size: 3,
|
||||
pg_minsize: 2,
|
||||
// number of parity chunks, required for EC
|
||||
parity_chunks?: 1,
|
||||
pg_count: 100,
|
||||
// default is failure_domain=host
|
||||
failure_domain?: 'host',
|
||||
// additional failure domain rules; failure_domain=x is equivalent to x=123..N
|
||||
level_placement?: 'dc=112233 host=123456',
|
||||
raw_placement?: 'any, dc=1 host!=1, dc=1 host!=(1,2)',
|
||||
old_combinator: false,
|
||||
max_osd_combinations: 10000,
|
||||
// block_size, bitmap_granularity, immediate_commit must match all OSDs used in that pool
|
||||
block_size: 131072,
|
||||
bitmap_granularity: 4096,
|
||||
// 'all'/'small'/'none', same as in OSD options
|
||||
immediate_commit: 'all',
|
||||
pg_stripe_size: 0,
|
||||
root_node?: 'rack1',
|
||||
// restrict pool to OSDs having all of these tags
|
||||
osd_tags?: 'nvme' | [ 'nvme', ... ],
|
||||
// prefer to put primary on OSD with these tags
|
||||
primary_affinity_tags?: 'nvme' | [ 'nvme', ... ],
|
||||
// scrub interval
|
||||
scrub_interval?: '30d',
|
||||
},
|
||||
...
|
||||
}, */
|
||||
pools: {},
|
||||
osd: {
|
||||
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
|
||||
},
|
||||
/* pgs: {
|
||||
hash: string,
|
||||
items: {
|
||||
<pool_id>: {
|
||||
<pg_id>: {
|
||||
osd_set: [ 1, 2, 3 ],
|
||||
primary: 1,
|
||||
pause: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}, */
|
||||
pgs: {},
|
||||
/* inode: {
|
||||
<pool_id>: {
|
||||
<inode_t>: {
|
||||
name: string,
|
||||
size?: uint64_t, // bytes
|
||||
parent_pool?: <pool_id>,
|
||||
parent_id?: <inode_t>,
|
||||
readonly?: boolean,
|
||||
}
|
||||
}
|
||||
}, */
|
||||
inode: {},
|
||||
},
|
||||
osd: {
|
||||
state: {
|
||||
/* <osd_num_t>: {
|
||||
state: "up",
|
||||
addresses: string[],
|
||||
host: string,
|
||||
port: uint16_t,
|
||||
primary_enabled: boolean,
|
||||
blockstore_enabled: boolean,
|
||||
}, */
|
||||
},
|
||||
stats: {
|
||||
/* <osd_num_t>: {
|
||||
time: number, // unix time
|
||||
data_block_size: uint64_t, // bytes
|
||||
bitmap_granularity: uint64_t, // bytes
|
||||
immediate_commit: "all"|"small"|"none",
|
||||
blockstore_ready: boolean,
|
||||
size: uint64_t, // bytes
|
||||
free: uint64_t, // bytes
|
||||
host: string,
|
||||
op_stats: {
|
||||
<string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
|
||||
},
|
||||
subop_stats: {
|
||||
<string>: { count: uint64_t, usec: uint64_t },
|
||||
},
|
||||
recovery_stats: {
|
||||
degraded: { count: uint64_t, bytes: uint64_t },
|
||||
misplaced: { count: uint64_t, bytes: uint64_t },
|
||||
},
|
||||
}, */
|
||||
},
|
||||
inodestats: {
|
||||
/* <pool_id>: {
|
||||
<inode_t>: {
|
||||
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
|
||||
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
|
||||
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
|
||||
},
|
||||
}, */
|
||||
},
|
||||
space: {
|
||||
/* <osd_num_t>: {
|
||||
<pool_id>: {
|
||||
<inode_t>: uint64_t, // bytes
|
||||
},
|
||||
}, */
|
||||
},
|
||||
},
|
||||
mon: {
|
||||
master: {
|
||||
/* ip: [ string ], id: uint64_t */
|
||||
},
|
||||
member: {
|
||||
/* <uint64_t>: { ip: [ string ] }, */
|
||||
},
|
||||
},
|
||||
pg: {
|
||||
state: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
primary: osd_num_t,
|
||||
state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
|
||||
"degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
|
||||
"has_invalid"|"has_inconsistent"|"has_corrupted"|"left_on_dead"|"scrubbing")[],
|
||||
}
|
||||
}, */
|
||||
},
|
||||
stats: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
object_count: uint64_t,
|
||||
clean_count: uint64_t,
|
||||
misplaced_count: uint64_t,
|
||||
degraded_count: uint64_t,
|
||||
incomplete_count: uint64_t,
|
||||
write_osd_set: osd_num_t[],
|
||||
},
|
||||
}, */
|
||||
},
|
||||
history: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
osd_sets: osd_num_t[][],
|
||||
all_peers: osd_num_t[],
|
||||
epoch: uint64_t,
|
||||
next_scrub: uint64_t,
|
||||
},
|
||||
}, */
|
||||
},
|
||||
},
|
||||
inode: {
|
||||
stats: {
|
||||
/* <pool_id>: {
|
||||
<inode_t>: {
|
||||
raw_used: uint64_t, // raw used bytes on OSDs
|
||||
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
|
||||
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
|
||||
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
|
||||
},
|
||||
}, */
|
||||
},
|
||||
},
|
||||
pool: {
|
||||
stats: {
|
||||
/* <pool_id>: {
|
||||
used_raw_tb: float, // used raw space in the pool
|
||||
total_raw_tb: float, // maximum amount of space in the pool
|
||||
raw_to_usable: float, // raw to usable ratio
|
||||
space_efficiency: float, // 0..1
|
||||
} */
|
||||
},
|
||||
},
|
||||
stats: {
|
||||
/* op_stats: {
|
||||
<string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
|
||||
},
|
||||
subop_stats: {
|
||||
<string>: { count: uint64_t, usec: uint64_t, iops: uint64_t, lat: uint64_t },
|
||||
},
|
||||
recovery_stats: {
|
||||
degraded: { count: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t },
|
||||
misplaced: { count: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t },
|
||||
},
|
||||
object_counts: {
|
||||
object: uint64_t,
|
||||
clean: uint64_t,
|
||||
misplaced: uint64_t,
|
||||
degraded: uint64_t,
|
||||
incomplete: uint64_t,
|
||||
},
|
||||
object_bytes: {
|
||||
total: uint64_t,
|
||||
clean: uint64_t,
|
||||
misplaced: uint64_t,
|
||||
degraded: uint64_t,
|
||||
incomplete: uint64_t,
|
||||
}, */
|
||||
},
|
||||
history: {
|
||||
last_clean_pgs: {},
|
||||
},
|
||||
index: {
|
||||
image: {
|
||||
/* <name>: {
|
||||
id: uint64_t,
|
||||
pool_id: uint64_t,
|
||||
}, */
|
||||
},
|
||||
maxid: {
|
||||
/* <pool_id>: uint64_t, */
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
etcd_nonempty_keys,
|
||||
etcd_allow,
|
||||
etcd_tree,
|
||||
};
|
46
mon/http_server.js
Normal file
46
mon/http_server.js
Normal file
@@ -0,0 +1,46 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const fsp = require('fs').promises;
|
||||
const http = require('http');
|
||||
const https = require('https');
|
||||
|
||||
async function create_http_server(cfg, handler)
|
||||
{
|
||||
let server;
|
||||
if (cfg.mon_https_cert)
|
||||
{
|
||||
const tls = {
|
||||
key: await fsp.readFile(cfg.mon_https_key),
|
||||
cert: await fsp.readFile(cfg.mon_https_cert),
|
||||
};
|
||||
if (cfg.mon_https_ca)
|
||||
{
|
||||
tls.mon_https_ca = await fsp.readFile(cfg.mon_https_ca);
|
||||
}
|
||||
if (cfg.mon_https_client_auth)
|
||||
{
|
||||
tls.requestCert = true;
|
||||
}
|
||||
server = https.createServer(tls, handler);
|
||||
}
|
||||
else
|
||||
{
|
||||
server = http.createServer(handler);
|
||||
}
|
||||
try
|
||||
{
|
||||
server.listen(cfg.mon_http_port || 8060, cfg.mon_http_ip || undefined);
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
console.error(
|
||||
'HTTP server disabled because listen at address: '+
|
||||
(cfg.mon_http_ip || '')+':'+(cfg.mon_http_port || 9090)+' failed with error: '+e
|
||||
);
|
||||
return null;
|
||||
}
|
||||
return server;
|
||||
}
|
||||
|
||||
module.exports = { create_http_server };
|
@@ -8,7 +8,7 @@
|
||||
// But we support this case with the "parity_space" parameter in optimize_initial()/optimize_change().
|
||||
|
||||
const { SimpleCombinator } = require('./simple_pgs.js');
|
||||
const LPOptimizer = require('./lp-optimizer.js');
|
||||
const LPOptimizer = require('./lp_optimizer.js');
|
||||
|
||||
const osd_tree = {
|
||||
ripper5: {
|
@@ -2,7 +2,7 @@
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const { compat } = require('./simple_pgs.js');
|
||||
const LPOptimizer = require('./lp-optimizer.js');
|
||||
const LPOptimizer = require('./lp_optimizer.js');
|
||||
|
||||
async function run()
|
||||
{
|
@@ -2,7 +2,7 @@
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const { compat, flatten_tree } = require('./simple_pgs.js');
|
||||
const LPOptimizer = require('./lp-optimizer.js');
|
||||
const LPOptimizer = require('./lp_optimizer.js');
|
||||
|
||||
const crush_tree = [
|
||||
{ level: 1, children: [
|
@@ -2,7 +2,7 @@
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const { compat } = require('./simple_pgs.js');
|
||||
const LPOptimizer = require('./lp-optimizer.js');
|
||||
const LPOptimizer = require('./lp_optimizer.js');
|
||||
|
||||
const osd_tree = {
|
||||
100: {
|
@@ -2,7 +2,7 @@
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const { compat, flatten_tree } = require('./simple_pgs.js');
|
||||
const LPOptimizer = require('./lp-optimizer.js');
|
||||
const LPOptimizer = require('./lp_optimizer.js');
|
||||
|
||||
const osd_tree = {
|
||||
100: {
|
@@ -23,4 +23,4 @@ for (let i = 2; i < process.argv.length; i++)
|
||||
}
|
||||
}
|
||||
|
||||
new Mon(options).start().catch(e => { console.error(e); process.exit(1); });
|
||||
Mon.run_forever(options).catch(console.error);
|
||||
|
1987
mon/mon.js
1987
mon/mon.js
File diff suppressed because it is too large
Load Diff
215
mon/osd_tree.js
Normal file
215
mon/osd_tree.js
Normal file
@@ -0,0 +1,215 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
function get_osd_tree(global_config, state)
|
||||
{
|
||||
const levels = global_config.placement_levels||{};
|
||||
levels.host = levels.host || 100;
|
||||
levels.osd = levels.osd || 101;
|
||||
const tree = {};
|
||||
let up_osds = {};
|
||||
// This requires monitor system time to be in sync with OSD system times (at least to some extent)
|
||||
const down_time = Date.now()/1000 - global_config.osd_out_time;
|
||||
for (const osd_num of Object.keys(state.osd.stats).sort((a, b) => a - b))
|
||||
{
|
||||
const stat = state.osd.stats[osd_num];
|
||||
const osd_cfg = state.config.osd[osd_num];
|
||||
let reweight = osd_cfg == null ? 1 : Number(osd_cfg.reweight);
|
||||
if (reweight < 0 || isNaN(reweight))
|
||||
reweight = 1;
|
||||
if (stat && stat.size && reweight && (state.osd.state[osd_num] || Number(stat.time) >= down_time ||
|
||||
osd_cfg && osd_cfg.noout))
|
||||
{
|
||||
// Numeric IDs are reserved for OSDs
|
||||
if (state.osd.state[osd_num] && reweight > 0)
|
||||
{
|
||||
// React to down OSDs immediately
|
||||
up_osds[osd_num] = true;
|
||||
}
|
||||
tree[osd_num] = tree[osd_num] || {};
|
||||
tree[osd_num].id = osd_num;
|
||||
tree[osd_num].parent = tree[osd_num].parent || stat.host;
|
||||
tree[osd_num].level = 'osd';
|
||||
tree[osd_num].size = reweight * stat.size / 1024 / 1024 / 1024 / 1024; // terabytes
|
||||
if (osd_cfg && osd_cfg.tags)
|
||||
{
|
||||
tree[osd_num].tags = (osd_cfg.tags instanceof Array ? [ ...osd_cfg.tags ] : [ osd_cfg.tags ])
|
||||
.reduce((a, c) => { a[c] = true; return a; }, {});
|
||||
}
|
||||
delete tree[osd_num].children;
|
||||
if (!tree[stat.host])
|
||||
{
|
||||
tree[stat.host] = {
|
||||
id: stat.host,
|
||||
level: 'host',
|
||||
parent: null,
|
||||
children: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const node_id in state.config.node_placement||{})
|
||||
{
|
||||
const node_cfg = state.config.node_placement[node_id];
|
||||
if (/^\d+$/.exec(node_id))
|
||||
{
|
||||
node_cfg.level = 'osd';
|
||||
}
|
||||
if (!node_id || !node_cfg.level || !levels[node_cfg.level] ||
|
||||
node_cfg.level === 'osd' && !tree[node_id])
|
||||
{
|
||||
// All nodes must have non-empty IDs and valid levels
|
||||
// OSDs have to actually exist
|
||||
continue;
|
||||
}
|
||||
tree[node_id] = tree[node_id] || {};
|
||||
tree[node_id].id = node_id;
|
||||
tree[node_id].level = node_cfg.level;
|
||||
tree[node_id].parent = node_cfg.parent;
|
||||
if (node_cfg.level !== 'osd')
|
||||
{
|
||||
tree[node_id].children = [];
|
||||
}
|
||||
}
|
||||
return { up_osds, levels, osd_tree: tree };
|
||||
}
|
||||
|
||||
function make_hier_tree(global_config, tree)
|
||||
{
|
||||
const levels = global_config.placement_levels||{};
|
||||
levels.host = levels.host || 100;
|
||||
levels.osd = levels.osd || 101;
|
||||
tree = { ...tree };
|
||||
for (const node_id in tree)
|
||||
{
|
||||
tree[node_id] = { ...tree[node_id], children: [] };
|
||||
}
|
||||
tree[''] = { children: [] };
|
||||
for (const node_id in tree)
|
||||
{
|
||||
if (node_id === '' || tree[node_id].level === 'osd' && (!tree[node_id].size || tree[node_id].size <= 0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const node_cfg = tree[node_id];
|
||||
const node_level = levels[node_cfg.level] || node_cfg.level;
|
||||
let parent_level = node_cfg.parent && tree[node_cfg.parent] && tree[node_cfg.parent].children
|
||||
&& tree[node_cfg.parent].level;
|
||||
parent_level = parent_level ? (levels[parent_level] || parent_level) : null;
|
||||
// Parent's level must be less than child's; OSDs must be leaves
|
||||
const parent = parent_level && parent_level < node_level ? node_cfg.parent : '';
|
||||
tree[parent].children.push(tree[node_id]);
|
||||
}
|
||||
// Delete empty nodes
|
||||
let deleted = 0;
|
||||
do
|
||||
{
|
||||
deleted = 0;
|
||||
for (const node_id in tree)
|
||||
{
|
||||
if (tree[node_id].level !== 'osd' && (!tree[node_id].children || !tree[node_id].children.length))
|
||||
{
|
||||
const parent = tree[node_id].parent;
|
||||
if (parent)
|
||||
{
|
||||
tree[parent].children = tree[parent].children.filter(c => c != tree[node_id]);
|
||||
}
|
||||
deleted++;
|
||||
delete tree[node_id];
|
||||
}
|
||||
}
|
||||
} while (deleted > 0);
|
||||
return tree;
|
||||
}
|
||||
|
||||
function filter_osds_by_root_node(global_config, pool_tree, root_node)
|
||||
{
|
||||
if (!root_node)
|
||||
{
|
||||
return;
|
||||
}
|
||||
let hier_tree = make_hier_tree(global_config, pool_tree);
|
||||
let included = [ ...(hier_tree[root_node] || {}).children||[] ];
|
||||
for (let i = 0; i < included.length; i++)
|
||||
{
|
||||
if (included[i].children)
|
||||
{
|
||||
included.splice(i+1, 0, ...included[i].children);
|
||||
}
|
||||
}
|
||||
let cur = pool_tree[root_node] || {};
|
||||
while (cur && cur.id)
|
||||
{
|
||||
included.unshift(cur);
|
||||
cur = pool_tree[cur.parent||''];
|
||||
}
|
||||
included = included.reduce((a, c) => { a[c.id||''] = true; return a; }, {});
|
||||
for (const item in pool_tree)
|
||||
{
|
||||
if (!included[item])
|
||||
{
|
||||
delete pool_tree[item];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function filter_osds_by_tags(orig_tree, tags)
|
||||
{
|
||||
if (!tags)
|
||||
{
|
||||
return;
|
||||
}
|
||||
for (const tag of (tags instanceof Array ? tags : [ tags ]))
|
||||
{
|
||||
for (const osd in orig_tree)
|
||||
{
|
||||
if (orig_tree[osd].level === 'osd' &&
|
||||
(!orig_tree[osd].tags || !orig_tree[osd].tags[tag]))
|
||||
{
|
||||
delete orig_tree[osd];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function filter_osds_by_block_layout(orig_tree, osd_stats, block_size, bitmap_granularity, immediate_commit)
|
||||
{
|
||||
for (const osd in orig_tree)
|
||||
{
|
||||
if (orig_tree[osd].level === 'osd')
|
||||
{
|
||||
const osd_stat = osd_stats[osd];
|
||||
if (osd_stat && (osd_stat.bs_block_size && osd_stat.bs_block_size != block_size ||
|
||||
osd_stat.bitmap_granularity && osd_stat.bitmap_granularity != bitmap_granularity ||
|
||||
osd_stat.immediate_commit == 'small' && immediate_commit == 'all' ||
|
||||
osd_stat.immediate_commit == 'none' && immediate_commit != 'none'))
|
||||
{
|
||||
delete orig_tree[osd];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function get_affinity_osds(pool_cfg, up_osds, osd_tree)
|
||||
{
|
||||
let aff_osds = up_osds;
|
||||
if (pool_cfg.primary_affinity_tags)
|
||||
{
|
||||
aff_osds = Object.keys(up_osds).reduce((a, c) => { a[c] = osd_tree[c]; return a; }, {});
|
||||
filter_osds_by_tags(aff_osds, pool_cfg.primary_affinity_tags);
|
||||
for (const osd in aff_osds)
|
||||
{
|
||||
aff_osds[osd] = true;
|
||||
}
|
||||
}
|
||||
return aff_osds;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
get_osd_tree,
|
||||
make_hier_tree,
|
||||
filter_osds_by_root_node,
|
||||
filter_osds_by_tags,
|
||||
filter_osds_by_block_layout,
|
||||
get_affinity_osds,
|
||||
};
|
@@ -1,25 +1,24 @@
|
||||
{
|
||||
"name": "vitastor-mon",
|
||||
"version": "1.6.1",
|
||||
"version": "1.7.1",
|
||||
"description": "Vitastor SDS monitor service",
|
||||
"main": "mon-main.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
"lint": "eslint *.js lp_optimizer/*.js scripts/*.js"
|
||||
},
|
||||
"author": "Vitaliy Filippov",
|
||||
"license": "UNLICENSED",
|
||||
"dependencies": {
|
||||
"antietcd": "^1.0.5",
|
||||
"sprintf-js": "^1.1.2",
|
||||
"ws": "^7.2.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint": "^8.0.0",
|
||||
"eslint-plugin-import": "^2.29.1",
|
||||
"eslint-plugin-node": "^11.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
},
|
||||
"scripts": {
|
||||
"lint": "eslint *.js"
|
||||
}
|
||||
}
|
||||
|
267
mon/pg_gen.js
Normal file
267
mon/pg_gen.js
Normal file
@@ -0,0 +1,267 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const { RuleCombinator } = require('./lp_optimizer/dsl_pgs.js');
|
||||
const { SimpleCombinator, flatten_tree } = require('./lp_optimizer/simple_pgs.js');
|
||||
const { validate_pool_cfg, get_pg_rules } = require('./pool_config.js');
|
||||
const LPOptimizer = require('./lp_optimizer/lp_optimizer.js');
|
||||
const { scale_pg_count } = require('./pg_utils.js');
|
||||
const { make_hier_tree, filter_osds_by_root_node,
|
||||
filter_osds_by_tags, filter_osds_by_block_layout, get_affinity_osds } = require('./osd_tree.js');
|
||||
|
||||
let seed;
|
||||
|
||||
function reset_rng()
|
||||
{
|
||||
seed = 0x5f020e43;
|
||||
}
|
||||
|
||||
function rng()
|
||||
{
|
||||
seed ^= seed << 13;
|
||||
seed ^= seed >> 17;
|
||||
seed ^= seed << 5;
|
||||
return seed + 2147483648;
|
||||
}
|
||||
|
||||
function pick_primary(pool_config, osd_set, up_osds, aff_osds)
|
||||
{
|
||||
let alive_set;
|
||||
if (pool_config.scheme === 'replicated')
|
||||
{
|
||||
// Prefer "affinity" OSDs
|
||||
alive_set = osd_set.filter(osd_num => osd_num && aff_osds[osd_num]);
|
||||
if (!alive_set.length)
|
||||
alive_set = osd_set.filter(osd_num => osd_num && up_osds[osd_num]);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Prefer data OSDs for EC because they can actually read something without an additional network hop
|
||||
const pg_data_size = (pool_config.pg_size||0) - (pool_config.parity_chunks||0);
|
||||
alive_set = osd_set.slice(0, pg_data_size).filter(osd_num => osd_num && aff_osds[osd_num]);
|
||||
if (!alive_set.length)
|
||||
alive_set = osd_set.filter(osd_num => osd_num && aff_osds[osd_num]);
|
||||
if (!alive_set.length)
|
||||
{
|
||||
alive_set = osd_set.slice(0, pg_data_size).filter(osd_num => osd_num && up_osds[osd_num]);
|
||||
if (!alive_set.length)
|
||||
alive_set = osd_set.filter(osd_num => osd_num && up_osds[osd_num]);
|
||||
}
|
||||
}
|
||||
if (!alive_set.length)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return alive_set[rng() % alive_set.length];
|
||||
}
|
||||
|
||||
function recheck_primary(state, global_config, up_osds, osd_tree)
|
||||
{
|
||||
let new_config_pgs;
|
||||
for (const pool_id in state.config.pools)
|
||||
{
|
||||
const pool_cfg = state.config.pools[pool_id];
|
||||
if (!validate_pool_cfg(pool_id, pool_cfg, global_config.placement_levels, false))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const aff_osds = get_affinity_osds(pool_cfg, up_osds, osd_tree);
|
||||
reset_rng();
|
||||
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
|
||||
{
|
||||
if (!state.config.pgs.items[pool_id])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const pg_cfg = state.config.pgs.items[pool_id][pg_num];
|
||||
if (pg_cfg)
|
||||
{
|
||||
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
|
||||
if (pg_cfg.primary != new_primary)
|
||||
{
|
||||
if (!new_config_pgs)
|
||||
{
|
||||
new_config_pgs = JSON.parse(JSON.stringify(state.config.pgs));
|
||||
}
|
||||
console.log(
|
||||
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
|
||||
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
|
||||
);
|
||||
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new_config_pgs;
|
||||
}
|
||||
|
||||
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
|
||||
{
|
||||
const aff_osds = get_affinity_osds(state.config.pools[pool_id] || {}, up_osds, osd_tree);
|
||||
const pg_items = {};
|
||||
reset_rng();
|
||||
new_pgs.map((osd_set, i) =>
|
||||
{
|
||||
osd_set = osd_set.map(osd_num => osd_num === LPOptimizer.NO_OSD ? 0 : osd_num);
|
||||
pg_items[i+1] = {
|
||||
osd_set,
|
||||
primary: pick_primary(state.config.pools[pool_id], osd_set, up_osds, aff_osds),
|
||||
};
|
||||
if (prev_pgs[i] && prev_pgs[i].join(' ') != osd_set.join(' ') &&
|
||||
prev_pgs[i].filter(osd_num => osd_num).length > 0)
|
||||
{
|
||||
pg_history[i] = pg_history[i] || {};
|
||||
pg_history[i].osd_sets = pg_history[i].osd_sets || [];
|
||||
pg_history[i].osd_sets.push(prev_pgs[i]);
|
||||
}
|
||||
if (pg_history[i] && pg_history[i].osd_sets)
|
||||
{
|
||||
pg_history[i].osd_sets = Object.values(pg_history[i].osd_sets
|
||||
.reduce((a, c) => { a[c.join(' ')] = c; return a; }, {}));
|
||||
}
|
||||
});
|
||||
for (let i = 0; i < new_pgs.length || i < prev_pgs.length; i++)
|
||||
{
|
||||
// FIXME: etcd has max_txn_ops limit, and it's 128 by default
|
||||
// Sooo we probably want to change our storage scheme for PG histories...
|
||||
request.compare.push({
|
||||
key: b64(etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
|
||||
target: 'MOD',
|
||||
mod_revision: ''+etcd_watch_revision,
|
||||
result: 'LESS',
|
||||
});
|
||||
if (pg_history[i])
|
||||
{
|
||||
request.success.push({
|
||||
requestPut: {
|
||||
key: b64(etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
|
||||
value: b64(JSON.stringify(pg_history[i])),
|
||||
},
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
request.success.push({
|
||||
requestDeleteRange: {
|
||||
key: b64(etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
save_to.items = save_to.items || {};
|
||||
if (!new_pgs.length)
|
||||
{
|
||||
delete save_to.items[pool_id];
|
||||
}
|
||||
else
|
||||
{
|
||||
save_to.items[pool_id] = pg_items;
|
||||
}
|
||||
}
|
||||
|
||||
async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels)
|
||||
{
|
||||
const pool_cfg = state.config.pools[pool_id];
|
||||
if (!validate_pool_cfg(pool_id, pool_cfg, global_config.placement_levels, false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
let pool_tree = { ...osd_tree };
|
||||
filter_osds_by_root_node(global_config, pool_tree, pool_cfg.root_node);
|
||||
filter_osds_by_tags(pool_tree, pool_cfg.osd_tags);
|
||||
filter_osds_by_block_layout(
|
||||
pool_tree,
|
||||
state.osd.stats,
|
||||
pool_cfg.block_size || global_config.block_size || 131072,
|
||||
pool_cfg.bitmap_granularity || global_config.bitmap_granularity || 4096,
|
||||
pool_cfg.immediate_commit || global_config.immediate_commit || 'all'
|
||||
);
|
||||
pool_tree = make_hier_tree(global_config, pool_tree);
|
||||
// First try last_clean_pgs to minimize data movement
|
||||
let prev_pgs = [];
|
||||
for (const pg in ((state.history.last_clean_pgs.items||{})[pool_id]||{}))
|
||||
{
|
||||
prev_pgs[pg-1] = [ ...state.history.last_clean_pgs.items[pool_id][pg].osd_set ];
|
||||
}
|
||||
if (!prev_pgs.length)
|
||||
{
|
||||
// Fall back to config/pgs if it's empty
|
||||
for (const pg in ((state.config.pgs.items||{})[pool_id]||{}))
|
||||
{
|
||||
prev_pgs[pg-1] = [ ...state.config.pgs.items[pool_id][pg].osd_set ];
|
||||
}
|
||||
}
|
||||
const old_pg_count = prev_pgs.length;
|
||||
const optimize_cfg = {
|
||||
osd_weights: Object.values(pool_tree).filter(item => item.level === 'osd').reduce((a, c) => { a[c.id] = c.size; return a; }, {}),
|
||||
combinator: !global_config.use_old_pg_combinator || pool_cfg.level_placement || pool_cfg.raw_placement
|
||||
// new algorithm:
|
||||
? new RuleCombinator(pool_tree, get_pg_rules(pool_id, pool_cfg, global_config.placement_levels), pool_cfg.max_osd_combinations)
|
||||
// old algorithm:
|
||||
: new SimpleCombinator(flatten_tree(pool_tree[''].children, levels, pool_cfg.failure_domain, 'osd'), pool_cfg.pg_size, pool_cfg.max_osd_combinations),
|
||||
pg_count: pool_cfg.pg_count,
|
||||
pg_size: pool_cfg.pg_size,
|
||||
pg_minsize: pool_cfg.pg_minsize,
|
||||
ordered: pool_cfg.scheme != 'replicated',
|
||||
};
|
||||
let optimize_result;
|
||||
// Re-shuffle PGs if config/pgs.hash is empty
|
||||
if (old_pg_count > 0 && state.config.pgs.hash)
|
||||
{
|
||||
if (prev_pgs.length != pool_cfg.pg_count)
|
||||
{
|
||||
// Scale PG count
|
||||
// Do it even if old_pg_count is already equal to pool_cfg.pg_count,
|
||||
// because last_clean_pgs may still contain the old number of PGs
|
||||
scale_pg_count(prev_pgs, pool_cfg.pg_count);
|
||||
}
|
||||
for (const pg of prev_pgs)
|
||||
{
|
||||
while (pg.length < pool_cfg.pg_size)
|
||||
{
|
||||
pg.push(0);
|
||||
}
|
||||
}
|
||||
optimize_result = await LPOptimizer.optimize_change({
|
||||
prev_pgs,
|
||||
...optimize_cfg,
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
optimize_result = await LPOptimizer.optimize_initial(optimize_cfg);
|
||||
}
|
||||
console.log(`Pool ${pool_id} (${pool_cfg.name || 'unnamed'}):`);
|
||||
LPOptimizer.print_change_stats(optimize_result);
|
||||
let pg_effsize = pool_cfg.pg_size;
|
||||
for (const pg of optimize_result.int_pgs)
|
||||
{
|
||||
const this_pg_size = pg.filter(osd => osd != LPOptimizer.NO_OSD).length;
|
||||
if (this_pg_size && this_pg_size < pg_effsize)
|
||||
{
|
||||
pg_effsize = this_pg_size;
|
||||
}
|
||||
}
|
||||
return {
|
||||
pool_id,
|
||||
pgs: optimize_result.int_pgs,
|
||||
stats: {
|
||||
total_raw_tb: optimize_result.space,
|
||||
pg_real_size: pg_effsize || pool_cfg.pg_size,
|
||||
raw_to_usable: (pg_effsize || pool_cfg.pg_size) / (pool_cfg.scheme === 'replicated'
|
||||
? 1 : (pool_cfg.pg_size - (pool_cfg.parity_chunks||0))),
|
||||
space_efficiency: optimize_result.space/(optimize_result.total_space||1),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function b64(str)
|
||||
{
|
||||
return Buffer.from(str).toString('base64');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
recheck_primary,
|
||||
save_new_pgs_txn,
|
||||
generate_pool_pgs,
|
||||
};
|
169
mon/pool_config.js
Normal file
169
mon/pool_config.js
Normal file
@@ -0,0 +1,169 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const { parse_level_indexes, parse_pg_dsl } = require('./lp_optimizer/dsl_pgs.js');
|
||||
|
||||
function validate_pool_cfg(pool_id, pool_cfg, placement_levels, warn)
|
||||
{
|
||||
pool_cfg.pg_size = Math.floor(pool_cfg.pg_size);
|
||||
pool_cfg.pg_minsize = Math.floor(pool_cfg.pg_minsize);
|
||||
pool_cfg.parity_chunks = Math.floor(pool_cfg.parity_chunks) || undefined;
|
||||
pool_cfg.pg_count = Math.floor(pool_cfg.pg_count);
|
||||
pool_cfg.max_osd_combinations = Math.floor(pool_cfg.max_osd_combinations) || 10000;
|
||||
if (!/^[1-9]\d*$/.exec(''+pool_id))
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool ID '+pool_id+' is invalid');
|
||||
return false;
|
||||
}
|
||||
if (pool_cfg.scheme !== 'xor' && pool_cfg.scheme !== 'replicated' &&
|
||||
pool_cfg.scheme !== 'ec' && pool_cfg.scheme !== 'jerasure')
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has invalid coding scheme (one of "xor", "replicated", "ec" and "jerasure" required)');
|
||||
return false;
|
||||
}
|
||||
if (!pool_cfg.pg_size || pool_cfg.pg_size < 1 || pool_cfg.pg_size > 256 ||
|
||||
pool_cfg.scheme !== 'replicated' && pool_cfg.pg_size < 3)
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has invalid pg_size');
|
||||
return false;
|
||||
}
|
||||
if (!pool_cfg.pg_minsize || pool_cfg.pg_minsize < 1 || pool_cfg.pg_minsize > pool_cfg.pg_size ||
|
||||
pool_cfg.scheme === 'xor' && pool_cfg.pg_minsize < (pool_cfg.pg_size - 1))
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has invalid pg_minsize');
|
||||
return false;
|
||||
}
|
||||
if (pool_cfg.scheme === 'xor' && pool_cfg.parity_chunks != 0 && pool_cfg.parity_chunks != 1)
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has invalid parity_chunks (must be 1)');
|
||||
return false;
|
||||
}
|
||||
if ((pool_cfg.scheme === 'ec' || pool_cfg.scheme === 'jerasure') &&
|
||||
(pool_cfg.parity_chunks < 1 || pool_cfg.parity_chunks > pool_cfg.pg_size-2))
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has invalid parity_chunks (must be between 1 and pg_size-2)');
|
||||
return false;
|
||||
}
|
||||
if (!pool_cfg.pg_count || pool_cfg.pg_count < 1)
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has invalid pg_count');
|
||||
return false;
|
||||
}
|
||||
if (!pool_cfg.name)
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has empty name');
|
||||
return false;
|
||||
}
|
||||
if (pool_cfg.max_osd_combinations < 100)
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has invalid max_osd_combinations (must be at least 100)');
|
||||
return false;
|
||||
}
|
||||
if (pool_cfg.root_node && typeof(pool_cfg.root_node) != 'string')
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has invalid root_node (must be a string)');
|
||||
return false;
|
||||
}
|
||||
if (pool_cfg.osd_tags && typeof(pool_cfg.osd_tags) != 'string' &&
|
||||
(!(pool_cfg.osd_tags instanceof Array) || pool_cfg.osd_tags.filter(t => typeof t != 'string').length > 0))
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has invalid osd_tags (must be a string or array of strings)');
|
||||
return false;
|
||||
}
|
||||
if (pool_cfg.primary_affinity_tags && typeof(pool_cfg.primary_affinity_tags) != 'string' &&
|
||||
(!(pool_cfg.primary_affinity_tags instanceof Array) || pool_cfg.primary_affinity_tags.filter(t => typeof t != 'string').length > 0))
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' has invalid primary_affinity_tags (must be a string or array of strings)');
|
||||
return false;
|
||||
}
|
||||
if (!get_pg_rules(pool_id, pool_cfg, placement_levels, true))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function get_pg_rules(pool_id, pool_cfg, placement_levels, warn)
|
||||
{
|
||||
if (pool_cfg.level_placement)
|
||||
{
|
||||
const pg_size = (0|pool_cfg.pg_size);
|
||||
let rules = pool_cfg.level_placement;
|
||||
if (typeof rules === 'string')
|
||||
{
|
||||
rules = rules.split(/\s+/).map(s => s.split(/=/, 2)).reduce((a, c) => { a[c[0]] = c[1]; return a; }, {});
|
||||
}
|
||||
else
|
||||
{
|
||||
rules = { ...rules };
|
||||
}
|
||||
// Always add failure_domain to prevent rules from being totally incorrect
|
||||
const all_diff = [];
|
||||
for (let i = 1; i <= pg_size; i++)
|
||||
{
|
||||
all_diff.push(i);
|
||||
}
|
||||
rules[pool_cfg.failure_domain || 'host'] = all_diff;
|
||||
placement_levels = placement_levels||{};
|
||||
placement_levels.host = placement_levels.host || 100;
|
||||
placement_levels.osd = placement_levels.osd || 101;
|
||||
for (const k in rules)
|
||||
{
|
||||
if (!placement_levels[k] || typeof rules[k] !== 'string' &&
|
||||
(!(rules[k] instanceof Array) ||
|
||||
rules[k].filter(s => typeof s !== 'string' && typeof s !== 'number').length > 0))
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' configuration is invalid: level_placement should be { [level]: string | (string|number)[] }');
|
||||
return null;
|
||||
}
|
||||
else if (rules[k].length != pg_size)
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' configuration is invalid: values in level_placement should contain exactly pg_size ('+pg_size+') items');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return parse_level_indexes(rules);
|
||||
}
|
||||
else if (typeof pool_cfg.raw_placement === 'string')
|
||||
{
|
||||
try
|
||||
{
|
||||
return parse_pg_dsl(pool_cfg.raw_placement);
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
if (warn)
|
||||
console.log('Pool '+pool_id+' configuration is invalid: invalid raw_placement: '+e.message);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
let rules = [ [] ];
|
||||
let prev = [ 1 ];
|
||||
for (let i = 1; i < pool_cfg.pg_size; i++)
|
||||
{
|
||||
rules.push([ [ pool_cfg.failure_domain||'host', '!=', prev ] ]);
|
||||
prev = [ ...prev, i+1 ];
|
||||
}
|
||||
return rules;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
validate_pool_cfg,
|
||||
get_pg_rules,
|
||||
};
|
220
mon/prometheus.js
Normal file
220
mon/prometheus.js
Normal file
@@ -0,0 +1,220 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const metric_help =
|
||||
`# HELP vitastor_object_bytes Total size of objects in cluster in bytes
|
||||
# TYPE vitastor_object_bytes gauge
|
||||
# HELP vitastor_object_count Total number of objects in cluster
|
||||
# TYPE vitastor_object_count gauge
|
||||
# HELP vitastor_stat_count Total operation count
|
||||
# TYPE vitastor_stat_count counter
|
||||
# HELP vitastor_stat_usec Total operation latency in usec
|
||||
# TYPE vitastor_stat_usec counter
|
||||
# HELP vitastor_stat_bytes Total operation size in bytes
|
||||
# HELP vitastor_stat_bytes counter
|
||||
|
||||
# HELP vitastor_image_raw_used Image raw used size in bytes
|
||||
# TYPE vitastor_image_raw_used counter
|
||||
# HELP vitastor_image_stat_count Per-image total operation count
|
||||
# TYPE vitastor_image_stat_count counter
|
||||
# HELP vitastor_image_stat_usec Per-image total operation latency
|
||||
# TYPE vitastor_image_stat_usec counter
|
||||
# HELP vitastor_image_stat_bytes Per-image total operation size in bytes
|
||||
# TYPE vitastor_image_stat_bytes counter
|
||||
|
||||
# HELP vitastor_osd_status OSD up/down status
|
||||
# TYPE vitastor_osd_status gauge
|
||||
# HELP vitastor_osd_size_bytes OSD total space in bytes
|
||||
# TYPE vitastor_osd_size_bytes gauge
|
||||
# HELP vitastor_osd_free_bytes OSD free space in bytes
|
||||
# TYPE vitastor_osd_free_bytes gauge
|
||||
# HELP vitastor_osd_stat_count Per-image total operation count
|
||||
# TYPE vitastor_osd_stat_count counter
|
||||
# HELP vitastor_osd_stat_usec Per-image total operation latency
|
||||
# TYPE vitastor_osd_stat_usec counter
|
||||
# HELP vitastor_osd_stat_bytes Per-image total operation size in bytes
|
||||
# TYPE vitastor_osd_stat_bytes counter
|
||||
|
||||
# HELP vitastor_monitor_info Monitor info, 1 is master, 0 is standby
|
||||
# TYPE vitastor_monitor_info gauge
|
||||
|
||||
# HELP vitastor_pool_info Pool configuration (in labels)
|
||||
# TYPE vitastor_pool_info gauge
|
||||
# HELP vitastor_pool_status Pool up/down status
|
||||
# TYPE vitastor_pool_status gauge
|
||||
# HELP vitastor_pool_raw_to_usable Raw to usable space ratio
|
||||
# TYPE vitastor_pool_raw_to_usable gauge
|
||||
# HELP vitastor_pool_space_efficiency Pool space usage efficiency
|
||||
# TYPE vitastor_pool_space_efficiency gauge
|
||||
# HELP vitastor_pool_total_raw_tb Total raw space in pool in TB
|
||||
# TYPE vitastor_pool_total_raw_tb gauge
|
||||
# HELP vitastor_pool_used_raw_tb Used raw space in pool in TB
|
||||
# TYPE vitastor_pool_used_raw_tb gauge
|
||||
# HELP vitastor_pg_count PG counts by state
|
||||
# HELP vitastor_pg_count gauge
|
||||
|
||||
`;
|
||||
|
||||
function export_prometheus_metrics(st)
|
||||
{
|
||||
let res = metric_help;
|
||||
|
||||
// Global statistics
|
||||
|
||||
for (const k in st.stats.object_bytes)
|
||||
{
|
||||
res += `vitastor_object_bytes{object_type="${k}"} ${st.stats.object_bytes[k]}\n`;
|
||||
}
|
||||
|
||||
for (const k in st.stats.object_counts)
|
||||
{
|
||||
res += `vitastor_object_count{object_type="${k}"} ${st.stats.object_counts[k]}\n`;
|
||||
}
|
||||
|
||||
for (const typ of [ 'op', 'subop', 'recovery' ])
|
||||
{
|
||||
for (const op in st.stats[typ+"_stats"]||{})
|
||||
{
|
||||
const op_stat = st.stats[typ+"_stats"][op];
|
||||
for (const key of [ 'count', 'usec', 'bytes' ])
|
||||
{
|
||||
res += `vitastor_stat_${key}{op="${op}",op_type="${typ}"} ${op_stat[key]||0}\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Per-image statistics
|
||||
|
||||
for (const pool in st.inode.stats)
|
||||
{
|
||||
for (const inode in st.inode.stats[pool])
|
||||
{
|
||||
const ist = st.inode.stats[pool][inode];
|
||||
const inode_name = ((st.config.inode[pool]||{})[inode]||{}).name||'';
|
||||
const inode_label = `image_name="${addslashes(inode_name)}",inode_num="${inode}",pool_id="${pool}"`;
|
||||
res += `vitastor_image_raw_used{${inode_label}} ${ist.raw_used||0}\n`;
|
||||
for (const op of [ 'read', 'write', 'delete' ])
|
||||
{
|
||||
for (const k of [ 'count', 'usec', 'bytes' ])
|
||||
{
|
||||
if (ist[op])
|
||||
{
|
||||
res += `vitastor_image_stat_${k}{${inode_label},op="${op}"} ${ist[op][k]||0}\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Per-OSD statistics
|
||||
|
||||
for (const osd in st.osd.stats)
|
||||
{
|
||||
const osd_stat = st.osd.stats[osd];
|
||||
const up = st.osd.state[osd] && st.osd.state[osd].state == 'up' ? 1 : 0;
|
||||
res += `vitastor_osd_status{host="${addslashes(osd_stat.host)}",osd_num="${osd}"} ${up}\n`;
|
||||
res += `vitastor_osd_size_bytes{osd_num="${osd}"} ${osd_stat.size||0}\n`;
|
||||
res += `vitastor_osd_free_bytes{osd_num="${osd}"} ${osd_stat.free||0}\n`;
|
||||
for (const op in osd_stat.op_stats)
|
||||
{
|
||||
const ist = osd_stat.op_stats[op];
|
||||
for (const k of [ 'count', 'usec', 'bytes' ])
|
||||
{
|
||||
res += `vitastor_osd_stat_${k}{osd_num="${osd}",op="${op}",op_type="op"} ${ist[k]||0}\n`;
|
||||
}
|
||||
}
|
||||
for (const op in osd_stat.subop_stats)
|
||||
{
|
||||
const ist = osd_stat.subop_stats[op];
|
||||
for (const k of [ 'count', 'usec', 'bytes' ])
|
||||
{
|
||||
res += `vitastor_osd_stat_${k}{osd_num="${osd}",op="${op}",op_type="subop"} ${ist[k]||0}\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Monitor statistics
|
||||
|
||||
for (const mon_id in st.mon.member)
|
||||
{
|
||||
const mon = st.mon.member[mon_id];
|
||||
const master = st.mon.master && st.mon.master.id == mon_id ? 1 : 0;
|
||||
const ip = (mon.ip instanceof Array ? mon.ip[0] : mon.ip) || '';
|
||||
res += `vitastor_monitor_info{monitor_hostname="${addslashes(mon.hostname)}",monitor_id="${mon_id}",monitor_ip="${addslashes(ip)}"} ${master}\n`;
|
||||
}
|
||||
|
||||
// Per-pool statistics
|
||||
|
||||
for (const pool_id in st.config.pools)
|
||||
{
|
||||
const pool_cfg = st.config.pools[pool_id];
|
||||
const pool_label = `pool_id="${pool_id}",pool_name="${addslashes(pool_cfg.name)}"`;
|
||||
const pool_stat = st.pool.stats[pool_id];
|
||||
res += `vitastor_pool_info{${pool_label}`+
|
||||
`,pool_scheme="${addslashes(pool_cfg.scheme)}"`+
|
||||
`,pg_size="${pool_cfg.pg_size||0}",pg_minsize="${pool_cfg.pg_minsize||0}"`+
|
||||
`,parity_chunks="${pool_cfg.parity_chunks||0}",pg_count="${pool_cfg.pg_count||0}"`+
|
||||
`,failure_domain="${addslashes(pool_cfg.failure_domain)}"`+
|
||||
`} 1\n`;
|
||||
if (!pool_stat)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
res += `vitastor_pool_raw_to_usable{${pool_label}} ${pool_stat.raw_to_usable||0}\n`;
|
||||
res += `vitastor_pool_space_efficiency{${pool_label}} ${pool_stat.space_efficiency||0}\n`;
|
||||
res += `vitastor_pool_total_raw_tb{${pool_label}} ${pool_stat.total_raw_tb||0}\n`;
|
||||
res += `vitastor_pool_used_raw_tb{${pool_label}} ${pool_stat.used_raw_tb||0}\n`;
|
||||
|
||||
// PG states and pool up/down status
|
||||
const real_pg_count = (Object.keys(((st.config.pgs||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
|
||||
const per_state = {
|
||||
active: 0,
|
||||
starting: 0,
|
||||
peering: 0,
|
||||
incomplete: 0,
|
||||
repeering: 0,
|
||||
stopping: 0,
|
||||
offline: 0,
|
||||
degraded: 0,
|
||||
has_inconsistent: 0,
|
||||
has_corrupted: 0,
|
||||
has_incomplete: 0,
|
||||
has_degraded: 0,
|
||||
has_misplaced: 0,
|
||||
has_unclean: 0,
|
||||
has_invalid: 0,
|
||||
left_on_dead: 0,
|
||||
scrubbing: 0,
|
||||
};
|
||||
const pool_pg_states = st.pg.state[pool_id] || {};
|
||||
for (let i = 1; i <= real_pg_count; i++)
|
||||
{
|
||||
if (!pool_pg_states[i])
|
||||
{
|
||||
per_state['offline'] = 1 + (per_state['offline']|0);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const st_name of pool_pg_states[i].state)
|
||||
{
|
||||
per_state[st_name] = 1 + (per_state[st_name]|0);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const st_name in per_state)
|
||||
{
|
||||
res += `vitastor_pg_count{pg_state="${st_name}",${pool_label}} ${per_state[st_name]}\n`;
|
||||
}
|
||||
const pool_active = per_state['active'] >= real_pg_count ? 1 : 0;
|
||||
res += `vitastor_pool_status{${pool_label}} ${pool_active}\n`;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
function addslashes(str)
|
||||
{
|
||||
return ((str||'')+'').replace(/(["\n\\])/g, "\\$1"); // escape " \n \
|
||||
}
|
||||
|
||||
module.exports = { export_prometheus_metrics };
|
2818
mon/scripts/Vitastor-Grafana-6+.json
Normal file
2818
mon/scripts/Vitastor-Grafana-6+.json
Normal file
File diff suppressed because it is too large
Load Diff
283
mon/stats.js
Normal file
283
mon/stats.js
Normal file
@@ -0,0 +1,283 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
function derive_osd_stats(st, prev, prev_diff)
|
||||
{
|
||||
const diff = prev_diff || { op_stats: {}, subop_stats: {}, recovery_stats: {}, inode_stats: {} };
|
||||
if (!st || !st.time || !prev || !prev.time || prev.time >= st.time)
|
||||
{
|
||||
return diff;
|
||||
}
|
||||
const timediff = BigInt(st.time*1000 - prev.time*1000);
|
||||
for (const op in st.op_stats||{})
|
||||
{
|
||||
const pr = prev && prev.op_stats && prev.op_stats[op];
|
||||
let c = st.op_stats[op];
|
||||
c = { bytes: BigInt(c.bytes||0), usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
|
||||
const b = c.bytes - BigInt(pr && pr.bytes||0);
|
||||
const us = c.usec - BigInt(pr && pr.usec||0);
|
||||
const n = c.count - BigInt(pr && pr.count||0);
|
||||
diff.op_stats[op] = { ...c, bps: n > 0 ? b*1000n/timediff : 0n, iops: n > 0 ? n*1000n/timediff : 0n, lat: n > 0 ? us/n : 0n };
|
||||
}
|
||||
for (const op in st.subop_stats||{})
|
||||
{
|
||||
const pr = prev && prev.subop_stats && prev.subop_stats[op];
|
||||
let c = st.subop_stats[op];
|
||||
c = { usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
|
||||
const us = c.usec - BigInt(pr && pr.usec||0);
|
||||
const n = c.count - BigInt(pr && pr.count||0);
|
||||
diff.subop_stats[op] = { ...c, iops: n > 0 ? n*1000n/timediff : 0n, lat: n > 0 ? us/n : 0n };
|
||||
}
|
||||
for (const op in st.recovery_stats||{})
|
||||
{
|
||||
const pr = prev && prev.recovery_stats && prev.recovery_stats[op];
|
||||
let c = st.recovery_stats[op];
|
||||
c = { bytes: BigInt(c.bytes||0), count: BigInt(c.count||0) };
|
||||
const b = c.bytes - BigInt(pr && pr.bytes||0);
|
||||
const n = c.count - BigInt(pr && pr.count||0);
|
||||
diff.recovery_stats[op] = { ...c, bps: n > 0 ? b*1000n/timediff : 0n, iops: n > 0 ? n*1000n/timediff : 0n };
|
||||
}
|
||||
for (const pool_id in st.inode_stats||{})
|
||||
{
|
||||
diff.inode_stats[pool_id] = {};
|
||||
for (const inode_num in st.inode_stats[pool_id])
|
||||
{
|
||||
const inode_diff = diff.inode_stats[pool_id][inode_num] = {};
|
||||
for (const op of [ 'read', 'write', 'delete' ])
|
||||
{
|
||||
const c = st.inode_stats[pool_id][inode_num][op];
|
||||
const pr = prev && prev.inode_stats && prev.inode_stats[pool_id] &&
|
||||
prev.inode_stats[pool_id][inode_num] && prev.inode_stats[pool_id][inode_num][op];
|
||||
const n = BigInt(c.count||0) - BigInt(pr && pr.count||0);
|
||||
inode_diff[op] = {
|
||||
bps: n > 0 ? (BigInt(c.bytes||0) - BigInt(pr && pr.bytes||0))*1000n/timediff : 0n,
|
||||
iops: n > 0 ? n*1000n/timediff : 0n,
|
||||
lat: n > 0 ? (BigInt(c.usec||0) - BigInt(pr && pr.usec||0))/n : 0n,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
// sum_op_stats(this.state.osd, this.prev_stats)
|
||||
function sum_op_stats(all_osd, prev_stats)
|
||||
{
|
||||
for (const osd in all_osd.stats)
|
||||
{
|
||||
const cur = { ...all_osd.stats[osd], inode_stats: all_osd.inodestats[osd]||{} };
|
||||
prev_stats.osd_diff[osd] = derive_osd_stats(
|
||||
cur, prev_stats.osd_stats[osd], prev_stats.osd_diff[osd]
|
||||
);
|
||||
prev_stats.osd_stats[osd] = cur;
|
||||
}
|
||||
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: { degraded: {}, misplaced: {} } };
|
||||
// Sum derived values instead of deriving summed
|
||||
for (const osd in all_osd.state)
|
||||
{
|
||||
const derived = prev_stats.osd_diff[osd];
|
||||
if (!all_osd.state[osd] || !derived)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
for (const type in sum_diff)
|
||||
{
|
||||
for (const op in derived[type]||{})
|
||||
{
|
||||
for (const k in derived[type][op])
|
||||
{
|
||||
sum_diff[type][op] = sum_diff[type][op] || {};
|
||||
sum_diff[type][op][k] = (sum_diff[type][op][k] || 0n) + derived[type][op][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return sum_diff;
|
||||
}
|
||||
|
||||
// sum_object_counts(this.state, this.config)
|
||||
function sum_object_counts(state, global_config)
|
||||
{
|
||||
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||
for (const pool_id in state.pg.stats)
|
||||
{
|
||||
let object_size = 0;
|
||||
for (const osd_num of state.pg.stats[pool_id].write_osd_set||[])
|
||||
{
|
||||
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
|
||||
{
|
||||
object_size = state.osd.stats[osd_num].block_size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
const pool_cfg = (state.config.pools[pool_id]||{});
|
||||
if (!object_size)
|
||||
{
|
||||
object_size = pool_cfg.block_size || global_config.block_size || 131072;
|
||||
}
|
||||
if (pool_cfg.scheme !== 'replicated')
|
||||
{
|
||||
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
|
||||
}
|
||||
object_size = BigInt(object_size);
|
||||
for (const pg_num in state.pg.stats[pool_id])
|
||||
{
|
||||
const st = state.pg.stats[pool_id][pg_num];
|
||||
if (st)
|
||||
{
|
||||
for (const k in object_counts)
|
||||
{
|
||||
if (st[k+'_count'])
|
||||
{
|
||||
object_counts[k] += BigInt(st[k+'_count']);
|
||||
object_bytes[k] += BigInt(st[k+'_count']) * object_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return { object_counts, object_bytes };
|
||||
}
|
||||
|
||||
// sum_inode_stats(this.state, this.prev_stats)
|
||||
function sum_inode_stats(state, prev_stats)
|
||||
{
|
||||
const inode_stats = {};
|
||||
const inode_stub = () => ({
|
||||
raw_used: 0n,
|
||||
read: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
|
||||
write: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
|
||||
delete: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
|
||||
});
|
||||
const seen_pools = {};
|
||||
for (const pool_id in state.config.pools)
|
||||
{
|
||||
seen_pools[pool_id] = true;
|
||||
state.pool.stats[pool_id] = state.pool.stats[pool_id] || {};
|
||||
state.pool.stats[pool_id].used_raw_tb = 0n;
|
||||
}
|
||||
for (const osd_num in state.osd.space)
|
||||
{
|
||||
for (const pool_id in state.osd.space[osd_num])
|
||||
{
|
||||
state.pool.stats[pool_id] = state.pool.stats[pool_id] || {};
|
||||
if (!seen_pools[pool_id])
|
||||
{
|
||||
state.pool.stats[pool_id].used_raw_tb = 0n;
|
||||
seen_pools[pool_id] = true;
|
||||
}
|
||||
inode_stats[pool_id] = inode_stats[pool_id] || {};
|
||||
for (const inode_num in state.osd.space[osd_num][pool_id])
|
||||
{
|
||||
const u = BigInt(state.osd.space[osd_num][pool_id][inode_num]||0);
|
||||
if (inode_num)
|
||||
{
|
||||
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
|
||||
inode_stats[pool_id][inode_num].raw_used += u;
|
||||
}
|
||||
state.pool.stats[pool_id].used_raw_tb += u;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const pool_id in seen_pools)
|
||||
{
|
||||
const used = state.pool.stats[pool_id].used_raw_tb;
|
||||
state.pool.stats[pool_id].used_raw_tb = Number(used)/1024/1024/1024/1024;
|
||||
}
|
||||
for (const osd_num in state.osd.state)
|
||||
{
|
||||
const ist = state.osd.inodestats[osd_num];
|
||||
if (!ist || !state.osd.state[osd_num])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
for (const pool_id in ist)
|
||||
{
|
||||
inode_stats[pool_id] = inode_stats[pool_id] || {};
|
||||
for (const inode_num in ist[pool_id])
|
||||
{
|
||||
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
|
||||
for (const op of [ 'read', 'write', 'delete' ])
|
||||
{
|
||||
inode_stats[pool_id][inode_num][op].count += BigInt(ist[pool_id][inode_num][op].count||0);
|
||||
inode_stats[pool_id][inode_num][op].usec += BigInt(ist[pool_id][inode_num][op].usec||0);
|
||||
inode_stats[pool_id][inode_num][op].bytes += BigInt(ist[pool_id][inode_num][op].bytes||0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const osd in state.osd.state)
|
||||
{
|
||||
const osd_diff = prev_stats.osd_diff[osd];
|
||||
if (!osd_diff || !state.osd.state[osd])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
for (const pool_id in osd_diff.inode_stats)
|
||||
{
|
||||
for (const inode_num in prev_stats.osd_diff[osd].inode_stats[pool_id])
|
||||
{
|
||||
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
|
||||
for (const op of [ 'read', 'write', 'delete' ])
|
||||
{
|
||||
const op_diff = prev_stats.osd_diff[osd].inode_stats[pool_id][inode_num][op] || {};
|
||||
const op_st = inode_stats[pool_id][inode_num][op];
|
||||
op_st.bps += op_diff.bps;
|
||||
op_st.iops += op_diff.iops;
|
||||
op_st.lat += op_diff.lat;
|
||||
op_st.n_osd = (op_st.n_osd || 0) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const pool_id in inode_stats)
|
||||
{
|
||||
for (const inode_num in inode_stats[pool_id])
|
||||
{
|
||||
let nonzero = inode_stats[pool_id][inode_num].raw_used > 0;
|
||||
for (const op of [ 'read', 'write', 'delete' ])
|
||||
{
|
||||
const op_st = inode_stats[pool_id][inode_num][op];
|
||||
if (op_st.n_osd)
|
||||
{
|
||||
op_st.lat /= BigInt(op_st.n_osd);
|
||||
delete op_st.n_osd;
|
||||
}
|
||||
if (op_st.bps > 0 || op_st.iops > 0)
|
||||
nonzero = true;
|
||||
}
|
||||
if (!nonzero && (!state.config.inode[pool_id] || !state.config.inode[pool_id][inode_num]))
|
||||
{
|
||||
// Deleted inode (no data, no I/O, no config)
|
||||
delete inode_stats[pool_id][inode_num];
|
||||
}
|
||||
}
|
||||
}
|
||||
return { inode_stats, seen_pools };
|
||||
}
|
||||
|
||||
function serialize_bigints(obj)
|
||||
{
|
||||
obj = { ...obj };
|
||||
for (const k in obj)
|
||||
{
|
||||
if (typeof obj[k] == 'bigint')
|
||||
{
|
||||
obj[k] = ''+obj[k];
|
||||
}
|
||||
else if (typeof obj[k] == 'object')
|
||||
{
|
||||
obj[k] = serialize_bigints(obj[k]);
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
derive_osd_stats,
|
||||
sum_op_stats,
|
||||
sum_object_counts,
|
||||
sum_inode_stats,
|
||||
serialize_bigints,
|
||||
};
|
37
mon/utils.js
Normal file
37
mon/utils.js
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const os = require('os');
|
||||
|
||||
function local_ips(all)
|
||||
{
|
||||
const ips = [];
|
||||
const ifaces = os.networkInterfaces();
|
||||
for (const ifname in ifaces)
|
||||
{
|
||||
for (const iface of ifaces[ifname])
|
||||
{
|
||||
if (iface.family == 'IPv4' && !iface.internal || all)
|
||||
{
|
||||
ips.push(iface.address);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ips;
|
||||
}
|
||||
|
||||
function b64(str)
|
||||
{
|
||||
return Buffer.from(str).toString('base64');
|
||||
}
|
||||
|
||||
function de64(str)
|
||||
{
|
||||
return Buffer.from(str, 'base64').toString();
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
b64,
|
||||
de64,
|
||||
local_ips,
|
||||
};
|
48
mon/vitastor_persist_filter.js
Normal file
48
mon/vitastor_persist_filter.js
Normal file
@@ -0,0 +1,48 @@
|
||||
// AntiEtcd persistence filter for Vitastor
|
||||
// (c) Vitaliy Filippov, 2024
|
||||
// License: Mozilla Public License 2.0 or Vitastor Network Public License 1.1
|
||||
|
||||
function vitastor_persist_filter(cfg)
|
||||
{
|
||||
const prefix = cfg.vitastor_prefix || '/vitastor';
|
||||
return (key, value) =>
|
||||
{
|
||||
if (key.substr(0, prefix.length+'/osd/stats/'.length) == prefix+'/osd/stats/')
|
||||
{
|
||||
if (value)
|
||||
{
|
||||
try
|
||||
{
|
||||
value = JSON.parse(value);
|
||||
value = JSON.stringify({
|
||||
bitmap_granularity: value.bitmap_granularity || undefined,
|
||||
data_block_size: value.data_block_size || undefined,
|
||||
host: value.host || undefined,
|
||||
immediate_commit: value.immediate_commit || undefined,
|
||||
});
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
console.error('invalid JSON in '+key+' = '+value+': '+e);
|
||||
value = {};
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
value = undefined;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
else if (key.substr(0, prefix.length+'/osd/'.length) == prefix+'/osd/' ||
|
||||
key.substr(0, prefix.length+'/inode/stats/'.length) == prefix+'/inode/stats/' ||
|
||||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' ||
|
||||
key.substr(0, prefix.length+'/pool/stats/'.length) == prefix+'/pool/stats/' ||
|
||||
key == prefix+'/stats')
|
||||
{
|
||||
return undefined;
|
||||
}
|
||||
return value;
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = vitastor_persist_filter;
|
80
node-binding/addon.cc
Normal file
80
node-binding/addon.cc
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#include "addon.h"
|
||||
|
||||
// Initialize the node addon
|
||||
NAN_MODULE_INIT(InitAddon)
|
||||
{
|
||||
// vitastor.Client
|
||||
|
||||
v8::Local<v8::FunctionTemplate> tpl = Nan::New<v8::FunctionTemplate>(NodeVitastor::Create);
|
||||
tpl->SetClassName(Nan::New("Client").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Nan::SetPrototypeMethod(tpl, "read", NodeVitastor::Read);
|
||||
Nan::SetPrototypeMethod(tpl, "write", NodeVitastor::Write);
|
||||
Nan::SetPrototypeMethod(tpl, "sync", NodeVitastor::Sync);
|
||||
Nan::SetPrototypeMethod(tpl, "read_bitmap", NodeVitastor::ReadBitmap);
|
||||
//Nan::SetPrototypeMethod(tpl, "destroy", NodeVitastor::Destroy);
|
||||
|
||||
Nan::Set(target, Nan::New("Client").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
|
||||
|
||||
// vitastor.Image (opened image)
|
||||
|
||||
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorImage::Create);
|
||||
tpl->SetClassName(Nan::New("Image").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(2);
|
||||
|
||||
Nan::SetPrototypeMethod(tpl, "read", NodeVitastorImage::Read);
|
||||
Nan::SetPrototypeMethod(tpl, "write", NodeVitastorImage::Write);
|
||||
Nan::SetPrototypeMethod(tpl, "sync", NodeVitastorImage::Sync);
|
||||
Nan::SetPrototypeMethod(tpl, "get_info", NodeVitastorImage::GetInfo);
|
||||
Nan::SetPrototypeMethod(tpl, "read_bitmap", NodeVitastorImage::ReadBitmap);
|
||||
|
||||
Nan::Set(target, Nan::New("Image").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
|
||||
|
||||
// vitastor.KV
|
||||
|
||||
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorKV::Create);
|
||||
tpl->SetClassName(Nan::New("KV").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Nan::SetPrototypeMethod(tpl, "open", NodeVitastorKV::Open);
|
||||
Nan::SetPrototypeMethod(tpl, "set_config", NodeVitastorKV::SetConfig);
|
||||
Nan::SetPrototypeMethod(tpl, "close", NodeVitastorKV::Close);
|
||||
Nan::SetPrototypeMethod(tpl, "get_size", NodeVitastorKV::GetSize);
|
||||
Nan::SetPrototypeMethod(tpl, "get", NodeVitastorKV::Get);
|
||||
Nan::SetPrototypeMethod(tpl, "get_cached", NodeVitastorKV::GetCached);
|
||||
Nan::SetPrototypeMethod(tpl, "set", NodeVitastorKV::Set);
|
||||
Nan::SetPrototypeMethod(tpl, "del", NodeVitastorKV::Del);
|
||||
Nan::SetPrototypeMethod(tpl, "list", NodeVitastorKV::List);
|
||||
|
||||
Nan::Set(target, Nan::New("KV").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
|
||||
|
||||
Nan::Set(target, Nan::New("ENOENT").ToLocalChecked(), Nan::New<v8::Int32>(-ENOENT));
|
||||
Nan::Set(target, Nan::New("EIO").ToLocalChecked(), Nan::New<v8::Int32>(-EIO));
|
||||
Nan::Set(target, Nan::New("EINVAL").ToLocalChecked(), Nan::New<v8::Int32>(-EINVAL));
|
||||
Nan::Set(target, Nan::New("EROFS").ToLocalChecked(), Nan::New<v8::Int32>(-EROFS));
|
||||
Nan::Set(target, Nan::New("ENOSPC").ToLocalChecked(), Nan::New<v8::Int32>(-ENOSPC));
|
||||
Nan::Set(target, Nan::New("EINTR").ToLocalChecked(), Nan::New<v8::Int32>(-EINTR));
|
||||
Nan::Set(target, Nan::New("EILSEQ").ToLocalChecked(), Nan::New<v8::Int32>(-EILSEQ));
|
||||
Nan::Set(target, Nan::New("ENOTBLK").ToLocalChecked(), Nan::New<v8::Int32>(-ENOTBLK));
|
||||
Nan::Set(target, Nan::New("ENOSYS").ToLocalChecked(), Nan::New<v8::Int32>(-ENOSYS));
|
||||
Nan::Set(target, Nan::New("EAGAIN").ToLocalChecked(), Nan::New<v8::Int32>(-EAGAIN));
|
||||
|
||||
// Listing handle
|
||||
|
||||
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorKVListing::Create);
|
||||
tpl->SetClassName(Nan::New("KVListing").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(2);
|
||||
|
||||
Nan::SetPrototypeMethod(tpl, "next", NodeVitastorKVListing::Next);
|
||||
Nan::SetPrototypeMethod(tpl, "close", NodeVitastorKVListing::Close);
|
||||
|
||||
Nan::Set(target, Nan::New("KVListing").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
|
||||
|
||||
NodeVitastorKV::listing_class.Reset(Nan::GetFunction(tpl).ToLocalChecked());
|
||||
}
|
||||
|
||||
NODE_MODULE(addon, (void*)InitAddon)
|
20
node-binding/addon.h
Normal file
20
node-binding/addon.h
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#ifndef NODE_VITASTOR_ADDON_H
|
||||
#define NODE_VITASTOR_ADDON_H
|
||||
|
||||
#include <nan.h>
|
||||
#include <vitastor_c.h>
|
||||
|
||||
#include "client.h"
|
||||
|
||||
#define ERRORF(format, ...) fprintf(stderr, format "\n", __VA_ARGS__);
|
||||
|
||||
#define TRACEF(format, ...) fprintf(stderr, format "\n", __VA_ARGS__);
|
||||
#define TRACE(msg) fprintf(stderr, "%s\n", msg);
|
||||
|
||||
//#define TRACEF(format, arg) ;
|
||||
//#define TRACE(msg) ;
|
||||
|
||||
#endif
|
20
node-binding/binding.gyp
Normal file
20
node-binding/binding.gyp
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
'targets': [
|
||||
{
|
||||
'target_name': 'addon',
|
||||
'sources': [
|
||||
'client.cc',
|
||||
'addon.cc'
|
||||
],
|
||||
'include_dirs': [
|
||||
'<!(node -e "require(\'nan\')")'
|
||||
],
|
||||
'cflags': [
|
||||
'<!(pkg-config --cflags vitastor)'
|
||||
],
|
||||
'libraries': [
|
||||
'<!(pkg-config --libs vitastor)'
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
853
node-binding/client.cc
Normal file
853
node-binding/client.cc
Normal file
@@ -0,0 +1,853 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#include "addon.h"
|
||||
|
||||
#define NODE_VITASTOR_READ 1
|
||||
#define NODE_VITASTOR_WRITE 2
|
||||
#define NODE_VITASTOR_SYNC 3
|
||||
#define NODE_VITASTOR_READ_BITMAP 4
|
||||
#define NODE_VITASTOR_GET_INFO 5
|
||||
|
||||
#ifndef INODE_POOL
|
||||
#define INODE_POOL(inode) (uint32_t)((inode) >> (64 - POOL_ID_BITS))
|
||||
#define INODE_NO_POOL(inode) (uint64_t)((inode) & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1))
|
||||
#define INODE_WITH_POOL(pool_id, inode) (((uint64_t)(pool_id) << (64-POOL_ID_BITS)) | INODE_NO_POOL(inode))
|
||||
#endif
|
||||
|
||||
class NodeVitastorRequest: public Nan::AsyncResource
|
||||
{
|
||||
public:
|
||||
NodeVitastorRequest(v8::Local<v8::Function> cb): Nan::AsyncResource("NodeVitastorRequest")
|
||||
{
|
||||
callback.Reset(cb);
|
||||
}
|
||||
|
||||
iovec iov;
|
||||
NodeVitastorImage *img = NULL;
|
||||
int op = 0;
|
||||
uint64_t offset = 0, len = 0, version = 0;
|
||||
bool with_parents = false;
|
||||
Nan::Persistent<v8::Function> callback;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// NodeVitastor
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
NodeVitastor::NodeVitastor(): Nan::ObjectWrap()
|
||||
{
|
||||
TRACE("NodeVitastor: constructor");
|
||||
poll_watcher.data = this;
|
||||
}
|
||||
|
||||
NodeVitastor::~NodeVitastor()
|
||||
{
|
||||
uv_poll_stop(&poll_watcher);
|
||||
vitastor_c_destroy(c);
|
||||
c = NULL;
|
||||
}
|
||||
|
||||
NAN_METHOD(NodeVitastor::Create)
|
||||
{
|
||||
TRACE("NodeVitastor::Create");
|
||||
v8::Local<v8::Object> jsParams = info[0].As<v8::Object>();
|
||||
v8::Local<v8::Array> keys = Nan::GetOwnPropertyNames(jsParams).ToLocalChecked();
|
||||
std::vector<std::string> cfg;
|
||||
for (uint32_t i = 0; i < keys->Length(); i++)
|
||||
{
|
||||
auto key = Nan::Get(keys, i).ToLocalChecked();
|
||||
cfg.push_back(std::string(*Nan::Utf8String(key)));
|
||||
cfg.push_back(std::string(*Nan::Utf8String(Nan::Get(jsParams, key).ToLocalChecked())));
|
||||
}
|
||||
|
||||
const char **c_cfg = new const char*[cfg.size()];
|
||||
for (size_t i = 0; i < cfg.size(); i++)
|
||||
{
|
||||
c_cfg[i] = cfg[i].c_str();
|
||||
}
|
||||
NodeVitastor* cli = new NodeVitastor();
|
||||
cli->c = vitastor_c_create_uring_json(c_cfg, cfg.size());
|
||||
delete[] c_cfg;
|
||||
|
||||
int res = vitastor_c_uring_register_eventfd(cli->c);
|
||||
if (res >= 0)
|
||||
{
|
||||
cli->eventfd = res;
|
||||
res = uv_poll_init_socket(uv_default_loop(), &cli->poll_watcher, cli->eventfd);
|
||||
if (res >= 0)
|
||||
res = uv_poll_start(&cli->poll_watcher, UV_READABLE, on_io_readable);
|
||||
}
|
||||
if (res < 0)
|
||||
{
|
||||
ERRORF("NodeVitastor: failed to create and register io_uring eventfd in libuv: %s", strerror(-cli->eventfd));
|
||||
vitastor_c_destroy(cli->c);
|
||||
cli->c = NULL;
|
||||
Nan::ThrowError("failed to create and register io_uring eventfd");
|
||||
return;
|
||||
}
|
||||
|
||||
cli->Wrap(info.This());
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
void NodeVitastor::on_io_readable(uv_poll_t* handle, int status, int revents)
|
||||
{
|
||||
TRACEF("NodeVitastor::on_io_readable status/revents %d %d", status, revents);
|
||||
if (revents & UV_READABLE)
|
||||
{
|
||||
NodeVitastor* self = (NodeVitastor*)handle->data;
|
||||
std::unique_lock<std::mutex> lock(self->mu);
|
||||
vitastor_c_uring_handle_events(self->c);
|
||||
}
|
||||
}
|
||||
|
||||
static NodeVitastorRequest* getReadRequest(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
|
||||
{
|
||||
uint64_t offset = Nan::To<int64_t>(info[argpos+0]).FromJust();
|
||||
uint64_t len = Nan::To<int64_t>(info[argpos+1]).FromJust();
|
||||
uint8_t *buf = (uint8_t*)malloc(len);
|
||||
if (!buf)
|
||||
{
|
||||
Nan::ThrowError("failed to allocate memory");
|
||||
return NULL;
|
||||
}
|
||||
v8::Local<v8::Function> callback = info[argpos+2].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
|
||||
req->offset = offset;
|
||||
req->len = len;
|
||||
req->iov = { .iov_base = buf, .iov_len = len };
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
// read(pool, inode, offset, len, callback(err, buffer, version))
|
||||
NAN_METHOD(NodeVitastor::Read)
|
||||
{
|
||||
TRACE("NodeVitastor::Read");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
uint64_t pool = Nan::To<int64_t>(info[0]).FromJust();
|
||||
uint64_t inode = Nan::To<int64_t>(info[1]).FromJust();
|
||||
|
||||
auto req = getReadRequest(info, 2);
|
||||
|
||||
std::unique_lock<std::mutex> lock(self->mu);
|
||||
vitastor_c_read(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, &req->iov, 1, on_read_finish, req);
|
||||
}
|
||||
|
||||
static NodeVitastorRequest* getWriteRequest(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
|
||||
{
|
||||
uint64_t offset = Nan::To<int64_t>(info[argpos+0]).FromJust();
|
||||
char *buf = node::Buffer::Data(info[argpos+1]);
|
||||
uint64_t len = node::Buffer::Length(info[argpos+1]);
|
||||
uint64_t version = 0;
|
||||
|
||||
if (!info[argpos+2].IsEmpty() && info[argpos+2]->IsObject())
|
||||
{
|
||||
auto key = Nan::New<v8::String>("version").ToLocalChecked();
|
||||
auto params = info[argpos+2].As<v8::Object>();
|
||||
auto versionObj = Nan::Get(params, key).ToLocalChecked();
|
||||
if (!versionObj.IsEmpty())
|
||||
version = Nan::To<int64_t>(versionObj).FromJust();
|
||||
argpos++;
|
||||
}
|
||||
|
||||
v8::Local<v8::Function> callback = info[argpos+2].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
|
||||
req->offset = offset;
|
||||
req->len = len;
|
||||
req->version = version;
|
||||
req->iov = { .iov_base = buf, .iov_len = req->len };
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
// write(pool, inode, offset, buffer, { version }?, callback(err))
|
||||
NAN_METHOD(NodeVitastor::Write)
|
||||
{
|
||||
TRACE("NodeVitastor::Write");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
uint64_t pool = Nan::To<int64_t>(info[0]).FromJust();
|
||||
uint64_t inode = Nan::To<int64_t>(info[1]).FromJust();
|
||||
|
||||
auto req = getWriteRequest(info, 2);
|
||||
|
||||
std::unique_lock<std::mutex> lock(self->mu);
|
||||
vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version, &req->iov, 1, on_write_finish, req);
|
||||
}
|
||||
|
||||
// sync(callback(err))
|
||||
NAN_METHOD(NodeVitastor::Sync)
|
||||
{
|
||||
TRACE("NodeVitastor::Sync");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
|
||||
std::unique_lock<std::mutex> lock(self->mu);
|
||||
vitastor_c_sync(self->c, on_write_finish, req);
|
||||
}
|
||||
|
||||
// read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))
|
||||
NAN_METHOD(NodeVitastor::ReadBitmap)
|
||||
{
|
||||
TRACE("NodeVitastor::ReadBitmap");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
uint64_t pool = Nan::To<int64_t>(info[0]).FromJust();
|
||||
uint64_t inode = Nan::To<int64_t>(info[1]).FromJust();
|
||||
uint64_t offset = Nan::To<int64_t>(info[2]).FromJust();
|
||||
uint64_t len = Nan::To<int64_t>(info[3]).FromJust();
|
||||
bool with_parents = Nan::To<bool>(info[4]).FromJust();
|
||||
v8::Local<v8::Function> callback = info[5].As<v8::Function>();
|
||||
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
vitastor_c_read_bitmap(self->c, ((pool << (64-POOL_ID_BITS)) | inode), offset, len, with_parents, on_read_bitmap_finish, req);
|
||||
}
|
||||
|
||||
static void on_error(NodeVitastorRequest *req, Nan::Callback & nanCallback, long retval)
|
||||
{
|
||||
// Legal errors: EINVAL, EIO, EROFS, ENOSPC, EINTR, ENOENT
|
||||
v8::Local<v8::Value> args[1];
|
||||
if (!retval)
|
||||
args[0] = Nan::Null();
|
||||
else
|
||||
args[0] = Nan::New<v8::Int32>((int32_t)retval);
|
||||
nanCallback.Call(1, args, req);
|
||||
}
|
||||
|
||||
void NodeVitastor::on_read_finish(void *opaque, long retval, uint64_t version)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
if (retval == -ENOENT)
|
||||
{
|
||||
free(req->iov.iov_base);
|
||||
nanCallback.Call(0, NULL, req);
|
||||
}
|
||||
else if (retval < 0)
|
||||
{
|
||||
free(req->iov.iov_base);
|
||||
on_error(req, nanCallback, retval);
|
||||
}
|
||||
else
|
||||
{
|
||||
v8::Local<v8::Value> args[3];
|
||||
args[0] = Nan::Null();
|
||||
args[1] = Nan::NewBuffer((char*)req->iov.iov_base, req->iov.iov_len).ToLocalChecked();
|
||||
args[2] = v8::BigInt::NewFromUnsigned(v8::Isolate::GetCurrent(), version);
|
||||
nanCallback.Call(3, args, req);
|
||||
}
|
||||
delete req;
|
||||
}
|
||||
|
||||
void NodeVitastor::on_write_finish(void *opaque, long retval)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
on_error(req, nanCallback, retval);
|
||||
delete req;
|
||||
}
|
||||
|
||||
void NodeVitastor::on_read_bitmap_finish(void *opaque, long retval, uint8_t *bitmap)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
if (retval == -ENOENT)
|
||||
nanCallback.Call(0, NULL, req);
|
||||
else if (retval < 0)
|
||||
on_error(req, nanCallback, retval);
|
||||
else
|
||||
{
|
||||
v8::Local<v8::Value> args[2];
|
||||
args[0] = Nan::Null();
|
||||
args[1] = Nan::NewBuffer((char*)bitmap, (retval+7)/8).ToLocalChecked();
|
||||
nanCallback.Call(2, args, req);
|
||||
}
|
||||
delete req;
|
||||
}
|
||||
|
||||
//NAN_METHOD(NodeVitastor::Destroy)
|
||||
//{
|
||||
// TRACE("NodeVitastor::Destroy");
|
||||
//}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// NodeVitastorImage
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
NAN_METHOD(NodeVitastorImage::Create)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Create");
|
||||
|
||||
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
|
||||
std::string name = std::string(*Nan::Utf8String(info[1].As<v8::String>()));
|
||||
NodeVitastor *cli = Nan::ObjectWrap::Unwrap<NodeVitastor>(parent);
|
||||
|
||||
NodeVitastorImage *img = new NodeVitastorImage();
|
||||
img->cli = cli;
|
||||
img->name = name;
|
||||
|
||||
img->Ref();
|
||||
cli->Ref();
|
||||
std::unique_lock<std::mutex> lock(cli->mu);
|
||||
vitastor_c_watch_inode(cli->c, (char*)img->name.c_str(), on_watch_start, img);
|
||||
|
||||
img->Wrap(info.This());
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
NodeVitastorImage::~NodeVitastorImage()
|
||||
{
|
||||
if (watch)
|
||||
{
|
||||
vitastor_c_close_watch(cli->c, watch);
|
||||
watch = NULL;
|
||||
}
|
||||
cli->Unref();
|
||||
}
|
||||
|
||||
// read(offset, len, callback(err, buffer, version))
|
||||
NAN_METHOD(NodeVitastorImage::Read)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Read");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
auto req = getReadRequest(info, 0);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_READ;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
// write(offset, buffer, { version }?, callback(err))
|
||||
NAN_METHOD(NodeVitastorImage::Write)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Write");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
auto req = getWriteRequest(info, 0);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_WRITE;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
NAN_METHOD(NodeVitastorImage::Sync)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Sync");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_SYNC;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
// read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))
|
||||
NAN_METHOD(NodeVitastorImage::ReadBitmap)
|
||||
{
|
||||
TRACE("NodeVitastorImage::ReadBitmap");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
uint64_t offset = Nan::To<int64_t>(info[0]).FromJust();
|
||||
uint64_t len = Nan::To<int64_t>(info[1]).FromJust();
|
||||
bool with_parents = Nan::To<bool>(info[2]).FromJust();
|
||||
v8::Local<v8::Function> callback = info[3].As<v8::Function>();
|
||||
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_READ_BITMAP;
|
||||
req->offset = offset;
|
||||
req->len = len;
|
||||
req->with_parents = with_parents;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
NAN_METHOD(NodeVitastorImage::GetInfo)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Sync");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_GET_INFO;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
void NodeVitastorImage::exec_or_wait(NodeVitastorRequest *req)
|
||||
{
|
||||
if (!watch)
|
||||
{
|
||||
// Need to wait for initialisation
|
||||
on_init.push_back(req);
|
||||
}
|
||||
else
|
||||
{
|
||||
exec_request(req);
|
||||
}
|
||||
}
|
||||
|
||||
void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(cli->mu);
|
||||
if (req->op == NODE_VITASTOR_READ)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
vitastor_c_read(cli->c, ino, req->offset, req->len, &req->iov, 1, NodeVitastor::on_read_finish, req);
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_WRITE)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
vitastor_c_write(cli->c, ino, req->offset, req->len, req->version, &req->iov, 1, NodeVitastor::on_write_finish, req);
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_SYNC)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
uint32_t imm = vitastor_c_inode_get_immediate_commit(cli->c, ino);
|
||||
if (imm != IMMEDIATE_ALL)
|
||||
{
|
||||
vitastor_c_sync(cli->c, NodeVitastor::on_write_finish, req);
|
||||
}
|
||||
else
|
||||
{
|
||||
NodeVitastor::on_write_finish(req, 0);
|
||||
}
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_READ_BITMAP)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
vitastor_c_read_bitmap(cli->c, ino, req->offset, req->len, req->with_parents, NodeVitastor::on_read_bitmap_finish, req);
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_GET_INFO)
|
||||
{
|
||||
uint64_t size = vitastor_c_inode_get_size(watch);
|
||||
uint64_t num = vitastor_c_inode_get_num(watch);
|
||||
uint32_t block_size = vitastor_c_inode_get_block_size(cli->c, num);
|
||||
uint32_t bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(cli->c, num);
|
||||
int readonly = vitastor_c_inode_get_readonly(watch);
|
||||
uint32_t immediate_commit = vitastor_c_inode_get_immediate_commit(cli->c, num);
|
||||
uint64_t parent_id = vitastor_c_inode_get_parent_id(watch);
|
||||
char *meta = vitastor_c_inode_get_meta(watch);
|
||||
uint64_t mod_revision = vitastor_c_inode_get_mod_revision(watch);
|
||||
|
||||
Nan::HandleScope scope;
|
||||
|
||||
v8::Local<v8::Object> res = Nan::New<v8::Object>();
|
||||
Nan::Set(res, Nan::New<v8::String>("pool_id").ToLocalChecked(), Nan::New<v8::Number>(INODE_POOL(num)));
|
||||
Nan::Set(res, Nan::New<v8::String>("inode_num").ToLocalChecked(), Nan::New<v8::Number>(INODE_NO_POOL(num)));
|
||||
if (size < ((uint64_t)1<<53))
|
||||
Nan::Set(res, Nan::New<v8::String>("size").ToLocalChecked(), Nan::New<v8::Number>(size));
|
||||
else
|
||||
Nan::Set(res, Nan::New<v8::String>("size").ToLocalChecked(), v8::BigInt::NewFromUnsigned(v8::Isolate::GetCurrent(), size));
|
||||
if (parent_id)
|
||||
{
|
||||
Nan::Set(res, Nan::New<v8::String>("parent_pool_id").ToLocalChecked(), Nan::New<v8::Number>(INODE_POOL(parent_id)));
|
||||
Nan::Set(res, Nan::New<v8::String>("parent_inode_num").ToLocalChecked(), Nan::New<v8::Number>(INODE_NO_POOL(parent_id)));
|
||||
}
|
||||
Nan::Set(res, Nan::New<v8::String>("readonly").ToLocalChecked(), Nan::New((bool)readonly));
|
||||
if (meta)
|
||||
{
|
||||
Nan::JSON nanJSON;
|
||||
Nan::Set(res, Nan::New<v8::String>("meta").ToLocalChecked(), nanJSON.Parse(Nan::New<v8::String>(meta).ToLocalChecked()).ToLocalChecked());
|
||||
}
|
||||
if (mod_revision < ((uint64_t)1<<53))
|
||||
Nan::Set(res, Nan::New<v8::String>("mod_revision").ToLocalChecked(), Nan::New<v8::Number>(mod_revision));
|
||||
else
|
||||
Nan::Set(res, Nan::New<v8::String>("mod_revision").ToLocalChecked(), v8::BigInt::NewFromUnsigned(v8::Isolate::GetCurrent(), mod_revision));
|
||||
Nan::Set(res, Nan::New<v8::String>("block_size").ToLocalChecked(), Nan::New(block_size));
|
||||
Nan::Set(res, Nan::New<v8::String>("bitmap_granularity").ToLocalChecked(), Nan::New(bitmap_granularity));
|
||||
Nan::Set(res, Nan::New<v8::String>("immediate_commit").ToLocalChecked(), Nan::New(immediate_commit));
|
||||
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = res;
|
||||
nanCallback.Call(1, args, req);
|
||||
|
||||
delete req;
|
||||
}
|
||||
}
|
||||
|
||||
void NodeVitastorImage::on_watch_start(void *opaque, long retval)
|
||||
{
|
||||
NodeVitastorImage *img = (NodeVitastorImage *)opaque;
|
||||
{
|
||||
img->watch = (void*)retval;
|
||||
auto on_init = std::move(img->on_init);
|
||||
for (auto req: on_init)
|
||||
{
|
||||
img->exec_request(req);
|
||||
}
|
||||
}
|
||||
img->Unref();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// NodeVitastorKV
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
// constructor(node_vitastor)
|
||||
NAN_METHOD(NodeVitastorKV::Create)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Create");
|
||||
|
||||
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
|
||||
NodeVitastor *cli = Nan::ObjectWrap::Unwrap<NodeVitastor>(parent);
|
||||
|
||||
NodeVitastorKV *kv = new NodeVitastorKV();
|
||||
kv->cli = cli;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(cli->mu);
|
||||
kv->dbw = new vitastorkv_dbw_t((cluster_client_t*)vitastor_c_get_internal_client(cli->c));
|
||||
}
|
||||
|
||||
kv->Wrap(info.This());
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
NodeVitastorKV::~NodeVitastorKV()
|
||||
{
|
||||
delete dbw;
|
||||
}
|
||||
|
||||
// open(inode_id, { ...config }, callback(err))
|
||||
NAN_METHOD(NodeVitastorKV::Open)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Open");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
uint64_t inode_id = Nan::To<int64_t>(info[0]).FromJust();
|
||||
|
||||
v8::Local<v8::Object> jsParams = info[1].As<v8::Object>();
|
||||
v8::Local<v8::Array> keys = Nan::GetOwnPropertyNames(jsParams).ToLocalChecked();
|
||||
std::map<std::string, std::string> cfg;
|
||||
for (uint32_t i = 0; i < keys->Length(); i++)
|
||||
{
|
||||
auto key = Nan::Get(keys, i).ToLocalChecked();
|
||||
cfg[std::string(*Nan::Utf8String(key))] = std::string(*Nan::Utf8String(Nan::Get(jsParams, key).ToLocalChecked()));
|
||||
}
|
||||
|
||||
v8::Local<v8::Function> callback = info[2].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->open(inode_id, cfg, [kv, req](int res)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
|
||||
nanCallback.Call(1, args, req);
|
||||
delete req;
|
||||
kv->Unref();
|
||||
});
|
||||
}
|
||||
|
||||
// close(callback(err))
|
||||
NAN_METHOD(NodeVitastorKV::Close)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Close");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->close([kv, req]()
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
nanCallback.Call(0, NULL, req);
|
||||
delete req;
|
||||
kv->Unref();
|
||||
});
|
||||
}
|
||||
|
||||
// set_config({ ...config })
|
||||
NAN_METHOD(NodeVitastorKV::SetConfig)
|
||||
{
|
||||
TRACE("NodeVitastorKV::SetConfig");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
v8::Local<v8::Object> jsParams = info[0].As<v8::Object>();
|
||||
v8::Local<v8::Array> keys = Nan::GetOwnPropertyNames(jsParams).ToLocalChecked();
|
||||
std::map<std::string, std::string> cfg;
|
||||
for (uint32_t i = 0; i < keys->Length(); i++)
|
||||
{
|
||||
auto key = Nan::Get(keys, i).ToLocalChecked();
|
||||
cfg[std::string(*Nan::Utf8String(key))] = std::string(*Nan::Utf8String(Nan::Get(jsParams, key).ToLocalChecked()));
|
||||
}
|
||||
|
||||
kv->dbw->set_config(cfg);
|
||||
}
|
||||
|
||||
// get_size()
|
||||
NAN_METHOD(NodeVitastorKV::GetSize)
|
||||
{
|
||||
TRACE("NodeVitastorKV::GetSize");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
auto size = kv->dbw->get_size();
|
||||
info.GetReturnValue().Set((size < ((uint64_t)1<<53))
|
||||
? v8::Local<v8::Value>(Nan::New<v8::Number>(size))
|
||||
: v8::Local<v8::Value>(v8::BigInt::NewFromUnsigned(info.GetIsolate(), size)));
|
||||
}
|
||||
|
||||
void NodeVitastorKV::get_impl(const Nan::FunctionCallbackInfo<v8::Value> & info, bool allow_cache)
|
||||
{
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
// FIXME: Handle Buffer too
|
||||
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
|
||||
|
||||
v8::Local<v8::Function> callback = info[1].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->get(key, [kv, req](int res, const std::string & value)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[2];
|
||||
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
|
||||
args[1] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(value).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
|
||||
nanCallback.Call(2, args, req);
|
||||
delete req;
|
||||
kv->Unref();
|
||||
}, allow_cache);
|
||||
}
|
||||
|
||||
// get(key, callback(err, value))
|
||||
NAN_METHOD(NodeVitastorKV::Get)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Get");
|
||||
get_impl(info, false);
|
||||
}
|
||||
|
||||
// get_cached(key, callback(err, value))
|
||||
NAN_METHOD(NodeVitastorKV::GetCached)
|
||||
{
|
||||
TRACE("NodeVitastorKV::GetCached");
|
||||
get_impl(info, true);
|
||||
}
|
||||
|
||||
static std::function<bool(int, const std::string &)> make_cas_callback(NodeVitastorRequest *cas_req)
|
||||
{
|
||||
return [cas_req](int res, const std::string & value)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(cas_req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(value).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
|
||||
Nan::MaybeLocal<v8::Value> ret = nanCallback.Call(1, args, cas_req);
|
||||
if (ret.IsEmpty())
|
||||
return false;
|
||||
return Nan::To<bool>(ret.ToLocalChecked()).FromJust();
|
||||
};
|
||||
}
|
||||
|
||||
// set(key, value, callback(err), cas_compare(old_value))
|
||||
NAN_METHOD(NodeVitastorKV::Set)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Set");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
// FIXME: Handle Buffer too
|
||||
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
|
||||
std::string value(*Nan::Utf8String(info[1].As<v8::String>()));
|
||||
|
||||
v8::Local<v8::Function> callback = info[2].As<v8::Function>();
|
||||
NodeVitastorRequest *req = new NodeVitastorRequest(callback), *cas_req = NULL;
|
||||
|
||||
std::function<bool(int, const std::string &)> cas_cb;
|
||||
if (info.Length() > 3 && info[3]->IsObject())
|
||||
{
|
||||
v8::Local<v8::Function> cas_callback = info[3].As<v8::Function>();
|
||||
cas_req = new NodeVitastorRequest(cas_callback);
|
||||
cas_cb = make_cas_callback(cas_req);
|
||||
}
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->set(key, value, [kv, req, cas_req](int res)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
|
||||
nanCallback.Call(1, args, req);
|
||||
delete req;
|
||||
if (cas_req)
|
||||
delete cas_req;
|
||||
kv->Unref();
|
||||
}, cas_cb);
|
||||
}
|
||||
|
||||
// del(key, callback(err), cas_compare(old_value))
|
||||
NAN_METHOD(NodeVitastorKV::Del)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Del");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
// FIXME: Handle Buffer too
|
||||
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
|
||||
|
||||
v8::Local<v8::Function> callback = info[1].As<v8::Function>();
|
||||
NodeVitastorRequest *req = new NodeVitastorRequest(callback), *cas_req = NULL;
|
||||
|
||||
std::function<bool(int, const std::string &)> cas_cb;
|
||||
if (info.Length() > 2 && info[2]->IsObject())
|
||||
{
|
||||
v8::Local<v8::Function> cas_callback = info[2].As<v8::Function>();
|
||||
cas_req = new NodeVitastorRequest(cas_callback);
|
||||
cas_cb = make_cas_callback(cas_req);
|
||||
}
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->del(key, [kv, req, cas_req](int res)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
|
||||
nanCallback.Call(1, args, req);
|
||||
delete req;
|
||||
if (cas_req)
|
||||
delete cas_req;
|
||||
kv->Unref();
|
||||
}, cas_cb);
|
||||
}
|
||||
|
||||
// list(start_key?)
|
||||
NAN_METHOD(NodeVitastorKV::List)
|
||||
{
|
||||
TRACE("NodeVitastorKV::List");
|
||||
|
||||
v8::Local<v8::Function> cons = Nan::New(listing_class);
|
||||
v8::Local<v8::Value> args[2];
|
||||
args[0] = info.This();
|
||||
int narg = 1;
|
||||
if (info.Length() > 1 && info[1]->IsString())
|
||||
{
|
||||
args[1] = info[1];
|
||||
narg = 2;
|
||||
}
|
||||
info.GetReturnValue().Set(Nan::NewInstance(cons, narg, args).ToLocalChecked());
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// NodeVitastorKVListing
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
// constructor(node_vitastor_kv, start_key?)
|
||||
NAN_METHOD(NodeVitastorKVListing::Create)
|
||||
{
|
||||
TRACE("NodeVitastorKVListing::Create");
|
||||
|
||||
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
|
||||
NodeVitastorKV *kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(parent);
|
||||
|
||||
std::string start_key;
|
||||
// FIXME: Handle Buffer too
|
||||
if (info.Length() > 1 && info[1]->IsString())
|
||||
{
|
||||
start_key = std::string(*Nan::Utf8String(info[1].As<v8::String>()));
|
||||
}
|
||||
|
||||
NodeVitastorKVListing *list = new NodeVitastorKVListing();
|
||||
list->kv = kv;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(kv->cli->mu);
|
||||
list->handle = list->kv->dbw->list_start(start_key);
|
||||
}
|
||||
|
||||
list->Wrap(info.This());
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
NodeVitastorKVListing::~NodeVitastorKVListing()
|
||||
{
|
||||
if (handle)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(kv->cli->mu);
|
||||
kv->dbw->list_close(handle);
|
||||
handle = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// next(callback(err, value))
|
||||
NAN_METHOD(NodeVitastorKVListing::Next)
|
||||
{
|
||||
TRACE("NodeVitastorKVListing::Next");
|
||||
|
||||
NodeVitastorKVListing* list = Nan::ObjectWrap::Unwrap<NodeVitastorKVListing>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(callback);
|
||||
if (!list->handle)
|
||||
{
|
||||
// Already closed
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = Nan::New<v8::Int32>(-EINVAL);
|
||||
nanCallback.Call(1, args, req);
|
||||
delete req;
|
||||
return;
|
||||
}
|
||||
|
||||
list->kv->Ref();
|
||||
list->kv->dbw->list_next(list->handle, [list, req](int res, const std::string & key, const std::string & value)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[3];
|
||||
args[0] = Nan::New<v8::Int32>(res);
|
||||
args[1] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(key).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
|
||||
args[2] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(value).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
|
||||
nanCallback.Call(3, args, req);
|
||||
delete req;
|
||||
list->kv->Unref();
|
||||
});
|
||||
}
|
||||
|
||||
// close()
|
||||
NAN_METHOD(NodeVitastorKVListing::Close)
|
||||
{
|
||||
TRACE("NodeVitastorKVListing::Close");
|
||||
|
||||
NodeVitastorKVListing* list = Nan::ObjectWrap::Unwrap<NodeVitastorKVListing>(info.This());
|
||||
|
||||
if (list->handle)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(list->kv->cli->mu);
|
||||
list->kv->dbw->list_close(list->handle);
|
||||
list->handle = NULL;
|
||||
}
|
||||
}
|
142
node-binding/client.h
Normal file
142
node-binding/client.h
Normal file
@@ -0,0 +1,142 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#ifndef NODE_VITASTOR_CLIENT_H
|
||||
#define NODE_VITASTOR_CLIENT_H
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include <nan.h>
|
||||
#include <vitastor_c.h>
|
||||
#include <vitastor_kv.h>
|
||||
|
||||
class NodeVitastorRequest;
|
||||
|
||||
class NodeVitastor: public Nan::ObjectWrap
|
||||
{
|
||||
public:
|
||||
// constructor({ ...config })
|
||||
static NAN_METHOD(Create);
|
||||
// read(pool, inode, offset, len, callback(err, buffer, version))
|
||||
static NAN_METHOD(Read);
|
||||
// write(pool, inode, offset, buffer, { version }?, callback(err))
|
||||
static NAN_METHOD(Write);
|
||||
// sync(callback(err))
|
||||
static NAN_METHOD(Sync);
|
||||
// read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))
|
||||
static NAN_METHOD(ReadBitmap);
|
||||
// // destroy()
|
||||
// static NAN_METHOD(Destroy);
|
||||
|
||||
~NodeVitastor();
|
||||
|
||||
private:
|
||||
vitastor_c *c = NULL;
|
||||
int eventfd = -1;
|
||||
uv_poll_t poll_watcher;
|
||||
// FIXME: Is it really needed?
|
||||
std::mutex mu;
|
||||
|
||||
NodeVitastor();
|
||||
|
||||
static void on_io_readable(uv_poll_t* handle, int status, int revents);
|
||||
static void on_read_finish(void *opaque, long retval, uint64_t version);
|
||||
static void on_write_finish(void *opaque, long retval);
|
||||
static void on_read_bitmap_finish(void *opaque, long retval, uint8_t *bitmap);
|
||||
|
||||
friend class NodeVitastorImage;
|
||||
friend class NodeVitastorKV;
|
||||
friend class NodeVitastorKVListing;
|
||||
};
|
||||
|
||||
class NodeVitastorImage: public Nan::ObjectWrap
|
||||
{
|
||||
public:
|
||||
// constructor(node_vitastor, name)
|
||||
static NAN_METHOD(Create);
|
||||
// read(offset, len, callback(err, buffer, version))
|
||||
static NAN_METHOD(Read);
|
||||
// write(offset, buffer, { version }?, callback(err))
|
||||
static NAN_METHOD(Write);
|
||||
// sync(callback(err))
|
||||
static NAN_METHOD(Sync);
|
||||
// read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))
|
||||
static NAN_METHOD(ReadBitmap);
|
||||
// get_info(callback({ num, name, size, parent_id?, readonly?, meta?, mod_revision, block_size, bitmap_granularity, immediate_commit }))
|
||||
static NAN_METHOD(GetInfo);
|
||||
|
||||
~NodeVitastorImage();
|
||||
|
||||
private:
|
||||
NodeVitastor *cli = NULL;
|
||||
std::string name;
|
||||
void *watch = NULL;
|
||||
std::vector<NodeVitastorRequest*> on_init;
|
||||
Nan::Persistent<v8::Object> cliObj;
|
||||
|
||||
NodeVitastorImage();
|
||||
|
||||
static void on_watch_start(void *opaque, long retval);
|
||||
void exec_request(NodeVitastorRequest *req);
|
||||
void exec_or_wait(NodeVitastorRequest *req);
|
||||
};
|
||||
|
||||
class NodeVitastorKV: public Nan::ObjectWrap
|
||||
{
|
||||
public:
|
||||
// constructor(node_vitastor)
|
||||
static NAN_METHOD(Create);
|
||||
// open(inode_id, { ...config }, callback(err))
|
||||
static NAN_METHOD(Open);
|
||||
// set_config({ ...config })
|
||||
static NAN_METHOD(SetConfig);
|
||||
// close(callback())
|
||||
static NAN_METHOD(Close);
|
||||
// get_size()
|
||||
static NAN_METHOD(GetSize);
|
||||
// get(key, callback(err, value))
|
||||
static NAN_METHOD(Get);
|
||||
// get_cached(key, callback(err, value))
|
||||
static NAN_METHOD(GetCached);
|
||||
// set(key, value, callback(err), cas_compare(old_value))
|
||||
static NAN_METHOD(Set);
|
||||
// del(key, callback(err), cas_compare(old_value))
|
||||
static NAN_METHOD(Del);
|
||||
// list(start_key?)
|
||||
static NAN_METHOD(List);
|
||||
|
||||
~NodeVitastorKV();
|
||||
|
||||
static Nan::Persistent<v8::Function> listing_class;
|
||||
|
||||
private:
|
||||
NodeVitastor *cli = NULL;
|
||||
vitastorkv_dbw_t *dbw = NULL;
|
||||
|
||||
NodeVitastorKV();
|
||||
|
||||
static void get_impl(const Nan::FunctionCallbackInfo<v8::Value> & info, bool allow_cache);
|
||||
|
||||
friend class NodeVitastorKVListing;
|
||||
};
|
||||
|
||||
class NodeVitastorKVListing: public Nan::ObjectWrap
|
||||
{
|
||||
public:
|
||||
// constructor(node_vitastor_kv, start_key?)
|
||||
static NAN_METHOD(Create);
|
||||
// next(callback(err, value))
|
||||
static NAN_METHOD(Next);
|
||||
// close()
|
||||
static NAN_METHOD(Close);
|
||||
|
||||
~NodeVitastorKVListing();
|
||||
|
||||
private:
|
||||
NodeVitastorKV *kv = NULL;
|
||||
void *handle = NULL;
|
||||
|
||||
NodeVitastorKVListing();
|
||||
};
|
||||
|
||||
#endif
|
24
node-binding/package.json
Normal file
24
node-binding/package.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"name": "vitastor",
|
||||
"version": "1.7.0",
|
||||
"description": "Low-level native bindings to Vitastor client library",
|
||||
"main": "index.js",
|
||||
"keywords": [
|
||||
"storage",
|
||||
"sds",
|
||||
"vitastor"
|
||||
],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git://git.yourcmc.ru/vitalif/vitastor.git"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "node-gyp rebuild"
|
||||
},
|
||||
"author": "Vitaliy Filippov",
|
||||
"license": "VNPL-2.0",
|
||||
"dependencies": {
|
||||
"bindings": "1.5.0",
|
||||
"nan": "^2.19.0"
|
||||
}
|
||||
}
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VERSION = '1.6.1'
|
||||
VERSION = '1.7.1'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -707,10 +707,10 @@ class VitastorDriver(driver.CloneableImageVD,
|
||||
return ({}, True)
|
||||
return ({}, False)
|
||||
|
||||
def copy_image_to_encrypted_volume(self, context, volume, image_service, image_id):
|
||||
self.copy_image_to_volume(context, volume, image_service, image_id, encrypted = True)
|
||||
def copy_image_to_encrypted_volume(self, context, volume, image_service, image_id, disable_sparse=False):
|
||||
self.copy_image_to_volume(context, volume, image_service, image_id, encrypted = True, disable_sparse=False)
|
||||
|
||||
def copy_image_to_volume(self, context, volume, image_service, image_id, encrypted = False):
|
||||
def copy_image_to_volume(self, context, volume, image_service, image_id, encrypted = False, disable_sparse=False):
|
||||
tmp_dir = volume_utils.image_conversion_dir()
|
||||
with tempfile.NamedTemporaryFile(dir = tmp_dir) as tmp:
|
||||
image_utils.fetch_to_raw(
|
||||
|
670
patches/libvirt-10.0-vitastor.diff
Normal file
670
patches/libvirt-10.0-vitastor.diff
Normal file
@@ -0,0 +1,670 @@
|
||||
From 571bde71268dcca6446454bb1e895e21bcc7b2a0 Mon Sep 17 00:00:00 2001
|
||||
From: ace <ace@0xace.cc>
|
||||
Date: Sat, 18 May 2024 19:45:49 +0300
|
||||
Subject: [PATCH] Add Vitastor support
|
||||
|
||||
---
|
||||
include/libvirt/libvirt-storage.h | 1 +
|
||||
src/conf/domain_conf.c | 4 +-
|
||||
src/conf/domain_validate.c | 10 +-
|
||||
src/conf/schemas/domaincommon.rng | 30 +++++
|
||||
src/conf/storage_conf.c | 20 ++-
|
||||
src/conf/storage_conf.h | 2 +
|
||||
src/conf/storage_source_conf.c | 2 +
|
||||
src/conf/storage_source_conf.h | 1 +
|
||||
src/conf/virstorageobj.c | 3 +
|
||||
src/libvirt-storage.c | 1 +
|
||||
src/libxl/libxl_conf.c | 1 +
|
||||
src/libxl/xen_xl.c | 1 +
|
||||
src/qemu/qemu_block.c | 45 +++++++
|
||||
src/qemu/qemu_domain.c | 4 +-
|
||||
src/qemu/qemu_snapshot.c | 2 +
|
||||
src/storage/storage_driver.c | 1 +
|
||||
.../storage_source_backingstore.c | 123 ++++++++++++++++++
|
||||
src/test/test_driver.c | 1 +
|
||||
.../storagepoolcapsschemadata/poolcaps-fs.xml | 7 +
|
||||
.../poolcaps-full.xml | 7 +
|
||||
tests/storagepoolxml2argvtest.c | 1 +
|
||||
tools/virsh-pool.c | 3 +
|
||||
22 files changed, 265 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
|
||||
index aaad4a3da1..5f5daa8341 100644
|
||||
--- a/include/libvirt/libvirt-storage.h
|
||||
+++ b/include/libvirt/libvirt-storage.h
|
||||
@@ -326,6 +326,7 @@ typedef enum {
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
|
||||
} virConnectListAllStoragePoolsFlags;
|
||||
|
||||
int virConnectListAllStoragePools(virConnectPtr conn,
|
||||
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
|
||||
index 52a5796ad2..089697b2a3 100644
|
||||
--- a/src/conf/domain_conf.c
|
||||
+++ b/src/conf/domain_conf.c
|
||||
@@ -7191,7 +7191,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
|
||||
src->configFile = virXPathString("string(./config/@file)", ctxt);
|
||||
|
||||
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
|
||||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
|
||||
src->query = virXMLPropString(node, "query");
|
||||
|
||||
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
|
||||
@@ -30657,6 +30658,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_POOL_MPATH:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
|
||||
index faa7659f07..01b907d60d 100644
|
||||
--- a/src/conf/domain_validate.c
|
||||
+++ b/src/conf/domain_validate.c
|
||||
@@ -495,6 +495,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
@@ -541,7 +542,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
}
|
||||
}
|
||||
|
||||
- /* internal snapshots and config files are currently supported only with rbd: */
|
||||
+ /* internal snapshots are currently supported only with rbd: */
|
||||
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
|
||||
if (src->snapshot) {
|
||||
@@ -549,10 +550,15 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
_("<snapshot> element is currently supported only with 'rbd' disks"));
|
||||
return -1;
|
||||
}
|
||||
+ }
|
||||
|
||||
+ /* config files are currently supported only with rbd and vitastor: */
|
||||
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||
if (src->configFile) {
|
||||
virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
- _("<config> element is currently supported only with 'rbd' disks"));
|
||||
+ _("<config> element is currently supported only with 'rbd' and 'vitastor' disks"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
|
||||
index df44cd9857..4bb72fc697 100644
|
||||
--- a/src/conf/schemas/domaincommon.rng
|
||||
+++ b/src/conf/schemas/domaincommon.rng
|
||||
@@ -1997,6 +1997,35 @@
|
||||
</element>
|
||||
</define>
|
||||
|
||||
+ <define name="diskSourceNetworkProtocolVitastor">
|
||||
+ <element name="source">
|
||||
+ <interleave>
|
||||
+ <attribute name="protocol">
|
||||
+ <value>vitastor</value>
|
||||
+ </attribute>
|
||||
+ <ref name="diskSourceCommon"/>
|
||||
+ <optional>
|
||||
+ <attribute name="name"/>
|
||||
+ </optional>
|
||||
+ <optional>
|
||||
+ <attribute name="query"/>
|
||||
+ </optional>
|
||||
+ <zeroOrMore>
|
||||
+ <ref name="diskSourceNetworkHost"/>
|
||||
+ </zeroOrMore>
|
||||
+ <optional>
|
||||
+ <element name="config">
|
||||
+ <attribute name="file">
|
||||
+ <ref name="absFilePath"/>
|
||||
+ </attribute>
|
||||
+ <empty/>
|
||||
+ </element>
|
||||
+ </optional>
|
||||
+ <empty/>
|
||||
+ </interleave>
|
||||
+ </element>
|
||||
+ </define>
|
||||
+
|
||||
<define name="diskSourceNetworkProtocolISCSI">
|
||||
<element name="source">
|
||||
<attribute name="protocol">
|
||||
@@ -2347,6 +2376,7 @@
|
||||
<ref name="diskSourceNetworkProtocolSimple"/>
|
||||
<ref name="diskSourceNetworkProtocolVxHS"/>
|
||||
<ref name="diskSourceNetworkProtocolNFS"/>
|
||||
+ <ref name="diskSourceNetworkProtocolVitastor"/>
|
||||
</choice>
|
||||
</define>
|
||||
|
||||
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
|
||||
index 68842004b7..1d69a788b6 100644
|
||||
--- a/src/conf/storage_conf.c
|
||||
+++ b/src/conf/storage_conf.c
|
||||
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
|
||||
"logical", "disk", "iscsi",
|
||||
"iscsi-direct", "scsi", "mpath",
|
||||
"rbd", "sheepdog", "gluster",
|
||||
- "zfs", "vstorage",
|
||||
+ "zfs", "vstorage", "vitastor",
|
||||
);
|
||||
|
||||
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
|
||||
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
|
||||
.formatToString = virStorageFileFormatTypeToString,
|
||||
}
|
||||
},
|
||||
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
|
||||
+ .poolOptions = {
|
||||
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NAME),
|
||||
+ },
|
||||
+ .volOptions = {
|
||||
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
|
||||
+ .formatFromString = virStorageVolumeFormatFromString,
|
||||
+ .formatToString = virStorageFileFormatTypeToString,
|
||||
+ }
|
||||
+ },
|
||||
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
|
||||
.poolOptions = {
|
||||
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
@@ -538,6 +550,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
|
||||
_("element 'name' is mandatory for RBD pool"));
|
||||
return -1;
|
||||
}
|
||||
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
|
||||
+ virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
+ _("element 'name' is mandatory for Vitastor pool"));
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
if (options->formatFromString) {
|
||||
g_autofree char *format = NULL;
|
||||
@@ -1127,6 +1144,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
|
||||
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
|
||||
* files, so they don't have a target */
|
||||
if (def->type != VIR_STORAGE_POOL_RBD &&
|
||||
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
|
||||
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
|
||||
def->type != VIR_STORAGE_POOL_GLUSTER &&
|
||||
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
|
||||
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
|
||||
index fc67957cfe..720c07ef74 100644
|
||||
--- a/src/conf/storage_conf.h
|
||||
+++ b/src/conf/storage_conf.h
|
||||
@@ -103,6 +103,7 @@ typedef enum {
|
||||
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
|
||||
VIR_STORAGE_POOL_ZFS, /* ZFS */
|
||||
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
|
||||
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
|
||||
|
||||
VIR_STORAGE_POOL_LAST,
|
||||
} virStoragePoolType;
|
||||
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
|
||||
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
|
||||
index 959ec5ed40..e751dd4d6a 100644
|
||||
--- a/src/conf/storage_source_conf.c
|
||||
+++ b/src/conf/storage_source_conf.c
|
||||
@@ -88,6 +88,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
|
||||
"ssh",
|
||||
"vxhs",
|
||||
"nfs",
|
||||
+ "vitastor",
|
||||
);
|
||||
|
||||
|
||||
@@ -1301,6 +1302,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
return 24007;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
/* we don't provide a default for RBD */
|
||||
return 0;
|
||||
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
|
||||
index 05b4bda16c..b5ed143c39 100644
|
||||
--- a/src/conf/storage_source_conf.h
|
||||
+++ b/src/conf/storage_source_conf.h
|
||||
@@ -129,6 +129,7 @@ typedef enum {
|
||||
VIR_STORAGE_NET_PROTOCOL_SSH,
|
||||
VIR_STORAGE_NET_PROTOCOL_VXHS,
|
||||
VIR_STORAGE_NET_PROTOCOL_NFS,
|
||||
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
|
||||
|
||||
VIR_STORAGE_NET_PROTOCOL_LAST
|
||||
} virStorageNetProtocol;
|
||||
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
|
||||
index 59fa5da372..4739167f5f 100644
|
||||
--- a/src/conf/virstorageobj.c
|
||||
+++ b/src/conf/virstorageobj.c
|
||||
@@ -1438,6 +1438,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
|
||||
return 1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
@@ -1921,6 +1922,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
|
||||
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
|
||||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
|
||||
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
|
||||
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
|
||||
index db7660aac4..561df34709 100644
|
||||
--- a/src/libvirt-storage.c
|
||||
+++ b/src/libvirt-storage.c
|
||||
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
|
||||
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
|
||||
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
|
||||
index 62e1be6672..71a1d42896 100644
|
||||
--- a/src/libxl/libxl_conf.c
|
||||
+++ b/src/libxl/libxl_conf.c
|
||||
@@ -979,6 +979,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
|
||||
index f175359307..8efcf4c329 100644
|
||||
--- a/src/libxl/xen_xl.c
|
||||
+++ b/src/libxl/xen_xl.c
|
||||
@@ -1456,6 +1456,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
|
||||
index c9f5cbbf29..dbbac36836 100644
|
||||
--- a/src/qemu/qemu_block.c
|
||||
+++ b/src/qemu/qemu_block.c
|
||||
@@ -758,6 +758,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
|
||||
}
|
||||
|
||||
|
||||
+static virJSONValue *
|
||||
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
|
||||
+{
|
||||
+ virJSONValue *ret = NULL;
|
||||
+ virStorageNetHostDef *host;
|
||||
+ size_t i;
|
||||
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
|
||||
+ g_autofree char *etcd = NULL;
|
||||
+
|
||||
+ for (i = 0; i < src->nhosts; i++) {
|
||||
+ host = src->hosts + i;
|
||||
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
|
||||
+ }
|
||||
+ if (src->nhosts > 0) {
|
||||
+ etcd = virBufferContentAndReset(&buf);
|
||||
+ }
|
||||
+
|
||||
+ if (virJSONValueObjectAdd(&ret,
|
||||
+ "S:etcd-host", etcd,
|
||||
+ "S:etcd-prefix", src->query,
|
||||
+ "S:config-path", src->configFile,
|
||||
+ "s:image", src->path,
|
||||
+ NULL) < 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static virJSONValue *
|
||||
qemuBlockStorageSourceGetSheepdogProps(virStorageSource *src)
|
||||
{
|
||||
@@ -1140,6 +1172,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return NULL;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
driver = "sheepdog";
|
||||
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
@@ -2020,6 +2058,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
@@ -2400,6 +2439,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
driver = "sheepdog";
|
||||
if (!(location = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
|
||||
index 341c543280..61b248fa2c 100644
|
||||
--- a/src/qemu/qemu_domain.c
|
||||
+++ b/src/qemu/qemu_domain.c
|
||||
@@ -5207,7 +5207,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
|
||||
if (src->query &&
|
||||
(actualType != VIR_STORAGE_TYPE_NETWORK ||
|
||||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
|
||||
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
|
||||
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
||||
_("query is supported only with HTTP(S) protocols"));
|
||||
return -1;
|
||||
@@ -10387,6 +10388,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
|
||||
break;
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
|
||||
index 0cac0c4146..4955ebd8d4 100644
|
||||
--- a/src/qemu/qemu_snapshot.c
|
||||
+++ b/src/qemu/qemu_snapshot.c
|
||||
@@ -423,6 +423,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
@@ -648,6 +649,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
|
||||
index 314fe930e0..fb615a8b4e 100644
|
||||
--- a/src/storage/storage_driver.c
|
||||
+++ b/src/storage/storage_driver.c
|
||||
@@ -1626,6 +1626,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
|
||||
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
|
||||
index 80681924ea..8a3ade9ec0 100644
|
||||
--- a/src/storage_file/storage_source_backingstore.c
|
||||
+++ b/src/storage_file/storage_source_backingstore.c
|
||||
@@ -287,6 +287,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
|
||||
}
|
||||
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseVitastorColonString(const char *colonstr,
|
||||
+ virStorageSource *src)
|
||||
+{
|
||||
+ char *p, *e, *next;
|
||||
+ g_autofree char *options = NULL;
|
||||
+
|
||||
+ /* optionally skip the "vitastor:" prefix if provided */
|
||||
+ if (STRPREFIX(colonstr, "vitastor:"))
|
||||
+ colonstr += strlen("vitastor:");
|
||||
+
|
||||
+ options = g_strdup(colonstr);
|
||||
+
|
||||
+ p = options;
|
||||
+ while (*p) {
|
||||
+ /* find : delimiter or end of string */
|
||||
+ for (e = p; *e && *e != ':'; ++e) {
|
||||
+ if (*e == '\\') {
|
||||
+ e++;
|
||||
+ if (*e == '\0')
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (*e == '\0') {
|
||||
+ next = e; /* last kv pair */
|
||||
+ } else {
|
||||
+ next = e + 1;
|
||||
+ *e = '\0';
|
||||
+ }
|
||||
+
|
||||
+ if (STRPREFIX(p, "image=")) {
|
||||
+ src->path = g_strdup(p + strlen("image="));
|
||||
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
|
||||
+ src->query = g_strdup(p + strlen("etcd-prefix="));
|
||||
+ } else if (STRPREFIX(p, "config-path=")) {
|
||||
+ src->configFile = g_strdup(p + strlen("config-path="));
|
||||
+ } else if (STRPREFIX(p, "etcd-host=")) {
|
||||
+ char *h, *sep;
|
||||
+
|
||||
+ h = p + strlen("etcd-host=");
|
||||
+ while (h < e) {
|
||||
+ for (sep = h; sep < e; ++sep) {
|
||||
+ if (*sep == '\\' && (sep[1] == ',' ||
|
||||
+ sep[1] == ';' ||
|
||||
+ sep[1] == ' ')) {
|
||||
+ *sep = '\0';
|
||||
+ sep += 2;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (virStorageSourceRBDAddHost(src, h) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ h = sep;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ p = next;
|
||||
+ }
|
||||
+
|
||||
+ if (!src->path) {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseNBDColonString(const char *nbdstr,
|
||||
virStorageSource *src)
|
||||
@@ -399,6 +468,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
@@ -975,6 +1049,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
|
||||
+ virJSONValue *json,
|
||||
+ const char *jsonstr G_GNUC_UNUSED,
|
||||
+ int opaque G_GNUC_UNUSED)
|
||||
+{
|
||||
+ const char *filename;
|
||||
+ const char *image = virJSONValueObjectGetString(json, "image");
|
||||
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
|
||||
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
|
||||
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
|
||||
+ size_t nservers;
|
||||
+ size_t i;
|
||||
+
|
||||
+ src->type = VIR_STORAGE_TYPE_NETWORK;
|
||||
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
|
||||
+
|
||||
+ /* legacy syntax passed via 'filename' option */
|
||||
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
|
||||
+ return virStorageSourceParseVitastorColonString(filename, src);
|
||||
+
|
||||
+ if (!image) {
|
||||
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
|
||||
+ _("missing image name in Vitastor backing volume "
|
||||
+ "JSON specification"));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ src->path = g_strdup(image);
|
||||
+ src->configFile = g_strdup(conf);
|
||||
+ src->query = g_strdup(etcd_prefix);
|
||||
+
|
||||
+ if (servers) {
|
||||
+ nservers = virJSONValueArraySize(servers);
|
||||
+
|
||||
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
|
||||
+ src->nhosts = nservers;
|
||||
+
|
||||
+ for (i = 0; i < nservers; i++) {
|
||||
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
|
||||
+ virJSONValueArrayGet(servers, i)) < 0)
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
|
||||
virJSONValue *json,
|
||||
@@ -1152,6 +1274,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
|
||||
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
|
||||
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
|
||||
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
|
||||
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
|
||||
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
|
||||
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
|
||||
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
|
||||
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
|
||||
index ed545848af..dbfdbe8476 100644
|
||||
--- a/src/test/test_driver.c
|
||||
+++ b/src/test/test_driver.c
|
||||
@@ -7336,6 +7336,7 @@ testStorageVolumeTypeForPool(int pooltype)
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
return VIR_STORAGE_VOL_NETWORK;
|
||||
case VIR_STORAGE_POOL_LOGICAL:
|
||||
case VIR_STORAGE_POOL_DISK:
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
index eee75af746..8bd0a57bdd 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='no'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
index 805950a937..852df0de16 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='yes'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
|
||||
index e8e40d695e..db55fe5f3a 100644
|
||||
--- a/tests/storagepoolxml2argvtest.c
|
||||
+++ b/tests/storagepoolxml2argvtest.c
|
||||
@@ -65,6 +65,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
default:
|
||||
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
|
||||
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
|
||||
index 36f00cf643..5f5bd3464e 100644
|
||||
--- a/tools/virsh-pool.c
|
||||
+++ b/tools/virsh-pool.c
|
||||
@@ -1223,6 +1223,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
|
||||
break;
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
|
||||
+ break;
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
break;
|
||||
}
|
||||
--
|
||||
2.43.0
|
||||
|
643
patches/libvirt-10.4-vitastor.diff
Normal file
643
patches/libvirt-10.4-vitastor.diff
Normal file
@@ -0,0 +1,643 @@
|
||||
commit 1f7e90e36b2afca0312392979b96d31951a8d66b
|
||||
Author: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||
Date: Thu Jun 27 01:34:54 2024 +0300
|
||||
|
||||
Add Vitastor support
|
||||
|
||||
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
|
||||
index aaad4a3da1..5f5daa8341 100644
|
||||
--- a/include/libvirt/libvirt-storage.h
|
||||
+++ b/include/libvirt/libvirt-storage.h
|
||||
@@ -326,6 +326,7 @@ typedef enum {
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
|
||||
} virConnectListAllStoragePoolsFlags;
|
||||
|
||||
int virConnectListAllStoragePools(virConnectPtr conn,
|
||||
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
|
||||
index fde594f811..66537db3e3 100644
|
||||
--- a/src/conf/domain_conf.c
|
||||
+++ b/src/conf/domain_conf.c
|
||||
@@ -7220,7 +7220,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
|
||||
src->configFile = virXPathString("string(./config/@file)", ctxt);
|
||||
|
||||
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
|
||||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
|
||||
src->query = virXMLPropString(node, "query");
|
||||
|
||||
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
|
||||
@@ -30734,6 +30735,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_POOL_MPATH:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
|
||||
index 395e036e8f..8a0190f85b 100644
|
||||
--- a/src/conf/domain_validate.c
|
||||
+++ b/src/conf/domain_validate.c
|
||||
@@ -495,6 +495,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
@@ -541,7 +542,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
}
|
||||
}
|
||||
|
||||
- /* internal snapshots and config files are currently supported only with rbd: */
|
||||
+ /* internal snapshots are currently supported only with rbd: */
|
||||
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
|
||||
if (src->snapshot) {
|
||||
@@ -549,10 +550,15 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
_("<snapshot> element is currently supported only with 'rbd' disks"));
|
||||
return -1;
|
||||
}
|
||||
+ }
|
||||
|
||||
+ /* config files are currently supported only with rbd and vitastor: */
|
||||
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||
if (src->configFile) {
|
||||
virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
- _("<config> element is currently supported only with 'rbd' disks"));
|
||||
+ _("<config> element is currently supported only with 'rbd' and 'vitastor' disks"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
|
||||
index a46a824f88..4c5b720643 100644
|
||||
--- a/src/conf/schemas/domaincommon.rng
|
||||
+++ b/src/conf/schemas/domaincommon.rng
|
||||
@@ -1997,6 +1997,35 @@
|
||||
</element>
|
||||
</define>
|
||||
|
||||
+ <define name="diskSourceNetworkProtocolVitastor">
|
||||
+ <element name="source">
|
||||
+ <interleave>
|
||||
+ <attribute name="protocol">
|
||||
+ <value>vitastor</value>
|
||||
+ </attribute>
|
||||
+ <ref name="diskSourceCommon"/>
|
||||
+ <optional>
|
||||
+ <attribute name="name"/>
|
||||
+ </optional>
|
||||
+ <optional>
|
||||
+ <attribute name="query"/>
|
||||
+ </optional>
|
||||
+ <zeroOrMore>
|
||||
+ <ref name="diskSourceNetworkHost"/>
|
||||
+ </zeroOrMore>
|
||||
+ <optional>
|
||||
+ <element name="config">
|
||||
+ <attribute name="file">
|
||||
+ <ref name="absFilePath"/>
|
||||
+ </attribute>
|
||||
+ <empty/>
|
||||
+ </element>
|
||||
+ </optional>
|
||||
+ <empty/>
|
||||
+ </interleave>
|
||||
+ </element>
|
||||
+ </define>
|
||||
+
|
||||
<define name="diskSourceNetworkProtocolISCSI">
|
||||
<element name="source">
|
||||
<attribute name="protocol">
|
||||
@@ -2347,6 +2376,7 @@
|
||||
<ref name="diskSourceNetworkProtocolSimple"/>
|
||||
<ref name="diskSourceNetworkProtocolVxHS"/>
|
||||
<ref name="diskSourceNetworkProtocolNFS"/>
|
||||
+ <ref name="diskSourceNetworkProtocolVitastor"/>
|
||||
</choice>
|
||||
</define>
|
||||
|
||||
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
|
||||
index 68842004b7..1d69a788b6 100644
|
||||
--- a/src/conf/storage_conf.c
|
||||
+++ b/src/conf/storage_conf.c
|
||||
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
|
||||
"logical", "disk", "iscsi",
|
||||
"iscsi-direct", "scsi", "mpath",
|
||||
"rbd", "sheepdog", "gluster",
|
||||
- "zfs", "vstorage",
|
||||
+ "zfs", "vstorage", "vitastor",
|
||||
);
|
||||
|
||||
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
|
||||
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
|
||||
.formatToString = virStorageFileFormatTypeToString,
|
||||
}
|
||||
},
|
||||
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
|
||||
+ .poolOptions = {
|
||||
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NAME),
|
||||
+ },
|
||||
+ .volOptions = {
|
||||
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
|
||||
+ .formatFromString = virStorageVolumeFormatFromString,
|
||||
+ .formatToString = virStorageFileFormatTypeToString,
|
||||
+ }
|
||||
+ },
|
||||
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
|
||||
.poolOptions = {
|
||||
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
@@ -538,6 +550,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
|
||||
_("element 'name' is mandatory for RBD pool"));
|
||||
return -1;
|
||||
}
|
||||
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
|
||||
+ virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
+ _("element 'name' is mandatory for Vitastor pool"));
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
if (options->formatFromString) {
|
||||
g_autofree char *format = NULL;
|
||||
@@ -1127,6 +1144,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
|
||||
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
|
||||
* files, so they don't have a target */
|
||||
if (def->type != VIR_STORAGE_POOL_RBD &&
|
||||
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
|
||||
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
|
||||
def->type != VIR_STORAGE_POOL_GLUSTER &&
|
||||
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
|
||||
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
|
||||
index fc67957cfe..720c07ef74 100644
|
||||
--- a/src/conf/storage_conf.h
|
||||
+++ b/src/conf/storage_conf.h
|
||||
@@ -103,6 +103,7 @@ typedef enum {
|
||||
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
|
||||
VIR_STORAGE_POOL_ZFS, /* ZFS */
|
||||
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
|
||||
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
|
||||
|
||||
VIR_STORAGE_POOL_LAST,
|
||||
} virStoragePoolType;
|
||||
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
|
||||
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
|
||||
index 959ec5ed40..e751dd4d6a 100644
|
||||
--- a/src/conf/storage_source_conf.c
|
||||
+++ b/src/conf/storage_source_conf.c
|
||||
@@ -88,6 +88,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
|
||||
"ssh",
|
||||
"vxhs",
|
||||
"nfs",
|
||||
+ "vitastor",
|
||||
);
|
||||
|
||||
|
||||
@@ -1301,6 +1302,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
return 24007;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
/* we don't provide a default for RBD */
|
||||
return 0;
|
||||
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
|
||||
index 05b4bda16c..b5ed143c39 100644
|
||||
--- a/src/conf/storage_source_conf.h
|
||||
+++ b/src/conf/storage_source_conf.h
|
||||
@@ -129,6 +129,7 @@ typedef enum {
|
||||
VIR_STORAGE_NET_PROTOCOL_SSH,
|
||||
VIR_STORAGE_NET_PROTOCOL_VXHS,
|
||||
VIR_STORAGE_NET_PROTOCOL_NFS,
|
||||
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
|
||||
|
||||
VIR_STORAGE_NET_PROTOCOL_LAST
|
||||
} virStorageNetProtocol;
|
||||
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
|
||||
index 59fa5da372..4739167f5f 100644
|
||||
--- a/src/conf/virstorageobj.c
|
||||
+++ b/src/conf/virstorageobj.c
|
||||
@@ -1438,6 +1438,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
|
||||
return 1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
@@ -1921,6 +1922,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
|
||||
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
|
||||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
|
||||
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
|
||||
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
|
||||
index db7660aac4..561df34709 100644
|
||||
--- a/src/libvirt-storage.c
|
||||
+++ b/src/libvirt-storage.c
|
||||
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
|
||||
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
|
||||
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
|
||||
index 62e1be6672..71a1d42896 100644
|
||||
--- a/src/libxl/libxl_conf.c
|
||||
+++ b/src/libxl/libxl_conf.c
|
||||
@@ -979,6 +979,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
|
||||
index 53f6871efc..c34b8cee1a 100644
|
||||
--- a/src/libxl/xen_xl.c
|
||||
+++ b/src/libxl/xen_xl.c
|
||||
@@ -1456,6 +1456,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
|
||||
index 738b72d7ea..5dd082fc89 100644
|
||||
--- a/src/qemu/qemu_block.c
|
||||
+++ b/src/qemu/qemu_block.c
|
||||
@@ -758,6 +758,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
|
||||
}
|
||||
|
||||
|
||||
+static virJSONValue *
|
||||
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
|
||||
+{
|
||||
+ virJSONValue *ret = NULL;
|
||||
+ virStorageNetHostDef *host;
|
||||
+ size_t i;
|
||||
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
|
||||
+ g_autofree char *etcd = NULL;
|
||||
+
|
||||
+ for (i = 0; i < src->nhosts; i++) {
|
||||
+ host = src->hosts + i;
|
||||
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
|
||||
+ }
|
||||
+ if (src->nhosts > 0) {
|
||||
+ etcd = virBufferContentAndReset(&buf);
|
||||
+ }
|
||||
+
|
||||
+ if (virJSONValueObjectAdd(&ret,
|
||||
+ "S:etcd-host", etcd,
|
||||
+ "S:etcd-prefix", src->query,
|
||||
+ "S:config-path", src->configFile,
|
||||
+ "s:image", src->path,
|
||||
+ NULL) < 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static virJSONValue *
|
||||
qemuBlockStorageSourceGetSheepdogProps(virStorageSource *src)
|
||||
{
|
||||
@@ -1140,6 +1172,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return NULL;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
driver = "sheepdog";
|
||||
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
@@ -2020,6 +2058,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
@@ -2400,6 +2439,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
driver = "sheepdog";
|
||||
if (!(location = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
|
||||
index bda62f2e5c..84b4e5f2b8 100644
|
||||
--- a/src/qemu/qemu_domain.c
|
||||
+++ b/src/qemu/qemu_domain.c
|
||||
@@ -5260,7 +5260,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
|
||||
if (src->query &&
|
||||
(actualType != VIR_STORAGE_TYPE_NETWORK ||
|
||||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
|
||||
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
|
||||
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
||||
_("query is supported only with HTTP(S) protocols"));
|
||||
return -1;
|
||||
@@ -10514,6 +10515,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
|
||||
break;
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
|
||||
index f5260c4a22..2f9d8406fe 100644
|
||||
--- a/src/qemu/qemu_snapshot.c
|
||||
+++ b/src/qemu/qemu_snapshot.c
|
||||
@@ -423,6 +423,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
@@ -648,6 +649,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
|
||||
index 86c03762d2..630c6eff1a 100644
|
||||
--- a/src/storage/storage_driver.c
|
||||
+++ b/src/storage/storage_driver.c
|
||||
@@ -1626,6 +1626,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
|
||||
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
|
||||
index 80681924ea..8a3ade9ec0 100644
|
||||
--- a/src/storage_file/storage_source_backingstore.c
|
||||
+++ b/src/storage_file/storage_source_backingstore.c
|
||||
@@ -287,6 +287,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
|
||||
}
|
||||
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseVitastorColonString(const char *colonstr,
|
||||
+ virStorageSource *src)
|
||||
+{
|
||||
+ char *p, *e, *next;
|
||||
+ g_autofree char *options = NULL;
|
||||
+
|
||||
+ /* optionally skip the "vitastor:" prefix if provided */
|
||||
+ if (STRPREFIX(colonstr, "vitastor:"))
|
||||
+ colonstr += strlen("vitastor:");
|
||||
+
|
||||
+ options = g_strdup(colonstr);
|
||||
+
|
||||
+ p = options;
|
||||
+ while (*p) {
|
||||
+ /* find : delimiter or end of string */
|
||||
+ for (e = p; *e && *e != ':'; ++e) {
|
||||
+ if (*e == '\\') {
|
||||
+ e++;
|
||||
+ if (*e == '\0')
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (*e == '\0') {
|
||||
+ next = e; /* last kv pair */
|
||||
+ } else {
|
||||
+ next = e + 1;
|
||||
+ *e = '\0';
|
||||
+ }
|
||||
+
|
||||
+ if (STRPREFIX(p, "image=")) {
|
||||
+ src->path = g_strdup(p + strlen("image="));
|
||||
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
|
||||
+ src->query = g_strdup(p + strlen("etcd-prefix="));
|
||||
+ } else if (STRPREFIX(p, "config-path=")) {
|
||||
+ src->configFile = g_strdup(p + strlen("config-path="));
|
||||
+ } else if (STRPREFIX(p, "etcd-host=")) {
|
||||
+ char *h, *sep;
|
||||
+
|
||||
+ h = p + strlen("etcd-host=");
|
||||
+ while (h < e) {
|
||||
+ for (sep = h; sep < e; ++sep) {
|
||||
+ if (*sep == '\\' && (sep[1] == ',' ||
|
||||
+ sep[1] == ';' ||
|
||||
+ sep[1] == ' ')) {
|
||||
+ *sep = '\0';
|
||||
+ sep += 2;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (virStorageSourceRBDAddHost(src, h) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ h = sep;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ p = next;
|
||||
+ }
|
||||
+
|
||||
+ if (!src->path) {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseNBDColonString(const char *nbdstr,
|
||||
virStorageSource *src)
|
||||
@@ -399,6 +468,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
@@ -975,6 +1049,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
|
||||
+ virJSONValue *json,
|
||||
+ const char *jsonstr G_GNUC_UNUSED,
|
||||
+ int opaque G_GNUC_UNUSED)
|
||||
+{
|
||||
+ const char *filename;
|
||||
+ const char *image = virJSONValueObjectGetString(json, "image");
|
||||
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
|
||||
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
|
||||
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
|
||||
+ size_t nservers;
|
||||
+ size_t i;
|
||||
+
|
||||
+ src->type = VIR_STORAGE_TYPE_NETWORK;
|
||||
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
|
||||
+
|
||||
+ /* legacy syntax passed via 'filename' option */
|
||||
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
|
||||
+ return virStorageSourceParseVitastorColonString(filename, src);
|
||||
+
|
||||
+ if (!image) {
|
||||
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
|
||||
+ _("missing image name in Vitastor backing volume "
|
||||
+ "JSON specification"));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ src->path = g_strdup(image);
|
||||
+ src->configFile = g_strdup(conf);
|
||||
+ src->query = g_strdup(etcd_prefix);
|
||||
+
|
||||
+ if (servers) {
|
||||
+ nservers = virJSONValueArraySize(servers);
|
||||
+
|
||||
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
|
||||
+ src->nhosts = nservers;
|
||||
+
|
||||
+ for (i = 0; i < nservers; i++) {
|
||||
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
|
||||
+ virJSONValueArrayGet(servers, i)) < 0)
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
|
||||
virJSONValue *json,
|
||||
@@ -1152,6 +1274,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
|
||||
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
|
||||
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
|
||||
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
|
||||
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
|
||||
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
|
||||
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
|
||||
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
|
||||
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
|
||||
index d2d1bc43e3..31a92e4a01 100644
|
||||
--- a/src/test/test_driver.c
|
||||
+++ b/src/test/test_driver.c
|
||||
@@ -7339,6 +7339,7 @@ testStorageVolumeTypeForPool(int pooltype)
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
return VIR_STORAGE_VOL_NETWORK;
|
||||
case VIR_STORAGE_POOL_LOGICAL:
|
||||
case VIR_STORAGE_POOL_DISK:
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
index eee75af746..8bd0a57bdd 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='no'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
index 805950a937..852df0de16 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='yes'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
|
||||
index e8e40d695e..db55fe5f3a 100644
|
||||
--- a/tests/storagepoolxml2argvtest.c
|
||||
+++ b/tests/storagepoolxml2argvtest.c
|
||||
@@ -65,6 +65,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
default:
|
||||
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
|
||||
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
|
||||
index f9aad8ded0..64704b4288 100644
|
||||
--- a/tools/virsh-pool.c
|
||||
+++ b/tools/virsh-pool.c
|
||||
@@ -1187,6 +1187,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
|
||||
break;
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
|
||||
+ break;
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
break;
|
||||
}
|
288
patches/nova-28.diff
Normal file
288
patches/nova-28.diff
Normal file
@@ -0,0 +1,288 @@
|
||||
diff --git a/nova/virt/image/model.py b/nova/virt/image/model.py
|
||||
index 971f7e9c07..ec3fca72cb 100644
|
||||
--- a/nova/virt/image/model.py
|
||||
+++ b/nova/virt/image/model.py
|
||||
@@ -129,3 +129,22 @@ class RBDImage(Image):
|
||||
self.user = user
|
||||
self.password = password
|
||||
self.servers = servers
|
||||
+
|
||||
+
|
||||
+class VitastorImage(Image):
|
||||
+ """Class for images in a remote Vitastor cluster"""
|
||||
+
|
||||
+ def __init__(self, name, etcd_address = None, etcd_prefix = None, config_path = None):
|
||||
+ """Create a new Vitastor image object
|
||||
+
|
||||
+ :param name: name of the image
|
||||
+ :param etcd_address: etcd URL(s) (optional)
|
||||
+ :param etcd_prefix: etcd prefix (optional)
|
||||
+ :param config_path: path to the configuration (optional)
|
||||
+ """
|
||||
+ super(VitastorImage, self).__init__(FORMAT_RAW)
|
||||
+
|
||||
+ self.name = name
|
||||
+ self.etcd_address = etcd_address
|
||||
+ self.etcd_prefix = etcd_prefix
|
||||
+ self.config_path = config_path
|
||||
diff --git a/nova/virt/images.py b/nova/virt/images.py
|
||||
index 5358f3766a..ebe3d6effb 100644
|
||||
--- a/nova/virt/images.py
|
||||
+++ b/nova/virt/images.py
|
||||
@@ -41,7 +41,7 @@ IMAGE_API = glance.API()
|
||||
|
||||
def qemu_img_info(path, format=None):
|
||||
"""Return an object containing the parsed output from qemu-img info."""
|
||||
- if not os.path.exists(path) and not path.startswith('rbd:'):
|
||||
+ if not os.path.exists(path) and not path.startswith('rbd:') and not path.startswith('vitastor:'):
|
||||
raise exception.DiskNotFound(location=path)
|
||||
|
||||
info = nova.privsep.qemu.unprivileged_qemu_img_info(path, format=format)
|
||||
@@ -50,7 +50,7 @@ def qemu_img_info(path, format=None):
|
||||
|
||||
def privileged_qemu_img_info(path, format=None, output_format='json'):
|
||||
"""Return an object containing the parsed output from qemu-img info."""
|
||||
- if not os.path.exists(path) and not path.startswith('rbd:'):
|
||||
+ if not os.path.exists(path) and not path.startswith('rbd:') and not path.startswith('vitastor:'):
|
||||
raise exception.DiskNotFound(location=path)
|
||||
|
||||
info = nova.privsep.qemu.privileged_qemu_img_info(path, format=format)
|
||||
diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py
|
||||
index f9475776b3..a2e18aab67 100644
|
||||
--- a/nova/virt/libvirt/config.py
|
||||
+++ b/nova/virt/libvirt/config.py
|
||||
@@ -1060,6 +1060,8 @@ class LibvirtConfigGuestDisk(LibvirtConfigGuestDevice):
|
||||
self.driver_iommu = False
|
||||
self.source_path = None
|
||||
self.source_protocol = None
|
||||
+ self.source_query = None
|
||||
+ self.source_config = None
|
||||
self.source_name = None
|
||||
self.source_hosts = []
|
||||
self.source_ports = []
|
||||
@@ -1189,6 +1191,10 @@ class LibvirtConfigGuestDisk(LibvirtConfigGuestDevice):
|
||||
source = etree.Element("source", protocol=self.source_protocol)
|
||||
if self.source_name is not None:
|
||||
source.set('name', self.source_name)
|
||||
+ if self.source_query is not None:
|
||||
+ source.set('query', self.source_query)
|
||||
+ if self.source_config is not None:
|
||||
+ source.append(etree.Element('config', file=self.source_config))
|
||||
hosts_info = zip(self.source_hosts, self.source_ports)
|
||||
for name, port in hosts_info:
|
||||
host = etree.Element('host', name=name)
|
||||
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
|
||||
index 391231c527..f38faa1608 100644
|
||||
--- a/nova/virt/libvirt/driver.py
|
||||
+++ b/nova/virt/libvirt/driver.py
|
||||
@@ -179,6 +179,7 @@ VOLUME_DRIVERS = {
|
||||
'local': 'nova.virt.libvirt.volume.volume.LibvirtVolumeDriver',
|
||||
'fake': 'nova.virt.libvirt.volume.volume.LibvirtFakeVolumeDriver',
|
||||
'rbd': 'nova.virt.libvirt.volume.net.LibvirtNetVolumeDriver',
|
||||
+ 'vitastor': 'nova.virt.libvirt.volume.vitastor.LibvirtVitastorVolumeDriver',
|
||||
'nfs': 'nova.virt.libvirt.volume.nfs.LibvirtNFSVolumeDriver',
|
||||
'smbfs': 'nova.virt.libvirt.volume.smbfs.LibvirtSMBFSVolumeDriver',
|
||||
'fibre_channel': 'nova.virt.libvirt.volume.fibrechannel.LibvirtFibreChannelVolumeDriver', # noqa:E501
|
||||
@@ -385,10 +386,10 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
# This prevents the risk of one test setting a capability
|
||||
# which bleeds over into other tests.
|
||||
|
||||
- # LVM and RBD require raw images. If we are not configured to
|
||||
+ # LVM, RBD, Vitastor require raw images. If we are not configured to
|
||||
# force convert images into raw format, then we _require_ raw
|
||||
# images only.
|
||||
- raw_only = ('rbd', 'lvm')
|
||||
+ raw_only = ('rbd', 'lvm', 'vitastor')
|
||||
requires_raw_image = (CONF.libvirt.images_type in raw_only and
|
||||
not CONF.force_raw_images)
|
||||
requires_ploop_image = CONF.libvirt.virt_type == 'parallels'
|
||||
@@ -775,12 +776,12 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
# Some imagebackends are only able to import raw disk images,
|
||||
# and will fail if given any other format. See the bug
|
||||
# https://bugs.launchpad.net/nova/+bug/1816686 for more details.
|
||||
- if CONF.libvirt.images_type in ('rbd',):
|
||||
+ if CONF.libvirt.images_type in ('rbd', 'vitastor'):
|
||||
if not CONF.force_raw_images:
|
||||
msg = _("'[DEFAULT]/force_raw_images = False' is not "
|
||||
- "allowed with '[libvirt]/images_type = rbd'. "
|
||||
+ "allowed with '[libvirt]/images_type = rbd' or 'vitastor'. "
|
||||
"Please check the two configs and if you really "
|
||||
- "do want to use rbd as images_type, set "
|
||||
+ "do want to use rbd or vitastor as images_type, set "
|
||||
"force_raw_images to True.")
|
||||
raise exception.InvalidConfiguration(msg)
|
||||
|
||||
@@ -2603,6 +2604,16 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
if connection_info['data'].get('auth_enabled'):
|
||||
username = connection_info['data']['auth_username']
|
||||
path = f"rbd:{volume_name}:id={username}"
|
||||
+ elif connection_info['driver_volume_type'] == 'vitastor':
|
||||
+ volume_name = connection_info['data']['name']
|
||||
+ path = 'vitastor:image='+volume_name.replace(':', '\\:')
|
||||
+ for k in [ 'config_path', 'etcd_address', 'etcd_prefix' ]:
|
||||
+ if k in connection_info['data']:
|
||||
+ kk = k
|
||||
+ if kk == 'etcd_address':
|
||||
+ # FIXME use etcd_address in qemu driver
|
||||
+ kk = 'etcd_host'
|
||||
+ path += ":"+kk.replace('_', '-')+"="+connection_info['data'][k].replace(':', '\\:')
|
||||
else:
|
||||
path = 'unknown'
|
||||
raise exception.DiskNotFound(location='unknown')
|
||||
@@ -2827,8 +2838,8 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
|
||||
image_format = CONF.libvirt.snapshot_image_format or source_type
|
||||
|
||||
- # NOTE(bfilippov): save lvm and rbd as raw
|
||||
- if image_format == 'lvm' or image_format == 'rbd':
|
||||
+ # NOTE(bfilippov): save lvm and rbd and vitastor as raw
|
||||
+ if image_format == 'lvm' or image_format == 'rbd' or image_format == 'vitastor':
|
||||
image_format = 'raw'
|
||||
|
||||
metadata = self._create_snapshot_metadata(instance.image_meta,
|
||||
@@ -2899,7 +2910,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
expected_state=task_states.IMAGE_UPLOADING)
|
||||
|
||||
# TODO(nic): possibly abstract this out to the root_disk
|
||||
- if source_type == 'rbd' and live_snapshot:
|
||||
+ if (source_type == 'rbd' or source_type == 'vitastor') and live_snapshot:
|
||||
# Standard snapshot uses qemu-img convert from RBD which is
|
||||
# not safe to run with live_snapshot.
|
||||
live_snapshot = False
|
||||
@@ -4099,7 +4110,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
# cleanup rescue volume
|
||||
lvm.remove_volumes([lvmdisk for lvmdisk in self._lvm_disks(instance)
|
||||
if lvmdisk.endswith('.rescue')])
|
||||
- if CONF.libvirt.images_type == 'rbd':
|
||||
+ if CONF.libvirt.images_type == 'rbd' or CONF.libvirt.images_type == 'vitastor':
|
||||
filter_fn = lambda disk: (disk.startswith(instance.uuid) and
|
||||
disk.endswith('.rescue'))
|
||||
rbd_utils.RBDDriver().cleanup_volumes(filter_fn)
|
||||
@@ -4356,6 +4367,8 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
# TODO(mikal): there is a bug here if images_type has
|
||||
# changed since creation of the instance, but I am pretty
|
||||
# sure that this bug already exists.
|
||||
+ if CONF.libvirt.images_type == 'vitastor':
|
||||
+ return 'vitastor'
|
||||
return 'rbd' if CONF.libvirt.images_type == 'rbd' else 'raw'
|
||||
|
||||
@staticmethod
|
||||
@@ -4764,10 +4777,10 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
finally:
|
||||
# NOTE(mikal): if the config drive was imported into RBD,
|
||||
# then we no longer need the local copy
|
||||
- if CONF.libvirt.images_type == 'rbd':
|
||||
+ if CONF.libvirt.images_type == 'rbd' or CONF.libvirt.images_type == 'vitastor':
|
||||
LOG.info('Deleting local config drive %(path)s '
|
||||
- 'because it was imported into RBD.',
|
||||
- {'path': config_disk_local_path},
|
||||
+ 'because it was imported into %(type).',
|
||||
+ {'path': config_disk_local_path, 'type': CONF.libvirt.images_type},
|
||||
instance=instance)
|
||||
os.unlink(config_disk_local_path)
|
||||
|
||||
diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py
|
||||
index da2a6e8b8a..52c02e72f1 100644
|
||||
--- a/nova/virt/libvirt/utils.py
|
||||
+++ b/nova/virt/libvirt/utils.py
|
||||
@@ -340,6 +340,10 @@ def find_disk(guest: libvirt_guest.Guest) -> ty.Tuple[str, ty.Optional[str]]:
|
||||
disk_path = disk.source_name
|
||||
if disk_path:
|
||||
disk_path = 'rbd:' + disk_path
|
||||
+ elif not disk_path and disk.source_protocol == 'vitastor':
|
||||
+ disk_path = disk.source_name
|
||||
+ if disk_path:
|
||||
+ disk_path = 'vitastor:' + disk_path
|
||||
|
||||
if not disk_path:
|
||||
raise RuntimeError(_("Can't retrieve root device path "
|
||||
@@ -354,6 +358,8 @@ def get_disk_type_from_path(path: str) -> ty.Optional[str]:
|
||||
return 'lvm'
|
||||
elif path.startswith('rbd:'):
|
||||
return 'rbd'
|
||||
+ elif path.startswith('vitastor:'):
|
||||
+ return 'vitastor'
|
||||
elif (os.path.isdir(path) and
|
||||
os.path.exists(os.path.join(path, "DiskDescriptor.xml"))):
|
||||
return 'ploop'
|
||||
diff --git a/nova/virt/libvirt/volume/vitastor.py b/nova/virt/libvirt/volume/vitastor.py
|
||||
new file mode 100644
|
||||
index 0000000000..0256df62c1
|
||||
--- /dev/null
|
||||
+++ b/nova/virt/libvirt/volume/vitastor.py
|
||||
@@ -0,0 +1,75 @@
|
||||
+# Copyright (c) 2021+, Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||
+#
|
||||
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
+# not use this file except in compliance with the License. You may obtain
|
||||
+# a copy of the License at
|
||||
+#
|
||||
+# http://www.apache.org/licenses/LICENSE-2.0
|
||||
+#
|
||||
+# Unless required by applicable law or agreed to in writing, software
|
||||
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
+# License for the specific language governing permissions and limitations
|
||||
+# under the License.
|
||||
+
|
||||
+from os_brick import exception as os_brick_exception
|
||||
+from os_brick import initiator
|
||||
+from os_brick.initiator import connector
|
||||
+from oslo_log import log as logging
|
||||
+
|
||||
+import nova.conf
|
||||
+from nova import utils
|
||||
+from nova.virt.libvirt.volume import volume as libvirt_volume
|
||||
+
|
||||
+
|
||||
+CONF = nova.conf.CONF
|
||||
+LOG = logging.getLogger(__name__)
|
||||
+
|
||||
+
|
||||
+class LibvirtVitastorVolumeDriver(libvirt_volume.LibvirtBaseVolumeDriver):
|
||||
+ """Driver to attach Vitastor volumes to libvirt."""
|
||||
+ def __init__(self, host):
|
||||
+ super(LibvirtVitastorVolumeDriver, self).__init__(host, is_block_dev=False)
|
||||
+
|
||||
+ def connect_volume(self, connection_info, instance):
|
||||
+ pass
|
||||
+
|
||||
+ def disconnect_volume(self, connection_info, instance, force=False):
|
||||
+ pass
|
||||
+
|
||||
+ def get_config(self, connection_info, disk_info):
|
||||
+ """Returns xml for libvirt."""
|
||||
+ conf = super(LibvirtVitastorVolumeDriver, self).get_config(connection_info, disk_info)
|
||||
+ conf.source_type = 'network'
|
||||
+ conf.source_protocol = 'vitastor'
|
||||
+ conf.source_name = connection_info['data'].get('name')
|
||||
+ conf.source_query = connection_info['data'].get('etcd_prefix') or None
|
||||
+ conf.source_config = connection_info['data'].get('config_path') or None
|
||||
+ conf.source_hosts = []
|
||||
+ conf.source_ports = []
|
||||
+ addresses = connection_info['data'].get('etcd_address', '')
|
||||
+ if addresses:
|
||||
+ if not isinstance(addresses, list):
|
||||
+ addresses = addresses.split(',')
|
||||
+ for addr in addresses:
|
||||
+ if addr.startswith('https://'):
|
||||
+ raise NotImplementedError('Vitastor block driver does not support SSL for etcd communication yet')
|
||||
+ if addr.startswith('http://'):
|
||||
+ addr = addr[7:]
|
||||
+ addr = addr.rstrip('/')
|
||||
+ if addr.endswith('/v3'):
|
||||
+ addr = addr[0:-3]
|
||||
+ p = addr.find('/')
|
||||
+ if p > 0:
|
||||
+ raise NotImplementedError('libvirt does not support custom URL paths for Vitastor etcd yet. Use /etc/vitastor/vitastor.conf')
|
||||
+ p = addr.find(':')
|
||||
+ port = '2379'
|
||||
+ if p > 0:
|
||||
+ port = addr[p+1:]
|
||||
+ addr = addr[0:p]
|
||||
+ conf.source_hosts.append(addr)
|
||||
+ conf.source_ports.append(port)
|
||||
+ return conf
|
||||
+
|
||||
+ def extend_volume(self, connection_info, instance, requested_size):
|
||||
+ return requested_size
|
193
patches/qemu-8.2-vitastor.patch
Normal file
193
patches/qemu-8.2-vitastor.patch
Normal file
@@ -0,0 +1,193 @@
|
||||
diff --git a/block/meson.build b/block/meson.build
|
||||
index 59ff6d380c..abde3715c2 100644
|
||||
--- a/block/meson.build
|
||||
+++ b/block/meson.build
|
||||
@@ -109,6 +109,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
diff --git a/meson.build b/meson.build
|
||||
index 6c77d9687d..390683ee71 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1295,6 +1295,26 @@ if not get_option('rbd').auto() or have_block
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'))
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -2157,6 +2177,7 @@ endif
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
|
||||
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
|
||||
@@ -4356,6 +4377,7 @@ summary_info += {'fdt support': fdt_opt == 'disabled' ? false : fdt_opt}
|
||||
summary_info += {'libcap-ng support': libcap_ng}
|
||||
summary_info += {'bpf support': libbpf}
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index c9baeda639..85e1df5a56 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -194,6 +194,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||
description: 'lzo compression support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index ca390c5700..d2dbaeb279 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -3201,7 +3201,7 @@
|
||||
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
|
||||
'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||
@@ -4255,6 +4255,28 @@
|
||||
'*key-secret': 'str',
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
+##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
@@ -4713,6 +4735,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5148,6 +5171,20 @@
|
||||
'*cluster-size' : 'size',
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @location: Where to store the new image file. This location cannot
|
||||
+# point to a snapshot.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
@@ -5370,6 +5407,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
index 76781f17f4..ac5fe3aa08 100755
|
||||
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
@@ -30,7 +30,7 @@
|
||||
--with-suffix="qemu-kvm" \
|
||||
--firmwarepath=/usr/share/qemu-firmware \
|
||||
--target-list="x86_64-softmmu" \
|
||||
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
--audio-drv-list="" \
|
||||
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||
--with-coroutine=ucontext \
|
||||
@@ -176,6 +176,7 @@
|
||||
--enable-opengl \
|
||||
--enable-pie \
|
||||
--enable-rbd \
|
||||
+--enable-vitastor \
|
||||
--enable-rdma \
|
||||
--enable-seccomp \
|
||||
--enable-snappy \
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 680fa3f581..dab422bf04 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -168,6 +168,7 @@ meson_options_help() {
|
||||
printf "%s\n" ' qed qed image format support'
|
||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||
printf "%s\n" ' rbd Ceph block device driver'
|
||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||
printf "%s\n" ' replication replication support'
|
||||
printf "%s\n" ' rutabaga-gfx rutabaga_gfx support'
|
||||
@@ -445,6 +446,8 @@ _meson_option_parse() {
|
||||
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||
--enable-relocatable) printf "%s" -Drelocatable=true ;;
|
193
patches/qemu-9.0-vitastor.patch
Normal file
193
patches/qemu-9.0-vitastor.patch
Normal file
@@ -0,0 +1,193 @@
|
||||
diff --git a/block/meson.build b/block/meson.build
|
||||
index e1f03fd773..db0cfb2321 100644
|
||||
--- a/block/meson.build
|
||||
+++ b/block/meson.build
|
||||
@@ -114,6 +114,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
diff --git a/meson.build b/meson.build
|
||||
index 91a0aa64c6..e8bc710578 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1452,6 +1452,26 @@ if not get_option('rbd').auto() or have_block
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'))
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -2250,6 +2270,7 @@ endif
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
|
||||
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
|
||||
@@ -4443,6 +4464,7 @@ summary_info += {'fdt support': fdt_opt == 'disabled' ? false : fdt_opt}
|
||||
summary_info += {'libcap-ng support': libcap_ng}
|
||||
summary_info += {'bpf support': libbpf}
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index 0a99a059ec..16dc440118 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -194,6 +194,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||
description: 'lzo compression support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index 746d1694c2..199a146a0b 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -3203,7 +3203,7 @@
|
||||
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
|
||||
'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||
@@ -4285,6 +4285,28 @@
|
||||
'*key-secret': 'str',
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
+##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
@@ -4741,6 +4763,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5180,6 +5203,20 @@
|
||||
'*cluster-size' : 'size',
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @location: Where to store the new image file. This location cannot
|
||||
+# point to a snapshot.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
@@ -5402,6 +5439,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
index 76781f17f4..ac5fe3aa08 100755
|
||||
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
@@ -30,7 +30,7 @@
|
||||
--with-suffix="qemu-kvm" \
|
||||
--firmwarepath=/usr/share/qemu-firmware \
|
||||
--target-list="x86_64-softmmu" \
|
||||
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
--audio-drv-list="" \
|
||||
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||
--with-coroutine=ucontext \
|
||||
@@ -176,6 +176,7 @@
|
||||
--enable-opengl \
|
||||
--enable-pie \
|
||||
--enable-rbd \
|
||||
+--enable-vitastor \
|
||||
--enable-rdma \
|
||||
--enable-seccomp \
|
||||
--enable-snappy \
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 680fa3f581..dab422bf04 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -168,6 +168,7 @@ meson_options_help() {
|
||||
printf "%s\n" ' qed qed image format support'
|
||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||
printf "%s\n" ' rbd Ceph block device driver'
|
||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||
printf "%s\n" ' replication replication support'
|
||||
printf "%s\n" ' rutabaga-gfx rutabaga_gfx support'
|
||||
@@ -445,6 +446,8 @@ _meson_option_parse() {
|
||||
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||
--enable-relocatable) printf "%s" -Drelocatable=true ;;
|
@@ -18,10 +18,11 @@ fi
|
||||
cd ~/rpmbuild/SPECS
|
||||
rpmbuild -bp fio.spec
|
||||
cd $VITASTOR
|
||||
VER=$(grep ^Version: rpm/vitastor-el7.spec | awk '{print $2}')
|
||||
ln -s ~/rpmbuild/BUILD/fio*/ fio
|
||||
sh copy-fio-includes.sh
|
||||
rm fio
|
||||
mv fio-copy fio
|
||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
tar --transform 's#^#vitastor-1.6.1/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.6.1$(rpm --eval '%dist').tar.gz *
|
||||
tar --transform "s#^#vitastor-$VER/#" --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-$VER$(rpm --eval '%dist').tar.gz *
|
||||
|
@@ -36,7 +36,8 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-1.6.1.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
VER=$(grep ^Version: vitastor-el7.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.6.1
|
||||
Version: 1.7.1
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.6.1.el7.tar.gz
|
||||
Source0: vitastor-1.7.1.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
@@ -108,10 +108,11 @@ npm install --production
|
||||
cd ..
|
||||
mkdir -p %buildroot/usr/lib/vitastor
|
||||
cp -r mon %buildroot/usr/lib/vitastor
|
||||
mv %buildroot/usr/lib/vitastor/mon/scripts/make-etcd %buildroot/usr/lib/vitastor/mon/
|
||||
mkdir -p %buildroot/lib/systemd/system
|
||||
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system
|
||||
cp mon/scripts/vitastor.target mon/scripts/vitastor-mon.service mon/scripts/vitastor-osd@.service %buildroot/lib/systemd/system
|
||||
mkdir -p %buildroot/lib/udev/rules.d
|
||||
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d
|
||||
cp mon/scripts/90-vitastor.rules %buildroot/lib/udev/rules.d
|
||||
|
||||
|
||||
%files
|
||||
@@ -143,6 +144,8 @@ mkdir -p /etc/vitastor
|
||||
groupadd -r -f vitastor 2>/dev/null ||:
|
||||
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
|
||||
mkdir -p /etc/vitastor
|
||||
mkdir -p /var/lib/vitastor
|
||||
chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
|
||||
%files -n vitastor-client
|
||||
@@ -160,6 +163,7 @@ mkdir -p /etc/vitastor
|
||||
|
||||
%files -n vitastor-client-devel
|
||||
%_includedir/vitastor_c.h
|
||||
%_includedir/vitastor_kv.h
|
||||
%_libdir/pkgconfig
|
||||
|
||||
|
||||
|
@@ -35,7 +35,8 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-1.6.1.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
VER=$(grep ^Version: vitastor-el8.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.6.1
|
||||
Version: 1.7.1
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.6.1.el8.tar.gz
|
||||
Source0: vitastor-1.7.1.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
@@ -105,10 +105,11 @@ npm install --production
|
||||
cd ..
|
||||
mkdir -p %buildroot/usr/lib/vitastor
|
||||
cp -r mon %buildroot/usr/lib/vitastor
|
||||
mv %buildroot/usr/lib/vitastor/mon/scripts/make-etcd %buildroot/usr/lib/vitastor/mon/
|
||||
mkdir -p %buildroot/lib/systemd/system
|
||||
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system
|
||||
cp mon/scripts/vitastor.target mon/scripts/vitastor-mon.service mon/scripts/vitastor-osd@.service %buildroot/lib/systemd/system
|
||||
mkdir -p %buildroot/lib/udev/rules.d
|
||||
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d
|
||||
cp mon/scripts/90-vitastor.rules %buildroot/lib/udev/rules.d
|
||||
|
||||
|
||||
%files
|
||||
@@ -140,6 +141,8 @@ mkdir -p /etc/vitastor
|
||||
groupadd -r -f vitastor 2>/dev/null ||:
|
||||
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
|
||||
mkdir -p /etc/vitastor
|
||||
mkdir -p /var/lib/vitastor
|
||||
chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
|
||||
%files -n vitastor-client
|
||||
@@ -157,6 +160,7 @@ mkdir -p /etc/vitastor
|
||||
|
||||
%files -n vitastor-client-devel
|
||||
%_includedir/vitastor_c.h
|
||||
%_includedir/vitastor_kv.h
|
||||
%_libdir/pkgconfig
|
||||
|
||||
|
||||
|
@@ -18,7 +18,8 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-1.6.1.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
VER=$(grep ^Version: vitastor-el9.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.6.1
|
||||
Version: 1.7.1
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.6.1.el9.tar.gz
|
||||
Source0: vitastor-1.7.1.el9.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
@@ -98,10 +98,11 @@ npm install --production
|
||||
cd ..
|
||||
mkdir -p %buildroot/usr/lib/vitastor
|
||||
cp -r mon %buildroot/usr/lib/vitastor
|
||||
mv %buildroot/usr/lib/vitastor/mon/scripts/make-etcd %buildroot/usr/lib/vitastor/mon/
|
||||
mkdir -p %buildroot/lib/systemd/system
|
||||
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system
|
||||
cp mon/scripts/vitastor.target mon/scripts/vitastor-mon.service mon/scripts/vitastor-osd@.service %buildroot/lib/systemd/system
|
||||
mkdir -p %buildroot/lib/udev/rules.d
|
||||
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d
|
||||
cp mon/scripts/90-vitastor.rules %buildroot/lib/udev/rules.d
|
||||
|
||||
|
||||
%files
|
||||
@@ -133,6 +134,8 @@ mkdir -p /etc/vitastor
|
||||
groupadd -r -f vitastor 2>/dev/null ||:
|
||||
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
|
||||
mkdir -p /etc/vitastor
|
||||
mkdir -p /var/lib/vitastor
|
||||
chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
|
||||
%files -n vitastor-client
|
||||
@@ -150,6 +153,7 @@ mkdir -p /etc/vitastor
|
||||
|
||||
%files -n vitastor-client-devel
|
||||
%_includedir/vitastor_c.h
|
||||
%_includedir/vitastor_kv.h
|
||||
%_libdir/pkgconfig
|
||||
|
||||
|
||||
|
@@ -19,7 +19,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVERSION="1.6.1")
|
||||
add_definitions(-DVERSION="1.7.1")
|
||||
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
|
||||
add_link_options(-fno-omit-frame-pointer)
|
||||
if (${WITH_ASAN})
|
||||
|
@@ -366,6 +366,7 @@ resume_0:
|
||||
!flusher->flush_queue.size() || !flusher->dequeuing)
|
||||
{
|
||||
stop_flusher:
|
||||
flusher->dequeuing = false;
|
||||
if (flusher->trim_wanted > 0 && try_trim)
|
||||
{
|
||||
// Attempt forced trim
|
||||
@@ -373,7 +374,6 @@ stop_flusher:
|
||||
flusher->active_flushers++;
|
||||
goto trim_journal;
|
||||
}
|
||||
flusher->dequeuing = false;
|
||||
wait_state = 0;
|
||||
return true;
|
||||
}
|
||||
|
@@ -12,6 +12,7 @@ add_library(vitastor_common STATIC
|
||||
msgr_stop.cpp msgr_op.cpp msgr_send.cpp msgr_receive.cpp ../util/ringloop.cpp ../../json11/json11.cpp
|
||||
http_client.cpp osd_ops.cpp pg_states.cpp ../util/timerfd_manager.cpp ../util/str_util.cpp ${MSGR_RDMA}
|
||||
)
|
||||
target_link_libraries(vitastor_common pthread)
|
||||
target_compile_options(vitastor_common PUBLIC -fPIC)
|
||||
|
||||
# libvitastor_client.so
|
||||
|
@@ -34,7 +34,7 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
|
||||
{
|
||||
// peer_osd just dropped connection
|
||||
// determine WHICH dirty_buffers are now obsolete and repeat them
|
||||
if (wb->repeat_ops_for(this, peer_osd) > 0)
|
||||
if (wb->repeat_ops_for(this, peer_osd, 0, 0) > 0)
|
||||
{
|
||||
continue_ops();
|
||||
}
|
||||
@@ -52,7 +52,8 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
|
||||
st_cli.tfd = tfd;
|
||||
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
|
||||
st_cli.on_change_osd_state_hook = [this](uint64_t peer_osd) { on_change_osd_state_hook(peer_osd); };
|
||||
st_cli.on_change_hook = [this](std::map<std::string, etcd_kv_t> & changes) { on_change_hook(changes); };
|
||||
st_cli.on_change_pool_config_hook = [this]() { on_change_pool_config_hook(); };
|
||||
st_cli.on_change_pg_state_hook = [this](pool_id_t pool_id, pg_num_t pg_num, osd_num_t prev_primary) { on_change_pg_state_hook(pool_id, pg_num, prev_primary); };
|
||||
st_cli.on_load_pgs_hook = [this](bool success) { on_load_pgs_hook(success); };
|
||||
st_cli.on_reload_hook = [this]() { st_cli.load_global_config(); };
|
||||
|
||||
@@ -77,11 +78,6 @@ cluster_client_t::~cluster_client_t()
|
||||
|
||||
cluster_op_t::~cluster_op_t()
|
||||
{
|
||||
if (buf)
|
||||
{
|
||||
free(buf);
|
||||
buf = NULL;
|
||||
}
|
||||
if (bitmap_buf)
|
||||
{
|
||||
free(bitmap_buf);
|
||||
@@ -427,7 +423,7 @@ void cluster_client_t::on_load_pgs_hook(bool success)
|
||||
continue_ops();
|
||||
}
|
||||
|
||||
void cluster_client_t::on_change_hook(std::map<std::string, etcd_kv_t> & changes)
|
||||
void cluster_client_t::on_change_pool_config_hook()
|
||||
{
|
||||
for (auto pool_item: st_cli.pool_config)
|
||||
{
|
||||
@@ -450,6 +446,19 @@ void cluster_client_t::on_change_hook(std::map<std::string, etcd_kv_t> & changes
|
||||
continue_ops();
|
||||
}
|
||||
|
||||
void cluster_client_t::on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_num, osd_num_t prev_primary)
|
||||
{
|
||||
auto & pg_cfg = st_cli.pool_config[pool_id].pg_config[pg_num];
|
||||
if (pg_cfg.cur_primary != prev_primary)
|
||||
{
|
||||
// Repeat this PG operations because an OSD which stopped being primary may not fsync operations
|
||||
if (wb->repeat_ops_for(this, 0, pool_id, pg_num) > 0)
|
||||
{
|
||||
continue_ops();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool cluster_client_t::get_immediate_commit(uint64_t inode)
|
||||
{
|
||||
if (enable_writeback)
|
||||
@@ -570,6 +579,14 @@ void cluster_client_t::execute_internal(cluster_op_t *op)
|
||||
{
|
||||
op->cur_inode = op->inode;
|
||||
op->retval = 0;
|
||||
op->state = 0;
|
||||
op->retry_after = 0;
|
||||
op->inflight_count = 0;
|
||||
op->done_count = 0;
|
||||
op->part_bitmaps = NULL;
|
||||
op->bitmap_buf_size = 0;
|
||||
op->prev_wait = 0;
|
||||
assert(!op->prev && !op->next);
|
||||
// check alignment, readonly flag and so on
|
||||
if (!check_rw(op))
|
||||
{
|
||||
@@ -600,7 +617,9 @@ void cluster_client_t::execute_internal(cluster_op_t *op)
|
||||
{
|
||||
if (!(op->flags & OP_FLUSH_BUFFER) && !op->version /* no CAS write-repeat */)
|
||||
{
|
||||
wb->copy_write(op, CACHE_WRITTEN);
|
||||
uint64_t flush_id = ++wb->last_flush_id;
|
||||
wb->copy_write(op, CACHE_REPEATING, flush_id);
|
||||
op->flush_id = flush_id;
|
||||
}
|
||||
if (dirty_bytes >= client_max_dirty_bytes || dirty_ops >= client_max_dirty_ops)
|
||||
{
|
||||
@@ -816,6 +835,10 @@ resume_2:
|
||||
auto & pool_cfg = st_cli.pool_config.at(INODE_POOL(op->inode));
|
||||
op->retval = op->len / pool_cfg.bitmap_granularity;
|
||||
}
|
||||
if (op->flush_id)
|
||||
{
|
||||
wb->mark_flush_written(op->inode, op->offset, op->len, op->flush_id);
|
||||
}
|
||||
erase_op(op);
|
||||
return 1;
|
||||
}
|
||||
@@ -988,6 +1011,29 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
|
||||
}
|
||||
}
|
||||
|
||||
bool cluster_client_t::affects_pg(uint64_t inode, uint64_t offset, uint64_t len, pool_id_t pool_id, pg_num_t pg_num)
|
||||
{
|
||||
if (INODE_POOL(inode) != pool_id)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
auto & pool_cfg = st_cli.pool_config.at(INODE_POOL(inode));
|
||||
uint32_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
|
||||
uint64_t pg_block_size = pool_cfg.data_block_size * pg_data_size;
|
||||
uint64_t first_stripe = (offset / pg_block_size) * pg_block_size;
|
||||
uint64_t last_stripe = len > 0 ? ((offset + len - 1) / pg_block_size) * pg_block_size : first_stripe;
|
||||
if ((last_stripe/pool_cfg.pg_stripe_size) - (first_stripe/pool_cfg.pg_stripe_size) + 1 >= pool_cfg.real_pg_count)
|
||||
{
|
||||
// All PGs are affected
|
||||
return true;
|
||||
}
|
||||
pg_num_t first_pg_num = (first_stripe/pool_cfg.pg_stripe_size) % pool_cfg.real_pg_count + 1; // like map_to_pg()
|
||||
pg_num_t last_pg_num = (last_stripe/pool_cfg.pg_stripe_size) % pool_cfg.real_pg_count + 1; // like map_to_pg()
|
||||
return (first_pg_num <= last_pg_num
|
||||
? (pg_num >= first_pg_num && pg_num <= last_pg_num)
|
||||
: (pg_num >= first_pg_num || pg_num <= last_pg_num));
|
||||
}
|
||||
|
||||
bool cluster_client_t::affects_osd(uint64_t inode, uint64_t offset, uint64_t len, osd_num_t osd)
|
||||
{
|
||||
auto & pool_cfg = st_cli.pool_config.at(INODE_POOL(inode));
|
||||
@@ -1210,7 +1256,9 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
|
||||
// So do all these things after modifying operation state, otherwise we may hit reenterability bugs
|
||||
// FIXME postpone such things to set_immediate here to avoid bugs
|
||||
// Set op->retry_after to retry operation after a short pause (not immediately)
|
||||
if (!op->retry_after)
|
||||
if (!op->retry_after && (op->retval == -EPIPE ||
|
||||
op->retval == -EIO && client_eio_retry_interval ||
|
||||
op->retval == -ENOSPC && client_retry_enospc))
|
||||
{
|
||||
op->retry_after = op->retval != -EPIPE ? client_eio_retry_interval : client_retry_interval;
|
||||
}
|
||||
|
@@ -56,8 +56,6 @@ struct cluster_op_t
|
||||
protected:
|
||||
int state = 0;
|
||||
uint64_t cur_inode; // for snapshot reads
|
||||
void *buf = NULL;
|
||||
cluster_op_t *orig_op = NULL;
|
||||
bool needs_reslice = false;
|
||||
int retry_after = 0;
|
||||
int inflight_count = 0, done_count = 0;
|
||||
@@ -66,6 +64,7 @@ protected:
|
||||
unsigned bitmap_buf_size = 0;
|
||||
cluster_op_t *prev = NULL, *next = NULL;
|
||||
int prev_wait = 0;
|
||||
uint64_t flush_id = 0;
|
||||
friend class cluster_client_t;
|
||||
friend class writeback_cache_t;
|
||||
};
|
||||
@@ -81,6 +80,7 @@ class cluster_client_t
|
||||
ring_loop_t *ringloop;
|
||||
|
||||
std::map<pool_id_t, uint64_t> pg_counts;
|
||||
std::map<pool_pg_num_t, osd_num_t> pg_primary;
|
||||
// client_max_dirty_* is actually "max unsynced", for the case when immediate_commit is off
|
||||
uint64_t client_max_dirty_bytes = 0;
|
||||
uint64_t client_max_dirty_ops = 0;
|
||||
@@ -146,9 +146,11 @@ public:
|
||||
|
||||
protected:
|
||||
bool affects_osd(uint64_t inode, uint64_t offset, uint64_t len, osd_num_t osd);
|
||||
bool affects_pg(uint64_t inode, uint64_t offset, uint64_t len, pool_id_t pool_id, pg_num_t pg_num);
|
||||
void on_load_config_hook(json11::Json::object & config);
|
||||
void on_load_pgs_hook(bool success);
|
||||
void on_change_hook(std::map<std::string, etcd_kv_t> & changes);
|
||||
void on_change_pool_config_hook();
|
||||
void on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_num, osd_num_t prev_primary);
|
||||
void on_change_osd_state_hook(uint64_t peer_osd);
|
||||
void execute_internal(cluster_op_t *op);
|
||||
void unshift_op(cluster_op_t *op);
|
||||
|
@@ -46,11 +46,12 @@ public:
|
||||
bool is_left_merged(dirty_buf_it_t dirty_it);
|
||||
bool is_right_merged(dirty_buf_it_t dirty_it);
|
||||
bool is_merged(const dirty_buf_it_t & dirty_it);
|
||||
void copy_write(cluster_op_t *op, int state);
|
||||
int repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd);
|
||||
void copy_write(cluster_op_t *op, int state, uint64_t new_flush_id = 0);
|
||||
int repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd, pool_id_t pool_id, pg_num_t pg_num);
|
||||
void start_writebacks(cluster_client_t *cli, int count);
|
||||
bool read_from_cache(cluster_op_t *op, uint32_t bitmap_granularity);
|
||||
void flush_buffers(cluster_client_t *cli, dirty_buf_it_t from_it, dirty_buf_it_t to_it);
|
||||
void mark_flush_written(uint64_t inode, uint64_t offset, uint64_t len, uint64_t flush_id);
|
||||
void fsync_start();
|
||||
void fsync_error();
|
||||
void fsync_ok();
|
||||
|
@@ -71,7 +71,7 @@ bool writeback_cache_t::is_merged(const dirty_buf_it_t & dirty_it)
|
||||
return is_left_merged(dirty_it) || is_right_merged(dirty_it);
|
||||
}
|
||||
|
||||
void writeback_cache_t::copy_write(cluster_op_t *op, int state)
|
||||
void writeback_cache_t::copy_write(cluster_op_t *op, int state, uint64_t new_flush_id)
|
||||
{
|
||||
// Save operation for replay when one of PGs goes out of sync
|
||||
// (primary OSD drops our connection in this case)
|
||||
@@ -180,6 +180,7 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state)
|
||||
.buf = buf,
|
||||
.len = op->len,
|
||||
.state = state,
|
||||
.flush_id = new_flush_id,
|
||||
.refcnt = refcnt,
|
||||
});
|
||||
if (state == CACHE_DIRTY)
|
||||
@@ -208,7 +209,7 @@ void writeback_cache_t::copy_write(cluster_op_t *op, int state)
|
||||
}
|
||||
}
|
||||
|
||||
int writeback_cache_t::repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd)
|
||||
int writeback_cache_t::repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd, pool_id_t pool_id, pg_num_t pg_num)
|
||||
{
|
||||
int repeated = 0;
|
||||
if (dirty_buffers.size())
|
||||
@@ -218,8 +219,11 @@ int writeback_cache_t::repeat_ops_for(cluster_client_t *cli, osd_num_t peer_osd)
|
||||
for (auto wr_it = dirty_buffers.begin(), flush_it = wr_it, last_it = wr_it; ; )
|
||||
{
|
||||
bool end = wr_it == dirty_buffers.end();
|
||||
bool flush_this = !end && wr_it->second.state != CACHE_REPEATING &&
|
||||
cli->affects_osd(wr_it->first.inode, wr_it->first.stripe, wr_it->second.len, peer_osd);
|
||||
bool flush_this = !end && wr_it->second.state != CACHE_REPEATING;
|
||||
if (peer_osd)
|
||||
flush_this = flush_this && cli->affects_osd(wr_it->first.inode, wr_it->first.stripe, wr_it->second.len, peer_osd);
|
||||
if (pool_id && pg_num)
|
||||
flush_this = flush_this && cli->affects_pg(wr_it->first.inode, wr_it->first.stripe, wr_it->second.len, pool_id, pg_num);
|
||||
if (flush_it != wr_it && (end || !flush_this ||
|
||||
wr_it->first.inode != flush_it->first.inode ||
|
||||
wr_it->first.stripe != last_it->first.stripe+last_it->second.len))
|
||||
@@ -265,7 +269,7 @@ void writeback_cache_t::flush_buffers(cluster_client_t *cli, dirty_buf_it_t from
|
||||
writebacks_active++;
|
||||
op->callback = [this, flush_id](cluster_op_t* op)
|
||||
{
|
||||
// Buffer flushes should be always retried, regardless of the error,
|
||||
// Buffer flushes are always retried, regardless of the error,
|
||||
// so they should never result in an error here
|
||||
assert(op->retval == op->len);
|
||||
for (auto fl_it = flushed_buffers.find(flush_id);
|
||||
@@ -277,16 +281,7 @@ void writeback_cache_t::flush_buffers(cluster_client_t *cli, dirty_buf_it_t from
|
||||
}
|
||||
flushed_buffers.erase(fl_it++);
|
||||
}
|
||||
for (auto dirty_it = find_dirty(op->inode, op->offset);
|
||||
dirty_it != dirty_buffers.end() && dirty_it->first.inode == op->inode &&
|
||||
dirty_it->first.stripe < op->offset+op->len; dirty_it++)
|
||||
{
|
||||
if (dirty_it->second.flush_id == flush_id && dirty_it->second.state == CACHE_REPEATING)
|
||||
{
|
||||
dirty_it->second.flush_id = 0;
|
||||
dirty_it->second.state = CACHE_WRITTEN;
|
||||
}
|
||||
}
|
||||
mark_flush_written(op->inode, op->offset, op->len, flush_id);
|
||||
delete op;
|
||||
writebacks_active--;
|
||||
// We can't call execute_internal because it affects an invalid copy of the list here
|
||||
@@ -304,6 +299,20 @@ void writeback_cache_t::flush_buffers(cluster_client_t *cli, dirty_buf_it_t from
|
||||
}
|
||||
}
|
||||
|
||||
void writeback_cache_t::mark_flush_written(uint64_t inode, uint64_t offset, uint64_t len, uint64_t flush_id)
|
||||
{
|
||||
for (auto dirty_it = find_dirty(inode, offset);
|
||||
dirty_it != dirty_buffers.end() && dirty_it->first.inode == inode &&
|
||||
dirty_it->first.stripe < offset+len; dirty_it++)
|
||||
{
|
||||
if (dirty_it->second.flush_id == flush_id && dirty_it->second.state == CACHE_REPEATING)
|
||||
{
|
||||
dirty_it->second.flush_id = 0;
|
||||
dirty_it->second.state = CACHE_WRITTEN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void writeback_cache_t::start_writebacks(cluster_client_t *cli, int count)
|
||||
{
|
||||
if (!writeback_queue.size())
|
||||
|
@@ -253,7 +253,7 @@ void etcd_state_client_t::parse_config(const json11::Json & config)
|
||||
this->etcd_ws_keepalive_interval = config["etcd_ws_keepalive_interval"].uint64_value();
|
||||
if (this->etcd_ws_keepalive_interval <= 0)
|
||||
{
|
||||
this->etcd_ws_keepalive_interval = 30;
|
||||
this->etcd_ws_keepalive_interval = 5;
|
||||
}
|
||||
this->max_etcd_attempts = config["max_etcd_attempts"].uint64_value();
|
||||
if (this->max_etcd_attempts <= 0)
|
||||
@@ -573,7 +573,7 @@ void etcd_state_client_t::load_global_config()
|
||||
{
|
||||
global_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
||||
}
|
||||
global_immediate_commit = parse_immediate_commit(global_config["immediate_commit"].string_value());
|
||||
global_immediate_commit = parse_immediate_commit(global_config["immediate_commit"].string_value(), IMMEDIATE_ALL);
|
||||
on_load_config_hook(global_config);
|
||||
});
|
||||
}
|
||||
@@ -867,7 +867,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
pc.used_for_fs = pool_item.second["used_for_fs"].as_string();
|
||||
// Immediate Commit Mode
|
||||
pc.immediate_commit = pool_item.second["immediate_commit"].is_string()
|
||||
? parse_immediate_commit(pool_item.second["immediate_commit"].string_value())
|
||||
? parse_immediate_commit(pool_item.second["immediate_commit"].string_value(), IMMEDIATE_ALL)
|
||||
: global_immediate_commit;
|
||||
// PG Stripe Size
|
||||
pc.pg_stripe_size = pool_item.second["pg_stripe_size"].uint64_value();
|
||||
@@ -890,6 +890,10 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (on_change_pool_config_hook)
|
||||
{
|
||||
on_change_pool_config_hook();
|
||||
}
|
||||
}
|
||||
else if (key == etcd_prefix+"/config/pgs")
|
||||
{
|
||||
@@ -1028,13 +1032,19 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
else if (value.is_null())
|
||||
{
|
||||
auto & pg_cfg = this->pool_config[pool_id].pg_config[pg_num];
|
||||
auto prev_primary = pg_cfg.cur_primary;
|
||||
pg_cfg.state_exists = false;
|
||||
pg_cfg.cur_primary = 0;
|
||||
pg_cfg.cur_state = 0;
|
||||
if (on_change_pg_state_hook)
|
||||
{
|
||||
on_change_pg_state_hook(pool_id, pg_num, prev_primary);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & pg_cfg = this->pool_config[pool_id].pg_config[pg_num];
|
||||
auto prev_primary = pg_cfg.cur_primary;
|
||||
pg_cfg.state_exists = true;
|
||||
osd_num_t cur_primary = value["primary"].uint64_value();
|
||||
int state = 0;
|
||||
@@ -1065,6 +1075,10 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
}
|
||||
pg_cfg.cur_primary = cur_primary;
|
||||
pg_cfg.cur_state = state;
|
||||
if (on_change_pg_state_hook)
|
||||
{
|
||||
on_change_pg_state_hook(pool_id, pg_num, prev_primary);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (key.substr(0, etcd_prefix.length()+11) == etcd_prefix+"/osd/state/")
|
||||
@@ -1161,10 +1175,11 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t etcd_state_client_t::parse_immediate_commit(const std::string & immediate_commit_str)
|
||||
uint32_t etcd_state_client_t::parse_immediate_commit(const std::string & immediate_commit_str, uint32_t default_value)
|
||||
{
|
||||
return immediate_commit_str == "all" ? IMMEDIATE_ALL :
|
||||
(immediate_commit_str == "small" ? IMMEDIATE_SMALL : IMMEDIATE_NONE);
|
||||
return (immediate_commit_str == "all" ? IMMEDIATE_ALL :
|
||||
(immediate_commit_str == "small" ? IMMEDIATE_SMALL :
|
||||
(immediate_commit_str == "none" ? IMMEDIATE_NONE : default_value)));
|
||||
}
|
||||
|
||||
uint32_t etcd_state_client_t::parse_scheme(const std::string & scheme)
|
||||
|
@@ -103,7 +103,7 @@ protected:
|
||||
void pick_next_etcd();
|
||||
public:
|
||||
int etcd_keepalive_timeout = 30;
|
||||
int etcd_ws_keepalive_interval = 30;
|
||||
int etcd_ws_keepalive_interval = 5;
|
||||
int max_etcd_attempts = 5;
|
||||
int etcd_quick_timeout = 1000;
|
||||
int etcd_slow_timeout = 5000;
|
||||
@@ -127,6 +127,8 @@ public:
|
||||
std::function<void(json11::Json::object &)> on_load_config_hook;
|
||||
std::function<json11::Json()> load_pgs_checks_hook;
|
||||
std::function<void(bool)> on_load_pgs_hook;
|
||||
std::function<void()> on_change_pool_config_hook;
|
||||
std::function<void(pool_id_t, pg_num_t, osd_num_t)> on_change_pg_state_hook;
|
||||
std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook;
|
||||
std::function<void(osd_num_t)> on_change_osd_state_hook;
|
||||
std::function<void()> on_reload_hook;
|
||||
@@ -155,6 +157,6 @@ public:
|
||||
int address_count();
|
||||
~etcd_state_client_t();
|
||||
|
||||
static uint32_t parse_immediate_commit(const std::string & immediate_commit_str);
|
||||
static uint32_t parse_immediate_commit(const std::string & immediate_commit_str, uint32_t default_value);
|
||||
static uint32_t parse_scheme(const std::string & scheme_str);
|
||||
};
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user